From: Bernhard Kölbl besentv@gmail.com
Signed-off-by: Bernhard Kölbl besentv@gmail.com --- dlls/windows.media.speech/private.h | 2 + dlls/windows.media.speech/recognizer.c | 23 +++ dlls/windows.media.speech/unixlib.h | 17 ++ dlls/windows.media.speech/vosk.c | 240 +++++++++++++++++++++++++ 4 files changed, 282 insertions(+)
diff --git a/dlls/windows.media.speech/private.h b/dlls/windows.media.speech/private.h index 2f804fbf1a7..873c767f814 100644 --- a/dlls/windows.media.speech/private.h +++ b/dlls/windows.media.speech/private.h @@ -47,6 +47,8 @@
#include "wine/list.h"
+#define SPERR_WINRT_INTERNAL_ERROR 0x800455a0 + /* * * Windows.Media.SpeechRecognition diff --git a/dlls/windows.media.speech/recognizer.c b/dlls/windows.media.speech/recognizer.c index c2f386206b8..e2663658d85 100644 --- a/dlls/windows.media.speech/recognizer.c +++ b/dlls/windows.media.speech/recognizer.c @@ -25,6 +25,9 @@
#include "wine/debug.h"
+#include "unixlib.h" +#include "wine/unixlib.h" + WINE_DEFAULT_DEBUG_CHANNEL(speech);
/* @@ -171,6 +174,8 @@ struct session IAudioCaptureClient *capture_client; WAVEFORMATEX capture_wfx;
+ vosk_handle vosk_handle; + HANDLE worker_thread, worker_control_event, audio_buf_event; BOOLEAN worker_running, worker_paused; CRITICAL_SECTION cs; @@ -318,7 +323,9 @@ static ULONG WINAPI session_AddRef( ISpeechContinuousRecognitionSession *iface ) static ULONG WINAPI session_Release( ISpeechContinuousRecognitionSession *iface ) { struct session *impl = impl_from_ISpeechContinuousRecognitionSession(iface); + struct vosk_release_params vosk_release_params; ULONG ref = InterlockedDecrement(&impl->ref); + TRACE("iface %p, ref %lu.\n", iface, ref);
if (!ref) @@ -344,6 +351,9 @@ static ULONG WINAPI session_Release( ISpeechContinuousRecognitionSession *iface impl->cs.DebugInfo->Spare[0] = 0; DeleteCriticalSection(&impl->cs);
+ vosk_release_params.vosk_handle = impl->vosk_handle; + WINE_UNIX_CALL(unix_vosk_release, &vosk_release_params); + IVector_ISpeechRecognitionConstraint_Release(impl->constraints); free(impl); } @@ -1083,6 +1093,7 @@ static HRESULT WINAPI recognizer_factory_Create( ISpeechRecognizerFactory *iface { struct recognizer *impl; struct session *session; + struct vosk_create_params vosk_create_params; struct vector_iids constraints_iids = { .iterable = &IID_IIterable_ISpeechRecognitionConstraint, @@ -1125,6 +1136,18 @@ static HRESULT WINAPI recognizer_factory_Create( ISpeechRecognizerFactory *iface if (FAILED(hr = recognizer_factory_create_audio_capture(session))) goto error;
+ if (!(vosk_create_params.locale_len = GetUserDefaultLocaleName(vosk_create_params.locale, LOCALE_NAME_MAX_LENGTH))) + goto error; + + vosk_create_params.sample_rate = (float)session->capture_wfx.nSamplesPerSec; + if (WINE_UNIX_CALL(unix_vosk_create, &vosk_create_params) != STATUS_SUCCESS) + { + hr = SPERR_WINRT_INTERNAL_ERROR; + goto error; + } + + session->vosk_handle = vosk_create_params.vosk_handle; + InitializeCriticalSection(&session->cs); session->cs.DebugInfo->Spare[0] = (DWORD_PTR)(__FILE__ ": recognition_session.cs");
diff --git a/dlls/windows.media.speech/unixlib.h b/dlls/windows.media.speech/unixlib.h index a263f36586c..91dc3a4d9af 100644 --- a/dlls/windows.media.speech/unixlib.h +++ b/dlls/windows.media.speech/unixlib.h @@ -30,9 +30,26 @@
#include "wine/unixlib.h"
+typedef UINT64 vosk_handle; + +struct vosk_create_params +{ + vosk_handle vosk_handle; + float sample_rate; + WCHAR locale[LOCALE_NAME_MAX_LENGTH]; + INT locale_len; +}; + +struct vosk_release_params +{ + vosk_handle vosk_handle; +}; + enum unix_funcs { unix_vosk_process_attach, + unix_vosk_create, + unix_vosk_release, };
#endif diff --git a/dlls/windows.media.speech/vosk.c b/dlls/windows.media.speech/vosk.c index 70dd495dc9f..f451bd30430 100644 --- a/dlls/windows.media.speech/vosk.c +++ b/dlls/windows.media.speech/vosk.c @@ -24,8 +24,16 @@
#include "config.h"
+#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <wchar.h> + #include <stdarg.h> +#include <dirent.h> #include <dlfcn.h> +#include <errno.h>
#ifdef HAVE_VOSK_API_H #include <vosk_api.h> @@ -44,6 +52,13 @@ WINE_DEFAULT_DEBUG_CHANNEL(speech);
#ifdef SONAME_LIBVOSK
+#define MAKE_FUNCPTR( f ) static typeof(f) * p_##f; +MAKE_FUNCPTR(vosk_model_new) +MAKE_FUNCPTR(vosk_recognizer_new) +MAKE_FUNCPTR(vosk_model_free) +MAKE_FUNCPTR(vosk_recognizer_free) +#undef MAKE_FUNCPTR + static void *libvosk_handle;
static NTSTATUS vosk_process_attach( void *args ) @@ -56,6 +71,225 @@ static NTSTATUS vosk_process_attach( void *args ) return STATUS_DLL_NOT_FOUND; }
+#define LOAD_FUNCPTR( f ) \ + if(!(p_##f = dlsym(libvosk_handle, #f))) \ + { \ + ERR("Failed to load symbol %s\n", debugstr_a(#f)); \ + goto error; \ + } + LOAD_FUNCPTR(vosk_model_new) + LOAD_FUNCPTR(vosk_recognizer_new) + LOAD_FUNCPTR(vosk_model_free) + LOAD_FUNCPTR(vosk_recognizer_free) +#undef LOAD_FUNCPTR + + return STATUS_SUCCESS; +error: + dlclose(libvosk_handle); + libvosk_handle = NULL; + return STATUS_ENTRYPOINT_NOT_FOUND; +} + +static inline vosk_handle vosk_recognizer_to_handle( VoskRecognizer *recognizer ) +{ + return (vosk_handle)(UINT_PTR)recognizer; +} + +static inline VoskRecognizer *vosk_recognizer_from_handle( vosk_handle handle ) +{ + return (VoskRecognizer *)(UINT_PTR)handle; +} + +static inline void str_to_lower( char *str ) +{ + char *s; + + for (s = str; s && *s; ++s) + *s = tolower(*s); +} + +static NTSTATUS errno_to_status( int err ) +{ + TRACE("errno %d.\n", err); + + switch (err) + { + case EAGAIN: return STATUS_SHARING_VIOLATION; + case EBADF: return STATUS_INVALID_HANDLE; + case EBUSY: return STATUS_DEVICE_BUSY; + case ENOSPC: return STATUS_DISK_FULL; + case EPERM: + case EROFS: + case EACCES: return STATUS_ACCESS_DENIED; + case ENOTDIR: return STATUS_OBJECT_PATH_NOT_FOUND; + case ENOENT: return STATUS_OBJECT_NAME_NOT_FOUND; + case EISDIR: return STATUS_INVALID_DEVICE_REQUEST; + case EMFILE: + case ENFILE: return STATUS_TOO_MANY_OPENED_FILES; + case EINVAL: return STATUS_INVALID_PARAMETER; + case ENOTEMPTY: return STATUS_DIRECTORY_NOT_EMPTY; + case EPIPE: return STATUS_PIPE_DISCONNECTED; + case EIO: return STATUS_DEVICE_NOT_READY; +#ifdef ENOMEDIUM + case ENOMEDIUM: return STATUS_NO_MEDIA_IN_DEVICE; +#endif + case ENXIO: return STATUS_NO_SUCH_DEVICE; + case ENOTTY: + case EOPNOTSUPP:return STATUS_NOT_SUPPORTED; + case ECONNRESET:return STATUS_PIPE_DISCONNECTED; + case EFAULT: return STATUS_ACCESS_VIOLATION; + case ESPIPE: return STATUS_ILLEGAL_FUNCTION; + case ELOOP: return STATUS_REPARSE_POINT_NOT_RESOLVED; +#ifdef ETIME /* Missing on FreeBSD */ + case ETIME: return STATUS_IO_TIMEOUT; +#endif + case ENOEXEC: /* ?? */ + case EEXIST: /* ?? */ + default: + FIXME("Converting errno %d to STATUS_UNSUCCESSFUL\n", err); + return STATUS_UNSUCCESSFUL; + } +} + +static NTSTATUS find_model_by_locale_and_path( const char *path, const char *locale, VoskModel **model ) +{ + static const char *vosk_model_identifier_small = "vosk-model-small-"; + static const char *vosk_model_identifier = "vosk-model-"; + char lang_region[6], *dir_name, *model_path; + NTSTATUS status = STATUS_UNSUCCESSFUL; + struct dirent *dirent; + size_t len, path_len; + DIR *dir; + + TRACE("path %s, locale %s, model %p.\n", path, debugstr_a(locale), model); + + if (!path || !model || strlen(locale) < 4) + return STATUS_UNSUCCESSFUL; + + lstrcpynA(lang_region, locale, 6); + str_to_lower(lang_region); + + *model = NULL; + path_len = strlen(path); + + if ((dir = opendir(path)) == NULL) + return errno_to_status(errno); + + while ((dirent = readdir(dir))) + { + if (dirent->d_type != DT_DIR) + continue; + + if (!strcmp(dir_name = dirent->d_name, "..")) + continue; + + len = strlen(vosk_model_identifier_small); + if (strncmp(dir_name, vosk_model_identifier_small, len)) + { + len = strlen(vosk_model_identifier); + if (strncmp(dir_name, vosk_model_identifier, len)) + continue; + } + dir_name += len; + + /* First match for lang and region (en_us), then only lang (en). */ + if (strncmp(dir_name, lang_region, 5) && strncmp(dir_name, lang_region, 2)) + continue; + + if(!(model_path = malloc(path_len + 1 /* '/' */ + strlen(dirent->d_name) + 1))) + return STATUS_MEMORY_NOT_ALLOCATED; + + sprintf(model_path, "%s/%s", path, dirent->d_name); + + TRACE("Trying to load Vosk model %s.\n", debugstr_a(model_path)); + + *model = p_vosk_model_new(model_path); + free(model_path); + + if (*model) + { + status = STATUS_SUCCESS; + break; + } + } + + closedir(dir); + + return status; +} + +static NTSTATUS get_model_by_locale( const char *locale, VoskModel **model ) +{ + NTSTATUS status = STATUS_UNSUCCESSFUL; + const char *suffix = NULL; + char *env, *path; + + TRACE("locale %s, model %p.\n", debugstr_a(locale), model); + + if (!locale || !model) + return STATUS_UNSUCCESSFUL; + + if (!find_model_by_locale_and_path(getenv("VOSK_MODEL_PATH"), locale, model)) + return STATUS_SUCCESS; + if (!find_model_by_locale_and_path("/usr/share/vosk", locale, model)) + return STATUS_SUCCESS; + + if ((env = getenv("XDG_CACHE_HOME"))) + suffix = "/vosk"; + else if ((env = getenv("HOME"))) + suffix = "/.cache/vosk"; + + if (suffix && (path = malloc(strlen(env) + strlen(suffix) + 1))) + { + sprintf(path, "%s%s", env, suffix); + status = find_model_by_locale_and_path(path, locale, model); + free(path); + } + + return status; +} + +static NTSTATUS vosk_create( void *args ) +{ + struct vosk_create_params *params = args; + char locale[LOCALE_NAME_MAX_LENGTH]; + VoskRecognizer *recognizer = NULL; + VoskModel *model = NULL; + NTSTATUS status; + + TRACE("args %p.\n", args); + + if(!ntdll_wcstoumbs(params->locale, params->locale_len, locale, LOCALE_NAME_MAX_LENGTH, FALSE)) + return STATUS_UNSUCCESSFUL; + + if ((status = get_model_by_locale(locale, &model)) != STATUS_SUCCESS) + { + ERR("No suitable Vosk model was found for locale %s. Speech recognition won't work.\n", debugstr_a(locale)); + return status; + } + + if (!(recognizer = p_vosk_recognizer_new(model, params->sample_rate))) + goto error; + + /* The model is kept alive inside the recognizer, so we can safely free our ref here. */ + p_vosk_model_free(model); + + params->vosk_handle = vosk_recognizer_to_handle(recognizer); + return STATUS_SUCCESS; + +error: + if (model) p_vosk_model_free(model); + return STATUS_UNSUCCESSFUL; +} + +static NTSTATUS vosk_release( void *args ) +{ + struct vosk_release_params *params = args; + + TRACE("args %p.\n", args); + + p_vosk_recognizer_free(vosk_recognizer_from_handle(params->vosk_handle)); + return STATUS_SUCCESS; }
@@ -69,6 +303,8 @@ static NTSTATUS vosk_process_attach( void *args ) }
MAKE_UNSUPPORTED_FUNC(vosk_process_attach) +MAKE_UNSUPPORTED_FUNC(vosk_create) +MAKE_UNSUPPORTED_FUNC(vosk_release) #undef MAKE_UNSUPPORTED_FUNC
#endif /* SONAME_LIBVOSK */ @@ -76,9 +312,13 @@ MAKE_UNSUPPORTED_FUNC(vosk_process_attach) unixlib_entry_t __wine_unix_call_funcs[] = { vosk_process_attach, + vosk_create, + vosk_release, };
unixlib_entry_t __wine_unix_call_wow64_funcs[] = { vosk_process_attach, + vosk_create, + vosk_release, };