From: Bernhard Kölbl besentv@gmail.com
Signed-off-by: Bernhard Kölbl besentv@gmail.com --- dlls/windows.media.speech/recognizer.c | 16 ++ dlls/windows.media.speech/unixlib.h | 17 +- dlls/windows.media.speech/vosk.c | 240 ++++++++++++++++++++++++- 3 files changed, 271 insertions(+), 2 deletions(-)
diff --git a/dlls/windows.media.speech/recognizer.c b/dlls/windows.media.speech/recognizer.c index c2f386206b8..f37e13d96cc 100644 --- a/dlls/windows.media.speech/recognizer.c +++ b/dlls/windows.media.speech/recognizer.c @@ -25,6 +25,9 @@
#include "wine/debug.h"
+#include "unixlib.h" +#include "wine/unixlib.h" + WINE_DEFAULT_DEBUG_CHANNEL(speech);
/* @@ -171,6 +174,8 @@ struct session IAudioCaptureClient *capture_client; WAVEFORMATEX capture_wfx;
+ vosk_instance vosk_instance; + HANDLE worker_thread, worker_control_event, audio_buf_event; BOOLEAN worker_running, worker_paused; CRITICAL_SECTION cs; @@ -318,7 +323,9 @@ static ULONG WINAPI session_AddRef( ISpeechContinuousRecognitionSession *iface ) static ULONG WINAPI session_Release( ISpeechContinuousRecognitionSession *iface ) { struct session *impl = impl_from_ISpeechContinuousRecognitionSession(iface); + struct vosk_release_params vosk_release_params; ULONG ref = InterlockedDecrement(&impl->ref); + TRACE("iface %p, ref %lu.\n", iface, ref);
if (!ref) @@ -344,6 +351,9 @@ static ULONG WINAPI session_Release( ISpeechContinuousRecognitionSession *iface impl->cs.DebugInfo->Spare[0] = 0; DeleteCriticalSection(&impl->cs);
+ vosk_release_params.instance = impl->vosk_instance; + WINE_UNIX_CALL(unix_vosk_release, &vosk_release_params); + IVector_ISpeechRecognitionConstraint_Release(impl->constraints); free(impl); } @@ -1083,6 +1093,7 @@ static HRESULT WINAPI recognizer_factory_Create( ISpeechRecognizerFactory *iface { struct recognizer *impl; struct session *session; + struct vosk_create_params vosk_create_params; struct vector_iids constraints_iids = { .iterable = &IID_IIterable_ISpeechRecognitionConstraint, @@ -1125,6 +1136,11 @@ static HRESULT WINAPI recognizer_factory_Create( ISpeechRecognizerFactory *iface if (FAILED(hr = recognizer_factory_create_audio_capture(session))) goto error;
+ vosk_create_params.sample_rate = (float)session->capture_wfx.nSamplesPerSec; + vosk_create_params.instance = &session->vosk_instance; + if (FAILED(hr = HRESULT_FROM_NT(WINE_UNIX_CALL(unix_vosk_create, &vosk_create_params)))) + goto error; + InitializeCriticalSection(&session->cs); session->cs.DebugInfo->Spare[0] = (DWORD_PTR)(__FILE__ ": recognition_session.cs");
diff --git a/dlls/windows.media.speech/unixlib.h b/dlls/windows.media.speech/unixlib.h index daf45e01258..f0edf8c883c 100644 --- a/dlls/windows.media.speech/unixlib.h +++ b/dlls/windows.media.speech/unixlib.h @@ -30,9 +30,24 @@
#include "wine/unixlib.h"
+typedef UINT64 vosk_instance; + +struct vosk_create_params +{ + vosk_instance *instance; + float sample_rate; +}; + +struct vosk_release_params +{ + vosk_instance instance; +}; + enum unix_funcs { - unix_vosk_process_attach + unix_vosk_process_attach, + unix_vosk_create, + unix_vosk_release, };
#endif diff --git a/dlls/windows.media.speech/vosk.c b/dlls/windows.media.speech/vosk.c index d3591d9b809..ffeba51b9b9 100644 --- a/dlls/windows.media.speech/vosk.c +++ b/dlls/windows.media.speech/vosk.c @@ -24,8 +24,16 @@
#include "config.h"
+#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <wchar.h> + #include <stdarg.h> +#include <dirent.h> #include <dlfcn.h> +#include <errno.h>
#ifdef HAVE_VOSK_API_H #include <vosk_api.h> @@ -40,7 +48,16 @@
#include "unixlib.h"
-WINE_DEFAULT_DEBUG_CHANNEL(vosk); +WINE_DEFAULT_DEBUG_CHANNEL(speech); + +#ifdef HAVE_VOSK_API_H + +#define MAKE_FUNCPTR( f ) static typeof(f) * p##f; +MAKE_FUNCPTR(vosk_model_new) +MAKE_FUNCPTR(vosk_recognizer_new) +MAKE_FUNCPTR(vosk_model_free) +MAKE_FUNCPTR(vosk_recognizer_free) +#undef MAKE_FUNCPTR
#ifdef HAVE_VOSK_API_H
@@ -59,12 +76,227 @@ static NTSTATUS vosk_process_attach( void *args ) return STATUS_DLL_NOT_FOUND; }
+#define LOAD_FUNCPTR( f ) \ + do if((p##f = dlsym(libvosk_handle, #f)) == NULL) \ + { \ + ERR("Failed to load symbol %s\n", #f); \ + goto error; \ + } while(0) + LOAD_FUNCPTR(vosk_model_new); + LOAD_FUNCPTR(vosk_recognizer_new); + LOAD_FUNCPTR(vosk_model_free); + LOAD_FUNCPTR(vosk_recognizer_free); +#undef LOAD_FUNCPTR return STATUS_SUCCESS; +error: + dlclose(libvosk_handle); + libvosk_handle = NULL; + return STATUS_ENTRYPOINT_NOT_FOUND; #else /* SONAME_LIBVOSK */ return STATUS_NOT_SUPPORTED; #endif /* SONAME_LIBVOSK */ }
+static inline vosk_instance to_vosk_instance( VoskRecognizer *ptr ) +{ + return (vosk_instance)(UINT_PTR)ptr; +} + +static inline VoskRecognizer *from_vosk_instance( vosk_instance instance ) +{ + return (VoskRecognizer *)(UINT_PTR)instance; +} + +static inline void str_to_lower(char *str) +{ + char *s; + + for (s = str; s && *s; ++s) + *s = tolower(*s); +} + +static NTSTATUS errno_to_status( int err ) +{ + TRACE("errno = %d\n", err); + switch (err) + { + case EAGAIN: return STATUS_SHARING_VIOLATION; + case EBADF: return STATUS_INVALID_HANDLE; + case EBUSY: return STATUS_DEVICE_BUSY; + case ENOSPC: return STATUS_DISK_FULL; + case EPERM: + case EROFS: + case EACCES: return STATUS_ACCESS_DENIED; + case ENOTDIR: return STATUS_OBJECT_PATH_NOT_FOUND; + case ENOENT: return STATUS_OBJECT_NAME_NOT_FOUND; + case EISDIR: return STATUS_INVALID_DEVICE_REQUEST; + case EMFILE: + case ENFILE: return STATUS_TOO_MANY_OPENED_FILES; + case EINVAL: return STATUS_INVALID_PARAMETER; + case ENOTEMPTY: return STATUS_DIRECTORY_NOT_EMPTY; + case EPIPE: return STATUS_PIPE_DISCONNECTED; + case EIO: return STATUS_DEVICE_NOT_READY; +#ifdef ENOMEDIUM + case ENOMEDIUM: return STATUS_NO_MEDIA_IN_DEVICE; +#endif + case ENXIO: return STATUS_NO_SUCH_DEVICE; + case ENOTTY: + case EOPNOTSUPP:return STATUS_NOT_SUPPORTED; + case ECONNRESET:return STATUS_PIPE_DISCONNECTED; + case EFAULT: return STATUS_ACCESS_VIOLATION; + case ESPIPE: return STATUS_ILLEGAL_FUNCTION; + case ELOOP: return STATUS_REPARSE_POINT_NOT_RESOLVED; +#ifdef ETIME /* Missing on FreeBSD */ + case ETIME: return STATUS_IO_TIMEOUT; +#endif + case ENOEXEC: /* ?? */ + case EEXIST: /* ?? */ + default: + FIXME("Converting errno %d to STATUS_UNSUCCESSFUL\n", err); + return STATUS_UNSUCCESSFUL; + } +} + +static NTSTATUS find_model_by_lang_and_path(const char *folder, const char *lcid, VoskModel **model) +{ + static const char *vosk_model_identifier_small = "vosk-model-small-"; + static const char *vosk_model_identifier = "vosk-model-"; + char lang[3], lang_region[6], *dir_name, *path; + NTSTATUS status = STATUS_UNSUCCESSFUL; + struct dirent *dirent; + DIR *dir; + + TRACE("folder %s, lcid %s, model %p.\n", folder, debugstr_a(lcid), model); + + if (!folder || !model || strlen(lcid) < 4) + return STATUS_UNSUCCESSFUL; + + lstrcpynA(lang, lcid, 3); + lstrcpynA(lang_region, lcid, 6); + + str_to_lower(lang); + str_to_lower(lang_region); + + *model = NULL; + + if ((dir = opendir(folder)) == NULL) + return errno_to_status(errno); + + while ((dirent = readdir(dir))) + { + if (dirent->d_type != DT_DIR) + continue; + + if (!strcmp(dir_name = dirent->d_name, "..")) + continue; + + if (strstr(dir_name, vosk_model_identifier_small)) + dir_name += strlen(vosk_model_identifier_small); + else if (strstr(dir_name, vosk_model_identifier)) + dir_name += strlen(vosk_model_identifier); + + if (strstr(dir_name, lang_region) != dir_name && strstr(dir_name, lang) != dir_name) + continue; + + path = malloc(strlen(folder) + 1 /* '/' */ + strlen(dirent->d_name) + 1); + sprintf(path, "%s/%s", folder, dirent->d_name); + + TRACE("Trying to load Vosk model %s.\n", debugstr_a(path)); + + *model = pvosk_model_new(path); + free(path); + + if (*model) + { + status = STATUS_SUCCESS; + break; + } + } + + closedir(dir); + + return status; +} + +static NTSTATUS get_model_by_lang(const char *lcid, VoskModel **model) +{ + static const char *cache_vosk = "/.cache/vosk"; + static const char *vosk = "/vosk"; + NTSTATUS status = STATUS_UNSUCCESSFUL; + char *path = NULL, *env = NULL; + + TRACE("lcid %s, model %p.\n", debugstr_a(lcid), model); + + if (!lcid || !model) + return STATUS_UNSUCCESSFUL; + + if (!find_model_by_lang_and_path(getenv("VOSK_MODEL_PATH"), lcid, model)) + return STATUS_SUCCESS; + if (!find_model_by_lang_and_path("/usr/share/vosk", lcid, model)) + return STATUS_SUCCESS; + + if ((env = getenv("XDG_CACHE_HOME"))) + { + path = malloc(strlen(env) + strlen(vosk) + 1); + sprintf(path, "%s%s", env, vosk); + + status = find_model_by_lang_and_path(path, lcid, model); + } + else if ((env = getenv("HOME"))) + { + path = malloc(strlen(env) + strlen(cache_vosk) + 1); + sprintf(path, "%s%s", env, cache_vosk); + + status = find_model_by_lang_and_path(path, lcid, model); + } + + if (path) + free(path); + + return status; +} + +static NTSTATUS vosk_create( void *args ) +{ + struct vosk_create_params *params = args; + VoskRecognizer *recognizer = NULL; + VoskModel *model = NULL; + NTSTATUS status; + + TRACE("args %p.\n", args); + + if ((status = get_model_by_lang(getenv("LC_NAME"), &model)) != STATUS_SUCCESS) + return status; + + if (!(recognizer = pvosk_recognizer_new(model, params->sample_rate))) + goto error; + + /* The model is kept alive inside the recognizer, so we can safely free our ref here. */ + pvosk_model_free(model); + + *params->instance = to_vosk_instance(recognizer); + return STATUS_SUCCESS; + +error: + if (model) pvosk_model_free(model); + *params->instance = to_vosk_instance( NULL ); + return STATUS_UNSUCCESSFUL; +} + +static NTSTATUS vosk_release(void *args) +{ + struct vosk_release_params *params = args; + + TRACE("args %p.\n", args); + + if (!params->instance) + return STATUS_UNSUCCESSFUL; + + pvosk_recognizer_free(from_vosk_instance(params->instance)); + + return STATUS_SUCCESS; +} + #else /* HAVE_VOSK_API_H */
#define MAKE_UNSUPPORTED_FUNC( f ) \ @@ -75,6 +307,8 @@ static NTSTATUS vosk_process_attach( void *args ) }
MAKE_UNSUPPORTED_FUNC(vosk_process_attach) +MAKE_UNSUPPORTED_FUNC(vosk_create) +MAKE_UNSUPPORTED_FUNC(vosk_release) #undef MAKE_UNSUPPORTED_FUNC
#endif /* HAVE_VOSK_API_H */ @@ -82,9 +316,13 @@ MAKE_UNSUPPORTED_FUNC(vosk_process_attach) unixlib_entry_t __wine_unix_call_funcs[] = { vosk_process_attach, + vosk_create, + vosk_release, };
unixlib_entry_t __wine_unix_call_wow64_funcs[] = { vosk_process_attach, + vosk_create, + vosk_release, };