From: Bernhard Kölbl besentv@gmail.com
Signed-off-by: Bernhard Kölbl besentv@gmail.com --- dlls/windows.media.speech/Makefile.in | 5 +- dlls/windows.media.speech/private.h | 3 + dlls/windows.media.speech/recognizer.c | 42 ++++ dlls/windows.media.speech/unixlib.h | 20 ++ dlls/windows.media.speech/vosk.c | 268 +++++++++++++++++++++++++ 5 files changed, 336 insertions(+), 2 deletions(-) create mode 100644 dlls/windows.media.speech/vosk.c
diff --git a/dlls/windows.media.speech/Makefile.in b/dlls/windows.media.speech/Makefile.in index c06a142780b..7a7f9711799 100644 --- a/dlls/windows.media.speech/Makefile.in +++ b/dlls/windows.media.speech/Makefile.in @@ -1,6 +1,6 @@ MODULE = windows.media.speech.dll UNIXLIB = windows.media.speech.so -IMPORTS = combase uuid +IMPORTS = combase uuid user32 UNIX_LIBS = $(VOSK_LIBS)
C_SRCS = \ @@ -10,6 +10,7 @@ C_SRCS = \ main.c \ recognizer.c \ synthesizer.c \ - vector.c + vector.c \ + vosk.c
IDL_SRCS = classes.idl diff --git a/dlls/windows.media.speech/private.h b/dlls/windows.media.speech/private.h index 2f804fbf1a7..62952478bdf 100644 --- a/dlls/windows.media.speech/private.h +++ b/dlls/windows.media.speech/private.h @@ -31,6 +31,7 @@ #include "windef.h" #include "winbase.h" #include "winstring.h" +#include "winuser.h" #include "objbase.h"
#include "activation.h" @@ -47,6 +48,8 @@
#include "wine/list.h"
+#define SPERR_WINRT_INTERNAL_ERROR 0x800455a0 + /* * * Windows.Media.SpeechRecognition diff --git a/dlls/windows.media.speech/recognizer.c b/dlls/windows.media.speech/recognizer.c index c2f386206b8..ff23acc2720 100644 --- a/dlls/windows.media.speech/recognizer.c +++ b/dlls/windows.media.speech/recognizer.c @@ -25,6 +25,9 @@
#include "wine/debug.h"
+#include "unixlib.h" +#include "wine/unixlib.h" + WINE_DEFAULT_DEBUG_CHANNEL(speech);
/* @@ -171,6 +174,8 @@ struct session IAudioCaptureClient *capture_client; WAVEFORMATEX capture_wfx;
+ vosk_handle vosk_handle; + HANDLE worker_thread, worker_control_event, audio_buf_event; BOOLEAN worker_running, worker_paused; CRITICAL_SECTION cs; @@ -318,7 +323,9 @@ static ULONG WINAPI session_AddRef( ISpeechContinuousRecognitionSession *iface ) static ULONG WINAPI session_Release( ISpeechContinuousRecognitionSession *iface ) { struct session *impl = impl_from_ISpeechContinuousRecognitionSession(iface); + struct vosk_release_params vosk_release_params; ULONG ref = InterlockedDecrement(&impl->ref); + TRACE("iface %p, ref %lu.\n", iface, ref);
if (!ref) @@ -344,6 +351,9 @@ static ULONG WINAPI session_Release( ISpeechContinuousRecognitionSession *iface impl->cs.DebugInfo->Spare[0] = 0; DeleteCriticalSection(&impl->cs);
+ vosk_release_params.vosk_handle = impl->vosk_handle; + WINE_UNIX_CALL(unix_vosk_release, &vosk_release_params); + IVector_ISpeechRecognitionConstraint_Release(impl->constraints); free(impl); } @@ -1079,6 +1089,35 @@ cleanup: return hr; }
+static HRESULT recognizer_factory_create_vosk_instance(struct session *session) +{ + struct vosk_create_params vosk_create_params = { 0 }; + WCHAR locale[LOCALE_NAME_MAX_LENGTH]; + NTSTATUS status; + INT len; + + if (!(len = GetUserDefaultLocaleName(locale, LOCALE_NAME_MAX_LENGTH))) + return E_FAIL; + + if (CharLowerBuffW(locale, len) != len) + return E_FAIL; + + if (!WideCharToMultiByte(CP_ACP, 0, locale, -1, (LPSTR)vosk_create_params.locale, len, NULL, NULL)) + return HRESULT_FROM_WIN32(GetLastError()); + + vosk_create_params.sample_rate = (FLOAT)session->capture_wfx.nSamplesPerSec; + + if ((status = WINE_UNIX_CALL(unix_vosk_create, &vosk_create_params))) + { + ERR("Unable to create Vosk instance for locale %s, status %#lx. Speech recognition won't work.\n", debugstr_a(vosk_create_params.locale), status); + return SPERR_WINRT_INTERNAL_ERROR; + } + + session->vosk_handle = vosk_create_params.vosk_handle; + + return S_OK; +} + static HRESULT WINAPI recognizer_factory_Create( ISpeechRecognizerFactory *iface, ILanguage *language, ISpeechRecognizer **speechrecognizer ) { struct recognizer *impl; @@ -1125,6 +1164,9 @@ static HRESULT WINAPI recognizer_factory_Create( ISpeechRecognizerFactory *iface if (FAILED(hr = recognizer_factory_create_audio_capture(session))) goto error;
+ if (FAILED(hr = recognizer_factory_create_vosk_instance(session))) + goto error; + InitializeCriticalSection(&session->cs); session->cs.DebugInfo->Spare[0] = (DWORD_PTR)(__FILE__ ": recognition_session.cs");
diff --git a/dlls/windows.media.speech/unixlib.h b/dlls/windows.media.speech/unixlib.h index 5516b51d235..5f45dcc0dc9 100644 --- a/dlls/windows.media.speech/unixlib.h +++ b/dlls/windows.media.speech/unixlib.h @@ -30,4 +30,24 @@
#include "wine/unixlib.h"
+typedef UINT64 vosk_handle; + +struct vosk_create_params +{ + vosk_handle vosk_handle; + CHAR locale[LOCALE_NAME_MAX_LENGTH]; + FLOAT sample_rate; +}; + +struct vosk_release_params +{ + vosk_handle vosk_handle; +}; + +enum unix_funcs +{ + unix_vosk_create, + unix_vosk_release, +}; + #endif diff --git a/dlls/windows.media.speech/vosk.c b/dlls/windows.media.speech/vosk.c new file mode 100644 index 00000000000..3782ec2f46c --- /dev/null +++ b/dlls/windows.media.speech/vosk.c @@ -0,0 +1,268 @@ +/* + * Vosk interface for Windows.Media.Speech + * + * Copyright 2023 Bernhard Kölbl for CodeWeavers + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#if 0 +#pragma makedep unix +#endif + +#include "config.h" + +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <wchar.h> + +#include <stdarg.h> +#include <dirent.h> +#include <dlfcn.h> +#include <errno.h> + +#ifdef HAVE_VOSK_API_H +#include <vosk_api.h> +#endif /* HAVE_VOSK_API_H */ + +#include "ntstatus.h" +#define WIN32_NO_STATUS +#include "winerror.h" +#include "winternl.h" + +#include "wine/debug.h" + +#include "unixlib.h" + +WINE_DEFAULT_DEBUG_CHANNEL(speech); + +#ifdef SONAME_LIBVOSK + +static inline vosk_handle vosk_recognizer_to_handle( VoskRecognizer *recognizer ) +{ + return (vosk_handle)(UINT_PTR)recognizer; +} + +static inline VoskRecognizer *vosk_recognizer_from_handle( vosk_handle handle ) +{ + return (VoskRecognizer *)(UINT_PTR)handle; +} + +static NTSTATUS errno_to_status( int err ) +{ + TRACE("errno %d.\n", err); + + switch (err) + { + case EAGAIN: return STATUS_SHARING_VIOLATION; + case EBADF: return STATUS_INVALID_HANDLE; + case EBUSY: return STATUS_DEVICE_BUSY; + case ENOSPC: return STATUS_DISK_FULL; + case EPERM: + case EROFS: + case EACCES: return STATUS_ACCESS_DENIED; + case ENOTDIR: return STATUS_OBJECT_PATH_NOT_FOUND; + case ENOENT: return STATUS_OBJECT_NAME_NOT_FOUND; + case EISDIR: return STATUS_INVALID_DEVICE_REQUEST; + case EMFILE: + case ENFILE: return STATUS_TOO_MANY_OPENED_FILES; + case EINVAL: return STATUS_INVALID_PARAMETER; + case ENOTEMPTY: return STATUS_DIRECTORY_NOT_EMPTY; + case EPIPE: return STATUS_PIPE_DISCONNECTED; + case EIO: return STATUS_DEVICE_NOT_READY; +#ifdef ENOMEDIUM + case ENOMEDIUM: return STATUS_NO_MEDIA_IN_DEVICE; +#endif + case ENXIO: return STATUS_NO_SUCH_DEVICE; + case ENOTTY: + case EOPNOTSUPP:return STATUS_NOT_SUPPORTED; + case ECONNRESET:return STATUS_PIPE_DISCONNECTED; + case EFAULT: return STATUS_ACCESS_VIOLATION; + case ESPIPE: return STATUS_ILLEGAL_FUNCTION; + case ELOOP: return STATUS_REPARSE_POINT_NOT_RESOLVED; +#ifdef ETIME /* Missing on FreeBSD */ + case ETIME: return STATUS_IO_TIMEOUT; +#endif + case ENOEXEC: /* ?? */ + case EEXIST: /* ?? */ + default: + FIXME("Converting errno %d to STATUS_UNSUCCESSFUL.\n", err); + return STATUS_UNSUCCESSFUL; + } +} + +static NTSTATUS find_model_by_locale_and_path( const char *path, const char *locale, VoskModel **model ) +{ + static const char *vosk_model_identifier_small = "vosk-model-small-"; + static const char *vosk_model_identifier = "vosk-model-"; + size_t ident_small_len = strlen(vosk_model_identifier_small); + size_t ident_len = strlen(vosk_model_identifier); + NTSTATUS status = STATUS_UNSUCCESSFUL; + char *dir_name, *model_path; + struct dirent *dirent; + size_t len, path_len; + DIR *dir; + + TRACE("path %s, locale %s, model %p.\n", path, debugstr_a(locale), model); + + if (!path || !locale || strlen(locale) < 4) + return STATUS_UNSUCCESSFUL; + + if ((dir = opendir(path)) == NULL) + return errno_to_status(errno); + + path_len = strlen(path); + *model = NULL; + + while ((dirent = readdir(dir))) + { + if (dirent->d_type != DT_DIR) + continue; + + if (!strcmp(dir_name = dirent->d_name, "..")) + continue; + + if (!strncmp(dir_name, vosk_model_identifier_small, ident_small_len)) + dir_name += ident_small_len; + else if (!strncmp(dir_name, vosk_model_identifier, ident_len)) + dir_name += ident_len; + else + continue; + + /* First match for lang and region (en-us), then only lang (en). */ + if (strncmp(dir_name, locale, 5) && strncmp(dir_name, locale, 2)) + continue; + + if(!(model_path = malloc(path_len + 1 /* '/' */ + strlen(dirent->d_name) + 1))) + { + status = STATUS_MEMORY_NOT_ALLOCATED; + break; + } + + sprintf(model_path, "%s/%s", path, dirent->d_name); + + TRACE("Trying to load Vosk model %s.\n", debugstr_a(model_path)); + + *model = vosk_model_new(model_path); + free(model_path); + + if (*model) + { + status = STATUS_SUCCESS; + break; + } + } + + closedir(dir); + + return status; +} + +static NTSTATUS find_model_by_locale( const char *locale, VoskModel **model ) +{ + NTSTATUS status = STATUS_UNSUCCESSFUL; + const char *suffix = NULL; + char *env, *path; + + TRACE("locale %s, model %p.\n", debugstr_a(locale), model); + + if (!model) + return STATUS_UNSUCCESSFUL; + + if (!find_model_by_locale_and_path(getenv("VOSK_MODEL_PATH"), locale, model)) + return STATUS_SUCCESS; + if (!find_model_by_locale_and_path("/usr/share/vosk", locale, model)) + return STATUS_SUCCESS; + + if ((env = getenv("XDG_CACHE_HOME"))) + suffix = "/vosk"; + else if ((env = getenv("HOME"))) + suffix = "/.cache/vosk"; + + if (suffix && (path = malloc(strlen(env) + strlen(suffix) + 1))) + { + sprintf(path, "%s%s", env, suffix); + status = find_model_by_locale_and_path(path, locale, model); + free(path); + } + + return status; +} + +static NTSTATUS vosk_create( void *args ) +{ + struct vosk_create_params *params = args; + VoskRecognizer *recognizer = NULL; + VoskModel *model = NULL; + NTSTATUS status; + + TRACE("args %p.\n", args); + + if ((status = find_model_by_locale(params->locale, &model))) + return status; + + if (!(recognizer = vosk_recognizer_new(model, params->sample_rate))) + goto error; + + /* The model is kept alive inside the recognizer, so we can safely free our ref here. */ + vosk_model_free(model); + + params->vosk_handle = vosk_recognizer_to_handle(recognizer); + return STATUS_SUCCESS; + +error: + if (model) vosk_model_free(model); + return STATUS_UNSUCCESSFUL; +} + +static NTSTATUS vosk_release( void *args ) +{ + struct vosk_release_params *params = args; + + TRACE("args %p.\n", args); + + vosk_recognizer_free(vosk_recognizer_from_handle(params->vosk_handle)); + + return STATUS_SUCCESS; +} + +#else /* SONAME_LIBVOSK */ + +#define MAKE_UNSUPPORTED_FUNC( f ) \ + static NTSTATUS f( void *args ) \ + { \ + WARN("wine was compiled without Vosk support. Speech recognition won't work.\n"); \ + return STATUS_NOT_SUPPORTED; \ + } + +MAKE_UNSUPPORTED_FUNC(vosk_create) +MAKE_UNSUPPORTED_FUNC(vosk_release) +#undef MAKE_UNSUPPORTED_FUNC + +#endif /* SONAME_LIBVOSK */ + +unixlib_entry_t __wine_unix_call_funcs[] = +{ + vosk_create, + vosk_release, +}; + +unixlib_entry_t __wine_unix_call_wow64_funcs[] = +{ + vosk_create, + vosk_release, +};