First relevant commit: windows.media.speech: Add a worker thread to the recognition session.
-- v10: windows.media.speech: Store recorded audio in a temporary ringbuffer. windows.media.speech: Partially implement the speech recognizer state. windows.media.speech: Add an audio capturing system. windows.media.speech: Allow the recognition session worker to be paused. windows.media.speech/tests: Check if stopping the session resets the paused state. windows.media.speech: Add a worker thread to the recognition session.
From: Bernhard Kölbl besentv@gmail.com
Signed-off-by: Bernhard Kölbl besentv@gmail.com --- dlls/windows.media.speech/private.h | 1 + dlls/windows.media.speech/recognizer.c | 129 ++++++++++++++++++++++- dlls/windows.media.speech/tests/speech.c | 8 +- 3 files changed, 130 insertions(+), 8 deletions(-)
diff --git a/dlls/windows.media.speech/private.h b/dlls/windows.media.speech/private.h index 41f7b02e3de..e80d73ec1fb 100644 --- a/dlls/windows.media.speech/private.h +++ b/dlls/windows.media.speech/private.h @@ -23,6 +23,7 @@ #include <stdarg.h>
#define COBJMACROS +#include "corerror.h" #include "windef.h" #include "winbase.h" #include "winstring.h" diff --git a/dlls/windows.media.speech/recognizer.c b/dlls/windows.media.speech/recognizer.c index 54a0e165f5f..f5ee2e1a70c 100644 --- a/dlls/windows.media.speech/recognizer.c +++ b/dlls/windows.media.speech/recognizer.c @@ -160,6 +160,10 @@ struct session
struct list completed_handlers; struct list result_handlers; + + HANDLE worker_thread, worker_control_event; + BOOLEAN worker_running; + CRITICAL_SECTION cs; };
/* @@ -173,6 +177,31 @@ static inline struct session *impl_from_ISpeechContinuousRecognitionSession( ISp return CONTAINING_RECORD(iface, struct session, ISpeechContinuousRecognitionSession_iface); }
+static DWORD CALLBACK session_worker_thread_cb( void *args ) +{ + ISpeechContinuousRecognitionSession *iface = args; + struct session *impl = impl_from_ISpeechContinuousRecognitionSession(iface); + BOOLEAN running = TRUE; + DWORD status; + + SetThreadDescription(GetCurrentThread(), L"wine_speech_recognition_session_worker"); + + while (running) + { + status = WaitForMultipleObjects(1, &impl->worker_control_event, FALSE, INFINITE); + if (status == 0) /* worker_control_event signaled */ + { + EnterCriticalSection(&impl->cs); + running = impl->worker_running; + LeaveCriticalSection(&impl->cs); + } + + /* TODO: Send mic data to recognizer and handle results. */ + } + + return 0; +} + static HRESULT WINAPI session_QueryInterface( ISpeechContinuousRecognitionSession *iface, REFIID iid, void **out ) { struct session *impl = impl_from_ISpeechContinuousRecognitionSession(iface); @@ -208,8 +237,24 @@ static ULONG WINAPI session_Release( ISpeechContinuousRecognitionSession *iface
if (!ref) { + HANDLE thread; + + EnterCriticalSection(&impl->cs); + thread = impl->worker_thread; + impl->worker_running = FALSE; + impl->worker_thread = INVALID_HANDLE_VALUE; + LeaveCriticalSection(&impl->cs); + + SetEvent(impl->worker_control_event); + WaitForSingleObject(thread, INFINITE); + CloseHandle(thread); + typed_event_handlers_clear(&impl->completed_handlers); typed_event_handlers_clear(&impl->result_handlers); + + impl->cs.DebugInfo->Spare[0] = 0; + DeleteCriticalSection(&impl->cs); + IVector_ISpeechRecognitionConstraint_Release(impl->constraints); free(impl); } @@ -254,8 +299,37 @@ static HRESULT session_start_async( IInspectable *invoker )
static HRESULT WINAPI session_StartAsync( ISpeechContinuousRecognitionSession *iface, IAsyncAction **action ) { - FIXME("iface %p, action %p stub!\n", iface, action); - return async_action_create(NULL, session_start_async, action); + struct session *impl = impl_from_ISpeechContinuousRecognitionSession(iface); + HRESULT hr; + + TRACE("iface %p, action %p.\n", iface, action); + + if (FAILED(hr = async_action_create(NULL, session_start_async, action))) + return hr; + + EnterCriticalSection(&impl->cs); + if (impl->worker_running || impl->worker_thread) + { + hr = COR_E_INVALIDOPERATION; + } + else if (!(impl->worker_thread = CreateThread(NULL, 0, session_worker_thread_cb, impl, 0, NULL))) + { + hr = HRESULT_FROM_WIN32(GetLastError()); + impl->worker_running = FALSE; + } + else + { + impl->worker_running = TRUE; + } + LeaveCriticalSection(&impl->cs); + + if (FAILED(hr)) + { + IAsyncAction_Release(*action); + *action = NULL; + } + + return hr; }
static HRESULT WINAPI session_StartWithModeAsync( ISpeechContinuousRecognitionSession *iface, @@ -273,8 +347,45 @@ static HRESULT session_stop_async( IInspectable *invoker )
static HRESULT WINAPI session_StopAsync( ISpeechContinuousRecognitionSession *iface, IAsyncAction **action ) { - FIXME("iface %p, action %p stub!\n", iface, action); - return async_action_create(NULL, session_stop_async, action); + struct session *impl = impl_from_ISpeechContinuousRecognitionSession(iface); + HANDLE thread; + HRESULT hr; + + TRACE("iface %p, action %p.\n", iface, action); + + if (FAILED(hr = async_action_create(NULL, session_stop_async, action))) + return hr; + + EnterCriticalSection(&impl->cs); + if (impl->worker_running && impl->worker_thread) + { + thread = impl->worker_thread; + impl->worker_thread = INVALID_HANDLE_VALUE; + impl->worker_running = FALSE; + } + else + { + hr = COR_E_INVALIDOPERATION; + } + LeaveCriticalSection(&impl->cs); + + if (SUCCEEDED(hr)) + { + SetEvent(impl->worker_control_event); + WaitForSingleObject(thread, INFINITE); + CloseHandle(thread); + + EnterCriticalSection(&impl->cs); + impl->worker_thread = NULL; + LeaveCriticalSection(&impl->cs); + } + else + { + IAsyncAction_Release(*action); + *action = NULL; + } + + return hr; }
static HRESULT WINAPI session_CancelAsync( ISpeechContinuousRecognitionSession *iface, IAsyncAction **action ) @@ -818,9 +929,18 @@ static HRESULT WINAPI recognizer_factory_Create( ISpeechRecognizerFactory *iface list_init(&session->completed_handlers); list_init(&session->result_handlers);
+ if (!(session->worker_control_event = CreateEventW(NULL, FALSE, FALSE, NULL))) + { + hr = HRESULT_FROM_WIN32(GetLastError()); + goto error; + } + if (FAILED(hr = vector_inspectable_create(&constraints_iids, (IVector_IInspectable**)&session->constraints))) goto error;
+ InitializeCriticalSection(&session->cs); + session->cs.DebugInfo->Spare[0] = (DWORD_PTR)(__FILE__ ": recognition_session.cs"); + /* Init ISpeechRecognizer */ impl->ISpeechRecognizer_iface.lpVtbl = &speech_recognizer_vtbl; impl->IClosable_iface.lpVtbl = &closable_vtbl; @@ -835,6 +955,7 @@ static HRESULT WINAPI recognizer_factory_Create( ISpeechRecognizerFactory *iface
error: if (session->constraints) IVector_ISpeechRecognitionConstraint_Release(session->constraints); + CloseHandle(session->worker_control_event); free(session); free(impl);
diff --git a/dlls/windows.media.speech/tests/speech.c b/dlls/windows.media.speech/tests/speech.c index a8ed8cff1e7..8b31031d3c5 100644 --- a/dlls/windows.media.speech/tests/speech.c +++ b/dlls/windows.media.speech/tests/speech.c @@ -1744,8 +1744,8 @@ static void test_Recognition(void)
action2 = (void *)0xdeadbeef; hr = ISpeechContinuousRecognitionSession_StartAsync(session, &action2); - todo_wine ok(hr == COR_E_INVALIDOPERATION, "ISpeechContinuousRecognitionSession_StartAsync failed, hr %#lx.\n", hr); - todo_wine ok(action2 == NULL, "action2 was %p.\n", action2); + ok(hr == COR_E_INVALIDOPERATION, "ISpeechContinuousRecognitionSession_StartAsync failed, hr %#lx.\n", hr); + ok(action2 == NULL, "action2 was %p.\n", action2);
hr = IAsyncAction_QueryInterface(action, &IID_IAsyncInfo, (void **)&info); ok(hr == S_OK, "IAsyncAction_QueryInterface failed, hr %#lx.\n", hr); @@ -1863,8 +1863,8 @@ static void test_Recognition(void)
/* Try stopping, when already stopped. */ hr = ISpeechContinuousRecognitionSession_StopAsync(session, &action); - todo_wine ok(hr == COR_E_INVALIDOPERATION, "ISpeechContinuousRecognitionSession_StopAsync failed, hr %#lx.\n", hr); - todo_wine ok(action == NULL, "action was %p.\n", action); + ok(hr == COR_E_INVALIDOPERATION, "ISpeechContinuousRecognitionSession_StopAsync failed, hr %#lx.\n", hr); + ok(action == NULL, "action was %p.\n", action);
hr = ISpeechContinuousRecognitionSession_remove_ResultGenerated(session, token); ok(hr == S_OK, "ISpeechContinuousRecognitionSession_remove_ResultGenerated failed, hr %#lx.\n", hr);
From: Bernhard Kölbl besentv@gmail.com
Signed-off-by: Bernhard Kölbl besentv@gmail.com --- dlls/windows.media.speech/tests/speech.c | 48 ++++++++++++++++++++++-- 1 file changed, 45 insertions(+), 3 deletions(-)
diff --git a/dlls/windows.media.speech/tests/speech.c b/dlls/windows.media.speech/tests/speech.c index 8b31031d3c5..2b3d0f53fee 100644 --- a/dlls/windows.media.speech/tests/speech.c +++ b/dlls/windows.media.speech/tests/speech.c @@ -1787,8 +1787,8 @@ static void test_Recognition(void) recog_state = 0xdeadbeef; hr = ISpeechRecognizer2_get_State(recognizer2, &recog_state); todo_wine ok(hr == S_OK, "ISpeechRecognizer2_get_State failed, hr %#lx.\n", hr); - todo_wine ok(recog_state == SpeechRecognizerState_Paused || /* Broken on Win10 1507 */ - broken(recog_state == SpeechRecognizerState_Capturing) , "recog_state was %u.\n", recog_state); + todo_wine ok(recog_state == SpeechRecognizerState_Paused || + broken(recog_state == SpeechRecognizerState_Capturing) /* Broken on Win10 1507 */, "recog_state was %u.\n", recog_state);
/* Check what happens if we try to pause again, when the session is already paused. */ hr = ISpeechContinuousRecognitionSession_PauseAsync(session, &action2); @@ -1844,7 +1844,7 @@ static void test_Recognition(void)
set = SetEvent(action_handler.event_block); ok(set == TRUE, "Event 'event_block' wasn't set.\n"); - ok(!WaitForSingleObject(put_thread , 1000), "Wait for put_thread failed.\n"); + ok(!WaitForSingleObject(put_thread, 1000), "Wait for put_thread failed.\n"); IAsyncInfo_Release(info);
CloseHandle(action_handler.event_finished); @@ -1866,6 +1866,48 @@ static void test_Recognition(void) ok(hr == COR_E_INVALIDOPERATION, "ISpeechContinuousRecognitionSession_StopAsync failed, hr %#lx.\n", hr); ok(action == NULL, "action was %p.\n", action);
+ /* Test, if Start/StopAsync resets the pause state. */ + hr = ISpeechContinuousRecognitionSession_StartAsync(session, &action); + ok(hr == S_OK, "ISpeechContinuousRecognitionSession_StartAsync failed, hr %#lx.\n", hr); + await_async_void(action, &action_handler); + IAsyncAction_Release(action); + + hr = ISpeechContinuousRecognitionSession_PauseAsync(session, &action); + ok(hr == S_OK, "ISpeechContinuousRecognitionSession_PauseAsync failed, hr %#lx.\n", hr); + await_async_void(action, &action_handler); + IAsyncAction_Release(action); + + recog_state = 0xdeadbeef; + hr = ISpeechRecognizer2_get_State(recognizer2, &recog_state); + todo_wine ok(hr == S_OK, "ISpeechRecognizer2_get_State failed, hr %#lx.\n", hr); + todo_wine ok(recog_state == SpeechRecognizerState_Paused || + broken(recog_state == SpeechRecognizerState_Capturing) /* Broken on Win10 1507 */ , "recog_state was %u.\n", recog_state); + + hr = ISpeechContinuousRecognitionSession_StopAsync(session, &action); + ok(hr == S_OK, "ISpeechContinuousRecognitionSession_PauseAsync failed, hr %#lx.\n", hr); + await_async_void(action, &action_handler); + IAsyncAction_Release(action); + + recog_state = 0xdeadbeef; + hr = ISpeechRecognizer2_get_State(recognizer2, &recog_state); + todo_wine ok(hr == S_OK, "ISpeechRecognizer2_get_State failed, hr %#lx.\n", hr); + todo_wine ok(recog_state == SpeechRecognizerState_Idle, "recog_state was %u.\n", recog_state); + + hr = ISpeechContinuousRecognitionSession_StartAsync(session, &action); + ok(hr == S_OK, "ISpeechContinuousRecognitionSession_PauseAsync failed, hr %#lx.\n", hr); + await_async_void(action, &action_handler); + IAsyncAction_Release(action); + + recog_state = 0xdeadbeef; + hr = ISpeechRecognizer2_get_State(recognizer2, &recog_state); + todo_wine ok(hr == S_OK, "ISpeechRecognizer2_get_State failed, hr %#lx.\n", hr); + todo_wine ok(recog_state == SpeechRecognizerState_Capturing, "recog_state was %u.\n", recog_state); + + hr = ISpeechContinuousRecognitionSession_StopAsync(session, &action); + ok(hr == S_OK, "ISpeechContinuousRecognitionSession_PauseAsync failed, hr %#lx.\n", hr); + await_async_void(action, &action_handler); + IAsyncAction_Release(action); + hr = ISpeechContinuousRecognitionSession_remove_ResultGenerated(session, token); ok(hr == S_OK, "ISpeechContinuousRecognitionSession_remove_ResultGenerated failed, hr %#lx.\n", hr);
From: Bernhard Kölbl besentv@gmail.com
Signed-off-by: Bernhard Kölbl besentv@gmail.com --- dlls/windows.media.speech/recognizer.c | 41 +++++++++++++++++++++--- dlls/windows.media.speech/tests/speech.c | 4 +-- 2 files changed, 38 insertions(+), 7 deletions(-)
diff --git a/dlls/windows.media.speech/recognizer.c b/dlls/windows.media.speech/recognizer.c index f5ee2e1a70c..4abe84b7d39 100644 --- a/dlls/windows.media.speech/recognizer.c +++ b/dlls/windows.media.speech/recognizer.c @@ -162,7 +162,7 @@ struct session struct list result_handlers;
HANDLE worker_thread, worker_control_event; - BOOLEAN worker_running; + BOOLEAN worker_running, worker_paused; CRITICAL_SECTION cs; };
@@ -362,6 +362,7 @@ static HRESULT WINAPI session_StopAsync( ISpeechContinuousRecognitionSession *if thread = impl->worker_thread; impl->worker_thread = INVALID_HANDLE_VALUE; impl->worker_running = FALSE; + impl->worker_paused = FALSE; } else { @@ -401,14 +402,44 @@ static HRESULT session_pause_async( IInspectable *invoker )
static HRESULT WINAPI session_PauseAsync( ISpeechContinuousRecognitionSession *iface, IAsyncAction **action ) { - FIXME("iface %p, action %p stub!\n", iface, action); - return async_action_create(NULL, session_pause_async, action); + struct session *impl = impl_from_ISpeechContinuousRecognitionSession(iface); + HRESULT hr = S_OK; + + TRACE("iface %p, action %p.\n", iface, action); + + *action = NULL; + + if (FAILED(hr = async_action_create(NULL, session_pause_async, action))) + return hr; + + EnterCriticalSection(&impl->cs); + if (impl->worker_running) + { + impl->worker_paused = TRUE; + } + LeaveCriticalSection(&impl->cs); + + SetEvent(impl->worker_control_event); + + return hr; }
static HRESULT WINAPI session_Resume( ISpeechContinuousRecognitionSession *iface ) { - FIXME("iface %p stub!\n", iface); - return E_NOTIMPL; + struct session *impl = impl_from_ISpeechContinuousRecognitionSession(iface); + + TRACE("iface %p.\n", iface); + + EnterCriticalSection(&impl->cs); + if (impl->worker_running) + { + impl->worker_paused = FALSE; + } + LeaveCriticalSection(&impl->cs); + + SetEvent(impl->worker_control_event); + + return S_OK; }
static HRESULT WINAPI session_add_Completed( ISpeechContinuousRecognitionSession *iface, diff --git a/dlls/windows.media.speech/tests/speech.c b/dlls/windows.media.speech/tests/speech.c index 2b3d0f53fee..46e97b7221f 100644 --- a/dlls/windows.media.speech/tests/speech.c +++ b/dlls/windows.media.speech/tests/speech.c @@ -1798,11 +1798,11 @@ static void test_Recognition(void) IAsyncAction_Release(action2);
hr = ISpeechContinuousRecognitionSession_Resume(session); - todo_wine ok(hr == S_OK, "ISpeechContinuousRecognitionSession_Resume failed, hr %#lx.\n", hr); + ok(hr == S_OK, "ISpeechContinuousRecognitionSession_Resume failed, hr %#lx.\n", hr);
/* Resume when already resumed. */ hr = ISpeechContinuousRecognitionSession_Resume(session); - todo_wine ok(hr == S_OK, "ISpeechContinuousRecognitionSession_Resume failed, hr %#lx.\n", hr); + ok(hr == S_OK, "ISpeechContinuousRecognitionSession_Resume failed, hr %#lx.\n", hr);
recog_state = 0xdeadbeef; hr = ISpeechRecognizer2_get_State(recognizer2, &recog_state);
From: Bernhard Kölbl besentv@gmail.com
Signed-off-by: Bernhard Kölbl besentv@gmail.com --- dlls/windows.media.speech/recognizer.c | 120 +++++++++++++++++++++++-- 1 file changed, 115 insertions(+), 5 deletions(-)
diff --git a/dlls/windows.media.speech/recognizer.c b/dlls/windows.media.speech/recognizer.c index 4abe84b7d39..d45cf03be19 100644 --- a/dlls/windows.media.speech/recognizer.c +++ b/dlls/windows.media.speech/recognizer.c @@ -19,6 +19,10 @@
#include "private.h"
+#include "initguid.h" +#include "audioclient.h" +#include "mmdeviceapi.h" + #include "wine/debug.h"
WINE_DEFAULT_DEBUG_CHANNEL(speech); @@ -161,7 +165,11 @@ struct session struct list completed_handlers; struct list result_handlers;
- HANDLE worker_thread, worker_control_event; + IAudioClient *audio_client; + IAudioCaptureClient *capture_client; + WAVEFORMATEX capture_wfx; + + HANDLE worker_thread, worker_control_event, audio_buf_event; BOOLEAN worker_running, worker_paused; CRITICAL_SECTION cs; }; @@ -181,24 +189,64 @@ static DWORD CALLBACK session_worker_thread_cb( void *args ) { ISpeechContinuousRecognitionSession *iface = args; struct session *impl = impl_from_ISpeechContinuousRecognitionSession(iface); - BOOLEAN running = TRUE; - DWORD status; + BOOLEAN running = TRUE, paused = FALSE; + DWORD flags, status; + UINT32 frame_count; + HANDLE events[2]; + BYTE *audio_buf;
SetThreadDescription(GetCurrentThread(), L"wine_speech_recognition_session_worker");
+ IAudioClient_Start(impl->audio_client); + IAudioClient_GetBufferSize(impl->audio_client, &frame_count); + while (running) { - status = WaitForMultipleObjects(1, &impl->worker_control_event, FALSE, INFINITE); + BOOLEAN old_paused = paused; + UINT32 count = 0; + + events[count++] = impl->worker_control_event; + if (!paused) events[count++] = impl->audio_buf_event; + + status = WaitForMultipleObjects(count, events, FALSE, INFINITE); if (status == 0) /* worker_control_event signaled */ { EnterCriticalSection(&impl->cs); + paused = impl->worker_paused; running = impl->worker_running; LeaveCriticalSection(&impl->cs); + + if (old_paused < paused) + { + IAudioClient_Stop(impl->audio_client); + IAudioClient_Reset(impl->audio_client); + TRACE("session worker paused.\n"); + } + else if (old_paused > paused) + { + IAudioClient_Start(impl->audio_client); + TRACE("session worker resumed.\n"); + } } + else if (status == 1) /* audio_buf_event signaled */ + { + UINT32 frames_available = 0;
- /* TODO: Send mic data to recognizer and handle results. */ + while (IAudioCaptureClient_GetBuffer(impl->capture_client, &audio_buf, &frames_available, &flags, NULL, NULL) == S_OK) + { + /* TODO: Send mic data to recognizer and handle results. */ + IAudioCaptureClient_ReleaseBuffer(impl->capture_client, frames_available); + } + } + else + { + ERR("Unexpected state entered. Aborting worker!\n"); + break; + } }
+ IAudioClient_Stop(impl->audio_client); + return 0; }
@@ -252,6 +300,9 @@ static ULONG WINAPI session_Release( ISpeechContinuousRecognitionSession *iface typed_event_handlers_clear(&impl->completed_handlers); typed_event_handlers_clear(&impl->result_handlers);
+ IAudioCaptureClient_Release(impl->capture_client); + IAudioClient_Release(impl->audio_client); + impl->cs.DebugInfo->Spare[0] = 0; DeleteCriticalSection(&impl->cs);
@@ -926,6 +977,60 @@ static const struct IActivationFactoryVtbl activation_factory_vtbl =
DEFINE_IINSPECTABLE(recognizer_factory, ISpeechRecognizerFactory, struct recognizer_statics, IActivationFactory_iface)
+static HRESULT recognizer_factory_create_audio_capture(struct session *session) +{ + const REFERENCE_TIME buffer_duration = 5000000; /* 0.5 second */ + IMMDeviceEnumerator *mm_enum = NULL; + IMMDevice *mm_device = NULL; + WAVEFORMATEX wfx = { 0 }; + WCHAR *str = NULL; + HRESULT hr = S_OK; + + if (!(session->audio_buf_event = CreateEventW(NULL, FALSE, FALSE, NULL))) + return HRESULT_FROM_WIN32(GetLastError()); + + if (FAILED(hr = CoCreateInstance(&CLSID_MMDeviceEnumerator, NULL, CLSCTX_INPROC_SERVER, &IID_IMMDeviceEnumerator, (void **)&mm_enum))) + goto cleanup; + + if (FAILED(hr = IMMDeviceEnumerator_GetDefaultAudioEndpoint(mm_enum, eCapture, eMultimedia, &mm_device))) + goto cleanup; + + if (FAILED(hr = IMMDevice_Activate(mm_device, &IID_IAudioClient, CLSCTX_INPROC_SERVER, NULL, (void **)&session->audio_client))) + goto cleanup; + + if (SUCCEEDED(hr = IMMDevice_GetId(mm_device, &str))) + TRACE("selected capture device ID: %s\n", debugstr_w(str)); + + wfx.wFormatTag = WAVE_FORMAT_PCM; + wfx.nSamplesPerSec = 16000; + wfx.nChannels = 1; + wfx.wBitsPerSample = 16; + wfx.nBlockAlign = (wfx.wBitsPerSample + 7) / 8 * wfx.nChannels; + wfx.nAvgBytesPerSec = wfx.nSamplesPerSec * wfx.nBlockAlign; + TRACE("wfx tag %u, channels %u, samples %lu, bits %u, align %u.\n", wfx.wFormatTag, wfx.nChannels, wfx.nSamplesPerSec, wfx.wBitsPerSample, wfx.nBlockAlign); + + if (FAILED(hr = IAudioClient_Initialize(session->audio_client, AUDCLNT_SHAREMODE_SHARED, AUDCLNT_STREAMFLAGS_EVENTCALLBACK, buffer_duration, 0, &wfx, NULL))) + goto cleanup; + + if (FAILED(hr = IAudioClient_SetEventHandle(session->audio_client, session->audio_buf_event))) + goto cleanup; + + hr = IAudioClient_GetService(session->audio_client, &IID_IAudioCaptureClient, (void **)&session->capture_client); + + session->capture_wfx = wfx; + +cleanup: + if (FAILED(hr)) + { + if (session->audio_client) IAudioClient_Release(session->audio_client); + if (session->audio_buf_event) CloseHandle(session->audio_buf_event); + } + if (mm_device) IMMDevice_Release(mm_device); + if (mm_enum) IMMDeviceEnumerator_Release(mm_enum); + CoTaskMemFree(str); + return hr; +} + static HRESULT WINAPI recognizer_factory_Create( ISpeechRecognizerFactory *iface, ILanguage *language, ISpeechRecognizer **speechrecognizer ) { struct recognizer *impl; @@ -969,6 +1074,9 @@ static HRESULT WINAPI recognizer_factory_Create( ISpeechRecognizerFactory *iface if (FAILED(hr = vector_inspectable_create(&constraints_iids, (IVector_IInspectable**)&session->constraints))) goto error;
+ if (FAILED(hr = recognizer_factory_create_audio_capture(session))) + goto error; + InitializeCriticalSection(&session->cs); session->cs.DebugInfo->Spare[0] = (DWORD_PTR)(__FILE__ ": recognition_session.cs");
@@ -985,6 +1093,8 @@ static HRESULT WINAPI recognizer_factory_Create( ISpeechRecognizerFactory *iface return S_OK;
error: + if (session->capture_client) IAudioCaptureClient_Release(session->capture_client); + if (session->audio_client) IAudioClient_Release(session->audio_client); if (session->constraints) IVector_ISpeechRecognitionConstraint_Release(session->constraints); CloseHandle(session->worker_control_event); free(session);
From: Bernhard Kölbl besentv@gmail.com
Signed-off-by: Bernhard Kölbl besentv@gmail.com --- dlls/windows.media.speech/recognizer.c | 21 ++++++++++++-- dlls/windows.media.speech/tests/speech.c | 36 ++++++++++++------------ 2 files changed, 37 insertions(+), 20 deletions(-)
diff --git a/dlls/windows.media.speech/recognizer.c b/dlls/windows.media.speech/recognizer.c index d45cf03be19..6864ae02da8 100644 --- a/dlls/windows.media.speech/recognizer.c +++ b/dlls/windows.media.speech/recognizer.c @@ -162,6 +162,8 @@ struct session
IVector_ISpeechRecognitionConstraint *constraints;
+ SpeechRecognizerState recognizer_state; + struct list completed_handlers; struct list result_handlers;
@@ -371,6 +373,7 @@ static HRESULT WINAPI session_StartAsync( ISpeechContinuousRecognitionSession *i else { impl->worker_running = TRUE; + impl->recognizer_state = SpeechRecognizerState_Capturing; } LeaveCriticalSection(&impl->cs);
@@ -414,6 +417,7 @@ static HRESULT WINAPI session_StopAsync( ISpeechContinuousRecognitionSession *if impl->worker_thread = INVALID_HANDLE_VALUE; impl->worker_running = FALSE; impl->worker_paused = FALSE; + impl->recognizer_state = SpeechRecognizerState_Idle; } else { @@ -467,6 +471,7 @@ static HRESULT WINAPI session_PauseAsync( ISpeechContinuousRecognitionSession *i if (impl->worker_running) { impl->worker_paused = TRUE; + impl->recognizer_state = SpeechRecognizerState_Paused; } LeaveCriticalSection(&impl->cs);
@@ -485,6 +490,7 @@ static HRESULT WINAPI session_Resume( ISpeechContinuousRecognitionSession *iface if (impl->worker_running) { impl->worker_paused = FALSE; + impl->recognizer_state = SpeechRecognizerState_Capturing; } LeaveCriticalSection(&impl->cs);
@@ -808,8 +814,19 @@ static HRESULT WINAPI recognizer2_get_ContinuousRecognitionSession( ISpeechRecog
static HRESULT WINAPI recognizer2_get_State( ISpeechRecognizer2 *iface, SpeechRecognizerState *state ) { - FIXME("iface %p, state %p stub!\n", iface, state); - return E_NOTIMPL; + struct recognizer *impl = impl_from_ISpeechRecognizer2(iface); + struct session *session = impl_from_ISpeechContinuousRecognitionSession(impl->session); + + FIXME("iface %p, state %p not all states are supported, yet!\n", iface, state); + + if (!state) + return E_POINTER; + + EnterCriticalSection(&session->cs); + *state = session->recognizer_state; + LeaveCriticalSection(&session->cs); + + return S_OK; }
static HRESULT WINAPI recognizer2_StopRecognitionAsync( ISpeechRecognizer2 *iface, IAsyncAction **action ) diff --git a/dlls/windows.media.speech/tests/speech.c b/dlls/windows.media.speech/tests/speech.c index 46e97b7221f..ff26a0a0bb0 100644 --- a/dlls/windows.media.speech/tests/speech.c +++ b/dlls/windows.media.speech/tests/speech.c @@ -1721,8 +1721,8 @@ static void test_Recognition(void)
recog_state = 0xdeadbeef; hr = ISpeechRecognizer2_get_State(recognizer2, &recog_state); - todo_wine ok(hr == S_OK, "ISpeechRecognizer2_get_State failed, hr %#lx.\n", hr); - todo_wine ok(recog_state == SpeechRecognizerState_Idle, "recog_state was %u.\n", recog_state); + ok(hr == S_OK, "ISpeechRecognizer2_get_State failed, hr %#lx.\n", hr); + ok(recog_state == SpeechRecognizerState_Idle, "recog_state was %u.\n", recog_state);
hr = ISpeechRecognizer_CompileConstraintsAsync(recognizer, &operation); ok(hr == S_OK, "ISpeechRecognizer_CompileConstraintsAsync failed, hr %#lx.\n", hr); @@ -1771,8 +1771,8 @@ static void test_Recognition(void)
recog_state = 0xdeadbeef; hr = ISpeechRecognizer2_get_State(recognizer2, &recog_state); - todo_wine ok(hr == S_OK, "ISpeechRecognizer2_get_State failed, hr %#lx.\n", hr); - todo_wine ok(recog_state == SpeechRecognizerState_Capturing, "recog_state was %u.\n", recog_state); + ok(hr == S_OK, "ISpeechRecognizer2_get_State failed, hr %#lx.\n", hr); + ok(recog_state == SpeechRecognizerState_Capturing, "recog_state was %u.\n", recog_state);
/* * TODO: Use a loopback device together with prerecorded audio files to test the recognizer's functionality. @@ -1786,9 +1786,9 @@ static void test_Recognition(void)
recog_state = 0xdeadbeef; hr = ISpeechRecognizer2_get_State(recognizer2, &recog_state); - todo_wine ok(hr == S_OK, "ISpeechRecognizer2_get_State failed, hr %#lx.\n", hr); - todo_wine ok(recog_state == SpeechRecognizerState_Paused || - broken(recog_state == SpeechRecognizerState_Capturing) /* Broken on Win10 1507 */, "recog_state was %u.\n", recog_state); + ok(hr == S_OK, "ISpeechRecognizer2_get_State failed, hr %#lx.\n", hr); + ok(recog_state == SpeechRecognizerState_Paused || + broken(recog_state == SpeechRecognizerState_Capturing) /* Broken on Win10 1507 */, "recog_state was %u.\n", recog_state);
/* Check what happens if we try to pause again, when the session is already paused. */ hr = ISpeechContinuousRecognitionSession_PauseAsync(session, &action2); @@ -1806,8 +1806,8 @@ static void test_Recognition(void)
recog_state = 0xdeadbeef; hr = ISpeechRecognizer2_get_State(recognizer2, &recog_state); - todo_wine ok(hr == S_OK, "ISpeechRecognizer2_get_State failed, hr %#lx.\n", hr); - todo_wine ok(recog_state == SpeechRecognizerState_Capturing, "recog_state was %u.\n", recog_state); + ok(hr == S_OK, "ISpeechRecognizer2_get_State failed, hr %#lx.\n", hr); + ok(recog_state == SpeechRecognizerState_Capturing, "recog_state was %u.\n", recog_state);
hr = ISpeechContinuousRecognitionSession_StopAsync(session, &action2); ok(hr == S_OK, "ISpeechContinuousRecognitionSession_StopAsync failed, hr %#lx.\n", hr); @@ -1858,8 +1858,8 @@ static void test_Recognition(void)
recog_state = 0xdeadbeef; hr = ISpeechRecognizer2_get_State(recognizer2, &recog_state); - todo_wine ok(hr == S_OK, "ISpeechRecognizer2_get_State failed, hr %#lx.\n", hr); - todo_wine ok(recog_state == SpeechRecognizerState_Idle, "recog_state was %u.\n", recog_state); + ok(hr == S_OK, "ISpeechRecognizer2_get_State failed, hr %#lx.\n", hr); + ok(recog_state == SpeechRecognizerState_Idle, "recog_state was %u.\n", recog_state);
/* Try stopping, when already stopped. */ hr = ISpeechContinuousRecognitionSession_StopAsync(session, &action); @@ -1879,9 +1879,9 @@ static void test_Recognition(void)
recog_state = 0xdeadbeef; hr = ISpeechRecognizer2_get_State(recognizer2, &recog_state); - todo_wine ok(hr == S_OK, "ISpeechRecognizer2_get_State failed, hr %#lx.\n", hr); - todo_wine ok(recog_state == SpeechRecognizerState_Paused || - broken(recog_state == SpeechRecognizerState_Capturing) /* Broken on Win10 1507 */ , "recog_state was %u.\n", recog_state); + ok(hr == S_OK, "ISpeechRecognizer2_get_State failed, hr %#lx.\n", hr); + ok(recog_state == SpeechRecognizerState_Paused || + broken(recog_state == SpeechRecognizerState_Capturing) /* Broken on Win10 1507 */ , "recog_state was %u.\n", recog_state);
hr = ISpeechContinuousRecognitionSession_StopAsync(session, &action); ok(hr == S_OK, "ISpeechContinuousRecognitionSession_PauseAsync failed, hr %#lx.\n", hr); @@ -1890,8 +1890,8 @@ static void test_Recognition(void)
recog_state = 0xdeadbeef; hr = ISpeechRecognizer2_get_State(recognizer2, &recog_state); - todo_wine ok(hr == S_OK, "ISpeechRecognizer2_get_State failed, hr %#lx.\n", hr); - todo_wine ok(recog_state == SpeechRecognizerState_Idle, "recog_state was %u.\n", recog_state); + ok(hr == S_OK, "ISpeechRecognizer2_get_State failed, hr %#lx.\n", hr); + ok(recog_state == SpeechRecognizerState_Idle, "recog_state was %u.\n", recog_state);
hr = ISpeechContinuousRecognitionSession_StartAsync(session, &action); ok(hr == S_OK, "ISpeechContinuousRecognitionSession_PauseAsync failed, hr %#lx.\n", hr); @@ -1900,8 +1900,8 @@ static void test_Recognition(void)
recog_state = 0xdeadbeef; hr = ISpeechRecognizer2_get_State(recognizer2, &recog_state); - todo_wine ok(hr == S_OK, "ISpeechRecognizer2_get_State failed, hr %#lx.\n", hr); - todo_wine ok(recog_state == SpeechRecognizerState_Capturing, "recog_state was %u.\n", recog_state); + ok(hr == S_OK, "ISpeechRecognizer2_get_State failed, hr %#lx.\n", hr); + ok(recog_state == SpeechRecognizerState_Capturing, "recog_state was %u.\n", recog_state);
hr = ISpeechContinuousRecognitionSession_StopAsync(session, &action); ok(hr == S_OK, "ISpeechContinuousRecognitionSession_PauseAsync failed, hr %#lx.\n", hr);
From: Bernhard Kölbl besentv@gmail.com
Signed-off-by: Bernhard Kölbl besentv@gmail.com --- dlls/windows.media.speech/recognizer.c | 28 +++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-)
diff --git a/dlls/windows.media.speech/recognizer.c b/dlls/windows.media.speech/recognizer.c index 6864ae02da8..cf9428fa7db 100644 --- a/dlls/windows.media.speech/recognizer.c +++ b/dlls/windows.media.speech/recognizer.c @@ -192,16 +192,19 @@ static DWORD CALLBACK session_worker_thread_cb( void *args ) ISpeechContinuousRecognitionSession *iface = args; struct session *impl = impl_from_ISpeechContinuousRecognitionSession(iface); BOOLEAN running = TRUE, paused = FALSE; + UINT32 frame_count, tmp_buf_size; + BYTE *audio_buf, *tmp_buf; DWORD flags, status; - UINT32 frame_count; HANDLE events[2]; - BYTE *audio_buf;
SetThreadDescription(GetCurrentThread(), L"wine_speech_recognition_session_worker");
IAudioClient_Start(impl->audio_client); IAudioClient_GetBufferSize(impl->audio_client, &frame_count);
+ tmp_buf_size = sizeof(*tmp_buf) * frame_count * impl->capture_wfx.nBlockAlign; + tmp_buf = malloc(tmp_buf_size); + while (running) { BOOLEAN old_paused = paused; @@ -232,13 +235,27 @@ static DWORD CALLBACK session_worker_thread_cb( void *args ) } else if (status == 1) /* audio_buf_event signaled */ { - UINT32 frames_available = 0; + UINT32 frames_available = 0, tmp_buf_offset = 0; + SIZE_T packet_size = frames_available * impl->capture_wfx.nBlockAlign;
- while (IAudioCaptureClient_GetBuffer(impl->capture_client, &audio_buf, &frames_available, &flags, NULL, NULL) == S_OK) + while (tmp_buf_offset < tmp_buf_size + && IAudioCaptureClient_GetBuffer(impl->capture_client, &audio_buf, &frames_available, &flags, NULL, NULL) == S_OK) { - /* TODO: Send mic data to recognizer and handle results. */ + if (tmp_buf_offset + packet_size > tmp_buf_size) + { + /* Defer processing until the next iteration of the worker loop. */ + IAudioCaptureClient_ReleaseBuffer(impl->capture_client, 0); + SetEvent(impl->audio_buf_event); + break; + } + + memcpy(tmp_buf + tmp_buf_offset, audio_buf, packet_size); + tmp_buf_offset += packet_size; + IAudioCaptureClient_ReleaseBuffer(impl->capture_client, frames_available); } + + /* TODO: Send mic data to recognizer and handle results. */ } else { @@ -248,6 +265,7 @@ static DWORD CALLBACK session_worker_thread_cb( void *args ) }
IAudioClient_Stop(impl->audio_client); + free(tmp_buf);
return 0; }
On Tue Jan 24 21:59:43 2023 +0000, Bernhard Kölbl wrote:
Should I really double or triple the size?
Well, if we don't run into constant buffer overflow/underflow, we can keep it as-is.
Jinoh Kang (@iamahuman) commented about dlls/windows.media.speech/recognizer.c:
- TRACE("wfx tag %u, channels %u, samples %lu, bits %u, align %u.\n", wfx.wFormatTag, wfx.nChannels, wfx.nSamplesPerSec, wfx.wBitsPerSample, wfx.nBlockAlign);
- if (FAILED(hr = IAudioClient_Initialize(session->audio_client, AUDCLNT_SHAREMODE_SHARED, AUDCLNT_STREAMFLAGS_EVENTCALLBACK, buffer_duration, 0, &wfx, NULL)))
goto cleanup;
- if (FAILED(hr = IAudioClient_SetEventHandle(session->audio_client, session->audio_buf_event)))
goto cleanup;
- hr = IAudioClient_GetService(session->audio_client, &IID_IAudioCaptureClient, (void **)&session->capture_client);
- session->capture_wfx = wfx;
+cleanup:
- if (FAILED(hr))
- {
if (session->audio_client) IAudioClient_Release(session->audio_client);
This causes double free in `recognizer_factory_Create`.
```suggestion:-0+0 ```
Jinoh Kang (@iamahuman) commented about dlls/windows.media.speech/recognizer.c:
}
else if (old_paused > paused)
{
IAudioClient_Start(impl->audio_client);
TRACE("session worker resumed.\n");
}
}
else if (status == 1) /* audio_buf_event signaled */
{
UINT32 frames_available = 0, tmp_buf_offset = 0;
SIZE_T packet_size = frames_available * impl->capture_wfx.nBlockAlign;
while (tmp_buf_offset < tmp_buf_size
&& IAudioCaptureClient_GetBuffer(impl->capture_client, &audio_buf, &frames_available, &flags, NULL, NULL) == S_OK)
{
if (tmp_buf_offset + packet_size > tmp_buf_size)
Verbose explanation (you're probably already aware of this): `packet_size` is always 0. `packet_size` should be the size for the current packet. The expression `frames_available * impl->capture_wfx.nBlockAlign` should be evalulated only after `frames_available` is assigned a meaningful value.
```suggestion:-0+0 SIZE_T packet_size = frames_available * impl->capture_wfx.nBlockAlign;
if (tmp_buf_offset + packet_size > tmp_buf_size) ```
My guess is that you wanted to move `packet_size` declaration out of the loop, but while doing so, you accidentally took the initializer expression with it as well. In this case, you can say:
```suggestion:-0+0 packet_size = frames_available * impl->capture_wfx.nBlockAlign;
if (tmp_buf_offset + packet_size > tmp_buf_size) ```
Jinoh Kang (@iamahuman) commented about dlls/windows.media.speech/recognizer.c:
- HRESULT hr = S_OK;
- if (!(session->audio_buf_event = CreateEventW(NULL, FALSE, FALSE, NULL)))
return HRESULT_FROM_WIN32(GetLastError());
- if (FAILED(hr = CoCreateInstance(&CLSID_MMDeviceEnumerator, NULL, CLSCTX_INPROC_SERVER, &IID_IMMDeviceEnumerator, (void **)&mm_enum)))
goto cleanup;
- if (FAILED(hr = IMMDeviceEnumerator_GetDefaultAudioEndpoint(mm_enum, eCapture, eMultimedia, &mm_device)))
goto cleanup;
- if (FAILED(hr = IMMDevice_Activate(mm_device, &IID_IAudioClient, CLSCTX_INPROC_SERVER, NULL, (void **)&session->audio_client)))
goto cleanup;
- if (SUCCEEDED(hr = IMMDevice_GetId(mm_device, &str)))
TRACE("selected capture device ID: %s\n", debugstr_w(str));
```suggestion:-1+0 hr = IMMDevice_GetId(mm_device, &str); TRACE("selected capture device ID: %s (HRESULT %#x)\n", debugstr_w(str), hr); ```
It's more helpful to report failure than to silence it, since (1) `TRACE` is usually enabled for debugging purposes and (2) randomly omitted TRACE might cause confusion. `IMMDevice::GetId` returns NULL in `*ppstrId` on failure, and `debugstr_w` reports NULL as `(null)`.
Jinoh Kang (@iamahuman) commented about dlls/windows.media.speech/recognizer.c:
- if (FAILED(hr = IAudioClient_Initialize(session->audio_client, AUDCLNT_SHAREMODE_SHARED, AUDCLNT_STREAMFLAGS_EVENTCALLBACK, buffer_duration, 0, &wfx, NULL)))
goto cleanup;
- if (FAILED(hr = IAudioClient_SetEventHandle(session->audio_client, session->audio_buf_event)))
goto cleanup;
- hr = IAudioClient_GetService(session->audio_client, &IID_IAudioCaptureClient, (void **)&session->capture_client);
- session->capture_wfx = wfx;
+cleanup:
- if (FAILED(hr))
- {
if (session->audio_client) IAudioClient_Release(session->audio_client);
if (session->audio_buf_event) CloseHandle(session->audio_buf_event);
I think it's better to consolidate all resource cleanup to the `error:` label inside `recognizer_factory_Create`. Alternatively, we should set it to `NULL` after closing the handle (or releasing the interface). In general, we don't want to leave an object in an inconsistent state if it's not our own (e.g. it's passed by pointer as a parameter).
Jinoh Kang (@iamahuman) commented about dlls/windows.media.speech/recognizer.c:
return S_OK;
error:
- if (session->capture_client) IAudioCaptureClient_Release(session->capture_client);
- if (session->audio_client) IAudioClient_Release(session->audio_client); if (session->constraints) IVector_ISpeechRecognitionConstraint_Release(session->constraints);
- CloseHandle(session->worker_control_event);
Closing a potentially NULL handling is confusing. How about:
```suggestion:-0+0 if (session->worker_control_event) CloseHandle(session->worker_control_event); ```
Jinoh Kang (@iamahuman) commented about dlls/windows.media.speech/recognizer.c:
list_init(&session->completed_handlers); list_init(&session->result_handlers);
- if (!(session->worker_control_event = CreateEventW(NULL, FALSE, FALSE, NULL)))
- {
hr = HRESULT_FROM_WIN32(GetLastError());
goto error;
(This is the point where we reach `error` with `session->worker_control_event` set to NULL.)
Jinoh Kang (@iamahuman) commented about dlls/windows.media.speech/recognizer.c:
memcpy(tmp_buf + tmp_buf_offset, audio_buf, packet_size);
tmp_buf_offset += packet_size;
IAudioCaptureClient_ReleaseBuffer(impl->capture_client, frames_available);
}
/* TODO: Send mic data to recognizer and handle results. */
}
else
{
ERR("Unexpected state entered. Aborting worker!\n");
break;
}
- }
- IAudioClient_Stop(impl->audio_client);
You should reset `IAudioClient` here as well.
```suggestion:-0+0 IAudioClient_Stop(impl->audio_client); IAudioClient_Reset(impl->audio_client); ```
Jinoh Kang (@iamahuman) commented about dlls/windows.media.speech/tests/speech.c:
- /* Test, if Start/StopAsync resets the pause state. */
- hr = ISpeechContinuousRecognitionSession_StartAsync(session, &action);
- ok(hr == S_OK, "ISpeechContinuousRecognitionSession_StartAsync failed, hr %#lx.\n", hr);
- await_async_void(action, &action_handler);
- IAsyncAction_Release(action);
- hr = ISpeechContinuousRecognitionSession_PauseAsync(session, &action);
- ok(hr == S_OK, "ISpeechContinuousRecognitionSession_PauseAsync failed, hr %#lx.\n", hr);
- await_async_void(action, &action_handler);
- IAsyncAction_Release(action);
- recog_state = 0xdeadbeef;
- hr = ISpeechRecognizer2_get_State(recognizer2, &recog_state);
- ok(hr == S_OK, "ISpeechRecognizer2_get_State failed, hr %#lx.\n", hr);
- ok(recog_state == SpeechRecognizerState_Paused ||
broken(recog_state == SpeechRecognizerState_Capturing) /* Broken on Win10 1507 */ , "recog_state was %u.\n", recog_state);
Nit: the space is inconsistent with code above.
```suggestion:-0+0 broken(recog_state == SpeechRecognizerState_Capturing) /* Broken on Win10 1507 */, "recog_state was %u.\n", recog_state); ```
Sorry for keeping you busy. I hope this is my final review comments. As always, thanks for your hard work for enabling Wine to run more apps!
On Wed Jan 25 12:05:53 2023 +0000, Jinoh Kang wrote:
I think it's better to consolidate all resource cleanup to the `error:` label inside `recognizer_factory_Create`. Alternatively, if we want to keep the `CloseHandle`/`Release` here, we should set the variable to `NULL` after closing the handle (or releasing the interface). In general, we don't want to leave an object in an inconsistent state if we don't own it (e.g. it's passed by pointer as a parameter).
Yeah, I initially had the idea that every of these functions is self containing, but that can't possibly work out well.
On Wed Jan 25 12:00:56 2023 +0000, Jinoh Kang wrote:
Verbose explanation (you're probably already aware of this): `packet_size` is always 0. `packet_size` should be the size for the current packet. The expression `frames_available * impl->capture_wfx.nBlockAlign` should be evalulated only after `frames_available` is assigned a meaningful value.
SIZE_T packet_size = frames_available * impl->capture_wfx.nBlockAlign; if (tmp_buf_offset + packet_size > tmp_buf_size)
My guess is that you wanted to move `packet_size` declaration out of the loop, but while doing so, you accidentally took the initializer expression with it as well. In this case, you can say:
packet_size = frames_available * impl->capture_wfx.nBlockAlign; if (tmp_buf_offset + packet_size > tmp_buf_size)
Yeah was a copy past accident.