First relevant commit: windows.media.speech: Add a worker thread to the recognition session.
-- v13: windows.media.speech: Store recorded audio in a temporary ringbuffer. windows.media.speech: Partially implement the speech recognizer state. windows.media.speech: Add an audio capturing system.
From: Bernhard Kölbl besentv@gmail.com
Signed-off-by: Bernhard Kölbl besentv@gmail.com --- dlls/windows.media.speech/private.h | 1 + dlls/windows.media.speech/recognizer.c | 132 +++++++++++++++++++++-- dlls/windows.media.speech/tests/speech.c | 8 +- 3 files changed, 131 insertions(+), 10 deletions(-)
diff --git a/dlls/windows.media.speech/private.h b/dlls/windows.media.speech/private.h index 41f7b02e3de..e80d73ec1fb 100644 --- a/dlls/windows.media.speech/private.h +++ b/dlls/windows.media.speech/private.h @@ -23,6 +23,7 @@ #include <stdarg.h>
#define COBJMACROS +#include "corerror.h" #include "windef.h" #include "winbase.h" #include "winstring.h" diff --git a/dlls/windows.media.speech/recognizer.c b/dlls/windows.media.speech/recognizer.c index 54a0e165f5f..aabfd56694a 100644 --- a/dlls/windows.media.speech/recognizer.c +++ b/dlls/windows.media.speech/recognizer.c @@ -160,6 +160,10 @@ struct session
struct list completed_handlers; struct list result_handlers; + + HANDLE worker_thread, worker_control_event; + BOOLEAN worker_running; + CRITICAL_SECTION cs; };
/* @@ -173,6 +177,31 @@ static inline struct session *impl_from_ISpeechContinuousRecognitionSession( ISp return CONTAINING_RECORD(iface, struct session, ISpeechContinuousRecognitionSession_iface); }
+static DWORD CALLBACK session_worker_thread_cb( void *args ) +{ + ISpeechContinuousRecognitionSession *iface = args; + struct session *impl = impl_from_ISpeechContinuousRecognitionSession(iface); + BOOLEAN running = TRUE; + DWORD status; + + SetThreadDescription(GetCurrentThread(), L"wine_speech_recognition_session_worker"); + + while (running) + { + status = WaitForMultipleObjects(1, &impl->worker_control_event, FALSE, INFINITE); + if (status == 0) /* worker_control_event signaled */ + { + EnterCriticalSection(&impl->cs); + running = impl->worker_running; + LeaveCriticalSection(&impl->cs); + } + + /* TODO: Send mic data to recognizer and handle results. */ + } + + return 0; +} + static HRESULT WINAPI session_QueryInterface( ISpeechContinuousRecognitionSession *iface, REFIID iid, void **out ) { struct session *impl = impl_from_ISpeechContinuousRecognitionSession(iface); @@ -208,8 +237,24 @@ static ULONG WINAPI session_Release( ISpeechContinuousRecognitionSession *iface
if (!ref) { + HANDLE thread; + + EnterCriticalSection(&impl->cs); + thread = impl->worker_thread; + impl->worker_running = FALSE; + impl->worker_thread = INVALID_HANDLE_VALUE; + LeaveCriticalSection(&impl->cs); + + SetEvent(impl->worker_control_event); + WaitForSingleObject(thread, INFINITE); + CloseHandle(thread); + typed_event_handlers_clear(&impl->completed_handlers); typed_event_handlers_clear(&impl->result_handlers); + + impl->cs.DebugInfo->Spare[0] = 0; + DeleteCriticalSection(&impl->cs); + IVector_ISpeechRecognitionConstraint_Release(impl->constraints); free(impl); } @@ -254,8 +299,37 @@ static HRESULT session_start_async( IInspectable *invoker )
static HRESULT WINAPI session_StartAsync( ISpeechContinuousRecognitionSession *iface, IAsyncAction **action ) { - FIXME("iface %p, action %p stub!\n", iface, action); - return async_action_create(NULL, session_start_async, action); + struct session *impl = impl_from_ISpeechContinuousRecognitionSession(iface); + HRESULT hr; + + TRACE("iface %p, action %p.\n", iface, action); + + if (FAILED(hr = async_action_create(NULL, session_start_async, action))) + return hr; + + EnterCriticalSection(&impl->cs); + if (impl->worker_running || impl->worker_thread) + { + hr = COR_E_INVALIDOPERATION; + } + else if (!(impl->worker_thread = CreateThread(NULL, 0, session_worker_thread_cb, impl, 0, NULL))) + { + hr = HRESULT_FROM_WIN32(GetLastError()); + impl->worker_running = FALSE; + } + else + { + impl->worker_running = TRUE; + } + LeaveCriticalSection(&impl->cs); + + if (FAILED(hr)) + { + IAsyncAction_Release(*action); + *action = NULL; + } + + return hr; }
static HRESULT WINAPI session_StartWithModeAsync( ISpeechContinuousRecognitionSession *iface, @@ -273,8 +347,45 @@ static HRESULT session_stop_async( IInspectable *invoker )
static HRESULT WINAPI session_StopAsync( ISpeechContinuousRecognitionSession *iface, IAsyncAction **action ) { - FIXME("iface %p, action %p stub!\n", iface, action); - return async_action_create(NULL, session_stop_async, action); + struct session *impl = impl_from_ISpeechContinuousRecognitionSession(iface); + HANDLE thread; + HRESULT hr; + + TRACE("iface %p, action %p.\n", iface, action); + + if (FAILED(hr = async_action_create(NULL, session_stop_async, action))) + return hr; + + EnterCriticalSection(&impl->cs); + if (impl->worker_running && impl->worker_thread) + { + thread = impl->worker_thread; + impl->worker_thread = INVALID_HANDLE_VALUE; + impl->worker_running = FALSE; + } + else + { + hr = COR_E_INVALIDOPERATION; + } + LeaveCriticalSection(&impl->cs); + + if (SUCCEEDED(hr)) + { + SetEvent(impl->worker_control_event); + WaitForSingleObject(thread, INFINITE); + CloseHandle(thread); + + EnterCriticalSection(&impl->cs); + impl->worker_thread = NULL; + LeaveCriticalSection(&impl->cs); + } + else + { + IAsyncAction_Release(*action); + *action = NULL; + } + + return hr; }
static HRESULT WINAPI session_CancelAsync( ISpeechContinuousRecognitionSession *iface, IAsyncAction **action ) @@ -818,9 +929,18 @@ static HRESULT WINAPI recognizer_factory_Create( ISpeechRecognizerFactory *iface list_init(&session->completed_handlers); list_init(&session->result_handlers);
+ if (!(session->worker_control_event = CreateEventW(NULL, FALSE, FALSE, NULL))) + { + hr = HRESULT_FROM_WIN32(GetLastError()); + goto error; + } + if (FAILED(hr = vector_inspectable_create(&constraints_iids, (IVector_IInspectable**)&session->constraints))) goto error;
+ InitializeCriticalSection(&session->cs); + session->cs.DebugInfo->Spare[0] = (DWORD_PTR)(__FILE__ ": recognition_session.cs"); + /* Init ISpeechRecognizer */ impl->ISpeechRecognizer_iface.lpVtbl = &speech_recognizer_vtbl; impl->IClosable_iface.lpVtbl = &closable_vtbl; @@ -828,13 +948,13 @@ static HRESULT WINAPI recognizer_factory_Create( ISpeechRecognizerFactory *iface impl->session = &session->ISpeechContinuousRecognitionSession_iface; impl->ref = 1;
- TRACE("created SpeechRecognizer %p.\n", impl); - *speechrecognizer = &impl->ISpeechRecognizer_iface; + TRACE("created SpeechRecognizer %p.\n", *speechrecognizer); return S_OK;
error: if (session->constraints) IVector_ISpeechRecognitionConstraint_Release(session->constraints); + if (session->worker_control_event) CloseHandle(session->worker_control_event); free(session); free(impl);
diff --git a/dlls/windows.media.speech/tests/speech.c b/dlls/windows.media.speech/tests/speech.c index a8ed8cff1e7..8b31031d3c5 100644 --- a/dlls/windows.media.speech/tests/speech.c +++ b/dlls/windows.media.speech/tests/speech.c @@ -1744,8 +1744,8 @@ static void test_Recognition(void)
action2 = (void *)0xdeadbeef; hr = ISpeechContinuousRecognitionSession_StartAsync(session, &action2); - todo_wine ok(hr == COR_E_INVALIDOPERATION, "ISpeechContinuousRecognitionSession_StartAsync failed, hr %#lx.\n", hr); - todo_wine ok(action2 == NULL, "action2 was %p.\n", action2); + ok(hr == COR_E_INVALIDOPERATION, "ISpeechContinuousRecognitionSession_StartAsync failed, hr %#lx.\n", hr); + ok(action2 == NULL, "action2 was %p.\n", action2);
hr = IAsyncAction_QueryInterface(action, &IID_IAsyncInfo, (void **)&info); ok(hr == S_OK, "IAsyncAction_QueryInterface failed, hr %#lx.\n", hr); @@ -1863,8 +1863,8 @@ static void test_Recognition(void)
/* Try stopping, when already stopped. */ hr = ISpeechContinuousRecognitionSession_StopAsync(session, &action); - todo_wine ok(hr == COR_E_INVALIDOPERATION, "ISpeechContinuousRecognitionSession_StopAsync failed, hr %#lx.\n", hr); - todo_wine ok(action == NULL, "action was %p.\n", action); + ok(hr == COR_E_INVALIDOPERATION, "ISpeechContinuousRecognitionSession_StopAsync failed, hr %#lx.\n", hr); + ok(action == NULL, "action was %p.\n", action);
hr = ISpeechContinuousRecognitionSession_remove_ResultGenerated(session, token); ok(hr == S_OK, "ISpeechContinuousRecognitionSession_remove_ResultGenerated failed, hr %#lx.\n", hr);
From: Bernhard Kölbl besentv@gmail.com
Signed-off-by: Bernhard Kölbl besentv@gmail.com --- dlls/windows.media.speech/tests/speech.c | 50 ++++++++++++++++++++++-- 1 file changed, 47 insertions(+), 3 deletions(-)
diff --git a/dlls/windows.media.speech/tests/speech.c b/dlls/windows.media.speech/tests/speech.c index 8b31031d3c5..c358aa70927 100644 --- a/dlls/windows.media.speech/tests/speech.c +++ b/dlls/windows.media.speech/tests/speech.c @@ -1787,8 +1787,8 @@ static void test_Recognition(void) recog_state = 0xdeadbeef; hr = ISpeechRecognizer2_get_State(recognizer2, &recog_state); todo_wine ok(hr == S_OK, "ISpeechRecognizer2_get_State failed, hr %#lx.\n", hr); - todo_wine ok(recog_state == SpeechRecognizerState_Paused || /* Broken on Win10 1507 */ - broken(recog_state == SpeechRecognizerState_Capturing) , "recog_state was %u.\n", recog_state); + todo_wine ok(recog_state == SpeechRecognizerState_Paused || + broken(recog_state == SpeechRecognizerState_Capturing) /* Broken on Win10 1507 */, "recog_state was %u.\n", recog_state);
/* Check what happens if we try to pause again, when the session is already paused. */ hr = ISpeechContinuousRecognitionSession_PauseAsync(session, &action2); @@ -1844,7 +1844,7 @@ static void test_Recognition(void)
set = SetEvent(action_handler.event_block); ok(set == TRUE, "Event 'event_block' wasn't set.\n"); - ok(!WaitForSingleObject(put_thread , 1000), "Wait for put_thread failed.\n"); + ok(!WaitForSingleObject(put_thread, 1000), "Wait for put_thread failed.\n"); IAsyncInfo_Release(info);
CloseHandle(action_handler.event_finished); @@ -1866,6 +1866,50 @@ static void test_Recognition(void) ok(hr == COR_E_INVALIDOPERATION, "ISpeechContinuousRecognitionSession_StopAsync failed, hr %#lx.\n", hr); ok(action == NULL, "action was %p.\n", action);
+ /* Test, if Start/StopAsync resets the pause state. */ + hr = ISpeechContinuousRecognitionSession_StartAsync(session, &action); + ok(hr == S_OK, "ISpeechContinuousRecognitionSession_StartAsync failed, hr %#lx.\n", hr); + await_async_void(action, &action_handler); + IAsyncAction_Release(action); + + hr = ISpeechContinuousRecognitionSession_PauseAsync(session, &action); + ok(hr == S_OK, "ISpeechContinuousRecognitionSession_PauseAsync failed, hr %#lx.\n", hr); + await_async_void(action, &action_handler); + IAsyncAction_Release(action); + + recog_state = 0xdeadbeef; + hr = ISpeechRecognizer2_get_State(recognizer2, &recog_state); + todo_wine ok(hr == S_OK, "ISpeechRecognizer2_get_State failed, hr %#lx.\n", hr); + todo_wine ok(recog_state == SpeechRecognizerState_Paused || + broken(recog_state == SpeechRecognizerState_Capturing) /* Broken on Win10 1507 */, "recog_state was %u.\n", recog_state); + + hr = ISpeechContinuousRecognitionSession_StopAsync(session, &action); + ok(hr == S_OK, "ISpeechContinuousRecognitionSession_PauseAsync failed, hr %#lx.\n", hr); + await_async_void(action, &action_handler); + IAsyncAction_Release(action); + + recog_state = 0xdeadbeef; + hr = ISpeechRecognizer2_get_State(recognizer2, &recog_state); + todo_wine ok(hr == S_OK, "ISpeechRecognizer2_get_State failed, hr %#lx.\n", hr); + todo_wine ok(recog_state == SpeechRecognizerState_Idle, "recog_state was %u.\n", recog_state); + + hr = ISpeechContinuousRecognitionSession_StartAsync(session, &action); + ok(hr == S_OK, "ISpeechContinuousRecognitionSession_PauseAsync failed, hr %#lx.\n", hr); + await_async_void(action, &action_handler); + IAsyncAction_Release(action); + + recog_state = 0xdeadbeef; + hr = ISpeechRecognizer2_get_State(recognizer2, &recog_state); + todo_wine ok(hr == S_OK, "ISpeechRecognizer2_get_State failed, hr %#lx.\n", hr); + todo_wine ok(recog_state == SpeechRecognizerState_Capturing + || broken(recog_state == SpeechRecognizerState_Idle) /* Sometimes Windows is a little behind. */, + "recog_state was %u.\n", recog_state); + + hr = ISpeechContinuousRecognitionSession_StopAsync(session, &action); + ok(hr == S_OK, "ISpeechContinuousRecognitionSession_PauseAsync failed, hr %#lx.\n", hr); + await_async_void(action, &action_handler); + IAsyncAction_Release(action); + hr = ISpeechContinuousRecognitionSession_remove_ResultGenerated(session, token); ok(hr == S_OK, "ISpeechContinuousRecognitionSession_remove_ResultGenerated failed, hr %#lx.\n", hr);
From: Bernhard Kölbl besentv@gmail.com
Signed-off-by: Bernhard Kölbl besentv@gmail.com --- dlls/windows.media.speech/recognizer.c | 41 +++++++++++++++++++++--- dlls/windows.media.speech/tests/speech.c | 4 +-- 2 files changed, 38 insertions(+), 7 deletions(-)
diff --git a/dlls/windows.media.speech/recognizer.c b/dlls/windows.media.speech/recognizer.c index aabfd56694a..80cf40fbef9 100644 --- a/dlls/windows.media.speech/recognizer.c +++ b/dlls/windows.media.speech/recognizer.c @@ -162,7 +162,7 @@ struct session struct list result_handlers;
HANDLE worker_thread, worker_control_event; - BOOLEAN worker_running; + BOOLEAN worker_running, worker_paused; CRITICAL_SECTION cs; };
@@ -362,6 +362,7 @@ static HRESULT WINAPI session_StopAsync( ISpeechContinuousRecognitionSession *if thread = impl->worker_thread; impl->worker_thread = INVALID_HANDLE_VALUE; impl->worker_running = FALSE; + impl->worker_paused = FALSE; } else { @@ -401,14 +402,44 @@ static HRESULT session_pause_async( IInspectable *invoker )
static HRESULT WINAPI session_PauseAsync( ISpeechContinuousRecognitionSession *iface, IAsyncAction **action ) { - FIXME("iface %p, action %p stub!\n", iface, action); - return async_action_create(NULL, session_pause_async, action); + struct session *impl = impl_from_ISpeechContinuousRecognitionSession(iface); + HRESULT hr = S_OK; + + TRACE("iface %p, action %p.\n", iface, action); + + *action = NULL; + + if (FAILED(hr = async_action_create(NULL, session_pause_async, action))) + return hr; + + EnterCriticalSection(&impl->cs); + if (impl->worker_running) + { + impl->worker_paused = TRUE; + } + LeaveCriticalSection(&impl->cs); + + SetEvent(impl->worker_control_event); + + return hr; }
static HRESULT WINAPI session_Resume( ISpeechContinuousRecognitionSession *iface ) { - FIXME("iface %p stub!\n", iface); - return E_NOTIMPL; + struct session *impl = impl_from_ISpeechContinuousRecognitionSession(iface); + + TRACE("iface %p.\n", iface); + + EnterCriticalSection(&impl->cs); + if (impl->worker_running) + { + impl->worker_paused = FALSE; + } + LeaveCriticalSection(&impl->cs); + + SetEvent(impl->worker_control_event); + + return S_OK; }
static HRESULT WINAPI session_add_Completed( ISpeechContinuousRecognitionSession *iface, diff --git a/dlls/windows.media.speech/tests/speech.c b/dlls/windows.media.speech/tests/speech.c index c358aa70927..8037b0e8354 100644 --- a/dlls/windows.media.speech/tests/speech.c +++ b/dlls/windows.media.speech/tests/speech.c @@ -1798,11 +1798,11 @@ static void test_Recognition(void) IAsyncAction_Release(action2);
hr = ISpeechContinuousRecognitionSession_Resume(session); - todo_wine ok(hr == S_OK, "ISpeechContinuousRecognitionSession_Resume failed, hr %#lx.\n", hr); + ok(hr == S_OK, "ISpeechContinuousRecognitionSession_Resume failed, hr %#lx.\n", hr);
/* Resume when already resumed. */ hr = ISpeechContinuousRecognitionSession_Resume(session); - todo_wine ok(hr == S_OK, "ISpeechContinuousRecognitionSession_Resume failed, hr %#lx.\n", hr); + ok(hr == S_OK, "ISpeechContinuousRecognitionSession_Resume failed, hr %#lx.\n", hr);
recog_state = 0xdeadbeef; hr = ISpeechRecognizer2_get_State(recognizer2, &recog_state);
From: Bernhard Kölbl besentv@gmail.com
Signed-off-by: Bernhard Kölbl besentv@gmail.com --- dlls/windows.media.speech/recognizer.c | 124 ++++++++++++++++++++++++- 1 file changed, 119 insertions(+), 5 deletions(-)
diff --git a/dlls/windows.media.speech/recognizer.c b/dlls/windows.media.speech/recognizer.c index 80cf40fbef9..0def2fc3cbd 100644 --- a/dlls/windows.media.speech/recognizer.c +++ b/dlls/windows.media.speech/recognizer.c @@ -19,6 +19,10 @@
#include "private.h"
+#include "initguid.h" +#include "audioclient.h" +#include "mmdeviceapi.h" + #include "wine/debug.h"
WINE_DEFAULT_DEBUG_CHANNEL(speech); @@ -161,7 +165,11 @@ struct session struct list completed_handlers; struct list result_handlers;
- HANDLE worker_thread, worker_control_event; + IAudioClient *audio_client; + IAudioCaptureClient *capture_client; + WAVEFORMATEX capture_wfx; + + HANDLE worker_thread, worker_control_event, audio_buf_event; BOOLEAN worker_running, worker_paused; CRITICAL_SECTION cs; }; @@ -181,25 +189,73 @@ static DWORD CALLBACK session_worker_thread_cb( void *args ) { ISpeechContinuousRecognitionSession *iface = args; struct session *impl = impl_from_ISpeechContinuousRecognitionSession(iface); - BOOLEAN running = TRUE; - DWORD status; + BOOLEAN running = TRUE, paused = FALSE; + DWORD flags, status; + HANDLE events[2]; + BYTE *audio_buf; + HRESULT hr;
SetThreadDescription(GetCurrentThread(), L"wine_speech_recognition_session_worker");
+ if (FAILED(hr = IAudioClient_Start(impl->audio_client))) + goto error; + while (running) { - status = WaitForMultipleObjects(1, &impl->worker_control_event, FALSE, INFINITE); + BOOLEAN old_paused = paused; + UINT32 count = 0; + + events[count++] = impl->worker_control_event; + if (!paused) events[count++] = impl->audio_buf_event; + + status = WaitForMultipleObjects(count, events, FALSE, INFINITE); if (status == 0) /* worker_control_event signaled */ { EnterCriticalSection(&impl->cs); + paused = impl->worker_paused; running = impl->worker_running; LeaveCriticalSection(&impl->cs); + + if (old_paused < paused) + { + if (FAILED(hr = IAudioClient_Stop(impl->audio_client))) goto error; + if (FAILED(hr = IAudioClient_Reset(impl->audio_client))) goto error; + TRACE("session worker paused.\n"); + } + else if (old_paused > paused) + { + if (FAILED(hr = IAudioClient_Start(impl->audio_client))) goto error; + TRACE("session worker resumed.\n"); + } } + else if (status == 1) /* audio_buf_event signaled */ + { + UINT32 frames_available = 0;
- /* TODO: Send mic data to recognizer and handle results. */ + while (IAudioCaptureClient_GetBuffer(impl->capture_client, &audio_buf, &frames_available, &flags, NULL, NULL) == S_OK) + { + /* TODO: Send mic data to recognizer and handle results. */ + IAudioCaptureClient_ReleaseBuffer(impl->capture_client, frames_available); + } + } + else + { + ERR("Unexpected state entered. Aborting worker.\n"); + break; + } }
+ if (FAILED(hr = IAudioClient_Stop(impl->audio_client))) + ERR("IAudioClient_Stop failed with %#lx.\n", hr); + + if (FAILED(hr = IAudioClient_Reset(impl->audio_client))) + ERR("IAudioClient_Reset failed with %#lx.\n", hr); + return 0; + +error: + ERR("The recognition session worker encountered a serious error and needs to stop. hr: %lx.\n", hr); + return 1; }
static HRESULT WINAPI session_QueryInterface( ISpeechContinuousRecognitionSession *iface, REFIID iid, void **out ) @@ -252,6 +308,9 @@ static ULONG WINAPI session_Release( ISpeechContinuousRecognitionSession *iface typed_event_handlers_clear(&impl->completed_handlers); typed_event_handlers_clear(&impl->result_handlers);
+ IAudioCaptureClient_Release(impl->capture_client); + IAudioClient_Release(impl->audio_client); + impl->cs.DebugInfo->Spare[0] = 0; DeleteCriticalSection(&impl->cs);
@@ -926,6 +985,55 @@ static const struct IActivationFactoryVtbl activation_factory_vtbl =
DEFINE_IINSPECTABLE(recognizer_factory, ISpeechRecognizerFactory, struct recognizer_statics, IActivationFactory_iface)
+static HRESULT recognizer_factory_create_audio_capture(struct session *session) +{ + const REFERENCE_TIME buffer_duration = 5000000; /* 0.5 second */ + IMMDeviceEnumerator *mm_enum = NULL; + IMMDevice *mm_device = NULL; + WAVEFORMATEX wfx = { 0 }; + WCHAR *str = NULL; + HRESULT hr = S_OK; + + if (!(session->audio_buf_event = CreateEventW(NULL, FALSE, FALSE, NULL))) + return HRESULT_FROM_WIN32(GetLastError()); + + if (FAILED(hr = CoCreateInstance(&CLSID_MMDeviceEnumerator, NULL, CLSCTX_INPROC_SERVER, &IID_IMMDeviceEnumerator, (void **)&mm_enum))) + goto cleanup; + + if (FAILED(hr = IMMDeviceEnumerator_GetDefaultAudioEndpoint(mm_enum, eCapture, eMultimedia, &mm_device))) + goto cleanup; + + if (FAILED(hr = IMMDevice_Activate(mm_device, &IID_IAudioClient, CLSCTX_INPROC_SERVER, NULL, (void **)&session->audio_client))) + goto cleanup; + + hr = IMMDevice_GetId(mm_device, &str); + TRACE("selected capture device ID: %s, hr %#lx\n", debugstr_w(str), hr); + + wfx.wFormatTag = WAVE_FORMAT_PCM; + wfx.nSamplesPerSec = 16000; + wfx.nChannels = 1; + wfx.wBitsPerSample = 16; + wfx.nBlockAlign = (wfx.wBitsPerSample + 7) / 8 * wfx.nChannels; + wfx.nAvgBytesPerSec = wfx.nSamplesPerSec * wfx.nBlockAlign; + TRACE("wfx tag %u, channels %u, samples %lu, bits %u, align %u.\n", wfx.wFormatTag, wfx.nChannels, wfx.nSamplesPerSec, wfx.wBitsPerSample, wfx.nBlockAlign); + + if (FAILED(hr = IAudioClient_Initialize(session->audio_client, AUDCLNT_SHAREMODE_SHARED, AUDCLNT_STREAMFLAGS_EVENTCALLBACK, buffer_duration, 0, &wfx, NULL))) + goto cleanup; + + if (FAILED(hr = IAudioClient_SetEventHandle(session->audio_client, session->audio_buf_event))) + goto cleanup; + + hr = IAudioClient_GetService(session->audio_client, &IID_IAudioCaptureClient, (void **)&session->capture_client); + + session->capture_wfx = wfx; + +cleanup: + if (mm_device) IMMDevice_Release(mm_device); + if (mm_enum) IMMDeviceEnumerator_Release(mm_enum); + CoTaskMemFree(str); + return hr; +} + static HRESULT WINAPI recognizer_factory_Create( ISpeechRecognizerFactory *iface, ILanguage *language, ISpeechRecognizer **speechrecognizer ) { struct recognizer *impl; @@ -969,6 +1077,9 @@ static HRESULT WINAPI recognizer_factory_Create( ISpeechRecognizerFactory *iface if (FAILED(hr = vector_inspectable_create(&constraints_iids, (IVector_IInspectable**)&session->constraints))) goto error;
+ if (FAILED(hr = recognizer_factory_create_audio_capture(session))) + goto error; + InitializeCriticalSection(&session->cs); session->cs.DebugInfo->Spare[0] = (DWORD_PTR)(__FILE__ ": recognition_session.cs");
@@ -984,6 +1095,9 @@ static HRESULT WINAPI recognizer_factory_Create( ISpeechRecognizerFactory *iface return S_OK;
error: + if (session->capture_client) IAudioCaptureClient_Release(session->capture_client); + if (session->audio_client) IAudioClient_Release(session->audio_client); + if (session->audio_buf_event) CloseHandle(session->audio_buf_event); if (session->constraints) IVector_ISpeechRecognitionConstraint_Release(session->constraints); if (session->worker_control_event) CloseHandle(session->worker_control_event); free(session);
From: Bernhard Kölbl besentv@gmail.com
Signed-off-by: Bernhard Kölbl besentv@gmail.com --- dlls/windows.media.speech/recognizer.c | 21 +++++++++++-- dlls/windows.media.speech/tests/speech.c | 40 ++++++++++++------------ 2 files changed, 39 insertions(+), 22 deletions(-)
diff --git a/dlls/windows.media.speech/recognizer.c b/dlls/windows.media.speech/recognizer.c index 0def2fc3cbd..72c08a1b06c 100644 --- a/dlls/windows.media.speech/recognizer.c +++ b/dlls/windows.media.speech/recognizer.c @@ -162,6 +162,8 @@ struct session
IVector_ISpeechRecognitionConstraint *constraints;
+ SpeechRecognizerState recognizer_state; + struct list completed_handlers; struct list result_handlers;
@@ -379,6 +381,7 @@ static HRESULT WINAPI session_StartAsync( ISpeechContinuousRecognitionSession *i else { impl->worker_running = TRUE; + impl->recognizer_state = SpeechRecognizerState_Capturing; } LeaveCriticalSection(&impl->cs);
@@ -422,6 +425,7 @@ static HRESULT WINAPI session_StopAsync( ISpeechContinuousRecognitionSession *if impl->worker_thread = INVALID_HANDLE_VALUE; impl->worker_running = FALSE; impl->worker_paused = FALSE; + impl->recognizer_state = SpeechRecognizerState_Idle; } else { @@ -475,6 +479,7 @@ static HRESULT WINAPI session_PauseAsync( ISpeechContinuousRecognitionSession *i if (impl->worker_running) { impl->worker_paused = TRUE; + impl->recognizer_state = SpeechRecognizerState_Paused; } LeaveCriticalSection(&impl->cs);
@@ -493,6 +498,7 @@ static HRESULT WINAPI session_Resume( ISpeechContinuousRecognitionSession *iface if (impl->worker_running) { impl->worker_paused = FALSE; + impl->recognizer_state = SpeechRecognizerState_Capturing; } LeaveCriticalSection(&impl->cs);
@@ -816,8 +822,19 @@ static HRESULT WINAPI recognizer2_get_ContinuousRecognitionSession( ISpeechRecog
static HRESULT WINAPI recognizer2_get_State( ISpeechRecognizer2 *iface, SpeechRecognizerState *state ) { - FIXME("iface %p, state %p stub!\n", iface, state); - return E_NOTIMPL; + struct recognizer *impl = impl_from_ISpeechRecognizer2(iface); + struct session *session = impl_from_ISpeechContinuousRecognitionSession(impl->session); + + FIXME("iface %p, state %p not all states are supported, yet.\n", iface, state); + + if (!state) + return E_POINTER; + + EnterCriticalSection(&session->cs); + *state = session->recognizer_state; + LeaveCriticalSection(&session->cs); + + return S_OK; }
static HRESULT WINAPI recognizer2_StopRecognitionAsync( ISpeechRecognizer2 *iface, IAsyncAction **action ) diff --git a/dlls/windows.media.speech/tests/speech.c b/dlls/windows.media.speech/tests/speech.c index 8037b0e8354..6bc5a8b1751 100644 --- a/dlls/windows.media.speech/tests/speech.c +++ b/dlls/windows.media.speech/tests/speech.c @@ -1721,8 +1721,8 @@ static void test_Recognition(void)
recog_state = 0xdeadbeef; hr = ISpeechRecognizer2_get_State(recognizer2, &recog_state); - todo_wine ok(hr == S_OK, "ISpeechRecognizer2_get_State failed, hr %#lx.\n", hr); - todo_wine ok(recog_state == SpeechRecognizerState_Idle, "recog_state was %u.\n", recog_state); + ok(hr == S_OK, "ISpeechRecognizer2_get_State failed, hr %#lx.\n", hr); + ok(recog_state == SpeechRecognizerState_Idle, "recog_state was %u.\n", recog_state);
hr = ISpeechRecognizer_CompileConstraintsAsync(recognizer, &operation); ok(hr == S_OK, "ISpeechRecognizer_CompileConstraintsAsync failed, hr %#lx.\n", hr); @@ -1771,8 +1771,8 @@ static void test_Recognition(void)
recog_state = 0xdeadbeef; hr = ISpeechRecognizer2_get_State(recognizer2, &recog_state); - todo_wine ok(hr == S_OK, "ISpeechRecognizer2_get_State failed, hr %#lx.\n", hr); - todo_wine ok(recog_state == SpeechRecognizerState_Capturing, "recog_state was %u.\n", recog_state); + ok(hr == S_OK, "ISpeechRecognizer2_get_State failed, hr %#lx.\n", hr); + ok(recog_state == SpeechRecognizerState_Capturing, "recog_state was %u.\n", recog_state);
/* * TODO: Use a loopback device together with prerecorded audio files to test the recognizer's functionality. @@ -1786,9 +1786,9 @@ static void test_Recognition(void)
recog_state = 0xdeadbeef; hr = ISpeechRecognizer2_get_State(recognizer2, &recog_state); - todo_wine ok(hr == S_OK, "ISpeechRecognizer2_get_State failed, hr %#lx.\n", hr); - todo_wine ok(recog_state == SpeechRecognizerState_Paused || - broken(recog_state == SpeechRecognizerState_Capturing) /* Broken on Win10 1507 */, "recog_state was %u.\n", recog_state); + ok(hr == S_OK, "ISpeechRecognizer2_get_State failed, hr %#lx.\n", hr); + ok(recog_state == SpeechRecognizerState_Paused || + broken(recog_state == SpeechRecognizerState_Capturing) /* Broken on Win10 1507 */, "recog_state was %u.\n", recog_state);
/* Check what happens if we try to pause again, when the session is already paused. */ hr = ISpeechContinuousRecognitionSession_PauseAsync(session, &action2); @@ -1806,8 +1806,8 @@ static void test_Recognition(void)
recog_state = 0xdeadbeef; hr = ISpeechRecognizer2_get_State(recognizer2, &recog_state); - todo_wine ok(hr == S_OK, "ISpeechRecognizer2_get_State failed, hr %#lx.\n", hr); - todo_wine ok(recog_state == SpeechRecognizerState_Capturing, "recog_state was %u.\n", recog_state); + ok(hr == S_OK, "ISpeechRecognizer2_get_State failed, hr %#lx.\n", hr); + ok(recog_state == SpeechRecognizerState_Capturing, "recog_state was %u.\n", recog_state);
hr = ISpeechContinuousRecognitionSession_StopAsync(session, &action2); ok(hr == S_OK, "ISpeechContinuousRecognitionSession_StopAsync failed, hr %#lx.\n", hr); @@ -1858,8 +1858,8 @@ static void test_Recognition(void)
recog_state = 0xdeadbeef; hr = ISpeechRecognizer2_get_State(recognizer2, &recog_state); - todo_wine ok(hr == S_OK, "ISpeechRecognizer2_get_State failed, hr %#lx.\n", hr); - todo_wine ok(recog_state == SpeechRecognizerState_Idle, "recog_state was %u.\n", recog_state); + ok(hr == S_OK, "ISpeechRecognizer2_get_State failed, hr %#lx.\n", hr); + ok(recog_state == SpeechRecognizerState_Idle, "recog_state was %u.\n", recog_state);
/* Try stopping, when already stopped. */ hr = ISpeechContinuousRecognitionSession_StopAsync(session, &action); @@ -1879,9 +1879,9 @@ static void test_Recognition(void)
recog_state = 0xdeadbeef; hr = ISpeechRecognizer2_get_State(recognizer2, &recog_state); - todo_wine ok(hr == S_OK, "ISpeechRecognizer2_get_State failed, hr %#lx.\n", hr); - todo_wine ok(recog_state == SpeechRecognizerState_Paused || - broken(recog_state == SpeechRecognizerState_Capturing) /* Broken on Win10 1507 */, "recog_state was %u.\n", recog_state); + ok(hr == S_OK, "ISpeechRecognizer2_get_State failed, hr %#lx.\n", hr); + ok(recog_state == SpeechRecognizerState_Paused || + broken(recog_state == SpeechRecognizerState_Capturing) /* Broken on Win10 1507 */, "recog_state was %u.\n", recog_state);
hr = ISpeechContinuousRecognitionSession_StopAsync(session, &action); ok(hr == S_OK, "ISpeechContinuousRecognitionSession_PauseAsync failed, hr %#lx.\n", hr); @@ -1890,8 +1890,8 @@ static void test_Recognition(void)
recog_state = 0xdeadbeef; hr = ISpeechRecognizer2_get_State(recognizer2, &recog_state); - todo_wine ok(hr == S_OK, "ISpeechRecognizer2_get_State failed, hr %#lx.\n", hr); - todo_wine ok(recog_state == SpeechRecognizerState_Idle, "recog_state was %u.\n", recog_state); + ok(hr == S_OK, "ISpeechRecognizer2_get_State failed, hr %#lx.\n", hr); + ok(recog_state == SpeechRecognizerState_Idle, "recog_state was %u.\n", recog_state);
hr = ISpeechContinuousRecognitionSession_StartAsync(session, &action); ok(hr == S_OK, "ISpeechContinuousRecognitionSession_PauseAsync failed, hr %#lx.\n", hr); @@ -1900,10 +1900,10 @@ static void test_Recognition(void)
recog_state = 0xdeadbeef; hr = ISpeechRecognizer2_get_State(recognizer2, &recog_state); - todo_wine ok(hr == S_OK, "ISpeechRecognizer2_get_State failed, hr %#lx.\n", hr); - todo_wine ok(recog_state == SpeechRecognizerState_Capturing - || broken(recog_state == SpeechRecognizerState_Idle) /* Sometimes Windows is a little behind. */, - "recog_state was %u.\n", recog_state); + ok(hr == S_OK, "ISpeechRecognizer2_get_State failed, hr %#lx.\n", hr); + ok(recog_state == SpeechRecognizerState_Capturing + || broken(recog_state == SpeechRecognizerState_Idle) /* Sometimes Windows is a little behind. */, + "recog_state was %u.\n", recog_state);
hr = ISpeechContinuousRecognitionSession_StopAsync(session, &action); ok(hr == S_OK, "ISpeechContinuousRecognitionSession_PauseAsync failed, hr %#lx.\n", hr);
From: Bernhard Kölbl besentv@gmail.com
Signed-off-by: Bernhard Kölbl besentv@gmail.com --- dlls/windows.media.speech/recognizer.c | 34 +++++++++++++++++++++++--- 1 file changed, 31 insertions(+), 3 deletions(-)
diff --git a/dlls/windows.media.speech/recognizer.c b/dlls/windows.media.speech/recognizer.c index 72c08a1b06c..c2f386206b8 100644 --- a/dlls/windows.media.speech/recognizer.c +++ b/dlls/windows.media.speech/recognizer.c @@ -192,9 +192,10 @@ static DWORD CALLBACK session_worker_thread_cb( void *args ) ISpeechContinuousRecognitionSession *iface = args; struct session *impl = impl_from_ISpeechContinuousRecognitionSession(iface); BOOLEAN running = TRUE, paused = FALSE; + UINT32 frame_count, tmp_buf_size; + BYTE *audio_buf, *tmp_buf; DWORD flags, status; HANDLE events[2]; - BYTE *audio_buf; HRESULT hr;
SetThreadDescription(GetCurrentThread(), L"wine_speech_recognition_session_worker"); @@ -202,6 +203,16 @@ static DWORD CALLBACK session_worker_thread_cb( void *args ) if (FAILED(hr = IAudioClient_Start(impl->audio_client))) goto error;
+ if (FAILED(hr = IAudioClient_GetBufferSize(impl->audio_client, &frame_count))) + goto error; + + tmp_buf_size = sizeof(*tmp_buf) * frame_count * impl->capture_wfx.nBlockAlign; + if (!(tmp_buf = malloc(tmp_buf_size))) + { + ERR("Memory allocation failed.\n"); + return 1; + } + while (running) { BOOLEAN old_paused = paused; @@ -232,13 +243,28 @@ static DWORD CALLBACK session_worker_thread_cb( void *args ) } else if (status == 1) /* audio_buf_event signaled */ { + SIZE_T packet_size = 0, tmp_buf_offset = 0; UINT32 frames_available = 0;
- while (IAudioCaptureClient_GetBuffer(impl->capture_client, &audio_buf, &frames_available, &flags, NULL, NULL) == S_OK) + while (tmp_buf_offset < tmp_buf_size + && IAudioCaptureClient_GetBuffer(impl->capture_client, &audio_buf, &frames_available, &flags, NULL, NULL) == S_OK) { - /* TODO: Send mic data to recognizer and handle results. */ + packet_size = frames_available * impl->capture_wfx.nBlockAlign; + if (tmp_buf_offset + packet_size > tmp_buf_size) + { + /* Defer processing until the next iteration of the worker loop. */ + IAudioCaptureClient_ReleaseBuffer(impl->capture_client, 0); + SetEvent(impl->audio_buf_event); + break; + } + + memcpy(tmp_buf + tmp_buf_offset, audio_buf, packet_size); + tmp_buf_offset += packet_size; + IAudioCaptureClient_ReleaseBuffer(impl->capture_client, frames_available); } + + /* TODO: Send mic data to recognizer and handle results. */ } else { @@ -253,6 +279,8 @@ static DWORD CALLBACK session_worker_thread_cb( void *args ) if (FAILED(hr = IAudioClient_Reset(impl->audio_client))) ERR("IAudioClient_Reset failed with %#lx.\n", hr);
+ free(tmp_buf); + return 0;
error:
This merge request was approved by Rémi Bernon.