First relevant commit: windows.media.speech: Add a worker thread to the recognition session.
-- v5: windows.media.speech: Store recorded audio in a temporary ringbuffer. windows.media.speech: Partially implement the speech recognizer state. windows.media.speech: Add an audio capturing system. windows.media.speech: Allow the recognition session worker to be paused.
From: Bernhard Kölbl besentv@gmail.com
Signed-off-by: Bernhard Kölbl besentv@gmail.com --- dlls/windows.media.speech/recognizer.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-)
diff --git a/dlls/windows.media.speech/recognizer.c b/dlls/windows.media.speech/recognizer.c index bdcc57f883e..105ed2b0b05 100644 --- a/dlls/windows.media.speech/recognizer.c +++ b/dlls/windows.media.speech/recognizer.c @@ -156,6 +156,8 @@ struct session ISpeechContinuousRecognitionSession ISpeechContinuousRecognitionSession_iface; LONG ref;
+ IVector_ISpeechRecognitionConstraint *constraints; + struct list completed_handlers; struct list result_handlers; }; @@ -208,6 +210,7 @@ static ULONG WINAPI session_Release( ISpeechContinuousRecognitionSession *iface { typed_event_handlers_clear(&impl->completed_handlers); typed_event_handlers_clear(&impl->result_handlers); + IVector_ISpeechRecognitionConstraint_Release(impl->constraints); free(impl); }
@@ -360,7 +363,6 @@ struct recognizer LONG ref;
ISpeechContinuousRecognitionSession *session; - IVector_ISpeechRecognitionConstraint *constraints; };
/* @@ -424,7 +426,6 @@ static ULONG WINAPI recognizer_Release( ISpeechRecognizer *iface ) if (!ref) { ISpeechContinuousRecognitionSession_Release(impl->session); - IVector_ISpeechRecognitionConstraint_Release(impl->constraints); free(impl); }
@@ -452,8 +453,11 @@ static HRESULT WINAPI recognizer_GetTrustLevel( ISpeechRecognizer *iface, TrustL static HRESULT WINAPI recognizer_get_Constraints( ISpeechRecognizer *iface, IVector_ISpeechRecognitionConstraint **vector ) { struct recognizer *impl = impl_from_ISpeechRecognizer(iface); + struct session *session = impl_from_ISpeechContinuousRecognitionSession(impl->session); + TRACE("iface %p, operation %p.\n", iface, vector); - IVector_ISpeechRecognitionConstraint_AddRef((*vector = impl->constraints)); + + IVector_ISpeechRecognitionConstraint_AddRef((*vector = session->constraints)); return S_OK; }
@@ -797,18 +801,22 @@ static HRESULT WINAPI recognizer_factory_Create( ISpeechRecognizerFactory *iface if (language) FIXME("language parameter unused. Stub!\n");
+ /* Init ISpeechContinuousRecognitionSession */ session->ISpeechContinuousRecognitionSession_iface.lpVtbl = &session_vtbl; session->ref = 1; + list_init(&session->completed_handlers); list_init(&session->result_handlers);
+ if (FAILED(hr = vector_inspectable_create(&constraints_iids, (IVector_IInspectable**)&session->constraints))) + goto error; + + /* Init ISpeechRecognizer */ impl->ISpeechRecognizer_iface.lpVtbl = &speech_recognizer_vtbl; impl->IClosable_iface.lpVtbl = &closable_vtbl; impl->ISpeechRecognizer2_iface.lpVtbl = &speech_recognizer2_vtbl; impl->session = &session->ISpeechContinuousRecognitionSession_iface; impl->ref = 1; - if (FAILED(hr = vector_inspectable_create(&constraints_iids, (IVector_IInspectable**)&impl->constraints))) - goto error;
TRACE("created SpeechRecognizer %p.\n", impl);
@@ -816,6 +824,7 @@ static HRESULT WINAPI recognizer_factory_Create( ISpeechRecognizerFactory *iface return S_OK;
error: + if (session->constraints) IVector_ISpeechRecognitionConstraint_Release(session->constraints); free(session); free(impl);
From: Bernhard Kölbl besentv@gmail.com
Signed-off-by: Bernhard Kölbl besentv@gmail.com --- dlls/windows.media.speech/private.h | 4 ++-- dlls/windows.media.speech/recognizer.c | 8 ++++---- dlls/windows.media.speech/synthesizer.c | 8 ++++---- 3 files changed, 10 insertions(+), 10 deletions(-)
diff --git a/dlls/windows.media.speech/private.h b/dlls/windows.media.speech/private.h index 13964329697..41f7b02e3de 100644 --- a/dlls/windows.media.speech/private.h +++ b/dlls/windows.media.speech/private.h @@ -69,8 +69,8 @@ struct vector_iids const GUID *view; };
-typedef HRESULT (WINAPI *async_action_callback)( IInspectable *invoker ); -typedef HRESULT (WINAPI *async_operation_inspectable_callback)( IInspectable *invoker, IInspectable **result ); +typedef HRESULT (*async_action_callback)( IInspectable *invoker ); +typedef HRESULT (*async_operation_inspectable_callback)( IInspectable *invoker, IInspectable **result );
HRESULT async_action_create( IInspectable *invoker, async_action_callback callback, IAsyncAction **out ); HRESULT async_operation_inspectable_create( const GUID *iid, IInspectable *invoker, async_operation_inspectable_callback callback, diff --git a/dlls/windows.media.speech/recognizer.c b/dlls/windows.media.speech/recognizer.c index 105ed2b0b05..aaf9c5f8908 100644 --- a/dlls/windows.media.speech/recognizer.c +++ b/dlls/windows.media.speech/recognizer.c @@ -247,7 +247,7 @@ static HRESULT WINAPI session_set_AutoStopSilenceTimeout( ISpeechContinuousRecog return E_NOTIMPL; }
-static HRESULT WINAPI start_callback( IInspectable *invoker ) +static HRESULT session_start_async( IInspectable *invoker ) { return S_OK; } @@ -255,7 +255,7 @@ static HRESULT WINAPI start_callback( IInspectable *invoker ) static HRESULT WINAPI session_StartAsync( ISpeechContinuousRecognitionSession *iface, IAsyncAction **action ) { FIXME("iface %p, action %p stub!\n", iface, action); - return async_action_create(NULL, start_callback, action); + return async_action_create(NULL, session_start_async, action); }
static HRESULT WINAPI session_StartWithModeAsync( ISpeechContinuousRecognitionSession *iface, @@ -479,7 +479,7 @@ static HRESULT WINAPI recognizer_get_UIOptions( ISpeechRecognizer *iface, ISpeec return E_NOTIMPL; }
-static HRESULT WINAPI compile_callback( IInspectable *invoker, IInspectable **result ) +static HRESULT recognizer_compile_constraints_async( IInspectable *invoker, IInspectable **result ) { return compilation_result_create(SpeechRecognitionResultStatus_Success, (ISpeechRecognitionCompilationResult **) result); } @@ -489,7 +489,7 @@ static HRESULT WINAPI recognizer_CompileConstraintsAsync( ISpeechRecognizer *ifa { IAsyncOperation_IInspectable **value = (IAsyncOperation_IInspectable **)operation; FIXME("iface %p, operation %p semi-stub!\n", iface, operation); - return async_operation_inspectable_create(&IID_IAsyncOperation_SpeechRecognitionCompilationResult, NULL, compile_callback, value); + return async_operation_inspectable_create(&IID_IAsyncOperation_SpeechRecognitionCompilationResult, NULL, recognizer_compile_constraints_async, value); }
static HRESULT WINAPI recognizer_RecognizeAsync( ISpeechRecognizer *iface, diff --git a/dlls/windows.media.speech/synthesizer.c b/dlls/windows.media.speech/synthesizer.c index ce257c7c355..39d14b84ab7 100644 --- a/dlls/windows.media.speech/synthesizer.c +++ b/dlls/windows.media.speech/synthesizer.c @@ -375,7 +375,7 @@ static HRESULT WINAPI synthesizer_GetTrustLevel( ISpeechSynthesizer *iface, Trus return E_NOTIMPL; }
-static HRESULT CALLBACK text_to_stream_operation( IInspectable *invoker, IInspectable **result ) +static HRESULT synthesizer_synthesize_text_to_stream_async( IInspectable *invoker, IInspectable **result ) { return synthesis_stream_create((ISpeechSynthesisStream **)result); } @@ -385,10 +385,10 @@ static HRESULT WINAPI synthesizer_SynthesizeTextToStreamAsync( ISpeechSynthesize { TRACE("iface %p, text %p, operation %p.\n", iface, text, operation); return async_operation_inspectable_create(&IID_IAsyncOperation_SpeechSynthesisStream, NULL, - text_to_stream_operation, (IAsyncOperation_IInspectable **)operation); + synthesizer_synthesize_text_to_stream_async, (IAsyncOperation_IInspectable **)operation); }
-static HRESULT CALLBACK ssml_to_stream_operation( IInspectable *invoker, IInspectable **result ) +static HRESULT synthesizer_synthesize_ssml_to_stream_async( IInspectable *invoker, IInspectable **result ) { return synthesis_stream_create((ISpeechSynthesisStream **)result); } @@ -398,7 +398,7 @@ static HRESULT WINAPI synthesizer_SynthesizeSsmlToStreamAsync( ISpeechSynthesize { TRACE("iface %p, ssml %p, operation %p.\n", iface, ssml, operation); return async_operation_inspectable_create(&IID_IAsyncOperation_SpeechSynthesisStream, NULL, - ssml_to_stream_operation, (IAsyncOperation_IInspectable **)operation); + synthesizer_synthesize_ssml_to_stream_async, (IAsyncOperation_IInspectable **)operation); }
static HRESULT WINAPI synthesizer_put_Voice( ISpeechSynthesizer *iface, IVoiceInformation *value )
From: Bernhard Kölbl besentv@gmail.com
Signed-off-by: Bernhard Kölbl besentv@gmail.com --- dlls/windows.media.speech/recognizer.c | 7 ++++++- dlls/windows.media.speech/tests/speech.c | 24 ++++++++++-------------- 2 files changed, 16 insertions(+), 15 deletions(-)
diff --git a/dlls/windows.media.speech/recognizer.c b/dlls/windows.media.speech/recognizer.c index aaf9c5f8908..b4f68489bd0 100644 --- a/dlls/windows.media.speech/recognizer.c +++ b/dlls/windows.media.speech/recognizer.c @@ -266,10 +266,15 @@ static HRESULT WINAPI session_StartWithModeAsync( ISpeechContinuousRecognitionSe return E_NOTIMPL; }
+static HRESULT session_stop_async( IInspectable *invoker ) +{ + return S_OK; +} + static HRESULT WINAPI session_StopAsync( ISpeechContinuousRecognitionSession *iface, IAsyncAction **action ) { FIXME("iface %p, action %p stub!\n", iface, action); - return E_NOTIMPL; + return async_action_create(NULL, session_stop_async, action); }
static HRESULT WINAPI session_CancelAsync( ISpeechContinuousRecognitionSession *iface, IAsyncAction **action ) diff --git a/dlls/windows.media.speech/tests/speech.c b/dlls/windows.media.speech/tests/speech.c index 445d10923ae..b6355743a83 100644 --- a/dlls/windows.media.speech/tests/speech.c +++ b/dlls/windows.media.speech/tests/speech.c @@ -1762,9 +1762,7 @@ static void test_Recognition(void) */
hr = ISpeechContinuousRecognitionSession_StopAsync(session, &action2); - todo_wine ok(hr == S_OK, "ISpeechContinuousRecognitionSession_StopAsync failed, hr %#lx.\n", hr); - - if (FAILED(hr)) goto skip_action; + ok(hr == S_OK, "ISpeechContinuousRecognitionSession_StopAsync failed, hr %#lx.\n", hr);
async_void_handler_create_static(&action_handler); action_handler.event_block = CreateEventW(NULL, FALSE, FALSE, NULL); @@ -1776,40 +1774,38 @@ static void test_Recognition(void) put_param.handler = &action_handler.IAsyncActionCompletedHandler_iface; put_param.action = action2; put_thread = CreateThread(NULL, 0, action_put_completed_thread, &put_param, 0, NULL); - todo_wine ok(!WaitForSingleObject(action_handler.event_finished , 5000), "Wait for event_finished failed.\n"); + ok(!WaitForSingleObject(action_handler.event_finished , 5000), "Wait for event_finished failed.\n");
handler = (void *)0xdeadbeef; old_ref = action_handler.ref; hr = IAsyncAction_get_Completed(action2, &handler); - todo_wine ok(hr == S_OK, "IAsyncAction_get_Completed failed, hr %#lx.\n", hr); + ok(hr == S_OK, "IAsyncAction_get_Completed failed, hr %#lx.\n", hr);
- todo_wine ok(handler == &action_handler.IAsyncActionCompletedHandler_iface || /* Broken on 1507. */ - broken(handler != NULL && handler != (void *)0xdeadbeef), "Handler was %p.\n", handler); + todo_wine ok(handler == &action_handler.IAsyncActionCompletedHandler_iface, "Handler was %p.\n", handler);
ref = action_handler.ref - old_ref; todo_wine ok(ref == 1, "The ref was increased by %lu.\n", ref); - IAsyncActionCompletedHandler_Release(handler); + if (handler) IAsyncActionCompletedHandler_Release(handler);
hr = IAsyncAction_QueryInterface(action2, &IID_IAsyncInfo, (void **)&info); - todo_wine ok(hr == S_OK, "IAsyncAction_QueryInterface failed, hr %#lx.\n", hr); + ok(hr == S_OK, "IAsyncAction_QueryInterface failed, hr %#lx.\n", hr);
hr = IAsyncInfo_Close(info); /* If IAsyncInfo_Close would wait for the handler to finish, the test would get stuck here. */ - todo_wine ok(hr == S_OK, "IAsyncInfo_Close failed, hr %#lx.\n", hr); + ok(hr == S_OK, "IAsyncInfo_Close failed, hr %#lx.\n", hr); check_async_info((IInspectable *)action2, 3, AsyncStatus_Closed, S_OK);
set = SetEvent(action_handler.event_block); - todo_wine ok(set == TRUE, "Event 'event_block' wasn't set.\n"); - todo_wine ok(!WaitForSingleObject(put_thread , 1000), "Wait for put_thread failed.\n"); + ok(set == TRUE, "Event 'event_block' wasn't set.\n"); + ok(!WaitForSingleObject(put_thread , 1000), "Wait for put_thread failed.\n"); IAsyncInfo_Release(info);
CloseHandle(action_handler.event_finished); CloseHandle(action_handler.event_block); CloseHandle(put_thread);
- todo_wine ok(action != action2, "actions were the same!\n"); + ok(action != action2, "actions were the same!\n");
IAsyncAction_Release(action2); -skip_action: IAsyncAction_Release(action);
hr = ISpeechContinuousRecognitionSession_remove_ResultGenerated(session, token);
From: Bernhard Kölbl besentv@gmail.com
Signed-off-by: Bernhard Kölbl besentv@gmail.com --- dlls/windows.media.speech/recognizer.c | 7 ++++++- dlls/windows.media.speech/tests/speech.c | 11 ++++++++++- 2 files changed, 16 insertions(+), 2 deletions(-)
diff --git a/dlls/windows.media.speech/recognizer.c b/dlls/windows.media.speech/recognizer.c index b4f68489bd0..54a0e165f5f 100644 --- a/dlls/windows.media.speech/recognizer.c +++ b/dlls/windows.media.speech/recognizer.c @@ -283,10 +283,15 @@ static HRESULT WINAPI session_CancelAsync( ISpeechContinuousRecognitionSession * return E_NOTIMPL; }
+static HRESULT session_pause_async( IInspectable *invoker ) +{ + return S_OK; +} + static HRESULT WINAPI session_PauseAsync( ISpeechContinuousRecognitionSession *iface, IAsyncAction **action ) { FIXME("iface %p, action %p stub!\n", iface, action); - return E_NOTIMPL; + return async_action_create(NULL, session_pause_async, action); }
static HRESULT WINAPI session_Resume( ISpeechContinuousRecognitionSession *iface ) diff --git a/dlls/windows.media.speech/tests/speech.c b/dlls/windows.media.speech/tests/speech.c index b6355743a83..8b14221f487 100644 --- a/dlls/windows.media.speech/tests/speech.c +++ b/dlls/windows.media.speech/tests/speech.c @@ -1761,6 +1761,15 @@ static void test_Recognition(void) * TODO: Use a loopback device together with prerecorded audio files to test the recognizer's functionality. */
+ hr = ISpeechContinuousRecognitionSession_PauseAsync(session, &action2); + ok(hr == S_OK, "ISpeechContinuousRecognitionSession_PauseAsync failed, hr %#lx.\n", hr); + await_async_void(action2, &action_handler); + check_async_info((IInspectable *)action2, 3, Completed, S_OK); + IAsyncAction_Release(action2); + + hr = ISpeechContinuousRecognitionSession_Resume(session); + todo_wine ok(hr == S_OK, "ISpeechContinuousRecognitionSession_Resume failed, hr %#lx.\n", hr); + hr = ISpeechContinuousRecognitionSession_StopAsync(session, &action2); ok(hr == S_OK, "ISpeechContinuousRecognitionSession_StopAsync failed, hr %#lx.\n", hr);
@@ -1792,7 +1801,7 @@ static void test_Recognition(void)
hr = IAsyncInfo_Close(info); /* If IAsyncInfo_Close would wait for the handler to finish, the test would get stuck here. */ ok(hr == S_OK, "IAsyncInfo_Close failed, hr %#lx.\n", hr); - check_async_info((IInspectable *)action2, 3, AsyncStatus_Closed, S_OK); + check_async_info((IInspectable *)action2, 4, AsyncStatus_Closed, S_OK);
set = SetEvent(action_handler.event_block); ok(set == TRUE, "Event 'event_block' wasn't set.\n");
From: Bernhard Kölbl besentv@gmail.com
Signed-off-by: Bernhard Kölbl besentv@gmail.com --- dlls/windows.media.speech/tests/speech.c | 27 ++++++++++++++++++++++++ 1 file changed, 27 insertions(+)
diff --git a/dlls/windows.media.speech/tests/speech.c b/dlls/windows.media.speech/tests/speech.c index 8b14221f487..155382fa76d 100644 --- a/dlls/windows.media.speech/tests/speech.c +++ b/dlls/windows.media.speech/tests/speech.c @@ -1619,6 +1619,7 @@ static void test_Recognition(void) struct iterator_hstring iterator_hstring; struct iterable_hstring iterable_hstring; EventRegistrationToken token = { .value = 0 }; + SpeechRecognizerState recog_state; HSTRING commands[3], hstr, tag; HANDLE put_thread; LONG ref, old_ref; @@ -1717,6 +1718,11 @@ static void test_Recognition(void) ok(hr == S_OK, "ISpeechContinuousRecognitionSession_add_ResultGenerated failed, hr %#lx.\n", hr); ok(token.value != 0xdeadbeef, "Got unexpexted token: %#I64x.\n", token.value);
+ recog_state = 0xdeadbeef; + hr = ISpeechRecognizer2_get_State(recognizer2, &recog_state); + todo_wine ok(hr == S_OK, "ISpeechRecognizer2_get_State failed, hr %#lx.\n", hr); + todo_wine ok(recog_state == SpeechRecognizerState_Idle, "recog_state was %u.\n", recog_state); + hr = ISpeechRecognizer_CompileConstraintsAsync(recognizer, &operation); ok(hr == S_OK, "ISpeechRecognizer_CompileConstraintsAsync failed, hr %#lx.\n", hr); await_async_inspectable((IAsyncOperation_IInspectable *)operation, @@ -1757,6 +1763,11 @@ static void test_Recognition(void)
IAsyncInfo_Release(info);
+ recog_state = 0xdeadbeef; + hr = ISpeechRecognizer2_get_State(recognizer2, &recog_state); + todo_wine ok(hr == S_OK, "ISpeechRecognizer2_get_State failed, hr %#lx.\n", hr); + todo_wine ok(recog_state == SpeechRecognizerState_Capturing, "recog_state was %u.\n", recog_state); + /* * TODO: Use a loopback device together with prerecorded audio files to test the recognizer's functionality. */ @@ -1767,9 +1778,20 @@ static void test_Recognition(void) check_async_info((IInspectable *)action2, 3, Completed, S_OK); IAsyncAction_Release(action2);
+ recog_state = 0xdeadbeef; + hr = ISpeechRecognizer2_get_State(recognizer2, &recog_state); + todo_wine ok(hr == S_OK, "ISpeechRecognizer2_get_State failed, hr %#lx.\n", hr); + todo_wine ok(recog_state == SpeechRecognizerState_Paused || /* Broken on Win10 1507 */ + broken(recog_state == SpeechRecognizerState_Capturing) , "recog_state was %u.\n", recog_state); + hr = ISpeechContinuousRecognitionSession_Resume(session); todo_wine ok(hr == S_OK, "ISpeechContinuousRecognitionSession_Resume failed, hr %#lx.\n", hr);
+ recog_state = 0xdeadbeef; + hr = ISpeechRecognizer2_get_State(recognizer2, &recog_state); + todo_wine ok(hr == S_OK, "ISpeechRecognizer2_get_State failed, hr %#lx.\n", hr); + todo_wine ok(recog_state == SpeechRecognizerState_Capturing, "recog_state was %u.\n", recog_state); + hr = ISpeechContinuousRecognitionSession_StopAsync(session, &action2); ok(hr == S_OK, "ISpeechContinuousRecognitionSession_StopAsync failed, hr %#lx.\n", hr);
@@ -1817,6 +1839,11 @@ static void test_Recognition(void) IAsyncAction_Release(action2); IAsyncAction_Release(action);
+ recog_state = 0xdeadbeef; + hr = ISpeechRecognizer2_get_State(recognizer2, &recog_state); + todo_wine ok(hr == S_OK, "ISpeechRecognizer2_get_State failed, hr %#lx.\n", hr); + todo_wine ok(recog_state == SpeechRecognizerState_Idle, "recog_state was %u.\n", recog_state); + hr = ISpeechContinuousRecognitionSession_remove_ResultGenerated(session, token); ok(hr == S_OK, "ISpeechContinuousRecognitionSession_remove_ResultGenerated failed, hr %#lx.\n", hr);
From: Bernhard Kölbl besentv@gmail.com
Signed-off-by: Bernhard Kölbl besentv@gmail.com --- dlls/windows.media.speech/tests/speech.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-)
diff --git a/dlls/windows.media.speech/tests/speech.c b/dlls/windows.media.speech/tests/speech.c index 155382fa76d..a8ed8cff1e7 100644 --- a/dlls/windows.media.speech/tests/speech.c +++ b/dlls/windows.media.speech/tests/speech.c @@ -18,6 +18,7 @@ #define COBJMACROS #include <stdarg.h>
+#include "corerror.h" #include "windef.h" #include "winbase.h" #include "winerror.h" @@ -1741,6 +1742,11 @@ static void test_Recognition(void)
await_async_void(action, &action_handler);
+ action2 = (void *)0xdeadbeef; + hr = ISpeechContinuousRecognitionSession_StartAsync(session, &action2); + todo_wine ok(hr == COR_E_INVALIDOPERATION, "ISpeechContinuousRecognitionSession_StartAsync failed, hr %#lx.\n", hr); + todo_wine ok(action2 == NULL, "action2 was %p.\n", action2); + hr = IAsyncAction_QueryInterface(action, &IID_IAsyncInfo, (void **)&info); ok(hr == S_OK, "IAsyncAction_QueryInterface failed, hr %#lx.\n", hr); check_async_info((IInspectable *)action, 1, Completed, S_OK); @@ -1784,6 +1790,17 @@ static void test_Recognition(void) todo_wine ok(recog_state == SpeechRecognizerState_Paused || /* Broken on Win10 1507 */ broken(recog_state == SpeechRecognizerState_Capturing) , "recog_state was %u.\n", recog_state);
+ /* Check what happens if we try to pause again, when the session is already paused. */ + hr = ISpeechContinuousRecognitionSession_PauseAsync(session, &action2); + ok(hr == S_OK, "ISpeechContinuousRecognitionSession_PauseAsync failed, hr %#lx.\n", hr); + await_async_void(action2, &action_handler); + check_async_info((IInspectable *)action2, 4, Completed, S_OK); + IAsyncAction_Release(action2); + + hr = ISpeechContinuousRecognitionSession_Resume(session); + todo_wine ok(hr == S_OK, "ISpeechContinuousRecognitionSession_Resume failed, hr %#lx.\n", hr); + + /* Resume when already resumed. */ hr = ISpeechContinuousRecognitionSession_Resume(session); todo_wine ok(hr == S_OK, "ISpeechContinuousRecognitionSession_Resume failed, hr %#lx.\n", hr);
@@ -1823,7 +1840,7 @@ static void test_Recognition(void)
hr = IAsyncInfo_Close(info); /* If IAsyncInfo_Close would wait for the handler to finish, the test would get stuck here. */ ok(hr == S_OK, "IAsyncInfo_Close failed, hr %#lx.\n", hr); - check_async_info((IInspectable *)action2, 4, AsyncStatus_Closed, S_OK); + check_async_info((IInspectable *)action2, 5, AsyncStatus_Closed, S_OK);
set = SetEvent(action_handler.event_block); ok(set == TRUE, "Event 'event_block' wasn't set.\n"); @@ -1844,6 +1861,11 @@ static void test_Recognition(void) todo_wine ok(hr == S_OK, "ISpeechRecognizer2_get_State failed, hr %#lx.\n", hr); todo_wine ok(recog_state == SpeechRecognizerState_Idle, "recog_state was %u.\n", recog_state);
+ /* Try stopping, when already stopped. */ + hr = ISpeechContinuousRecognitionSession_StopAsync(session, &action); + todo_wine ok(hr == COR_E_INVALIDOPERATION, "ISpeechContinuousRecognitionSession_StopAsync failed, hr %#lx.\n", hr); + todo_wine ok(action == NULL, "action was %p.\n", action); + hr = ISpeechContinuousRecognitionSession_remove_ResultGenerated(session, token); ok(hr == S_OK, "ISpeechContinuousRecognitionSession_remove_ResultGenerated failed, hr %#lx.\n", hr);
From: Bernhard Kölbl besentv@gmail.com
Signed-off-by: Bernhard Kölbl besentv@gmail.com --- dlls/windows.media.speech/private.h | 1 + dlls/windows.media.speech/recognizer.c | 121 ++++++++++++++++++++++- dlls/windows.media.speech/tests/speech.c | 8 +- 3 files changed, 122 insertions(+), 8 deletions(-)
diff --git a/dlls/windows.media.speech/private.h b/dlls/windows.media.speech/private.h index 41f7b02e3de..e80d73ec1fb 100644 --- a/dlls/windows.media.speech/private.h +++ b/dlls/windows.media.speech/private.h @@ -23,6 +23,7 @@ #include <stdarg.h>
#define COBJMACROS +#include "corerror.h" #include "windef.h" #include "winbase.h" #include "winstring.h" diff --git a/dlls/windows.media.speech/recognizer.c b/dlls/windows.media.speech/recognizer.c index 54a0e165f5f..ae40ec48d8b 100644 --- a/dlls/windows.media.speech/recognizer.c +++ b/dlls/windows.media.speech/recognizer.c @@ -160,6 +160,10 @@ struct session
struct list completed_handlers; struct list result_handlers; + + HANDLE worker_thread, worker_control_event; + BOOLEAN worker_running; + CRITICAL_SECTION cs; };
/* @@ -173,6 +177,29 @@ static inline struct session *impl_from_ISpeechContinuousRecognitionSession( ISp return CONTAINING_RECORD(iface, struct session, ISpeechContinuousRecognitionSession_iface); }
+static DWORD CALLBACK session_worker_thread_cb( void *args ) +{ + ISpeechContinuousRecognitionSession *iface = args; + struct session *impl = impl_from_ISpeechContinuousRecognitionSession(iface); + BOOLEAN running = TRUE; + DWORD status; + + while (running) + { + status = WaitForMultipleObjects(1, &impl->worker_control_event, FALSE, INFINITE); + if (status == 0) /* worker_control_event signaled */ + { + EnterCriticalSection(&impl->cs); + running = impl->worker_running; + LeaveCriticalSection(&impl->cs); + } + + /* TODO: Send mic data to recognizer and handle results. */ + } + + return 0; +} + static HRESULT WINAPI session_QueryInterface( ISpeechContinuousRecognitionSession *iface, REFIID iid, void **out ) { struct session *impl = impl_from_ISpeechContinuousRecognitionSession(iface); @@ -200,6 +227,12 @@ static ULONG WINAPI session_AddRef( ISpeechContinuousRecognitionSession *iface ) return ref; }
+static void session_join_worker_thread( HANDLE thread ) +{ + WaitForSingleObject(thread, INFINITE); + CloseHandle(thread); +} + static ULONG WINAPI session_Release( ISpeechContinuousRecognitionSession *iface ) { struct session *impl = impl_from_ISpeechContinuousRecognitionSession(iface); @@ -208,8 +241,23 @@ static ULONG WINAPI session_Release( ISpeechContinuousRecognitionSession *iface
if (!ref) { + HANDLE thread; + + EnterCriticalSection(&impl->cs); + thread = impl->worker_thread; + impl->worker_running = FALSE; + impl->worker_thread = INVALID_HANDLE_VALUE; + LeaveCriticalSection(&impl->cs); + + SetEvent(impl->worker_control_event); + session_join_worker_thread(thread); + typed_event_handlers_clear(&impl->completed_handlers); typed_event_handlers_clear(&impl->result_handlers); + + impl->cs.DebugInfo->Spare[0] = 0; + DeleteCriticalSection(&impl->cs); + IVector_ISpeechRecognitionConstraint_Release(impl->constraints); free(impl); } @@ -254,8 +302,32 @@ static HRESULT session_start_async( IInspectable *invoker )
static HRESULT WINAPI session_StartAsync( ISpeechContinuousRecognitionSession *iface, IAsyncAction **action ) { - FIXME("iface %p, action %p stub!\n", iface, action); - return async_action_create(NULL, session_start_async, action); + struct session *impl = impl_from_ISpeechContinuousRecognitionSession(iface); + BOOLEAN invalid_state = FALSE; + HRESULT hr = S_OK; + + TRACE("iface %p, action %p.\n", iface, action); + + *action = NULL; + + EnterCriticalSection(&impl->cs); + if (!impl->worker_running && !impl->worker_thread && SUCCEEDED(hr = async_action_create(NULL, session_start_async, action))) + { + impl->worker_running = TRUE; + if (!(impl->worker_thread = CreateThread(NULL, 0, session_worker_thread_cb, impl, 0, NULL))) + { + hr = HRESULT_FROM_WIN32(GetLastError()); + impl->worker_running = FALSE; + } + } + else + invalid_state = TRUE; + LeaveCriticalSection(&impl->cs); + + if (invalid_state && SUCCEEDED(hr)) + hr = COR_E_INVALIDOPERATION; + + return hr; }
static HRESULT WINAPI session_StartWithModeAsync( ISpeechContinuousRecognitionSession *iface, @@ -273,8 +345,39 @@ static HRESULT session_stop_async( IInspectable *invoker )
static HRESULT WINAPI session_StopAsync( ISpeechContinuousRecognitionSession *iface, IAsyncAction **action ) { - FIXME("iface %p, action %p stub!\n", iface, action); - return async_action_create(NULL, session_stop_async, action); + struct session *impl = impl_from_ISpeechContinuousRecognitionSession(iface); + BOOLEAN invalid_state = FALSE; + HRESULT hr = S_OK; + HANDLE thread; + + TRACE("iface %p, action %p.\n", iface, action); + + *action = NULL; + + EnterCriticalSection(&impl->cs); + if (impl->worker_running && impl->worker_thread && SUCCEEDED(hr = async_action_create(NULL, session_stop_async, action))) + { + thread = impl->worker_thread; + impl->worker_thread = INVALID_HANDLE_VALUE; + impl->worker_running = FALSE; + } + else + invalid_state = TRUE; + LeaveCriticalSection(&impl->cs); + + if (invalid_state && SUCCEEDED(hr)) + hr = COR_E_INVALIDOPERATION; + else if (!invalid_state) + { + SetEvent(impl->worker_control_event); + session_join_worker_thread(thread); + + EnterCriticalSection(&impl->cs); + impl->worker_thread = NULL; + LeaveCriticalSection(&impl->cs); + } + + return hr; }
static HRESULT WINAPI session_CancelAsync( ISpeechContinuousRecognitionSession *iface, IAsyncAction **action ) @@ -818,9 +921,18 @@ static HRESULT WINAPI recognizer_factory_Create( ISpeechRecognizerFactory *iface list_init(&session->completed_handlers); list_init(&session->result_handlers);
+ if (!(session->worker_control_event = CreateEventW(NULL, FALSE, FALSE, NULL))) + { + hr = HRESULT_FROM_WIN32(GetLastError()); + goto error; + } + if (FAILED(hr = vector_inspectable_create(&constraints_iids, (IVector_IInspectable**)&session->constraints))) goto error;
+ InitializeCriticalSection(&session->cs); + session->cs.DebugInfo->Spare[0] = (DWORD_PTR)(__FILE__ ": recognition_session.cs"); + /* Init ISpeechRecognizer */ impl->ISpeechRecognizer_iface.lpVtbl = &speech_recognizer_vtbl; impl->IClosable_iface.lpVtbl = &closable_vtbl; @@ -835,6 +947,7 @@ static HRESULT WINAPI recognizer_factory_Create( ISpeechRecognizerFactory *iface
error: if (session->constraints) IVector_ISpeechRecognitionConstraint_Release(session->constraints); + CloseHandle(session->worker_control_event); free(session); free(impl);
diff --git a/dlls/windows.media.speech/tests/speech.c b/dlls/windows.media.speech/tests/speech.c index a8ed8cff1e7..8b31031d3c5 100644 --- a/dlls/windows.media.speech/tests/speech.c +++ b/dlls/windows.media.speech/tests/speech.c @@ -1744,8 +1744,8 @@ static void test_Recognition(void)
action2 = (void *)0xdeadbeef; hr = ISpeechContinuousRecognitionSession_StartAsync(session, &action2); - todo_wine ok(hr == COR_E_INVALIDOPERATION, "ISpeechContinuousRecognitionSession_StartAsync failed, hr %#lx.\n", hr); - todo_wine ok(action2 == NULL, "action2 was %p.\n", action2); + ok(hr == COR_E_INVALIDOPERATION, "ISpeechContinuousRecognitionSession_StartAsync failed, hr %#lx.\n", hr); + ok(action2 == NULL, "action2 was %p.\n", action2);
hr = IAsyncAction_QueryInterface(action, &IID_IAsyncInfo, (void **)&info); ok(hr == S_OK, "IAsyncAction_QueryInterface failed, hr %#lx.\n", hr); @@ -1863,8 +1863,8 @@ static void test_Recognition(void)
/* Try stopping, when already stopped. */ hr = ISpeechContinuousRecognitionSession_StopAsync(session, &action); - todo_wine ok(hr == COR_E_INVALIDOPERATION, "ISpeechContinuousRecognitionSession_StopAsync failed, hr %#lx.\n", hr); - todo_wine ok(action == NULL, "action was %p.\n", action); + ok(hr == COR_E_INVALIDOPERATION, "ISpeechContinuousRecognitionSession_StopAsync failed, hr %#lx.\n", hr); + ok(action == NULL, "action was %p.\n", action);
hr = ISpeechContinuousRecognitionSession_remove_ResultGenerated(session, token); ok(hr == S_OK, "ISpeechContinuousRecognitionSession_remove_ResultGenerated failed, hr %#lx.\n", hr);
From: Bernhard Kölbl besentv@gmail.com
Signed-off-by: Bernhard Kölbl besentv@gmail.com --- dlls/windows.media.speech/tests/speech.c | 42 ++++++++++++++++++++++++ 1 file changed, 42 insertions(+)
diff --git a/dlls/windows.media.speech/tests/speech.c b/dlls/windows.media.speech/tests/speech.c index 8b31031d3c5..8342f3cf0f3 100644 --- a/dlls/windows.media.speech/tests/speech.c +++ b/dlls/windows.media.speech/tests/speech.c @@ -1866,6 +1866,48 @@ static void test_Recognition(void) ok(hr == COR_E_INVALIDOPERATION, "ISpeechContinuousRecognitionSession_StopAsync failed, hr %#lx.\n", hr); ok(action == NULL, "action was %p.\n", action);
+ /* Test, if Start/StopAsync resets the pause state. */ + hr = ISpeechContinuousRecognitionSession_StartAsync(session, &action); + ok(hr == S_OK, "ISpeechContinuousRecognitionSession_StartAsync failed, hr %#lx.\n", hr); + await_async_void(action, &action_handler); + IAsyncAction_Release(action); + + hr = ISpeechContinuousRecognitionSession_PauseAsync(session, &action); + ok(hr == S_OK, "ISpeechContinuousRecognitionSession_PauseAsync failed, hr %#lx.\n", hr); + await_async_void(action, &action_handler); + IAsyncAction_Release(action); + + recog_state = 0xdeadbeef; + hr = ISpeechRecognizer2_get_State(recognizer2, &recog_state); + todo_wine ok(hr == S_OK, "ISpeechRecognizer2_get_State failed, hr %#lx.\n", hr); + todo_wine ok(recog_state == SpeechRecognizerState_Paused || /* Broken on Win10 1507 */ + broken(recog_state == SpeechRecognizerState_Capturing) , "recog_state was %u.\n", recog_state); + + hr = ISpeechContinuousRecognitionSession_StopAsync(session, &action); + ok(hr == S_OK, "ISpeechContinuousRecognitionSession_PauseAsync failed, hr %#lx.\n", hr); + await_async_void(action, &action_handler); + IAsyncAction_Release(action); + + recog_state = 0xdeadbeef; + hr = ISpeechRecognizer2_get_State(recognizer2, &recog_state); + todo_wine ok(hr == S_OK, "ISpeechRecognizer2_get_State failed, hr %#lx.\n", hr); + todo_wine ok(recog_state == SpeechRecognizerState_Idle, "recog_state was %u.\n", recog_state); + + hr = ISpeechContinuousRecognitionSession_StartAsync(session, &action); + ok(hr == S_OK, "ISpeechContinuousRecognitionSession_PauseAsync failed, hr %#lx.\n", hr); + await_async_void(action, &action_handler); + IAsyncAction_Release(action); + + recog_state = 0xdeadbeef; + hr = ISpeechRecognizer2_get_State(recognizer2, &recog_state); + todo_wine ok(hr == S_OK, "ISpeechRecognizer2_get_State failed, hr %#lx.\n", hr); + todo_wine ok(recog_state == SpeechRecognizerState_Capturing, "recog_state was %u.\n", recog_state); + + hr = ISpeechContinuousRecognitionSession_StopAsync(session, &action); + ok(hr == S_OK, "ISpeechContinuousRecognitionSession_PauseAsync failed, hr %#lx.\n", hr); + await_async_void(action, &action_handler); + IAsyncAction_Release(action); + hr = ISpeechContinuousRecognitionSession_remove_ResultGenerated(session, token); ok(hr == S_OK, "ISpeechContinuousRecognitionSession_remove_ResultGenerated failed, hr %#lx.\n", hr);
From: Bernhard Kölbl besentv@gmail.com
Signed-off-by: Bernhard Kölbl besentv@gmail.com --- dlls/windows.media.speech/recognizer.c | 33 ++++++++++++++++++++---- dlls/windows.media.speech/tests/speech.c | 4 +-- 2 files changed, 30 insertions(+), 7 deletions(-)
diff --git a/dlls/windows.media.speech/recognizer.c b/dlls/windows.media.speech/recognizer.c index ae40ec48d8b..6decf766714 100644 --- a/dlls/windows.media.speech/recognizer.c +++ b/dlls/windows.media.speech/recognizer.c @@ -162,7 +162,7 @@ struct session struct list result_handlers;
HANDLE worker_thread, worker_control_event; - BOOLEAN worker_running; + BOOLEAN worker_running, worker_paused; CRITICAL_SECTION cs; };
@@ -360,6 +360,7 @@ static HRESULT WINAPI session_StopAsync( ISpeechContinuousRecognitionSession *if thread = impl->worker_thread; impl->worker_thread = INVALID_HANDLE_VALUE; impl->worker_running = FALSE; + impl->worker_paused = FALSE; } else invalid_state = TRUE; @@ -393,14 +394,36 @@ static HRESULT session_pause_async( IInspectable *invoker )
static HRESULT WINAPI session_PauseAsync( ISpeechContinuousRecognitionSession *iface, IAsyncAction **action ) { - FIXME("iface %p, action %p stub!\n", iface, action); - return async_action_create(NULL, session_pause_async, action); + struct session *impl = impl_from_ISpeechContinuousRecognitionSession(iface); + HRESULT hr = S_OK; + + TRACE("iface %p, action %p.\n", iface, action); + + if (FAILED(hr = async_action_create(NULL, session_pause_async, action))) + return hr; + + EnterCriticalSection(&impl->cs); + if (impl->worker_running) impl->worker_paused = TRUE; + LeaveCriticalSection(&impl->cs); + + SetEvent(impl->worker_control_event); + + return hr; }
static HRESULT WINAPI session_Resume( ISpeechContinuousRecognitionSession *iface ) { - FIXME("iface %p stub!\n", iface); - return E_NOTIMPL; + struct session *impl = impl_from_ISpeechContinuousRecognitionSession(iface); + + TRACE("iface %p.\n", iface); + + EnterCriticalSection(&impl->cs); + if (impl->worker_running) impl->worker_paused = FALSE; + LeaveCriticalSection(&impl->cs); + + SetEvent(impl->worker_control_event); + + return S_OK; }
static HRESULT WINAPI session_add_Completed( ISpeechContinuousRecognitionSession *iface, diff --git a/dlls/windows.media.speech/tests/speech.c b/dlls/windows.media.speech/tests/speech.c index 8342f3cf0f3..efd4b6fa28d 100644 --- a/dlls/windows.media.speech/tests/speech.c +++ b/dlls/windows.media.speech/tests/speech.c @@ -1798,11 +1798,11 @@ static void test_Recognition(void) IAsyncAction_Release(action2);
hr = ISpeechContinuousRecognitionSession_Resume(session); - todo_wine ok(hr == S_OK, "ISpeechContinuousRecognitionSession_Resume failed, hr %#lx.\n", hr); + ok(hr == S_OK, "ISpeechContinuousRecognitionSession_Resume failed, hr %#lx.\n", hr);
/* Resume when already resumed. */ hr = ISpeechContinuousRecognitionSession_Resume(session); - todo_wine ok(hr == S_OK, "ISpeechContinuousRecognitionSession_Resume failed, hr %#lx.\n", hr); + ok(hr == S_OK, "ISpeechContinuousRecognitionSession_Resume failed, hr %#lx.\n", hr);
recog_state = 0xdeadbeef; hr = ISpeechRecognizer2_get_State(recognizer2, &recog_state);
From: Bernhard Kölbl besentv@gmail.com
Signed-off-by: Bernhard Kölbl besentv@gmail.com --- dlls/windows.media.speech/recognizer.c | 122 +++++++++++++++++++++++-- 1 file changed, 116 insertions(+), 6 deletions(-)
diff --git a/dlls/windows.media.speech/recognizer.c b/dlls/windows.media.speech/recognizer.c index 6decf766714..8a82dadb45d 100644 --- a/dlls/windows.media.speech/recognizer.c +++ b/dlls/windows.media.speech/recognizer.c @@ -19,6 +19,10 @@
#include "private.h"
+#include "initguid.h" +#include "audioclient.h" +#include "mmdeviceapi.h" + #include "wine/debug.h"
WINE_DEFAULT_DEBUG_CHANNEL(speech); @@ -161,7 +165,10 @@ struct session struct list completed_handlers; struct list result_handlers;
- HANDLE worker_thread, worker_control_event; + IAudioClient *audio_client; + IAudioCaptureClient *capture_client; + + HANDLE worker_thread, worker_control_event, audio_buf_event; BOOLEAN worker_running, worker_paused; CRITICAL_SECTION cs; }; @@ -181,22 +188,59 @@ static DWORD CALLBACK session_worker_thread_cb( void *args ) { ISpeechContinuousRecognitionSession *iface = args; struct session *impl = impl_from_ISpeechContinuousRecognitionSession(iface); - BOOLEAN running = TRUE; - DWORD status; + UINT32 frame_count, frames_available; + BOOLEAN running = TRUE, paused = FALSE; + DWORD flags, status; + HANDLE events[2]; + BYTE *audio_buf; + + IAudioClient_Start(impl->audio_client); + IAudioClient_GetBufferSize(impl->audio_client, &frame_count);
while (running) { - status = WaitForMultipleObjects(1, &impl->worker_control_event, FALSE, INFINITE); + BOOLEAN old_paused = paused; + UINT32 count = 0; + + events[count++] = impl->worker_control_event; + if (!paused) events[count++] = impl->audio_buf_event; + + status = WaitForMultipleObjects(count, events, FALSE, INFINITE); if (status == 0) /* worker_control_event signaled */ { EnterCriticalSection(&impl->cs); + paused = impl->worker_paused; running = impl->worker_running; LeaveCriticalSection(&impl->cs); - }
- /* TODO: Send mic data to recognizer and handle results. */ + if (old_paused < paused) + { + IAudioClient_Stop(impl->audio_client); + TRACE("session worker paused.\n"); + } + else if (old_paused > paused) + { + IAudioClient_Start(impl->audio_client); + TRACE("session worker resumed.\n"); + } + } + else if (status == 1) /* audio_buf_event signaled */ + { + while (IAudioCaptureClient_GetBuffer(impl->capture_client, &audio_buf, &frames_available, &flags, NULL, NULL) == S_OK) + { + /* TODO: Send mic data to recognizer and handle results. */ + IAudioCaptureClient_ReleaseBuffer(impl->capture_client, frames_available); + } + } + else + { + ERR("Unexpected state entered. Aborting worker!\n"); + break; + } }
+ IAudioClient_Stop(impl->audio_client); + return 0; }
@@ -255,6 +299,9 @@ static ULONG WINAPI session_Release( ISpeechContinuousRecognitionSession *iface typed_event_handlers_clear(&impl->completed_handlers); typed_event_handlers_clear(&impl->result_handlers);
+ IAudioCaptureClient_Release(impl->capture_client); + IAudioClient_Release(impl->audio_client); + impl->cs.DebugInfo->Spare[0] = 0; DeleteCriticalSection(&impl->cs);
@@ -910,6 +957,64 @@ static const struct IActivationFactoryVtbl activation_factory_vtbl =
DEFINE_IINSPECTABLE(recognizer_factory, ISpeechRecognizerFactory, struct recognizer_statics, IActivationFactory_iface)
+static HRESULT recognizer_factory_create_audio_capture(struct session *session) +{ + const REFERENCE_TIME buffer_duration = 5000000; /* 0.5 second */ + IMMDeviceEnumerator *mm_enum = NULL; + IMMDevice *mm_device = NULL; + WAVEFORMATEX *wfx = NULL; + WCHAR *str = NULL; + HRESULT hr = S_OK; + + if (!(session->audio_buf_event = CreateEventW(NULL, FALSE, FALSE, NULL))) + return HRESULT_FROM_WIN32(GetLastError()); + + if (FAILED(hr = CoCreateInstance(&CLSID_MMDeviceEnumerator, NULL, CLSCTX_INPROC_SERVER, &IID_IMMDeviceEnumerator, (void**)&mm_enum))) + goto cleanup; + + if (FAILED(hr = IMMDeviceEnumerator_GetDefaultAudioEndpoint(mm_enum, eCapture, eMultimedia, &mm_device))) + goto cleanup; + + if (FAILED(hr = IMMDevice_Activate(mm_device, &IID_IAudioClient, CLSCTX_INPROC_SERVER, NULL, (void**)&session->audio_client))) + goto cleanup; + + if (SUCCEEDED(hr = IMMDevice_GetId(mm_device, &str))) + { + TRACE("selected capture device ID: %s\n", debugstr_w(str)); + CoTaskMemFree(str); + } + + if (FAILED(hr = IAudioClient_GetMixFormat(session->audio_client, (WAVEFORMATEX **)&wfx))) + goto cleanup; + + wfx->wFormatTag = WAVE_FORMAT_PCM; + wfx->nChannels = 1; + wfx->nSamplesPerSec = 16000; + wfx->nBlockAlign = 2; + wfx->wBitsPerSample = 16; + wfx->nAvgBytesPerSec = wfx->nSamplesPerSec * wfx->nBlockAlign; + TRACE("wfx tag %u, channels %u, samples %lu, bits %u, align %u.\n", wfx->wFormatTag, wfx->nChannels, wfx->nSamplesPerSec, wfx->wBitsPerSample, wfx->nBlockAlign); + + if (FAILED(hr = IAudioClient_Initialize(session->audio_client, AUDCLNT_SHAREMODE_SHARED, AUDCLNT_STREAMFLAGS_EVENTCALLBACK, buffer_duration, 0, wfx, NULL))) + goto cleanup; + + if (FAILED(hr = IAudioClient_SetEventHandle(session->audio_client, session->audio_buf_event))) + goto cleanup; + + hr = IAudioClient_GetService(session->audio_client, &IID_IAudioCaptureClient, (void**)&session->capture_client); + +cleanup: + if (FAILED(hr)) + { + if (session->audio_client) IAudioClient_Release(session->audio_client); + if (session->audio_buf_event) CloseHandle(session->audio_buf_event); + } + if (wfx) CoTaskMemFree(wfx); + if (mm_device) IMMDevice_Release(mm_device); + if (mm_enum) IMMDeviceEnumerator_Release(mm_enum); + return hr; +} + static HRESULT WINAPI recognizer_factory_Create( ISpeechRecognizerFactory *iface, ILanguage *language, ISpeechRecognizer **speechrecognizer ) { struct recognizer *impl; @@ -953,6 +1058,9 @@ static HRESULT WINAPI recognizer_factory_Create( ISpeechRecognizerFactory *iface if (FAILED(hr = vector_inspectable_create(&constraints_iids, (IVector_IInspectable**)&session->constraints))) goto error;
+ if (FAILED(hr = recognizer_factory_create_audio_capture(session))) + goto error; + InitializeCriticalSection(&session->cs); session->cs.DebugInfo->Spare[0] = (DWORD_PTR)(__FILE__ ": recognition_session.cs");
@@ -969,6 +1077,8 @@ static HRESULT WINAPI recognizer_factory_Create( ISpeechRecognizerFactory *iface return S_OK;
error: + if (session->capture_client) IAudioCaptureClient_Release(session->capture_client); + if (session->audio_client) IAudioClient_Release(session->audio_client); if (session->constraints) IVector_ISpeechRecognitionConstraint_Release(session->constraints); CloseHandle(session->worker_control_event); free(session);
From: Bernhard Kölbl besentv@gmail.com
Signed-off-by: Bernhard Kölbl besentv@gmail.com --- dlls/windows.media.speech/recognizer.c | 31 +++++++++++++++++++++--- dlls/windows.media.speech/tests/speech.c | 30 +++++++++++------------ 2 files changed, 42 insertions(+), 19 deletions(-)
diff --git a/dlls/windows.media.speech/recognizer.c b/dlls/windows.media.speech/recognizer.c index 8a82dadb45d..9820da4d26f 100644 --- a/dlls/windows.media.speech/recognizer.c +++ b/dlls/windows.media.speech/recognizer.c @@ -162,6 +162,8 @@ struct session
IVector_ISpeechRecognitionConstraint *constraints;
+ SpeechRecognizerState recognizer_state; + struct list completed_handlers; struct list result_handlers;
@@ -366,6 +368,7 @@ static HRESULT WINAPI session_StartAsync( ISpeechContinuousRecognitionSession *i hr = HRESULT_FROM_WIN32(GetLastError()); impl->worker_running = FALSE; } + impl->recognizer_state = SpeechRecognizerState_Capturing; } else invalid_state = TRUE; @@ -408,6 +411,7 @@ static HRESULT WINAPI session_StopAsync( ISpeechContinuousRecognitionSession *if impl->worker_thread = INVALID_HANDLE_VALUE; impl->worker_running = FALSE; impl->worker_paused = FALSE; + impl->recognizer_state = SpeechRecognizerState_Idle; } else invalid_state = TRUE; @@ -450,7 +454,11 @@ static HRESULT WINAPI session_PauseAsync( ISpeechContinuousRecognitionSession *i return hr;
EnterCriticalSection(&impl->cs); - if (impl->worker_running) impl->worker_paused = TRUE; + if (impl->worker_running) + { + impl->worker_paused = TRUE; + impl->recognizer_state = SpeechRecognizerState_Paused; + } LeaveCriticalSection(&impl->cs);
SetEvent(impl->worker_control_event); @@ -465,7 +473,11 @@ static HRESULT WINAPI session_Resume( ISpeechContinuousRecognitionSession *iface TRACE("iface %p.\n", iface);
EnterCriticalSection(&impl->cs); - if (impl->worker_running) impl->worker_paused = FALSE; + if (impl->worker_running) + { + impl->worker_paused = FALSE; + impl->recognizer_state = SpeechRecognizerState_Idle; + } LeaveCriticalSection(&impl->cs);
SetEvent(impl->worker_control_event); @@ -788,8 +800,19 @@ static HRESULT WINAPI recognizer2_get_ContinuousRecognitionSession( ISpeechRecog
static HRESULT WINAPI recognizer2_get_State( ISpeechRecognizer2 *iface, SpeechRecognizerState *state ) { - FIXME("iface %p, state %p stub!\n", iface, state); - return E_NOTIMPL; + struct recognizer *impl = impl_from_ISpeechRecognizer2(iface); + struct session *session = impl_from_ISpeechContinuousRecognitionSession(impl->session); + + FIXME("iface %p, state %p not all states are supported, yet!\n", iface, state); + + if (!state) + return E_POINTER; + + EnterCriticalSection(&session->cs); + *state = session->recognizer_state; + LeaveCriticalSection(&session->cs); + + return S_OK; }
static HRESULT WINAPI recognizer2_StopRecognitionAsync( ISpeechRecognizer2 *iface, IAsyncAction **action ) diff --git a/dlls/windows.media.speech/tests/speech.c b/dlls/windows.media.speech/tests/speech.c index efd4b6fa28d..5c0f963fa77 100644 --- a/dlls/windows.media.speech/tests/speech.c +++ b/dlls/windows.media.speech/tests/speech.c @@ -1721,8 +1721,8 @@ static void test_Recognition(void)
recog_state = 0xdeadbeef; hr = ISpeechRecognizer2_get_State(recognizer2, &recog_state); - todo_wine ok(hr == S_OK, "ISpeechRecognizer2_get_State failed, hr %#lx.\n", hr); - todo_wine ok(recog_state == SpeechRecognizerState_Idle, "recog_state was %u.\n", recog_state); + ok(hr == S_OK, "ISpeechRecognizer2_get_State failed, hr %#lx.\n", hr); + ok(recog_state == SpeechRecognizerState_Idle, "recog_state was %u.\n", recog_state);
hr = ISpeechRecognizer_CompileConstraintsAsync(recognizer, &operation); ok(hr == S_OK, "ISpeechRecognizer_CompileConstraintsAsync failed, hr %#lx.\n", hr); @@ -1771,8 +1771,8 @@ static void test_Recognition(void)
recog_state = 0xdeadbeef; hr = ISpeechRecognizer2_get_State(recognizer2, &recog_state); - todo_wine ok(hr == S_OK, "ISpeechRecognizer2_get_State failed, hr %#lx.\n", hr); - todo_wine ok(recog_state == SpeechRecognizerState_Capturing, "recog_state was %u.\n", recog_state); + ok(hr == S_OK, "ISpeechRecognizer2_get_State failed, hr %#lx.\n", hr); + ok(recog_state == SpeechRecognizerState_Capturing, "recog_state was %u.\n", recog_state);
/* * TODO: Use a loopback device together with prerecorded audio files to test the recognizer's functionality. @@ -1786,8 +1786,8 @@ static void test_Recognition(void)
recog_state = 0xdeadbeef; hr = ISpeechRecognizer2_get_State(recognizer2, &recog_state); - todo_wine ok(hr == S_OK, "ISpeechRecognizer2_get_State failed, hr %#lx.\n", hr); - todo_wine ok(recog_state == SpeechRecognizerState_Paused || /* Broken on Win10 1507 */ + ok(hr == S_OK, "ISpeechRecognizer2_get_State failed, hr %#lx.\n", hr); + ok(recog_state == SpeechRecognizerState_Paused || /* Broken on Win10 1507 */ broken(recog_state == SpeechRecognizerState_Capturing) , "recog_state was %u.\n", recog_state);
/* Check what happens if we try to pause again, when the session is already paused. */ @@ -1806,7 +1806,7 @@ static void test_Recognition(void)
recog_state = 0xdeadbeef; hr = ISpeechRecognizer2_get_State(recognizer2, &recog_state); - todo_wine ok(hr == S_OK, "ISpeechRecognizer2_get_State failed, hr %#lx.\n", hr); + ok(hr == S_OK, "ISpeechRecognizer2_get_State failed, hr %#lx.\n", hr); todo_wine ok(recog_state == SpeechRecognizerState_Capturing, "recog_state was %u.\n", recog_state);
hr = ISpeechContinuousRecognitionSession_StopAsync(session, &action2); @@ -1858,8 +1858,8 @@ static void test_Recognition(void)
recog_state = 0xdeadbeef; hr = ISpeechRecognizer2_get_State(recognizer2, &recog_state); - todo_wine ok(hr == S_OK, "ISpeechRecognizer2_get_State failed, hr %#lx.\n", hr); - todo_wine ok(recog_state == SpeechRecognizerState_Idle, "recog_state was %u.\n", recog_state); + ok(hr == S_OK, "ISpeechRecognizer2_get_State failed, hr %#lx.\n", hr); + ok(recog_state == SpeechRecognizerState_Idle, "recog_state was %u.\n", recog_state);
/* Try stopping, when already stopped. */ hr = ISpeechContinuousRecognitionSession_StopAsync(session, &action); @@ -1879,8 +1879,8 @@ static void test_Recognition(void)
recog_state = 0xdeadbeef; hr = ISpeechRecognizer2_get_State(recognizer2, &recog_state); - todo_wine ok(hr == S_OK, "ISpeechRecognizer2_get_State failed, hr %#lx.\n", hr); - todo_wine ok(recog_state == SpeechRecognizerState_Paused || /* Broken on Win10 1507 */ + ok(hr == S_OK, "ISpeechRecognizer2_get_State failed, hr %#lx.\n", hr); + ok(recog_state == SpeechRecognizerState_Paused || /* Broken on Win10 1507 */ broken(recog_state == SpeechRecognizerState_Capturing) , "recog_state was %u.\n", recog_state);
hr = ISpeechContinuousRecognitionSession_StopAsync(session, &action); @@ -1890,8 +1890,8 @@ static void test_Recognition(void)
recog_state = 0xdeadbeef; hr = ISpeechRecognizer2_get_State(recognizer2, &recog_state); - todo_wine ok(hr == S_OK, "ISpeechRecognizer2_get_State failed, hr %#lx.\n", hr); - todo_wine ok(recog_state == SpeechRecognizerState_Idle, "recog_state was %u.\n", recog_state); + ok(hr == S_OK, "ISpeechRecognizer2_get_State failed, hr %#lx.\n", hr); + ok(recog_state == SpeechRecognizerState_Idle, "recog_state was %u.\n", recog_state);
hr = ISpeechContinuousRecognitionSession_StartAsync(session, &action); ok(hr == S_OK, "ISpeechContinuousRecognitionSession_PauseAsync failed, hr %#lx.\n", hr); @@ -1900,8 +1900,8 @@ static void test_Recognition(void)
recog_state = 0xdeadbeef; hr = ISpeechRecognizer2_get_State(recognizer2, &recog_state); - todo_wine ok(hr == S_OK, "ISpeechRecognizer2_get_State failed, hr %#lx.\n", hr); - todo_wine ok(recog_state == SpeechRecognizerState_Capturing, "recog_state was %u.\n", recog_state); + ok(hr == S_OK, "ISpeechRecognizer2_get_State failed, hr %#lx.\n", hr); + ok(recog_state == SpeechRecognizerState_Capturing, "recog_state was %u.\n", recog_state);
hr = ISpeechContinuousRecognitionSession_StopAsync(session, &action); ok(hr == S_OK, "ISpeechContinuousRecognitionSession_PauseAsync failed, hr %#lx.\n", hr);
From: Bernhard Kölbl besentv@gmail.com
Signed-off-by: Bernhard Kölbl besentv@gmail.com --- dlls/windows.media.speech/recognizer.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-)
diff --git a/dlls/windows.media.speech/recognizer.c b/dlls/windows.media.speech/recognizer.c index 9820da4d26f..f3ab4422495 100644 --- a/dlls/windows.media.speech/recognizer.c +++ b/dlls/windows.media.speech/recognizer.c @@ -190,15 +190,17 @@ static DWORD CALLBACK session_worker_thread_cb( void *args ) { ISpeechContinuousRecognitionSession *iface = args; struct session *impl = impl_from_ISpeechContinuousRecognitionSession(iface); - UINT32 frame_count, frames_available; + UINT32 frame_count, frames_available, tmp_buf_offset = 0; BOOLEAN running = TRUE, paused = FALSE; + BYTE *audio_buf, *tmp_buf; DWORD flags, status; HANDLE events[2]; - BYTE *audio_buf;
IAudioClient_Start(impl->audio_client); IAudioClient_GetBufferSize(impl->audio_client, &frame_count);
+ tmp_buf = malloc(sizeof(*tmp_buf) * frame_count * 2); /* multiplied with 2 because our audio frames have 16bit depth. */ + while (running) { BOOLEAN old_paused = paused; @@ -228,11 +230,17 @@ static DWORD CALLBACK session_worker_thread_cb( void *args ) } else if (status == 1) /* audio_buf_event signaled */ { - while (IAudioCaptureClient_GetBuffer(impl->capture_client, &audio_buf, &frames_available, &flags, NULL, NULL) == S_OK) + while (IAudioCaptureClient_GetBuffer(impl->capture_client, &audio_buf, &frames_available, &flags, NULL, NULL) == S_OK && tmp_buf_offset < (frame_count * 2)) { - /* TODO: Send mic data to recognizer and handle results. */ + memcpy(tmp_buf + tmp_buf_offset, audio_buf, frames_available * 2); + tmp_buf_offset += (frames_available * 2); /* multiplied with 2 because our audio frames have 16bit depth. */ + IAudioCaptureClient_ReleaseBuffer(impl->capture_client, frames_available); } + + /* TODO: Send mic data to recognizer and handle results. */ + + tmp_buf_offset = 0; } else { @@ -242,6 +250,7 @@ static DWORD CALLBACK session_worker_thread_cb( void *args ) }
IAudioClient_Stop(impl->audio_client); + free(tmp_buf);
return 0; }
Jinoh Kang (@iamahuman) commented about dlls/windows.media.speech/recognizer.c:
- if (FAILED(hr = IMMDevice_Activate(mm_device, &IID_IAudioClient, CLSCTX_INPROC_SERVER, NULL, (void**)&session->audio_client)))
goto cleanup;
- if (SUCCEEDED(hr = IMMDevice_GetId(mm_device, &str)))
- {
TRACE("selected capture device ID: %s\n", debugstr_w(str));
CoTaskMemFree(str);
- }
- if (FAILED(hr = IAudioClient_GetMixFormat(session->audio_client, (WAVEFORMATEX **)&wfx)))
goto cleanup;
- wfx->wFormatTag = WAVE_FORMAT_PCM;
- wfx->nChannels = 1;
- wfx->nSamplesPerSec = 16000;
Magic constant. You should replace this with a `#define` shared with Unix side interfacing vosk. `#define WINE_VOSK_SAMPLE_RATE 16000` will do.
(I'm aware that most vosk models are trained with 16kHz PCM streams.)
Jinoh Kang (@iamahuman) commented about dlls/windows.media.speech/recognizer.c:
goto cleanup;
- if (SUCCEEDED(hr = IMMDevice_GetId(mm_device, &str)))
- {
TRACE("selected capture device ID: %s\n", debugstr_w(str));
CoTaskMemFree(str);
- }
- if (FAILED(hr = IAudioClient_GetMixFormat(session->audio_client, (WAVEFORMATEX **)&wfx)))
goto cleanup;
- wfx->wFormatTag = WAVE_FORMAT_PCM;
- wfx->nChannels = 1;
- wfx->nSamplesPerSec = 16000;
- wfx->nBlockAlign = 2;
- wfx->wBitsPerSample = 16;
Ditto. Something like (with appropriate defines):
```suggestion:-1+0 wfx->wBitsPerSample = WINE_VOSK_BITS_PER_SAMPLE; wfx->nBlockAlign = (wfx->wBitsPerSample + 7) / 8 * wfx->nChannels; ```
Jinoh Kang (@iamahuman) commented about dlls/windows.media.speech/recognizer.c:
if (!ref) {
HANDLE thread;
EnterCriticalSection(&impl->cs);
thread = impl->worker_thread;
impl->worker_running = FALSE;
impl->worker_thread = INVALID_HANDLE_VALUE;
LeaveCriticalSection(&impl->cs);
SetEvent(impl->worker_control_event);
session_join_worker_thread(thread);
```suggestion:-0+0 WaitForSingleObject(thread, INFINITE); CloseHandle(thread); ```
Readers cannot infer that `session_join_worker_thread` closes the given handle from the function's name. Instead of renaming it to `session_join_close_worker_thread`, might as well open code it.
Jinoh Kang (@iamahuman) commented about dlls/windows.media.speech/recognizer.c:
thread = impl->worker_thread;
impl->worker_thread = INVALID_HANDLE_VALUE;
impl->worker_running = FALSE;
impl->worker_paused = FALSE;
impl->recognizer_state = SpeechRecognizerState_Idle;
- }
- else
invalid_state = TRUE;
- LeaveCriticalSection(&impl->cs);
- if (invalid_state && SUCCEEDED(hr))
hr = COR_E_INVALIDOPERATION;
- else if (!invalid_state)
- {
SetEvent(impl->worker_control_event);
session_join_worker_thread(thread);
```suggestion:-0+0 WaitForSingleObject(thread, INFINITE); CloseHandle(thread); ```
Readers cannot infer that `session_join_worker_thread` closes the given handle from the function's name. Instead of renaming it to `session_join_close_worker_thread`, might as well open code it.
Jinoh Kang (@iamahuman) commented about dlls/windows.media.speech/recognizer.c:
- return hr;
}
static HRESULT WINAPI session_Resume( ISpeechContinuousRecognitionSession *iface ) {
- FIXME("iface %p stub!\n", iface);
- return E_NOTIMPL;
- struct session *impl = impl_from_ISpeechContinuousRecognitionSession(iface);
- TRACE("iface %p.\n", iface);
- EnterCriticalSection(&impl->cs);
- if (impl->worker_running)
- {
impl->worker_paused = FALSE;
impl->recognizer_state = SpeechRecognizerState_Idle;
From https://learn.microsoft.com/en-us/uwp/api/windows.media.speechrecognition.sp...:
**Idle**: 0
Indicates that speech recognition is not active and the speech recognizer is not capturing (listening for) audio input.
In this state, [SpeechRecognizer.RecognizeAsync], [SpeechRecognizer.RecognizeWithUIAsync], **[SpeechContinuousRecognitionSession.StartAsync]**, or [SpeechRecognizer.CompileConstraintsAsync] can be called.
(emphasis mine)
A successful call to `StartAsync` requires that `worker_running` is true as a precondition. However, the `get_State` implementation returns `recognizer_state` as-is. Therefore, `worker_running && recognizer_state == SpeechRecognizerState_Idle` is an invalid state combination.
Something like the following would make more sense:
```suggestion:-0+0 impl->recognizer_state = SpeechRecognizerState_Capturing; ```
[SpeechRecognizer.RecognizeAsync]: https://learn.microsoft.com/en-us/uwp/api/windows.media.speechrecognition.sp... [SpeechRecognizer.RecognizeWithUIAsync]: https://learn.microsoft.com/en-us/uwp/api/windows.media.speechrecognition.sp... [SpeechContinuousRecognitionSession.StartAsync]: https://learn.microsoft.com/en-us/uwp/api/windows.media.speechrecognition.sp...) [SpeechRecognizer.CompileConstraintsAsync]: https://learn.microsoft.com/en-us/uwp/api/windows.media.speechrecognition.sp...
Jinoh Kang (@iamahuman) commented about dlls/windows.media.speech/recognizer.c:
- HRESULT hr = S_OK;
- TRACE("iface %p, action %p.\n", iface, action);
- *action = NULL;
- EnterCriticalSection(&impl->cs);
- if (!impl->worker_running && !impl->worker_thread && SUCCEEDED(hr = async_action_create(NULL, session_start_async, action)))
- {
impl->worker_running = TRUE;
if (!(impl->worker_thread = CreateThread(NULL, 0, session_worker_thread_cb, impl, 0, NULL)))
{
hr = HRESULT_FROM_WIN32(GetLastError());
impl->worker_running = FALSE;
}
impl->recognizer_state = SpeechRecognizerState_Capturing;
This changes the `recognizer_state` regardless of whether the thread creation succeeds or not.
```suggestion:-6+0 if ((impl->worker_thread = CreateThread(NULL, 0, session_worker_thread_cb, impl, 0, NULL))) { impl->worker_running = TRUE; impl->recognizer_state = SpeechRecognizerState_Capturing; } else { hr = HRESULT_FROM_WIN32(GetLastError()); } ```
However, in general this function should be improved in a direction that reduces nested `if`s. You can also replace `invalid_state = TRUE` with `hr = COR_E_INVALIDOPERATION`. Nested `if`s can be replaced by successive `if (SUCCEEDED(hr) [&& ...] )` statements.
Jinoh Kang (@iamahuman) commented about dlls/windows.media.speech/recognizer.c:
}
static HRESULT WINAPI session_StartAsync( ISpeechContinuousRecognitionSession *iface, IAsyncAction **action ) {
- FIXME("iface %p, action %p stub!\n", iface, action);
- return async_action_create(NULL, start_callback, action);
- struct session *impl = impl_from_ISpeechContinuousRecognitionSession(iface);
- BOOLEAN invalid_state = FALSE;
- HRESULT hr = S_OK;
- TRACE("iface %p, action %p.\n", iface, action);
- *action = NULL;
- EnterCriticalSection(&impl->cs);
- if (!impl->worker_running && !impl->worker_thread && SUCCEEDED(hr = async_action_create(NULL, session_start_async, action)))
`async_action_create` should be taken out of the critical section. In general, minimize the number of potentially failing or blocking operations inside a critical section (ideally to zero).
Jinoh Kang (@iamahuman) commented about dlls/windows.media.speech/recognizer.c:
impl->worker_running = TRUE;
if (!(impl->worker_thread = CreateThread(NULL, 0, session_worker_thread_cb, impl, 0, NULL)))
{
hr = HRESULT_FROM_WIN32(GetLastError());
impl->worker_running = FALSE;
}
impl->recognizer_state = SpeechRecognizerState_Capturing;
- }
- else
invalid_state = TRUE;
- LeaveCriticalSection(&impl->cs);
- if (invalid_state && SUCCEEDED(hr))
hr = COR_E_INVALIDOPERATION;
- return hr;
You should release `*action` and set it to `NULL` on failure.
Jinoh Kang (@iamahuman) commented about dlls/windows.media.speech/recognizer.c:
TRACE("session worker resumed.\n");
}
}
else if (status == 1) /* audio_buf_event signaled */
{
while (IAudioCaptureClient_GetBuffer(impl->capture_client, &audio_buf, &frames_available, &flags, NULL, NULL) == S_OK && tmp_buf_offset < (frame_count * 2))
{
memcpy(tmp_buf + tmp_buf_offset, audio_buf, frames_available * 2);
tmp_buf_offset += (frames_available * 2); /* multiplied with 2 because our audio frames have 16bit depth. */
IAudioCaptureClient_ReleaseBuffer(impl->capture_client, frames_available);
}
/* TODO: Send mic data to recognizer and handle results. */
tmp_buf_offset = 0;
Why not move this before the `while` loop, to make clear that `tmp_buf_offset` is `0` at the start of the loop? Alternatively, you can declare `tmp_buf_offset` inside the `else if`.
Jinoh Kang (@iamahuman) commented about dlls/windows.media.speech/recognizer.c:
UINT32 count = 0;
events[count++] = impl->worker_control_event;
if (!paused) events[count++] = impl->audio_buf_event;
status = WaitForMultipleObjects(count, events, FALSE, INFINITE);
if (status == 0) /* worker_control_event signaled */
{
EnterCriticalSection(&impl->cs);
paused = impl->worker_paused;
running = impl->worker_running;
LeaveCriticalSection(&impl->cs);
if (old_paused < paused)
{
IAudioClient_Stop(impl->audio_client);
Note that `IAudioClient` may already have pending audio data in buffer at the time `IAudioClient_Stop` is called. Ideally `PauseAsync` should wait until the capture buffer is flushed, but let's skip that for this MR.
Jinoh Kang (@iamahuman) commented about dlls/windows.media.speech/recognizer.c:
}
+static DWORD CALLBACK session_worker_thread_cb( void *args ) +{
- ISpeechContinuousRecognitionSession *iface = args;
- struct session *impl = impl_from_ISpeechContinuousRecognitionSession(iface);
- UINT32 frame_count, frames_available, tmp_buf_offset = 0;
- BOOLEAN running = TRUE, paused = FALSE;
- BYTE *audio_buf, *tmp_buf;
- DWORD flags, status;
- HANDLE events[2];
- IAudioClient_Start(impl->audio_client);
- IAudioClient_GetBufferSize(impl->audio_client, &frame_count);
- tmp_buf = malloc(sizeof(*tmp_buf) * frame_count * 2); /* multiplied with 2 because our audio frames have 16bit depth. */
As per the review comment below, replace `2` with something like `(WINE_VOSK_BITS_PER_SAMPLE + 7) / 8`.
Hi Bernhard, could you take a look at my review? You can take your time, especially since the code freeze has not yet ended.
(Speaking of the code freeze, the previous MR should have probably stayed Draft.)
Jinoh Kang (@iamahuman) commented about dlls/windows.media.speech/recognizer.c:
- if (FAILED(hr = IAudioClient_Initialize(session->audio_client, AUDCLNT_SHAREMODE_SHARED, AUDCLNT_STREAMFLAGS_EVENTCALLBACK, buffer_duration, 0, wfx, NULL)))
goto cleanup;
- if (FAILED(hr = IAudioClient_SetEventHandle(session->audio_client, session->audio_buf_event)))
goto cleanup;
- hr = IAudioClient_GetService(session->audio_client, &IID_IAudioCaptureClient, (void**)&session->capture_client);
+cleanup:
- if (FAILED(hr))
- {
if (session->audio_client) IAudioClient_Release(session->audio_client);
if (session->audio_buf_event) CloseHandle(session->audio_buf_event);
- }
- if (wfx) CoTaskMemFree(wfx);
```suggestion:-0+0 CoTaskMemFree(wfx); ```
From https://learn.microsoft.com/en-us/windows/win32/api/combaseapi/nf-combaseapi...:
`[in, optional] pv`
A pointer to the memory block to be freed. If this parameter is **NULL**, the function has no effect.
On Mon Jan 16 16:20:40 2023 +0000, Jinoh Kang wrote:
Magic constant. You should replace this with a `#define` shared with Unix side interfacing vosk. `#define WINE_VOSK_SAMPLE_RATE 16000` will do. (I'm aware that most vosk models are trained with 16kHz PCM streams.)
Either that or I'll keep a wfx instance inside the recognizer object.
On Mon Jan 16 16:20:40 2023 +0000, Jinoh Kang wrote:
WaitForSingleObject(thread, INFINITE); CloseHandle(thread);
Readers cannot infer that `session_join_worker_thread` closes the given handle from the function's name. Instead of renaming it to `session_join_close_worker_thread`, might as well open code it.
Yeah the function is pretty short at this point.
On Mon Jan 16 16:20:40 2023 +0000, Jinoh Kang wrote:
From https://learn.microsoft.com/en-us/uwp/api/windows.media.speechrecognition.sp...:
**Idle**: 0
Indicates that speech recognition is not active and the speech
recognizer is not capturing (listening for) audio input.
In this state, [SpeechRecognizer.RecognizeAsync],
[SpeechRecognizer.RecognizeWithUIAsync], **[SpeechContinuousRecognitionSession.StartAsync]**, or [SpeechRecognizer.CompileConstraintsAsync] can be called. (emphasis mine) A successful call to `StartAsync` requires that `worker_running` is true as a precondition. However, the `get_State` implementation returns `recognizer_state` as-is. Therefore, `worker_running && recognizer_state == SpeechRecognizerState_Idle` is an invalid state combination. Something like the following would make more sense:
impl->recognizer_state = SpeechRecognizerState_Capturing;
Yes, that's a mistake.
On Mon Jan 16 16:20:41 2023 +0000, Jinoh Kang wrote:
This changes the `recognizer_state` regardless of whether the thread creation succeeds or not.
if ((impl->worker_thread = CreateThread(NULL, 0, session_worker_thread_cb, impl, 0, NULL))) { impl->worker_running = TRUE; impl->recognizer_state = SpeechRecognizerState_Capturing; } else { hr = HRESULT_FROM_WIN32(GetLastError()); }
However, in general this function should be improved in a direction that reduces nested `if`s. You can also replace `invalid_state = TRUE` with `hr = COR_E_INVALIDOPERATION`. Nested `if`s can be replaced by successive `if (SUCCEEDED(hr) [&& ...] )` statements.
Yeah, the function developed in an ugly direction.
On Mon Jan 16 16:20:41 2023 +0000, Jinoh Kang wrote:
`async_action_create` should be taken out of the critical section. In general, minimize the number of potentially failing or blocking operations inside a critical section (ideally to zero).
I didn't really came up with something better, that's why it's like that.
On Mon Jan 16 17:13:55 2023 +0000, Jinoh Kang wrote:
Hi Bernhard, could you take a look at my review? You can take your time, especially since the code freeze has not yet ended. (Speaking of the code freeze, the previous MR should have probably stayed Draft.)
Well, I undrafted it because it's basically ready and it's not too subtle that it's a feature change.
On Mon Jan 16 16:21:26 2023 +0000, Jinoh Kang wrote:
Ditto. Something like (with appropriate defines):
wfx->wBitsPerSample = WINE_VOSK_BITS_PER_SAMPLE; wfx->nBlockAlign = (wfx->wBitsPerSample + 7) / 8 * wfx->nChannels;
I personally find this a bit confusing, not sure if it makes sense to do. @rbernon What's your opinion?
On Thu Jan 19 15:48:27 2023 +0000, Bernhard Kölbl wrote:
I personally find this a bit confusing, not sure if it makes sense to do. @rbernon What's your opinion?
I think you can actually omit `+ 7` here.
On Thu Jan 19 15:50:54 2023 +0000, Jinoh Kang wrote:
I think you can actually omit `+ 7` here.
For consistency with existing code, you can say `wfx->nChannels * wfx->wBitsPerSample / 8`.
On Mon Jan 16 16:24:07 2023 +0000, Jinoh Kang wrote:
Note that `IAudioClient` may already have pending audio data in buffer at the time `IAudioClient_Stop` is called. Ideally `PauseAsync` should wait until the capture buffer is flushed, but let's skip that for this MR.
It wasn't an issue so far, maybe we can flush it on the pause signal edge?
On Mon Jan 16 16:20:41 2023 +0000, Jinoh Kang wrote:
As per the review comment below, replace `2` with something like `(WINE_VOSK_BITS_PER_SAMPLE + 7) / 8`.
Actually, wouldn't `sizeof(*tmp_buf) * frame_count * wfx->nBlockAlign` work as well?