[PATCH 4/8] sapi: Partially implement ISpVoice::Speak.

14 Jul 2023

From: Shaun Ren sren@codeweavers.com
Introduce ISpTTSEngineSite, which is passed to the TTS engine during
speech synthesis.
---
 dlls/sapi/async.c     |   3 +
 dlls/sapi/tests/tts.c |  14 ++
 dlls/sapi/tts.c       | 324 +++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 338 insertions(+), 3 deletions(-)

diff --git a/dlls/sapi/async.c b/dlls/sapi/async.c
index 57ae89ad723..491ca657c1a 100644
--- a/dlls/sapi/async.c
+++ b/dlls/sapi/async.c
@@ -52,6 +52,8 @@ void async_empty_queue(struct async_queue *queue)
 {
     struct async_task *task, *next;
+    if (!queue->init) return;
+
     EnterCriticalSection(&queue->cs);
     LIST_FOR_EACH_ENTRY_SAFE(task, next, &queue->tasks, struct async_task, entry)
     {
@@ -163,6 +165,7 @@ HRESULT async_queue_task(struct async_queue *queue, struct async_task *task)
     list_add_tail(&queue->tasks, &task->entry);
     LeaveCriticalSection(&queue->cs);
+    ResetEvent(queue->empty);
     SetEvent(queue->wait);
return S_OK;
diff --git a/dlls/sapi/tests/tts.c b/dlls/sapi/tests/tts.c
index 534e5842f39..ffe2e5f73a7 100644
--- a/dlls/sapi/tests/tts.c
+++ b/dlls/sapi/tests/tts.c
@@ -38,6 +38,7 @@ static void test_interfaces(void)
 {
     ISpeechVoice *speech_voice, *speech_voice2;
     IConnectionPointContainer *container;
+    ISpTTSEngineSite *site;
     ISpVoice *spvoice, *spvoice2;
     IDispatch *dispatch;
     IUnknown *unk;
@@ -90,6 +91,10 @@ static void test_interfaces(void)
     EXPECT_REF(container, 2);
     IConnectionPointContainer_Release(container);
+    hr = ISpeechVoice_QueryInterface(speech_voice, &IID_ISpTTSEngineSite,
+                                     (void **)&site);
+    ok(hr == E_NOINTERFACE, "ISpeechVoice_QueryInterface for ISpTTSEngineSite returned: %#lx.\n", hr);
+
     ISpeechVoice_Release(speech_voice);
 }
@@ -102,6 +107,7 @@ static void test_spvoice(void)
     WCHAR *token_id = NULL, *default_token_id = NULL;
     LONG rate;
     USHORT volume;
+    ULONG stream_num;
     HRESULT hr;
if (waveOutGetNumDevs() == 0) {
@@ -204,6 +210,14 @@ static void test_spvoice(void)
     hr = ISpVoice_SetVolume(voice, 101);
     ok(hr == E_INVALIDARG, "got %#lx.\n", hr);
+    hr = ISpVoice_Speak(voice, NULL, SPF_PURGEBEFORESPEAK, NULL);
+    ok(hr == S_OK, "got %#lx.\n", hr);
+
+    stream_num = 0xdeadbeef;
+    hr = ISpVoice_Speak(voice, NULL, SPF_PURGEBEFORESPEAK, &stream_num);
+    ok(hr == S_OK, "got %#lx.\n", hr);
+    ok(stream_num == 0xdeadbeef, "got %lu.\n", stream_num);
+
     ISpVoice_Release(voice);
     ISpMMSysAudio_Release(audio_out);
 }
diff --git a/dlls/sapi/tts.c b/dlls/sapi/tts.c
index b0b3bbd2eff..7acbfba04cf 100644
--- a/dlls/sapi/tts.c
+++ b/dlls/sapi/tts.c
@@ -27,6 +27,7 @@
 #include "objbase.h"
#include "sapiddk.h"
+#include "sperror.h"
#include "wine/debug.h"
@@ -43,8 +44,10 @@ struct speech_voice
ISpStreamFormat *output;
     ISpTTSEngine *engine;
+    ULONG cur_stream_num;
     USHORT volume;
     LONG rate;
+    struct async_queue queue;
     CRITICAL_SECTION cs;
 };
@@ -63,6 +66,20 @@ static inline struct speech_voice *impl_from_IConnectionPointContainer(IConnecti
     return CONTAINING_RECORD(iface, struct speech_voice, IConnectionPointContainer_iface);
 }
+struct tts_engine_site
+{
+    ISpTTSEngineSite ISpTTSEngineSite_iface;
+    LONG ref;
+
+    struct speech_voice *voice;
+    ULONG stream_num;
+};
+
+static inline struct tts_engine_site *impl_from_ISpTTSEngineSite(ISpTTSEngineSite *iface)
+{
+    return CONTAINING_RECORD(iface, struct tts_engine_site, ISpTTSEngineSite_iface);
+}
+
 static HRESULT create_default_token(const WCHAR *cat_id, ISpObjectToken **token)
 {
     ISpObjectTokenCategory *cat;
@@ -143,6 +160,7 @@ static ULONG WINAPI speech_voice_Release(ISpeechVoice *iface)
if (!ref)
     {
+        async_cancel_queue(&This->queue);
         if (This->output) ISpStreamFormat_Release(This->output);
         if (This->engine) ISpTTSEngine_Release(This->engine);
         DeleteCriticalSection(&This->cs);
@@ -697,11 +715,170 @@ static HRESULT WINAPI spvoice_GetVoice(ISpVoice *iface, ISpObjectToken **token)
     return hr;
 }
-static HRESULT WINAPI spvoice_Speak(ISpVoice *iface, const WCHAR *contents, DWORD flags, ULONG *number)
+struct async_result
 {
-    FIXME("(%p, %p, %#lx, %p): stub.\n", iface, contents, flags, number);
+    HANDLE done;
+    HRESULT hr;
+};
+
+struct speak_task
+{
+    struct async_task task;
+    struct async_result *result;
+
+    struct speech_voice *voice;
+    SPVTEXTFRAG *frag_list;
+    ISpTTSEngineSite *site;
+    DWORD flags;
+};
+
+static void speak_proc(struct async_task *task)
+{
+    struct speak_task *speak_task = (struct speak_task *)task;
+    HRESULT hr = S_OK;
+
+    FIXME("(%p): stub.\n", task);
+
+    if (speak_task->result)
+    {
+        speak_task->result->hr = hr;
+        SetEvent(speak_task->result->done);
+    }
+}
+
+static HRESULT ttsenginesite_create(struct speech_voice *voice, ULONG stream_num, ISpTTSEngineSite **site);
+
+static HRESULT WINAPI spvoice_Speak(ISpVoice *iface, const WCHAR *contents, DWORD flags, ULONG *stream_num_out)
+{
+    struct speech_voice *This = impl_from_ISpVoice(iface);
+    ISpTTSEngineSite *site = NULL;
+    SPVTEXTFRAG *frag;
+    struct speak_task *speak_task = NULL;
+    struct async_result *result = NULL;
+    size_t contents_len, contents_size;
+    ULONG stream_num;
+    HRESULT hr;
+
+    TRACE("(%p, %p, %#lx, %p).\n", iface, contents, flags, stream_num_out);
+
+    flags &= ~SPF_IS_NOT_XML;
+    if (flags & ~(SPF_ASYNC | SPF_PURGEBEFORESPEAK | SPF_NLP_SPEAK_PUNC))
+    {
+        FIXME("flags %#lx not implemented.\n", flags & ~(SPF_ASYNC | SPF_PURGEBEFORESPEAK | SPF_NLP_SPEAK_PUNC));
+        return E_NOTIMPL;
+    }
+
+    if (flags & SPF_PURGEBEFORESPEAK)
+    {
+        ISpAudio *audio;
+
+        EnterCriticalSection(&This->cs);
+
+        if (This->output && SUCCEEDED(ISpStreamFormat_QueryInterface(This->output, &IID_ISpAudio, (void **)&audio)))
+        {
+            ISpAudio_SetState(audio, SPAS_CLOSED, 0);
+            ISpAudio_Release(audio);
+        }
+
+        LeaveCriticalSection(&This->cs);
+
+        async_empty_queue(&This->queue);
+
+        if (!contents || !*contents)
+            return S_OK;
+    }
+    else if (!contents)
+        return E_POINTER;
+
+    contents_len = wcslen(contents);
+    contents_size = sizeof(WCHAR) * (contents_len + 1);
+
+    TRACE("contents: %s.\n", debugstr_w(contents));
+
+    if (!This->output)
+    {
+        /* Create a new output stream with the default output. */
+        if (FAILED(hr = ISpVoice_SetOutput(iface, NULL, TRUE)))
+            return hr;
+    }
+
+    if (!This->engine)
+    {
+        /* Create a new engine with the default voice. */
+        if (FAILED(hr = ISpVoice_SetVoice(iface, NULL)))
+            return hr;
+    }
+
+
+    if (!(frag = heap_alloc(sizeof(*frag) + contents_size)))
+        return E_OUTOFMEMORY;
+    memset(frag, 0, sizeof(*frag));
+    memcpy(frag + 1, contents, contents_size);
+    frag->State.eAction = SPVA_Speak;
+    frag->State.Volume  = 100;
+    frag->pTextStart    = (WCHAR *)(frag + 1);
+    frag->ulTextLen     = contents_len;
+    frag->ulTextSrcOffset = 0;
+
+    stream_num = InterlockedIncrement((LONG *)&This->cur_stream_num);
+    if (FAILED(hr = ttsenginesite_create(This, stream_num, &site)))
+    {
+        FIXME("Failed to create ttsenginesite: %#lx.\n", hr);
+        goto fail;
+    }
+
+    speak_task = heap_alloc(sizeof(*speak_task));
+
+    speak_task->task.proc = speak_proc;
+    speak_task->result    = NULL;
+    speak_task->voice     = This;
+    speak_task->frag_list = frag;
+    speak_task->site      = site;
+    speak_task->flags     = flags & SPF_NLP_SPEAK_PUNC;
+
+    if (!(flags & SPF_ASYNC))
+    {
+        if (!(result = heap_alloc(sizeof(*result))))
+        {
+            hr = E_OUTOFMEMORY;
+            goto fail;
+        }
+        result->hr = E_FAIL;
+        result->done = CreateEventW(NULL, FALSE, FALSE, NULL);
+        speak_task->result = result;
+    }
+
+    if (FAILED(hr = async_queue_task(&This->queue, (struct async_task *)speak_task)))
+    {
+        WARN("Failed to queue task: %#lx.\n", hr);
+        goto fail;
+    }
+
+    if (stream_num_out)
+        *stream_num_out = stream_num;
+
+    if (flags & SPF_ASYNC)
+        return S_OK;
+    else
+    {
+        WaitForSingleObject(result->done, INFINITE);
+        hr = result->hr;
+        CloseHandle(result->done);
+        heap_free(result);
+        return hr;
+    }
+
+fail:
+    if (site) ISpTTSEngineSite_Release(site);
+    heap_free(frag);
+    heap_free(speak_task);
+    if (result)
+    {
+        CloseHandle(result->done);
+        heap_free(result);
+    }
+    return hr;
-    return E_NOTIMPL;
 }
static HRESULT WINAPI spvoice_SpeakStream(ISpVoice *iface, IStream *stream, DWORD flags, ULONG *number)
@@ -894,6 +1071,145 @@ static const ISpVoiceVtbl spvoice_vtbl =
     spvoice_DisplayUI
 };
+/* ISpTTSEngineSite interface */
+static HRESULT WINAPI ttsenginesite_QueryInterface(ISpTTSEngineSite *iface, REFIID iid, void **obj)
+{
+    struct tts_engine_site *This = impl_from_ISpTTSEngineSite(iface);
+
+    TRACE("(%p, %s %p).\n", iface, debugstr_guid(iid), obj);
+
+    if (IsEqualIID(iid, &IID_IUnknown) ||
+        IsEqualIID(iid, &IID_ISpTTSEngineSite))
+        *obj = &This->ISpTTSEngineSite_iface;
+    else
+    {
+        *obj = NULL;
+        FIXME("interface %s not implemented.\n", debugstr_guid(iid));
+        return E_NOINTERFACE;
+    }
+
+    IUnknown_AddRef((IUnknown *)*obj);
+    return S_OK;
+}
+
+static ULONG WINAPI ttsenginesite_AddRef(ISpTTSEngineSite *iface)
+{
+    struct tts_engine_site *This = impl_from_ISpTTSEngineSite(iface);
+    ULONG ref = InterlockedIncrement(&This->ref);
+
+    TRACE("(%p): ref=%lu.\n", iface, ref);
+
+    return ref;
+}
+
+static ULONG WINAPI ttsenginesite_Release(ISpTTSEngineSite *iface)
+{
+    struct tts_engine_site *This = impl_from_ISpTTSEngineSite(iface);
+
+    ULONG ref = InterlockedDecrement(&This->ref);
+
+    TRACE("(%p): ref=%lu.\n", iface, ref);
+
+    if (!ref)
+    {
+        if (This->voice)
+            ISpeechVoice_Release(&This->voice->ISpeechVoice_iface);
+        heap_free(This);
+    }
+
+    return ref;
+}
+
+static HRESULT WINAPI ttsenginesite_AddEvents(ISpTTSEngineSite *iface, const SPEVENT *events, ULONG count)
+{
+    FIXME("(%p, %p, %ld): stub.\n", iface, events, count);
+
+    return S_OK;
+}
+
+static HRESULT WINAPI ttsenginesite_GetEventInterest(ISpTTSEngineSite *iface, ULONGLONG *interest)
+{
+    FIXME("(%p, %p): stub.\n", iface, interest);
+
+    return E_NOTIMPL;
+}
+
+static DWORD WINAPI ttsenginesite_GetActions(ISpTTSEngineSite *iface)
+{
+    FIXME("(%p): stub.\n", iface);
+
+    return SPVES_CONTINUE;
+}
+
+static HRESULT WINAPI ttsenginesite_Write(ISpTTSEngineSite *iface, const void *buf, ULONG cb, ULONG *cb_written)
+{
+    FIXME("(%p, %p, %ld, %p): stub.\n", iface, buf, cb, cb_written);
+
+    return E_NOTIMPL;
+}
+
+static HRESULT WINAPI ttsenginesite_GetRate(ISpTTSEngineSite *iface, long *rate)
+{
+    FIXME("(%p, %p): stub.\n", iface, rate);
+
+    return E_NOTIMPL;
+}
+
+static HRESULT WINAPI ttsenginesite_GetVolume(ISpTTSEngineSite *iface, USHORT *volume)
+{
+    FIXME("(%p, %p): stub.\n", iface, volume);
+
+    return E_NOTIMPL;
+}
+
+static HRESULT WINAPI ttsenginesite_GetSkipInfo(ISpTTSEngineSite *iface, SPVSKIPTYPE *type, long *skip_count)
+{
+    FIXME("(%p, %p, %p): stub.\n", iface, type, skip_count);
+
+    return E_NOTIMPL;
+}
+
+static HRESULT WINAPI ttsenginesite_CompleteSkip(ISpTTSEngineSite *iface, long num_skipped)
+{
+    FIXME("(%p, %ld): stub.\n", iface, num_skipped);
+
+    return E_NOTIMPL;
+}
+
+const static ISpTTSEngineSiteVtbl ttsenginesite_vtbl =
+{
+    ttsenginesite_QueryInterface,
+    ttsenginesite_AddRef,
+    ttsenginesite_Release,
+    ttsenginesite_AddEvents,
+    ttsenginesite_GetEventInterest,
+    ttsenginesite_GetActions,
+    ttsenginesite_Write,
+    ttsenginesite_GetRate,
+    ttsenginesite_GetVolume,
+    ttsenginesite_GetSkipInfo,
+    ttsenginesite_CompleteSkip
+};
+
+static HRESULT ttsenginesite_create(struct speech_voice *voice, ULONG stream_num, ISpTTSEngineSite **site)
+{
+    struct tts_engine_site *This = heap_alloc(sizeof(*This));
+
+    if (!This) return E_OUTOFMEMORY;
+
+    This->ISpTTSEngineSite_iface.lpVtbl = &ttsenginesite_vtbl;
+
+    This->ref = 1;
+    This->voice = voice;
+    This->stream_num = stream_num;
+
+    ISpeechVoice_AddRef(&This->voice->ISpeechVoice_iface);
+
+    *site = &This->ISpTTSEngineSite_iface;
+
+    return S_OK;
+}
+
 /* IConnectionPointContainer interface */
 static HRESULT WINAPI container_QueryInterface(IConnectionPointContainer *iface, REFIID iid, void **obj)
 {
@@ -960,8 +1276,10 @@ HRESULT speech_voice_create(IUnknown *outer, REFIID iid, void **obj)
This->output = NULL;
     This->engine = NULL;
+    This->cur_stream_num = 0;
     This->volume = 100;
     This->rate = 0;
+    memset(&This->queue, 0, sizeof(This->queue));
InitializeCriticalSection(&This->cs);
-- 
GitLab


https://gitlab.winehq.org/wine/wine/-/merge_requests/3328

    

2025

2024

2023

2022

[PATCH 4/8] sapi: Partially implement ISpVoice::Speak.