[PATCH 0/4] MR8571: sapi/tts: Implement TTS engine audio output resampler.

List overview All Threads

newer

older

Re: [PATCH v4 0/2] MR8511: ole32:...

[PATCH 0/1] MR8579: winemac:...

Shaun Ren (＠shaunren)

15 Jul 2025 15 Jul '25

12:52 a.m.

Used by Diablo IV's screen reader.

-- https://gitlab.winehq.org/wine/wine/-/merge_requests/8571

Show replies by date

Shaun Ren

15 Jul 15 Jul

12:52 a.m.

New subject: [PATCH 1/4] sapi/stream: Remove the FIXME message for unknown ISpStream interfaces.

From: Shaun Ren sren@codeweavers.com

SpVoice may query ISpStream objects for an ISpAudio interface, which currently generates unnecessary FIXME log messages. --- dlls/sapi/stream.c | 1 - 1 file changed, 1 deletion(-)

diff --git a/dlls/sapi/stream.c b/dlls/sapi/stream.c index 41826b8c949..6a43128acd5 100644 --- a/dlls/sapi/stream.c +++ b/dlls/sapi/stream.c @@ -66,7 +66,6 @@ static HRESULT WINAPI spstream_QueryInterface(ISpStream *iface, REFIID iid, void else { *obj = NULL; - FIXME("interface %s not implemented.\n", debugstr_guid(iid)); return E_NOINTERFACE; }

-- GitLab https://gitlab.winehq.org/wine/wine/-/merge_requests/8571

Shaun Ren

12:52 a.m.

New subject: [PATCH 2/4] sapi/tests: Test resampler support in ISpVoice.

From: Shaun Ren sren@codeweavers.com

--- dlls/sapi/tests/tts.c | 149 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 135 insertions(+), 14 deletions(-)

diff --git a/dlls/sapi/tests/tts.c b/dlls/sapi/tests/tts.c index 43f8f93de1f..4d3f32dc36b 100644 --- a/dlls/sapi/tests/tts.c +++ b/dlls/sapi/tests/tts.c @@ -18,8 +18,13 @@ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA */

+#include <stdint.h> +#include <math.h> + #define COBJMACROS

+#include "objbase.h" + #include "sapiddk.h" #include "sperror.h"

@@ -141,6 +146,8 @@ static void test_interfaces(void) #define TESTENGINE_CLSID L"{57C7E6B1-2FC2-4E8E-B968-1410A39E7198}" static const GUID CLSID_TestEngine = {0x57C7E6B1,0x2FC2,0x4E8E,{0xB9,0x68,0x14,0x10,0xA3,0x9E,0x71,0x98}};

+static const unsigned int test_engine_sample_rate = 22050; + struct test_engine { ISpTTSEngine ISpTTSEngine_iface; @@ -148,7 +155,10 @@ struct test_engine

ISpObjectToken *token;

- BOOL simulate_output; + + const char *output_data; + size_t output_len; + BOOL speak_called; DWORD flags; GUID fmtid; @@ -204,7 +214,8 @@ static void copy_frag_list(const SPVTEXTFRAG *frag_list, SPVTEXTFRAG **ret_frags

static void reset_engine_params(struct test_engine *engine) { - engine->simulate_output = FALSE; + engine->output_data = NULL; + engine->output_len = 0; engine->speak_called = FALSE; engine->flags = 0xdeadbeef; memset(&engine->fmtid, 0xde, sizeof(engine->fmtid)); @@ -216,6 +227,32 @@ static void reset_engine_params(struct test_engine *engine) engine->frag_count = 0; }

+static char *make_sin_data(int sin_freq, size_t time_ms, size_t sample_rate, size_t *len) +{ + double ang_freq; + char *data; + size_t i; + int val; + + *len = sample_rate * sizeof(int16_t) * time_ms / 1000; + if (!(data = malloc(*len))) + return NULL; + + if (!sin_freq) + { + memset(data, 0, *len); + return data; + } + + ang_freq = 2 * M_PI * sin_freq / sample_rate; + for (i = 0; i < *len / sizeof(int16_t); i++) + { + val = floor(32768 * sin(ang_freq * i) + 0.5); + ((int16_t *)data)[i] = min(max(-32768, val), 32767); + } + return data; +} + static inline struct test_engine *impl_from_ISpTTSEngine(ISpTTSEngine *iface) { return CONTAINING_RECORD(iface, struct test_engine, ISpTTSEngine_iface); @@ -258,9 +295,11 @@ static HRESULT WINAPI test_engine_Speak(ISpTTSEngine *iface, DWORD flags, REFGUI const WAVEFORMATEX *wfx, const SPVTEXTFRAG *frag_list, ISpTTSEngineSite *site) { + static const int num_out_iters = 5; + struct test_engine *engine = impl_from_ISpTTSEngine(iface); + size_t out_iter_len; DWORD actions; - char *buf; int i; HRESULT hr;

@@ -282,19 +321,18 @@ static HRESULT WINAPI test_engine_Speak(ISpTTSEngine *iface, DWORD flags, REFGUI actions = ISpTTSEngineSite_GetActions(site); ok(actions == SPVES_CONTINUE, "got %#lx.\n", actions);

- if (!engine->simulate_output) + if (!engine->output_len) return S_OK;

- buf = calloc(1, 22050 * 2 / 5); - for (i = 0; i < 5; i++) + out_iter_len = engine->output_len / num_out_iters; + for (i = 0; i < num_out_iters; i++) { if (ISpTTSEngineSite_GetActions(site) & SPVES_ABORT) break; - hr = ISpTTSEngineSite_Write(site, buf, 22050 * 2 / 5, NULL); + hr = ISpTTSEngineSite_Write(site, engine->output_data + i * out_iter_len, out_iter_len, NULL); ok(hr == S_OK || hr == SP_AUDIO_STOPPED, "got %#lx.\n", hr); - Sleep(100); + Sleep(20); } - free(buf);

return S_OK; } @@ -307,10 +345,10 @@ static HRESULT WINAPI test_engine_GetOutputFormat(ISpTTSEngine *iface, const GUI *out_wfx = CoTaskMemAlloc(sizeof(WAVEFORMATEX)); (*out_wfx)->wFormatTag = WAVE_FORMAT_PCM; (*out_wfx)->nChannels = 1; - (*out_wfx)->nSamplesPerSec = 22050; + (*out_wfx)->nSamplesPerSec = test_engine_sample_rate; (*out_wfx)->wBitsPerSample = 16; (*out_wfx)->nBlockAlign = 2; - (*out_wfx)->nAvgBytesPerSec = 22050 * 2; + (*out_wfx)->nAvgBytesPerSec = test_engine_sample_rate * 2; (*out_wfx)->cbSize = 0;

return S_OK; @@ -458,6 +496,12 @@ static void test_spvoice(void) USHORT volume; ULONG stream_num; DWORD regid; + WAVEFORMATEX wfx; + ISpStream *spstream; + IStream *mem_stream; + char *wave_data = NULL; + size_t wave_len = 0; + STATSTG statstg; DWORD start, duration; ISpeechVoice *speech_voice; ISpeechObjectTokens *speech_tokens; @@ -470,6 +514,7 @@ static void test_spvoice(void) DISPID dispid; DISPPARAMS params; VARIANT args[2], ret; + int i; HRESULT hr;

if (waveOutGetNumDevs() == 0) { @@ -656,8 +701,12 @@ static void test_spvoice(void) ISpVoice_SetRate(voice, 0); ISpVoice_SetVolume(voice, 100);

+ wave_data = make_sin_data(0, 1000, test_engine_sample_rate, &wave_len); + reset_engine_params(&test_engine); - test_engine.simulate_output = TRUE; + test_engine.output_data = wave_data; + test_engine.output_len = wave_len; + stream_num = 0xdeadbeef; start = GetTickCount(); hr = ISpVoice_Speak(voice, test_text, SPF_DEFAULT, &stream_num); @@ -684,7 +733,9 @@ static void test_spvoice(void) ok(duration < 200, "took %lu ms.\n", duration);

reset_engine_params(&test_engine); - test_engine.simulate_output = TRUE; + test_engine.output_data = wave_data; + test_engine.output_len = wave_len; + stream_num = 0xdeadbeef; start = GetTickCount(); hr = ISpVoice_Speak(voice, test_text, SPF_DEFAULT | SPF_ASYNC | SPF_NLP_SPEAK_PUNC, &stream_num); @@ -710,7 +761,9 @@ static void test_spvoice(void) ok(test_engine.volume == 100, "got %d.\n", test_engine.volume);

reset_engine_params(&test_engine); - test_engine.simulate_output = TRUE; + test_engine.output_data = wave_data; + test_engine.output_len = wave_len; + hr = ISpVoice_Speak(voice, test_text, SPF_DEFAULT | SPF_ASYNC, NULL); ok(hr == S_OK, "got %#lx.\n", hr);

@@ -721,6 +774,73 @@ static void test_spvoice(void) ok(hr == S_OK, "got %#lx.\n", hr); ok(duration < 300, "took %lu ms.\n", duration);

+ free(wave_data); + wave_data = NULL; + + /* Test ISPVoice resampler */ + hr = CoCreateInstance(&CLSID_SpStream, NULL, CLSCTX_INPROC_SERVER, + &IID_ISpStream, (void **)&spstream); + ok(hr == S_OK, "Failed to create SpStream: %#lx.\n", hr); + + hr = CreateStreamOnHGlobal(NULL, TRUE, &mem_stream); + ok(hr == S_OK, "Failed to create memory stream: %#lx.\n", hr); + + wfx.wFormatTag = WAVE_FORMAT_PCM; + wfx.nChannels = 1; + wfx.nSamplesPerSec = 16000; + wfx.nAvgBytesPerSec = 16000 * 2; + wfx.nBlockAlign = 2; + wfx.wBitsPerSample = 16; + wfx.cbSize = 0; + + hr = ISpStream_SetBaseStream(spstream, mem_stream, &SPDFID_WaveFormatEx, &wfx); + ok(hr == S_OK, "got %#lx.\n", hr); + + hr = ISpVoice_SetOutput(voice, (IUnknown *)spstream, TRUE); + ok(hr == S_OK, "got %#lx.\n", hr); + + wave_data = make_sin_data(50, 200, test_engine_sample_rate, &wave_len); + reset_engine_params(&test_engine); + test_engine.output_data = wave_data; + test_engine.output_len = wave_len; + + hr = ISpVoice_Speak(voice, test_text, SPF_DEFAULT, NULL); + todo_wine ok(hr == S_OK, "got %#lx.\n", hr); + todo_wine ok(test_engine.speak_called, "ISpTTSEngine::Speak was not called.\n"); + + hr = ISpVoice_SetOutput(voice, NULL, TRUE); + ok(hr == S_OK, "got %#lx.\n", hr); + + free(wave_data); + wave_data = make_sin_data(50, 200, 16000, &wave_len); + + hr = IStream_Stat(mem_stream, &statstg, STATFLAG_DEFAULT); + ok(hr == S_OK, "got %#lx.\n", hr); + todo_wine ok(fabs((double)statstg.cbSize.QuadPart / wave_len - 1) < 0.02, + "got %I64u, expected %Iu (+/-2%%).\n", statstg.cbSize.QuadPart, wave_len); + + if (statstg.cbSize.QuadPart > 0) { + size_t check_len = min((size_t)statstg.cbSize.QuadPart, wave_len) / sizeof(int16_t); + unsigned int max_diff = 0; + const void *mem_data; + HGLOBAL mem_global; + + hr = GetHGlobalFromStream(mem_stream, &mem_global); + ok(hr == S_OK, "got %#lx.\n", hr); + + mem_data = GlobalLock(mem_global); + for (i = 0; i < check_len; i++) { + int out = ((int16_t *)mem_data)[i], exp = ((int16_t *)wave_data)[i]; + max_diff = max(max_diff, abs(out - exp)); + } + GlobalUnlock(mem_global); + + ok(max_diff < 32768 * 0.02, "got max_diff %u.\n", max_diff); + } + + ISpStream_Release(spstream); + IStream_Release(mem_stream); + hr = ISpVoice_QueryInterface(voice, &IID_ISpeechVoice, (void **)&speech_voice); ok(hr == S_OK, "got %#lx.\n", hr);

@@ -823,6 +943,7 @@ done: ISpVoice_Release(voice); ISpObjectToken_Release(token); ISpMMSysAudio_Release(audio_out); + free(wave_data); SysFreeString(req); SysFreeString(opt); }

-- GitLab https://gitlab.winehq.org/wine/wine/-/merge_requests/8571

Shaun Ren

12:52 a.m.

New subject: [PATCH 3/4] sapi/tts: Implement TTS engine audio output resampler.

From: Shaun Ren sren@codeweavers.com

This implementation uses the audio resampler DSP from Media Foundation. --- dlls/sapi/Makefile.in | 4 +- dlls/sapi/tests/tts.c | 8 +- dlls/sapi/tts.c | 183 +++++++++++++++++++++++++++++++++++++++--- 3 files changed, 178 insertions(+), 17 deletions(-)

diff --git a/dlls/sapi/Makefile.in b/dlls/sapi/Makefile.in index da91eca5b8c..8286ff8adc0 100644 --- a/dlls/sapi/Makefile.in +++ b/dlls/sapi/Makefile.in @@ -1,6 +1,6 @@ MODULE = sapi.dll -IMPORTS = uuid ole32 oleaut32 user32 advapi32 -DELAYIMPORTS = winmm +IMPORTS = uuid ole32 oleaut32 user32 advapi32 mfuuid wmcodecdspuuid +DELAYIMPORTS = winmm mfplat

SOURCES = \ async.c \ diff --git a/dlls/sapi/tests/tts.c b/dlls/sapi/tests/tts.c index 4d3f32dc36b..c89bffae5a0 100644 --- a/dlls/sapi/tests/tts.c +++ b/dlls/sapi/tests/tts.c @@ -805,8 +805,8 @@ static void test_spvoice(void) test_engine.output_len = wave_len;

hr = ISpVoice_Speak(voice, test_text, SPF_DEFAULT, NULL); - todo_wine ok(hr == S_OK, "got %#lx.\n", hr); - todo_wine ok(test_engine.speak_called, "ISpTTSEngine::Speak was not called.\n"); + ok(hr == S_OK, "got %#lx.\n", hr); + ok(test_engine.speak_called, "ISpTTSEngine::Speak was not called.\n");

hr = ISpVoice_SetOutput(voice, NULL, TRUE); ok(hr == S_OK, "got %#lx.\n", hr); @@ -816,8 +816,8 @@ static void test_spvoice(void)

hr = IStream_Stat(mem_stream, &statstg, STATFLAG_DEFAULT); ok(hr == S_OK, "got %#lx.\n", hr); - todo_wine ok(fabs((double)statstg.cbSize.QuadPart / wave_len - 1) < 0.02, - "got %I64u, expected %Iu (+/-2%%).\n", statstg.cbSize.QuadPart, wave_len); + ok(fabs((double)statstg.cbSize.QuadPart / wave_len - 1) < 0.02, + "got %I64u, expected %Iu.\n", statstg.cbSize.QuadPart, wave_len);

if (statstg.cbSize.QuadPart > 0) { size_t check_len = min((size_t)statstg.cbSize.QuadPart, wave_len) / sizeof(int16_t); diff --git a/dlls/sapi/tts.c b/dlls/sapi/tts.c index baf7a74789d..54537d927be 100644 --- a/dlls/sapi/tts.c +++ b/dlls/sapi/tts.c @@ -26,6 +26,11 @@ #include "winbase.h" #include "objbase.h"

+#include "mfapi.h" +#include "mferror.h" +#include "mftransform.h" +#include "wmcodecdsp.h" + #include "sapiddk.h" #include "sperror.h"

@@ -43,6 +48,7 @@ struct speech_voice LONG ref;

ISpStreamFormat *output; + IMFTransform *resampler; ISpObjectToken *engine_token; ISpTTSEngine *engine; LONG cur_stream_num; @@ -76,6 +82,9 @@ struct tts_engine_site

struct speech_voice *voice; ULONG stream_num; + BOOL use_resampler; + IMFSample *out_sample; + IMFMediaBuffer *out_buf; };

static inline struct tts_engine_site *impl_from_ISpTTSEngineSite(ISpTTSEngineSite *iface) @@ -83,6 +92,23 @@ static inline struct tts_engine_site *impl_from_ISpTTSEngineSite(ISpTTSEngineSit return CONTAINING_RECORD(iface, struct tts_engine_site, ISpTTSEngineSite_iface); }

+static const char *debugstr_wfx(const WAVEFORMATEX *wfx) +{ + if (!wfx) return "(null)"; + if (wfx->wFormatTag == WAVE_FORMAT_EXTENSIBLE) + { + const WAVEFORMATEXTENSIBLE *wfxe = (const WAVEFORMATEXTENSIBLE *)wfx; + + return wine_dbg_sprintf( + "tag: %#x (%s), ch: %u (mask: %#lx), rate: %lu, avgbps: %lu, align: %u, depth: %u", + wfx->wFormatTag, debugstr_guid(&wfxe->SubFormat), wfx->nChannels, wfxe->dwChannelMask, + wfx->nSamplesPerSec, wfx->nAvgBytesPerSec, wfx->nBlockAlign, wfx->wBitsPerSample); + } + return wine_dbg_sprintf("tag: %#x, ch: %u, rate: %lu, avgbps: %lu, align: %u, depth: %u", + wfx->wFormatTag, wfx->nChannels, wfx->nSamplesPerSec, + wfx->nAvgBytesPerSec, wfx->nBlockAlign, wfx->wBitsPerSample); +} + static HRESULT create_token_category(const WCHAR *cat_id, ISpObjectTokenCategory **cat) { HRESULT hr; @@ -170,6 +196,7 @@ static ULONG WINAPI speech_voice_Release(ISpeechVoice *iface) { async_cancel_queue(&This->queue); if (This->output) ISpStreamFormat_Release(This->output); + if (This->resampler) IMFTransform_Release(This->resampler); if (This->engine_token) ISpObjectToken_Release(This->engine_token); if (This->engine) ISpTTSEngine_Release(This->engine); DeleteCriticalSection(&This->cs); @@ -824,16 +851,68 @@ static void free_frag_list(SPVTEXTFRAG *frag) } }

-static HRESULT set_output_format(ISpStreamFormat *output, ISpTTSEngine *engine, GUID *fmtid, WAVEFORMATEX **wfx) +static HRESULT setup_resampler(struct speech_voice *voice, const WAVEFORMATEX *in_wfx, + const WAVEFORMATEX *out_wfx) +{ + IMFMediaType *cur_in_type = NULL, *cur_out_type = NULL; + IMFMediaType *in_type = NULL, *out_type = NULL; + DWORD flags; + HRESULT hr; + + TRACE("Resampling TTS engine output\n"); + TRACE(" in_wfx: %s\n", debugstr_wfx(in_wfx)); + TRACE("to\n"); + TRACE(" out_wfx: %s\n", debugstr_wfx(out_wfx)); + + if (!voice->resampler && + FAILED(hr = CoCreateInstance(&CLSID_CResamplerMediaObject, NULL, CLSCTX_INPROC_SERVER, + &IID_IMFTransform, (void **)&voice->resampler))) + { + ERR("Failed to create CResamplerMediaObject: %#lx.\n", hr); + return hr; + } + + if (FAILED(hr = MFCreateMediaType(&in_type)) || + FAILED(hr = MFInitMediaTypeFromWaveFormatEx(in_type, in_wfx, sizeof(WAVEFORMATEX) + in_wfx->cbSize))) + goto done; + + if (FAILED(hr = MFCreateMediaType(&out_type)) || + FAILED(hr = MFInitMediaTypeFromWaveFormatEx(out_type, out_wfx, sizeof(WAVEFORMATEX) + out_wfx->cbSize))) + goto done; + + if (FAILED(IMFTransform_GetInputCurrentType(voice->resampler, 0, &cur_in_type)) || + IMFMediaType_IsEqual(cur_in_type, in_type, &flags) != S_OK) + { + if (FAILED(hr = IMFTransform_SetInputType(voice->resampler, 0, in_type, 0))) + goto done; + } + + if (FAILED(IMFTransform_GetOutputCurrentType(voice->resampler, 0, &cur_out_type)) || + IMFMediaType_IsEqual(cur_out_type, out_type, &flags) != S_OK) + { + if (FAILED(hr = IMFTransform_SetOutputType(voice->resampler, 0, out_type, 0))) + goto done; + } + +done: + if (in_type) IMFMediaType_Release(in_type); + if (out_type) IMFMediaType_Release(out_type); + if (cur_in_type) IMFMediaType_Release(cur_in_type); + if (cur_out_type) IMFMediaType_Release(cur_out_type); + return hr; +} + +static HRESULT set_output_format(struct speech_voice *voice, GUID *fmtid, WAVEFORMATEX **engine_wfx, + BOOL *use_resampler) { GUID output_fmtid; WAVEFORMATEX *output_wfx = NULL; ISpAudio *audio = NULL; HRESULT hr;

- if (FAILED(hr = ISpStreamFormat_GetFormat(output, &output_fmtid, &output_wfx))) + if (FAILED(hr = ISpStreamFormat_GetFormat(voice->output, &output_fmtid, &output_wfx))) return hr; - if (FAILED(hr = ISpTTSEngine_GetOutputFormat(engine, &output_fmtid, output_wfx, fmtid, wfx))) + if (FAILED(hr = ISpTTSEngine_GetOutputFormat(voice->engine, &output_fmtid, output_wfx, fmtid, engine_wfx))) goto done; if (!IsEqualGUID(fmtid, &SPDFID_WaveFormatEx)) { @@ -841,12 +920,22 @@ static HRESULT set_output_format(ISpStreamFormat *output, ISpTTSEngine *engine, goto done; }

- if (memcmp(output_wfx, *wfx, sizeof(WAVEFORMATEX)) || - memcmp(output_wfx + 1, *wfx + 1, output_wfx->cbSize)) + *use_resampler = FALSE; + + if (memcmp(output_wfx, *engine_wfx, sizeof(WAVEFORMATEX)) || + memcmp(output_wfx + 1, *engine_wfx + 1, output_wfx->cbSize)) { - if (FAILED(hr = ISpStreamFormat_QueryInterface(output, &IID_ISpAudio, (void **)&audio)) || - FAILED(hr = ISpAudio_SetFormat(audio, &SPDFID_WaveFormatEx, *wfx))) - goto done; + if (SUCCEEDED(ISpStreamFormat_QueryInterface(voice->output, &IID_ISpAudio, (void **)&audio))) + { + hr = ISpAudio_SetFormat(audio, &SPDFID_WaveFormatEx, *engine_wfx); + if (hr == SPERR_UNSUPPORTED_FORMAT) + *use_resampler = TRUE; + } + else + *use_resampler = TRUE; + + if (*use_resampler) + hr = setup_resampler(voice, *engine_wfx, output_wfx); }

done: @@ -859,6 +948,7 @@ static void speak_proc(struct async_task *task) { struct speak_task *speak_task = (struct speak_task *)task; struct speech_voice *This = speak_task->voice; + struct tts_engine_site *site = impl_from_ISpTTSEngineSite(speak_task->site); GUID fmtid; WAVEFORMATEX *wfx = NULL; ISpAudio *audio = NULL; @@ -875,7 +965,7 @@ static void speak_proc(struct async_task *task) goto done; }

- if (FAILED(hr = set_output_format(This->output, This->engine, &fmtid, &wfx))) + if (FAILED(hr = set_output_format(This, &fmtid, &wfx, &site->use_resampler))) { LeaveCriticalSection(&This->cs); ERR("failed setting output format: %#lx.\n", hr); @@ -1356,8 +1446,9 @@ static ULONG WINAPI ttsenginesite_Release(ISpTTSEngineSite *iface)

if (!ref) { - if (This->voice) - ISpeechVoice_Release(&This->voice->ISpeechVoice_iface); + if (This->voice) ISpeechVoice_Release(&This->voice->ISpeechVoice_iface); + if (This->out_sample) IMFSample_Release(This->out_sample); + if (This->out_buf) IMFMediaBuffer_Release(This->out_buf); free(This); }

@@ -1392,6 +1483,69 @@ static DWORD WINAPI ttsenginesite_GetActions(ISpTTSEngineSite *iface) return actions; }

+static HRESULT resample_engine_output(struct tts_engine_site *This, const void *buf, ULONG cb, ULONG *cb_written) +{ + MFT_OUTPUT_DATA_BUFFER mft_buf; + IMFMediaBuffer *in_buf = NULL; + IMFSample *in_sample = NULL; + BYTE *in_data, *out_data; + DWORD out_len; + DWORD status; + HRESULT hr; + + if (FAILED(hr = MFCreateSample(&in_sample)) || + FAILED(hr = MFCreateMemoryBuffer(cb, &in_buf)) || + FAILED(hr = IMFSample_AddBuffer(in_sample, in_buf))) + goto done; + + if (!This->out_sample) + { + if (FAILED(hr = MFCreateSample(&This->out_sample)) || + FAILED(hr = MFCreateMemoryBuffer(16384, &This->out_buf)) || + FAILED(hr = IMFSample_AddBuffer(This->out_sample, This->out_buf))) + goto done; + } + + if (FAILED(hr = IMFMediaBuffer_Lock(in_buf, &in_data, NULL, NULL))) + goto done; + memcpy(in_data, buf, cb); + IMFMediaBuffer_Unlock(in_buf); + + IMFMediaBuffer_SetCurrentLength(in_buf, cb); + + if (FAILED(hr = IMFTransform_ProcessInput(This->voice->resampler, 0, in_sample, 0))) + goto done; + + while (SUCCEEDED(hr)) + { + memset(&mft_buf, 0, sizeof(mft_buf)); + mft_buf.pSample = This->out_sample; + + if (FAILED(hr = IMFTransform_ProcessOutput(This->voice->resampler, 0, 1, &mft_buf, &status))) + { + if (hr == MF_E_TRANSFORM_NEED_MORE_INPUT) + hr = S_OK; + break; + } + + if (FAILED(hr = IMFMediaBuffer_GetCurrentLength(This->out_buf, &out_len)) || + FAILED(hr = IMFMediaBuffer_Lock(This->out_buf, &out_data, NULL, NULL))) + break; + + hr = ISpStreamFormat_Write(This->voice->output, out_data, out_len, NULL); + IMFMediaBuffer_Unlock(This->out_buf); + } + +done: + if (in_sample) IMFSample_Release(in_sample); + if (in_buf) IMFMediaBuffer_Release(in_buf); + + if (cb_written) + *cb_written = SUCCEEDED(hr) ? cb : 0; + + return hr; +} + static HRESULT WINAPI ttsenginesite_Write(ISpTTSEngineSite *iface, const void *buf, ULONG cb, ULONG *cb_written) { struct tts_engine_site *This = impl_from_ISpTTSEngineSite(iface); @@ -1401,6 +1555,9 @@ static HRESULT WINAPI ttsenginesite_Write(ISpTTSEngineSite *iface, const void *b if (!This->voice->output) return SPERR_UNINITIALIZED;

+ if (This->use_resampler) + return resample_engine_output(This, buf, cb, cb_written); + return ISpStreamFormat_Write(This->voice->output, buf, cb, cb_written); }

@@ -1472,6 +1629,9 @@ static HRESULT ttsenginesite_create(struct speech_voice *voice, ULONG stream_num This->ref = 1; This->voice = voice; This->stream_num = stream_num; + This->use_resampler = FALSE; + This->out_sample = NULL; + This->out_buf = NULL;

ISpeechVoice_AddRef(&This->voice->ISpeechVoice_iface);

@@ -1545,6 +1705,7 @@ HRESULT speech_voice_create(IUnknown *outer, REFIID iid, void **obj) This->ref = 1;

This->output = NULL; + This->resampler = NULL; This->engine_token = NULL; This->engine = NULL; This->cur_stream_num = 0;

-- GitLab https://gitlab.winehq.org/wine/wine/-/merge_requests/8571

Shaun Ren

12:52 a.m.

New subject: [PATCH 4/4] sapi/tts: Support allow_format_changes in ISpVoice::SetOutput.

From: Shaun Ren sren@codeweavers.com

--- dlls/sapi/tts.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/dlls/sapi/tts.c b/dlls/sapi/tts.c index 54537d927be..159635a4559 100644 --- a/dlls/sapi/tts.c +++ b/dlls/sapi/tts.c @@ -49,6 +49,7 @@ struct speech_voice

ISpStreamFormat *output; IMFTransform *resampler; + BOOL allow_format_changes; ISpObjectToken *engine_token; ISpTTSEngine *engine; LONG cur_stream_num; @@ -683,9 +684,6 @@ static HRESULT WINAPI spvoice_SetOutput(ISpVoice *iface, IUnknown *unk, BOOL all

TRACE("(%p, %p, %d).\n", iface, unk, allow_format_changes);

- if (!allow_format_changes) - FIXME("ignoring allow_format_changes = FALSE.\n"); - if (FAILED(hr = async_start_queue(&This->queue))) return hr;

@@ -719,6 +717,8 @@ static HRESULT WINAPI spvoice_SetOutput(ISpVoice *iface, IUnknown *unk, BOOL all ISpStreamFormat_Release(This->output); This->output = stream;

+ This->allow_format_changes = allow_format_changes; + LeaveCriticalSection(&This->cs);

return S_OK; @@ -925,7 +925,8 @@ static HRESULT set_output_format(struct speech_voice *voice, GUID *fmtid, WAVEFO if (memcmp(output_wfx, *engine_wfx, sizeof(WAVEFORMATEX)) || memcmp(output_wfx + 1, *engine_wfx + 1, output_wfx->cbSize)) { - if (SUCCEEDED(ISpStreamFormat_QueryInterface(voice->output, &IID_ISpAudio, (void **)&audio))) + if (voice->allow_format_changes && + SUCCEEDED(ISpStreamFormat_QueryInterface(voice->output, &IID_ISpAudio, (void **)&audio))) { hr = ISpAudio_SetFormat(audio, &SPDFID_WaveFormatEx, *engine_wfx); if (hr == SPERR_UNSUPPORTED_FORMAT) @@ -1706,6 +1707,7 @@ HRESULT speech_voice_create(IUnknown *outer, REFIID iid, void **obj)

This->output = NULL; This->resampler = NULL; + This->allow_format_changes = TRUE; This->engine_token = NULL; This->engine = NULL; This->cur_stream_num = 0;

-- GitLab https://gitlab.winehq.org/wine/wine/-/merge_requests/8571

Huw Davies (＠huw)

16 Jul 16 Jul

7:18 a.m.

New subject: [PATCH 0/4] MR8571: sapi/tts: Implement TTS engine audio output resampler. - approved

This merge request was approved by Huw Davies.

-- https://gitlab.winehq.org/wine/wine/-/merge_requests/8571

111

Age (days ago)

112

Last active (days ago)

wine-gitlab@winehq.org

5 comments

3 participants

tags (0)

participants (3)

Huw Davies (＠huw)
Shaun Ren
Shaun Ren (＠shaunren)