From: Shaun Ren sren@codeweavers.com
--- dlls/sapi/tests/tts.c | 26 +++++++------- dlls/sapi/xml.c | 80 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 92 insertions(+), 14 deletions(-)
diff --git a/dlls/sapi/tests/tts.c b/dlls/sapi/tests/tts.c index 569ee918164..86b484b268e 100644 --- a/dlls/sapi/tests/tts.c +++ b/dlls/sapi/tests/tts.c @@ -1052,24 +1052,22 @@ static void test_spvoice_ssml(void) reset_engine_params(&test_engine);
hr = ISpVoice_Speak(voice, text4, SPF_DEFAULT, NULL); - todo_wine ok(hr == S_OK, "got %#lx.\n", hr); - todo_wine ok(test_engine.frag_count == 2, "got %Iu.\n", test_engine.frag_count); + ok(hr == S_OK, "got %#lx.\n", hr); + ok(test_engine.frag_count == 2, "got %Iu.\n", test_engine.frag_count);
- if (test_engine.frag_count == 2) { - check_frag_text(0, L"One, "); - check_frag_state_field(0, eAction, SPVA_Speak, "%d"); - check_frag_state_field(0, RateAdj, 0, "%ld"); + check_frag_text(0, L"One, "); + check_frag_state_field(0, eAction, SPVA_Speak, "%d"); + check_frag_state_field(0, RateAdj, 0, "%ld");
- check_frag_text(1, L"two."); - check_frag_state_field(1, eAction, SPVA_Speak, "%d"); - check_frag_state_field(1, RateAdj, -17, "%ld"); /* 3^(-17/10) ~= 0.15 */ - } + check_frag_text(1, L"two."); + check_frag_state_field(1, eAction, SPVA_Speak, "%d"); + check_frag_state_field(1, RateAdj, -17, "%ld"); /* 3^(-17/10) ~= 0.15 */
reset_engine_params(&test_engine);
hr = ISpVoice_Speak(voice, text5, SPF_IS_XML | SPF_PARSE_SSML, NULL); - todo_wine ok(hr == S_OK, "got %#lx.\n", hr); - todo_wine ok(test_engine.frag_count == 8 || broken(test_engine.frag_count == 3) /* win7 */, + ok(hr == S_OK, "got %#lx.\n", hr); + ok(test_engine.frag_count == 8 || broken(test_engine.frag_count == 3) /* win7 */, "got %Iu.\n", test_engine.frag_count);
if (test_engine.frag_count == 8) { @@ -1086,7 +1084,7 @@ static void test_spvoice_ssml(void) reset_engine_params(&test_engine);
hr = ISpVoice_Speak(voice, text6, SPF_IS_XML | SPF_PARSE_SSML, NULL); - todo_wine ok(hr == S_OK || broken(hr == SPERR_UNSUPPORTED_FORMAT) /* win7 */, "got %#lx.\n", hr); + ok(hr == S_OK || broken(hr == SPERR_UNSUPPORTED_FORMAT) /* win7 */, "got %#lx.\n", hr);
if (hr == S_OK) { ok(test_engine.frag_count == 5, "got %Iu.\n", test_engine.frag_count); @@ -1101,7 +1099,7 @@ static void test_spvoice_ssml(void) reset_engine_params(&test_engine);
hr = ISpVoice_Speak(voice, text7, SPF_IS_XML | SPF_PARSE_SSML, NULL); - todo_wine ok(hr == S_OK || broken(hr == SPERR_UNSUPPORTED_FORMAT) /* win7 */, "got %#lx.\n", hr); + ok(hr == S_OK || broken(hr == SPERR_UNSUPPORTED_FORMAT) /* win7 */, "got %#lx.\n", hr);
if (hr == S_OK) { ok(test_engine.frag_count == 5, "got %Iu.\n", test_engine.frag_count); diff --git a/dlls/sapi/xml.c b/dlls/sapi/xml.c index aad32462272..97305df344c 100644 --- a/dlls/sapi/xml.c +++ b/dlls/sapi/xml.c @@ -25,6 +25,7 @@ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA */
+#include <math.h> #include <assert.h>
#define COBJMACROS @@ -457,6 +458,81 @@ static HRESULT add_sapi_text_fragment(struct xml_parser *parser, const SPVSTATE return S_OK; }
+static HRESULT parse_double_value(const xmlstr_t *value, double *res, size_t *read_len) +{ + WCHAR *buf, *end; + + if (!value->len) + return SPERR_UNSUPPORTED_FORMAT; + + if (!(buf = xmlstrdupW(value))) + return E_OUTOFMEMORY; + + *res = wcstod(buf, &end); + *read_len = end - buf; + + free(buf); + return S_OK; +} + +static HRESULT parse_ssml_elems(struct xml_parser *parser, const SPVSTATE *state, const struct xml_elem *parent); + +static HRESULT parse_ssml_prosody_elem(struct xml_parser *parser, SPVSTATE state, const struct xml_elem *parent) +{ + struct xml_attr attr; + BOOL end = FALSE; + size_t read_len; + HRESULT hr; + + while (next_xml_attr(parser, &attr, &end)) + { + if (xml_attr_eq(&attr, L"rate")) + { + if (xmlstr_eq(&attr.value, L"x-slow")) + state.RateAdj = -9; + else if (xmlstr_eq(&attr.value, L"slow")) + state.RateAdj = -4; + else if (xmlstr_eq(&attr.value, L"medium")) + state.RateAdj = 0; + else if (xmlstr_eq(&attr.value, L"fast")) + state.RateAdj = 4; + else if (xmlstr_eq(&attr.value, L"x-fast")) + state.RateAdj = 9; + else + { + double rate; + + if (FAILED(hr = parse_double_value(&attr.value, &rate, &read_len))) + return hr; + if (read_len < attr.value.len - 1 || + (read_len == attr.value.len - 1 && attr.value.ptr[read_len] != '%')) + { + ERR("Invalid value %s for the rate attribute in <prosody>.\n", debugstr_xmlstr(&attr.value)); + return SPERR_UNSUPPORTED_FORMAT; + } + + if (attr.value.ptr[attr.value.len - 1] == '%') + rate = 1 + rate / 100; + + if (rate < 0) + state.RateAdj = 0; + else if (rate <= 0.01) + state.RateAdj = -10; + else + state.RateAdj = lround(log(rate) * (10 / log(3))); + } + } + else + { + FIXME("Unknown <prosody> attribute %s.\n", debugstr_xmlstr(&attr.name)); + return E_NOTIMPL; + } + } + + if (end) return S_OK; + return parse_ssml_elems(parser, &state, parent); +} + static HRESULT parse_ssml_elems(struct xml_parser *parser, const SPVSTATE *state, const struct xml_elem *parent) { struct xml_attr attr; @@ -476,6 +552,10 @@ static HRESULT parse_ssml_elems(struct xml_parser *parser, const SPVSTATE *state if (end) continue; hr = parse_ssml_elems(parser, state, &elem); } + else if (xml_elem_eq(&elem, ssml_ns, L"prosody")) + { + hr = parse_ssml_prosody_elem(parser, *state, &elem); + } else { FIXME("Unknown element %s.\n", debugstr_xmlstr(&elem.name));
From: Shaun Ren sren@codeweavers.com
--- dlls/sapi/tests/tts.c | 4 ++-- dlls/sapi/xml.c | 42 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 2 deletions(-)
diff --git a/dlls/sapi/tests/tts.c b/dlls/sapi/tests/tts.c index 86b484b268e..4f75aa88cac 100644 --- a/dlls/sapi/tests/tts.c +++ b/dlls/sapi/tests/tts.c @@ -1123,7 +1123,7 @@ static void test_spvoice_ssml(void) reset_engine_params(&test_engine);
hr = ISpVoice_Speak(voice, text8, SPF_IS_XML | SPF_PARSE_SSML, NULL); - todo_wine ok(hr == S_OK, "got %#lx.\n", hr); + ok(hr == S_OK, "got %#lx.\n", hr);
if (hr == S_OK) { ok(test_engine.frag_count == 9, "got %Iu.\n", test_engine.frag_count); @@ -1147,7 +1147,7 @@ static void test_spvoice_ssml(void) reset_engine_params(&test_engine);
hr = ISpVoice_Speak(voice, text9, SPF_IS_XML | SPF_PARSE_SSML, NULL); - todo_wine ok(hr == S_OK || broken(hr == SPERR_UNSUPPORTED_FORMAT) /* win7 */, "got %#lx.\n", hr); + ok(hr == S_OK || broken(hr == SPERR_UNSUPPORTED_FORMAT) /* win7 */, "got %#lx.\n", hr);
if (hr == S_OK) { ok(test_engine.frag_count == 7, "got %Iu.\n", test_engine.frag_count); diff --git a/dlls/sapi/xml.c b/dlls/sapi/xml.c index 97305df344c..1446b045de8 100644 --- a/dlls/sapi/xml.c +++ b/dlls/sapi/xml.c @@ -475,6 +475,13 @@ static HRESULT parse_double_value(const xmlstr_t *value, double *res, size_t *re return S_OK; }
+static inline long lclamp(long value, long value_min, long value_max) +{ + if (value < value_min) return value_min; + if (value > value_max) return value_max; + return value; +} + static HRESULT parse_ssml_elems(struct xml_parser *parser, const SPVSTATE *state, const struct xml_elem *parent);
static HRESULT parse_ssml_prosody_elem(struct xml_parser *parser, SPVSTATE state, const struct xml_elem *parent) @@ -522,6 +529,41 @@ static HRESULT parse_ssml_prosody_elem(struct xml_parser *parser, SPVSTATE state state.RateAdj = lround(log(rate) * (10 / log(3))); } } + else if (xml_attr_eq(&attr, L"volume")) + { + if (xmlstr_eq(&attr.value, L"silent")) + state.Volume = 0; + else if (xmlstr_eq(&attr.value, L"x-soft")) + state.Volume = 20; + else if (xmlstr_eq(&attr.value, L"soft")) + state.Volume = 40; + else if (xmlstr_eq(&attr.value, L"medium")) + state.Volume = 60; + else if (xmlstr_eq(&attr.value, L"loud")) + state.Volume = 80; + else if (xmlstr_eq(&attr.value, L"x-loud")) + state.Volume = 100; + else + { + double volume; + + if (FAILED(hr = parse_double_value(&attr.value, &volume, &read_len))) + return hr; + if (read_len < attr.value.len - 1 || + (read_len == attr.value.len - 1 && attr.value.ptr[read_len] != '%')) + { + ERR("Invalid value %s for the volume attribute in <prosody>.\n", debugstr_xmlstr(&attr.value)); + return SPERR_UNSUPPORTED_FORMAT; + } + + if (attr.value.ptr[attr.value.len - 1] == '%') + volume = state.Volume * (1 + volume / 100); + else if (attr.value.ptr[0] == '+' || attr.value.ptr[0] == '-') + volume = state.Volume + volume; + + state.Volume = lclamp(lround(volume), 0, 100); + } + } else { FIXME("Unknown <prosody> attribute %s.\n", debugstr_xmlstr(&attr.name));
From: Shaun Ren sren@codeweavers.com
--- dlls/sapi/tests/tts.c | 53 +++++++++++++++++++++++++++++++++++++++++++ dlls/sapi/xml.c | 37 ++++++++++++++++++++++++++++++ 2 files changed, 90 insertions(+)
diff --git a/dlls/sapi/tests/tts.c b/dlls/sapi/tests/tts.c index 4f75aa88cac..43f8f93de1f 100644 --- a/dlls/sapi/tests/tts.c +++ b/dlls/sapi/tests/tts.c @@ -922,6 +922,28 @@ static void test_spvoice_ssml(void) L"<prosody volume='loud'><prosody volume='soft'>soft.</prosody></prosody>" L"</speak>";
+ static const WCHAR text10[] = + L"<speak version='1.0' xmlns='http://www.w3.org/2001/10/synthesis' xml:lang='en-us'>" + L"<prosody pitch='300Hz'>300Hz.</prosody>" + L"<prosody pitch='600Hz'>600Hz.</prosody>" + L"<prosody pitch='+300Hz'>+300Hz.</prosody>" + L"<prosody pitch='-300Hz'>-300Hz.</prosody>" + L"<prosody pitch='41.4%'>41.4%.</prosody>" + L"<prosody pitch='+41.4%'>+41.4%.</prosody>" + L"<prosody pitch='-50%'>-50%.</prosody>" + L"<prosody pitch='-98.99999%'>-98.99999%.</prosody>" + L"<prosody pitch='-99%'>-99%.</prosody>" + L"<prosody pitch='-101%'>-101%.</prosody>" + L"<prosody pitch='41.4%'><prosody pitch='+41.4%'>+100%.</prosody></prosody>" + L"<prosody pitch='41.4%'><prosody pitch='-50%'>-29%.</prosody></prosody>" + L"<prosody pitch='x-low'>x-low.</prosody>" + L"<prosody pitch='low'>low.</prosody>" + L"<prosody pitch='medium'>medium.</prosody>" + L"<prosody pitch='high'>high.</prosody>" + L"<prosody pitch='x-high'>x-high.</prosody>" + L"<prosody pitch='high'><prosody pitch='low'>high-low.</prosody></prosody>" + L"</speak>"; +
ISpVoice *voice; ISpObjectToken *token; @@ -1162,6 +1184,37 @@ static void test_spvoice_ssml(void) check_frag_state_field(6, Volume, 40, "%lu"); /* soft */ }
+ reset_engine_params(&test_engine); + + hr = ISpVoice_Speak(voice, text10, SPF_IS_XML | SPF_PARSE_SSML, NULL); + ok(hr == S_OK || broken(hr == SPERR_UNSUPPORTED_FORMAT) /* win7 */, "got %#lx.\n", hr); + + if (hr == S_OK) { + ok(test_engine.frag_count == 18, "got %Iu.\n", test_engine.frag_count); + + check_frag_state_field(0, PitchAdj.MiddleAdj, 0, "%ld"); /* Absolute Hz values are ignored. */ + check_frag_state_field(1, PitchAdj.MiddleAdj, 0, "%ld"); + check_frag_state_field(2, PitchAdj.MiddleAdj, 0, "%ld"); + check_frag_state_field(3, PitchAdj.MiddleAdj, 0, "%ld"); + check_frag_state_field(4, PitchAdj.MiddleAdj, 12, "%ld"); /* 2^(12/24) ~= 1.414. */ + check_frag_state_field(5, PitchAdj.MiddleAdj, 12, "%ld"); /* 2^(12/24) ~= 1.414. */ + check_frag_state_field(6, PitchAdj.MiddleAdj, -24, "%ld"); /* 2^(-24/24) = 0.5. */ + check_frag_state_field(7, PitchAdj.MiddleAdj, -159, "%ld"); /* 2^(-159/24) ~= 0.0100001. */ + check_frag_state_field(8, PitchAdj.MiddleAdj, -10, "%ld"); /* -99%. */ + check_frag_state_field(9, PitchAdj.MiddleAdj, -10, "%ld"); /* -101%. */ + check_frag_state_field(10, PitchAdj.MiddleAdj, 24, "%ld"); /* 2^(24/24) = 1. */ + check_frag_state_field(11, PitchAdj.MiddleAdj, -12, "%ld"); /* 2^(-12/24) ~= 0.707. */ + + check_frag_state_field(12, PitchAdj.MiddleAdj, -9, "%ld"); /* x-low */ + check_frag_state_field(13, PitchAdj.MiddleAdj, -4, "%ld"); /* low */ + check_frag_state_field(14, PitchAdj.MiddleAdj, 0, "%ld"); /* medium */ + check_frag_state_field(15, PitchAdj.MiddleAdj, 4, "%ld"); /* high */ + check_frag_state_field(16, PitchAdj.MiddleAdj, 9, "%ld"); /* x-high */ + + check_frag_state_field(17, PitchAdj.MiddleAdj, -4, "%ld"); /* low */ + } + + reset_engine_params(&test_engine); ISpVoice_Release(voice); ISpObjectToken_Release(token); diff --git a/dlls/sapi/xml.c b/dlls/sapi/xml.c index 1446b045de8..0f2ca8bc3cc 100644 --- a/dlls/sapi/xml.c +++ b/dlls/sapi/xml.c @@ -564,6 +564,43 @@ static HRESULT parse_ssml_prosody_elem(struct xml_parser *parser, SPVSTATE state state.Volume = lclamp(lround(volume), 0, 100); } } + else if (xml_attr_eq(&attr, L"pitch")) + { + if (xmlstr_eq(&attr.value, L"x-low")) + state.PitchAdj.MiddleAdj = -9; + else if (xmlstr_eq(&attr.value, L"low")) + state.PitchAdj.MiddleAdj = -4; + else if (xmlstr_eq(&attr.value, L"medium")) + state.PitchAdj.MiddleAdj = 0; + else if (xmlstr_eq(&attr.value, L"high")) + state.PitchAdj.MiddleAdj = 4; + else if (xmlstr_eq(&attr.value, L"x-high")) + state.PitchAdj.MiddleAdj = 9; + else + { + double pitch; + + if (FAILED(hr = parse_double_value(&attr.value, &pitch, &read_len))) + return hr; + + if (attr.value.len > 2 && read_len == attr.value.len - 2 && + attr.value.ptr[read_len] == 'H' && attr.value.ptr[read_len + 1] == 'z') + { + WARN("Ignoring Hz pitch value %s in <prosody>.\n", debugstr_xmlstr(&attr.value)); + continue; + } + else if (read_len != attr.value.len - 1 || attr.value.ptr[read_len] != '%') + { + ERR("Invalid value %s for the pitch attribute in <prosody>.\n", debugstr_xmlstr(&attr.value)); + return SPERR_UNSUPPORTED_FORMAT; + } + + if (pitch > -99) + state.PitchAdj.MiddleAdj += lround(log2(1 + pitch / 100) * 24); + else + state.PitchAdj.MiddleAdj -= 10; + } + } else { FIXME("Unknown <prosody> attribute %s.\n", debugstr_xmlstr(&attr.name));
Huw Davies (@huw) commented about dlls/sapi/xml.c:
- HRESULT hr;
- while (next_xml_attr(parser, &attr, &end))
- {
if (xml_attr_eq(&attr, L"rate"))
{
if (xmlstr_eq(&attr.value, L"x-slow"))
state.RateAdj = -9;
else if (xmlstr_eq(&attr.value, L"slow"))
state.RateAdj = -4;
else if (xmlstr_eq(&attr.value, L"medium"))
state.RateAdj = 0;
else if (xmlstr_eq(&attr.value, L"fast"))
state.RateAdj = 4;
else if (xmlstr_eq(&attr.value, L"x-fast"))
state.RateAdj = 9;
This would likely be cleaner using a lookup table (likewise for the other commits in this MR).
Also note that the final commit message in this MR is missing the usual `sapi:` prefix