[PATCH 0/3] MR10720: msxml3: Support for Shift_JIS encoding in parser.
Signed-off-by: Nikolay Sivov <nsivov@codeweavers.com> -- https://gitlab.winehq.org/wine/wine/-/merge_requests/10720
From: Nikolay Sivov <nsivov@codeweavers.com> Signed-off-by: Nikolay Sivov <nsivov@codeweavers.com> --- dlls/msxml3/tests/domdoc.c | 47 +++++++++++++++++++++++++++++++++++ dlls/msxml3/tests/saxreader.c | 33 ++++++++++++++++++++++++ 2 files changed, 80 insertions(+) diff --git a/dlls/msxml3/tests/domdoc.c b/dlls/msxml3/tests/domdoc.c index 55e5fd5f9d5..4682565ae0b 100644 --- a/dlls/msxml3/tests/domdoc.c +++ b/dlls/msxml3/tests/domdoc.c @@ -820,6 +820,12 @@ static const char win1252decl[] = DECL_WIN_1252 ; +static const char shift_jis_xml[] = + "<?xml version=\"1.0\" encoding=\"shift_jis\" ?><a>" "\x83\x89" "</a>"; + +static const char shift_jis_xml2[] = + "<?xml version=\"1.0\" encoding=\"shift-jis\" ?><a>" "\x83\x89" "</a>"; + static const char nocontent[] = "no xml content here"; static const char szExampleXML[] = @@ -11532,6 +11538,47 @@ static void test_load(void) IXMLDOMDocument_Release(doc); } + /* Shift_JIS */ + GetTempPathA(MAX_PATH, path); + strcat(path, "shift_jis.xml"); + write_to_file(path, shift_jis_xml); + doc = create_document(&IID_IXMLDOMDocument); + + V_VT(&src) = VT_BSTR; + V_BSTR(&src) = _bstr_(path); + hr = IXMLDOMDocument_load(doc, src, &b); + todo_wine + ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); +if (hr == S_OK) +{ + hr = IXMLDOMDocument_get_text(doc, &bstr1); + ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); + ok(!wcscmp(bstr1, L"\u30e9"), "Unexpected text %s.\n", debugstr_w(bstr1)); + SysFreeString(bstr1); +} + DeleteFileA(path); + IXMLDOMDocument_Release(doc); + + GetTempPathA(MAX_PATH, path); + strcat(path, "shift_jis.xml"); + write_to_file(path, shift_jis_xml2); + + doc = create_document(&IID_IXMLDOMDocument); + V_VT(&src) = VT_BSTR; + V_BSTR(&src) = _bstr_(path); + hr = IXMLDOMDocument_load(doc, src, &b); + todo_wine + ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); +if (hr == S_OK) +{ + hr = IXMLDOMDocument_get_text(doc, &bstr1); + ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); + ok(!wcscmp(bstr1, L"\u30e9"), "Unexpected text %s.\n", debugstr_w(bstr1)); + SysFreeString(bstr1); +} + DeleteFileA(path); + IXMLDOMDocument_Release(doc); + free_bstrs(); } diff --git a/dlls/msxml3/tests/saxreader.c b/dlls/msxml3/tests/saxreader.c index b7a79bb8faa..918d62fe3c3 100644 --- a/dlls/msxml3/tests/saxreader.c +++ b/dlls/msxml3/tests/saxreader.c @@ -3378,6 +3378,17 @@ static struct call_entry xml_us_ascii_seq[] = { CH_ENDTEST } }; +static struct call_entry xml_shift_jis_seq[] = +{ + { CH_PUTDOCUMENTLOCATOR, 0, 0, S_OK }, + { CH_STARTDOCUMENT, 0, 0, S_OK }, + { CH_STARTELEMENT, 1, 47, S_OK, L"", L"a", L"a" }, + { CH_CHARACTERS, 1, 47, S_OK, L"\u30e9" }, + { CH_ENDELEMENT, 1, 50, S_OK, L"", L"a", L"a" }, + { CH_ENDDOCUMENT, 0, 0, S_OK }, + { CH_ENDTEST } +}; + static void test_saxreader_encoding(void) { static const DWORD ucs4_le_test[] = @@ -3400,6 +3411,12 @@ static void test_saxreader_encoding(void) static const char xml_iso_8859_1_test[] = "<?xml version=\"1.0\" encoding=\"iso-8859-1\" ?><a>" "\x80" "</a>"; + static const char xml_shift_jis_test[] = + "<?xml version=\"1.0\" encoding=\"shift_jis\" ?><a>" "\x83\x89" "</a>"; + + static const char xml_shift_jis_test2[] = + "<?xml version=\"1.0\" encoding=\"shift-jis\" ?><a>" "\x83\x89" "</a>"; + const struct enc_test_entry_t *entry = encoding_test_data; static const CHAR testXmlA[] = "test.xml"; DWORD ucs4_be_test[ARRAYSIZE(ucs4_le_test)]; @@ -3493,6 +3510,22 @@ static void test_saxreader_encoding(void) ok_sequence(sequences, CONTENT_HANDLER_INDEX, xml_iso_8859_1_seq, "Content test with iso-8859-1", FALSE); DeleteFileA(testXmlA); + create_test_file(testXmlA, xml_shift_jis_test, sizeof(xml_shift_jis_test) - 1); + set_expected_seq(xml_shift_jis_seq); + hr = ISAXXMLReader_parseURL(reader, L"test.xml"); + todo_wine + ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); + ok_sequence(sequences, CONTENT_HANDLER_INDEX, xml_shift_jis_seq, "Content test with shift_jis", TRUE); + DeleteFileA(testXmlA); + + create_test_file(testXmlA, xml_shift_jis_test2, sizeof(xml_shift_jis_test2) - 1); + set_expected_seq(xml_shift_jis_seq); + hr = ISAXXMLReader_parseURL(reader, L"test.xml"); + todo_wine + ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); + ok_sequence(sequences, CONTENT_HANDLER_INDEX, xml_shift_jis_seq, "Content test with shift-jis", TRUE); + DeleteFileA(testXmlA); + ISAXXMLReader_Release(reader); } -- GitLab https://gitlab.winehq.org/wine/wine/-/merge_requests/10720
From: Nikolay Sivov <nsivov@codeweavers.com> Signed-off-by: Nikolay Sivov <nsivov@codeweavers.com> --- dlls/msxml3/saxreader.c | 2 ++ dlls/msxml3/tests/domdoc.c | 8 -------- dlls/msxml3/tests/saxreader.c | 6 ++---- 3 files changed, 4 insertions(+), 12 deletions(-) diff --git a/dlls/msxml3/saxreader.c b/dlls/msxml3/saxreader.c index a4aa5728409..0e5548b07de 100644 --- a/dlls/msxml3/saxreader.c +++ b/dlls/msxml3/saxreader.c @@ -587,6 +587,8 @@ static bool saxreader_get_encoding_codepage(const WCHAR *name, UINT *codepage) } encodings[] = { + { L"shift_jis", 932 }, + { L"shift-jis", 932 }, { L"gbk", 936 }, { L"gb2312", 936 }, { L"us-ascii", 20127 }, diff --git a/dlls/msxml3/tests/domdoc.c b/dlls/msxml3/tests/domdoc.c index 4682565ae0b..1d7e9456561 100644 --- a/dlls/msxml3/tests/domdoc.c +++ b/dlls/msxml3/tests/domdoc.c @@ -11547,15 +11547,11 @@ static void test_load(void) V_VT(&src) = VT_BSTR; V_BSTR(&src) = _bstr_(path); hr = IXMLDOMDocument_load(doc, src, &b); - todo_wine ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); -if (hr == S_OK) -{ hr = IXMLDOMDocument_get_text(doc, &bstr1); ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); ok(!wcscmp(bstr1, L"\u30e9"), "Unexpected text %s.\n", debugstr_w(bstr1)); SysFreeString(bstr1); -} DeleteFileA(path); IXMLDOMDocument_Release(doc); @@ -11567,15 +11563,11 @@ if (hr == S_OK) V_VT(&src) = VT_BSTR; V_BSTR(&src) = _bstr_(path); hr = IXMLDOMDocument_load(doc, src, &b); - todo_wine ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); -if (hr == S_OK) -{ hr = IXMLDOMDocument_get_text(doc, &bstr1); ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); ok(!wcscmp(bstr1, L"\u30e9"), "Unexpected text %s.\n", debugstr_w(bstr1)); SysFreeString(bstr1); -} DeleteFileA(path); IXMLDOMDocument_Release(doc); diff --git a/dlls/msxml3/tests/saxreader.c b/dlls/msxml3/tests/saxreader.c index 918d62fe3c3..55273818fc8 100644 --- a/dlls/msxml3/tests/saxreader.c +++ b/dlls/msxml3/tests/saxreader.c @@ -3513,17 +3513,15 @@ static void test_saxreader_encoding(void) create_test_file(testXmlA, xml_shift_jis_test, sizeof(xml_shift_jis_test) - 1); set_expected_seq(xml_shift_jis_seq); hr = ISAXXMLReader_parseURL(reader, L"test.xml"); - todo_wine ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); - ok_sequence(sequences, CONTENT_HANDLER_INDEX, xml_shift_jis_seq, "Content test with shift_jis", TRUE); + ok_sequence(sequences, CONTENT_HANDLER_INDEX, xml_shift_jis_seq, "Content test with shift_jis", FALSE); DeleteFileA(testXmlA); create_test_file(testXmlA, xml_shift_jis_test2, sizeof(xml_shift_jis_test2) - 1); set_expected_seq(xml_shift_jis_seq); hr = ISAXXMLReader_parseURL(reader, L"test.xml"); - todo_wine ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); - ok_sequence(sequences, CONTENT_HANDLER_INDEX, xml_shift_jis_seq, "Content test with shift-jis", TRUE); + ok_sequence(sequences, CONTENT_HANDLER_INDEX, xml_shift_jis_seq, "Content test with shift-jis", FALSE); DeleteFileA(testXmlA); ISAXXMLReader_Release(reader); -- GitLab https://gitlab.winehq.org/wine/wine/-/merge_requests/10720
From: Nikolay Sivov <nsivov@codeweavers.com> Signed-off-by: Nikolay Sivov <nsivov@codeweavers.com> --- dlls/msxml3/saxreader.c | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/dlls/msxml3/saxreader.c b/dlls/msxml3/saxreader.c index 0e5548b07de..bfb84b9cea4 100644 --- a/dlls/msxml3/saxreader.c +++ b/dlls/msxml3/saxreader.c @@ -814,18 +814,36 @@ struct input_buffer struct list entities; }; +static bool is_mb_codepage(UINT codepage) +{ + return codepage == 932 || codepage == 936; +} + static size_t convert_get_raw_length(struct input_buffer *buffer) { const struct encoded_buffer *raw = &buffer->raw; size_t size = raw->written; - if (buffer->encoding == XML_ENCODING_UTF8 && buffer->code_page == CP_UTF8) + if (buffer->encoding == XML_ENCODING_UTF8) { - /* Incomplete single byte char, look for a start byte of multibyte char. */ - if (raw->data[size-1] & 0x80) + if (buffer->code_page == CP_UTF8) { - while (--size && !(raw->data[size] & 0xc0)) - ; + /* Incomplete single byte char, look for a start byte of multibyte char. */ + if (raw->data[size-1] & 0x80) + { + while (--size && !(raw->data[size] & 0xc0)) + ; + } + } + else if (is_mb_codepage(buffer->code_page)) + { + /* Attempt to skip incomplete character */ + if (size > 2 + && IsDBCSLeadByteEx(buffer->code_page, raw->data[size-1]) + && !IsDBCSLeadByteEx(buffer->code_page, raw->data[size-2])) + { + --size; + } } } -- GitLab https://gitlab.winehq.org/wine/wine/-/merge_requests/10720
Alfred Agrell (@Alcaro) commented about dlls/msxml3/tests/domdoc.c:
+ V_VT(&src) = VT_BSTR; + V_BSTR(&src) = _bstr_(path); + hr = IXMLDOMDocument_load(doc, src, &b); + ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); + hr = IXMLDOMDocument_get_text(doc, &bstr1); + ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); + ok(!wcscmp(bstr1, L"\u30e9"), "Unexpected text %s.\n", debugstr_w(bstr1)); + SysFreeString(bstr1); + DeleteFileA(path); + IXMLDOMDocument_Release(doc); + + GetTempPathA(MAX_PATH, path); + strcat(path, "shift_jis.xml"); + write_to_file(path, shift_jis_xml2); + + doc = create_document(&IID_IXMLDOMDocument); Inconsistent whitespace compared to the above copy
(Your choice which one's correct) -- https://gitlab.winehq.org/wine/wine/-/merge_requests/10720#note_137356
On Wed Apr 22 18:28:52 2026 +0000, Alfred Agrell wrote:
Inconsistent whitespace compared to the above copy (Your choice which one's correct) I'd rather not to re-run the pipeline because of that.
-- https://gitlab.winehq.org/wine/wine/-/merge_requests/10720#note_137364
participants (3)
-
Alfred Agrell (@Alcaro) -
Nikolay Sivov -
Nikolay Sivov (@nsivov)