[PATCH 0/1] MR11015: msxml3: Fallback to UTF-8 encoding if detection fails.
Fixes application that tries to parse xml file with leading white-space characters. Because native works with arbitrary number of white-space characters preceding the input and reports E_SAX_INVALIDENCODING only in case of wrong encoding declaration I have changed the default to utf-8. -- https://gitlab.winehq.org/wine/wine/-/merge_requests/11015
From: Piotr Caban <piotr@codeweavers.com> --- dlls/msxml3/saxreader.c | 6 +----- dlls/msxml3/tests/saxreader.c | 9 +++++++++ 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/dlls/msxml3/saxreader.c b/dlls/msxml3/saxreader.c index a759d39a417..366f779b063 100644 --- a/dlls/msxml3/saxreader.c +++ b/dlls/msxml3/saxreader.c @@ -5879,10 +5879,6 @@ static enum xmlencoding saxreader_match_encoding(const char *data, size_t size, return XML_ENCODING_UTF16LE; if (b[0] == 0 && b[1] == '<' && b[2] == 0 && b[3] == '?') return XML_ENCODING_UTF16BE; - if (b[0] == '<' && b[1] == '?' && b[2] == 'x' && b[3] == 'm') - return XML_ENCODING_UTF8; - if (b[0] == '<' && b[1] && b[1] != '?') - return XML_ENCODING_UTF8; if (b[0] == 0xef && b[1] == 0xbb && b[2] == 0xbf) { @@ -5902,7 +5898,7 @@ static enum xmlencoding saxreader_match_encoding(const char *data, size_t size, return XML_ENCODING_UTF16LE; } - return XML_ENCODING_UNKNOWN; + return XML_ENCODING_UTF8; } static void saxreader_detect_encoding(struct saxlocator *locator, bool force_utf16) diff --git a/dlls/msxml3/tests/saxreader.c b/dlls/msxml3/tests/saxreader.c index 8a3d87401cb..7264229da00 100644 --- a/dlls/msxml3/tests/saxreader.c +++ b/dlls/msxml3/tests/saxreader.c @@ -3417,6 +3417,9 @@ static void test_saxreader_encoding(void) static const char xml_shift_jis_test2[] = "<?xml version=\"1.0\" encoding=\"shift-jis\" ?><a>" "\x83\x89" "</a>"; + static const char utf8_ws_test[] = + " \r\n <a>text</a>"; + const struct enc_test_entry_t *entry = encoding_test_data; static const CHAR testXmlA[] = "test.xml"; DWORD ucs4_be_test[ARRAYSIZE(ucs4_le_test)]; @@ -3464,6 +3467,12 @@ static void test_saxreader_encoding(void) hr = ISAXXMLReader_parse(reader, input); ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); + /* UTF-8 with leading white-spaces */ + create_test_file(testXmlA, utf8_ws_test, sizeof(utf8_ws_test)); + hr = ISAXXMLReader_parseURL(reader, L"test.xml"); + ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); + DeleteFileA(testXmlA); + ISAXXMLReader_Release(reader); free_bstrs(); -- GitLab https://gitlab.winehq.org/wine/wine/-/merge_requests/11015
Marking as draft because of failing tests. -- https://gitlab.winehq.org/wine/wine/-/merge_requests/11015#note_141524
participants (2)
-
Piotr Caban -
Piotr Caban (@piotr)