[PATCH 0/3] MR10646: msxml3: Add support of GB2312.
Superseded MR !3928. -- https://gitlab.winehq.org/wine/wine/-/merge_requests/10646
From: Jactry Zeng <jzeng@codeweavers.com> --- dlls/msxml3/mxwriter.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/dlls/msxml3/mxwriter.c b/dlls/msxml3/mxwriter.c index 6b1cd4706cb..85822b526d8 100644 --- a/dlls/msxml3/mxwriter.c +++ b/dlls/msxml3/mxwriter.c @@ -58,6 +58,7 @@ typedef enum XmlEncoding_ISO_8859_9, XmlEncoding_UTF16, XmlEncoding_UTF8, + XmlEncoding_windows_936, XmlEncoding_windows_1250, XmlEncoding_windows_1251, XmlEncoding_windows_1252, @@ -89,6 +90,8 @@ static const struct xml_encoding_data xml_encoding_map[] = { { L"iso-8859-9", XmlEncoding_ISO_8859_9, 28599 }, { L"UTF-16", XmlEncoding_UTF16, ~0 }, { L"UTF-8", XmlEncoding_UTF8, CP_UTF8 }, + { L"GBK", XmlEncoding_windows_936, 936 }, + { L"GB2312", XmlEncoding_windows_936, 936 }, { L"windows-1250", XmlEncoding_windows_1250, 1250 }, { L"windows-1251", XmlEncoding_windows_1251, 1251 }, { L"windows-1252", XmlEncoding_windows_1252, 1252 }, -- GitLab https://gitlab.winehq.org/wine/wine/-/merge_requests/10646
From: Jactry Zeng <jzeng@codeweavers.com> --- dlls/msxml3/main.c | 1 + dlls/msxml3/tests/domdoc.c | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/dlls/msxml3/main.c b/dlls/msxml3/main.c index 16e0b38b185..b0711c510f6 100644 --- a/dlls/msxml3/main.c +++ b/dlls/msxml3/main.c @@ -321,6 +321,7 @@ static void init_char_encoders(void) xmlCharEncodingOutputFunc output; } encoder[] = { + { "gb2312", gbk_to_utf8, utf8_to_gbk }, { "gbk", gbk_to_utf8, utf8_to_gbk }, { "iso8859-1", iso8859_1_to_utf8, utf8_to_iso8859_1 }, { "windows-1250", win1250_to_utf8, utf8_to_win1250 }, diff --git a/dlls/msxml3/tests/domdoc.c b/dlls/msxml3/tests/domdoc.c index f9dfef94b81..243c84dcf57 100644 --- a/dlls/msxml3/tests/domdoc.c +++ b/dlls/msxml3/tests/domdoc.c @@ -805,6 +805,13 @@ static const char win936xml[] = DECL_WIN_936 "<open></open>"; +#define DECL_WIN_GB2312 \ +"<?xml version=\"1.0\" encoding=\"GB2312\"?>" + +static const char gb2312xml[] = +DECL_WIN_GB2312 +"<open></open>"; + #define DECL_WIN_1252 \ "<?xml version=\"1.0\" encoding=\"Windows-1252\"?>" @@ -11077,6 +11084,7 @@ static void test_load(void) { iso8859_1_xml, S_OK, VARIANT_TRUE }, { win1252xml, S_OK, VARIANT_TRUE }, { win936xml, S_FALSE, VARIANT_FALSE }, + { gb2312xml, S_OK, VARIANT_TRUE }, }; -- GitLab https://gitlab.winehq.org/wine/wine/-/merge_requests/10646
From: Jactry Zeng <jzeng@codeweavers.com> --- dlls/msxml3/tests/domdoc.c | 83 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) diff --git a/dlls/msxml3/tests/domdoc.c b/dlls/msxml3/tests/domdoc.c index 243c84dcf57..7efbf8bb5d2 100644 --- a/dlls/msxml3/tests/domdoc.c +++ b/dlls/msxml3/tests/domdoc.c @@ -16778,6 +16778,88 @@ static void test_createElement(void) free_bstrs(); } +static void test_cjk_codepages(void) +{ + IXMLDOMNodeList *node_list; + CHAR *content, *buffer; + IXMLDOMDocument *doc; + IXMLDOMNode *node; + WCHAR locale[8]; + HANDLE file; + int i, size; + DWORD read; + HRESULT hr; + BSTR str; + const struct codepage_test { + const CHAR *cp; + BOOL supported; + } zhcn_tests[] = { + {"GB2312", TRUE}, + {"GBK", TRUE}, + {"Windows-936", FALSE}, + }; + + GetSystemDefaultLocaleName(locale, sizeof(locale)); + if (!wcscmp(locale, L"ja-JP") || !wcscmp(locale, L"en-AE")) + { + win_skip("Skipping tests on Japanese and English (AE) Windows.\n" ); + return; + } + + for (i = 0; i < ARRAYSIZE(zhcn_tests); i++) + { + static const CHAR xml_zhcn[] = "<?xml version=\"1.0\" encoding=\"%s\"?>\r\n<euro>\x80</euro>\r\n"; + doc = create_document(&IID_IXMLDOMDocument); + + size = strlen(xml_zhcn) + strlen(zhcn_tests[i].cp) - 1; + content = malloc(size); + + wsprintfA(content, xml_zhcn, zhcn_tests[i].cp); + + hr = IXMLDOMDocument_loadXML(doc, _bstr_(content), NULL); + ok(hr == S_OK, "tests[%d]: Got hr %#lx.\n", i, hr); + + hr = IXMLDOMDocument_save(doc, _variantbstr_("test.xml")); + if (zhcn_tests[i].supported) + ok(hr == S_OK, "tests[%d]: Got hr %#lx.\n", i, hr); + else + { + ok(hr == E_FAIL, "tests[%d]: Got hr %#lx.\n", i, hr); + free(content); + IXMLDOMDocument_Release(doc); + continue; + } + + file = CreateFileA("test.xml", GENERIC_READ, 0, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); + ok(file != INVALID_HANDLE_VALUE, "tests[%d]: Could not open file: %#lx.\n", i, GetLastError()); + read = 0; + buffer = malloc(size); + memset(buffer, 0, size); + ReadFile(file, buffer, size, &read, NULL); + ok(!!read, "tests[%d]: Could not read file.\n", i); + ok(!memcmp(buffer, content, size), "tests[%d]: Got wrong content.\n", i); + free(buffer); + CloseHandle(file); + DeleteFileA("test.xml"); + + hr = IXMLDOMDocument_getElementsByTagName(doc, _bstr_("euro"), &node_list); + ok(hr == S_OK, "tests[%d]: Got hr %#lx.\n", i, hr); + hr = IXMLDOMNodeList_get_item(node_list, 0, &node); + ok(hr == S_OK, "tests[%d]: Got hr %#lx.\n", i, hr); + hr = IXMLDOMNode_get_text(node, &str); + ok(hr == S_OK, "tests[%d]: Got hr %#lx.\n", i, hr); + ok(!wcscmp(str, L"\x20ac"), "tests[%d]: Got unexpected text %s.\n", i, debugstr_w(str)); + SysFreeString(str); + IXMLDOMNode_Release(node); + IXMLDOMNodeList_Release(node_list); + + free(content); + IXMLDOMDocument_Release(doc); + } + + free_bstrs(); +} + START_TEST(domdoc) { HRESULT hr; @@ -16882,6 +16964,7 @@ START_TEST(domdoc) test_document_reload(); test_setAttribute(); test_createElement(); + test_cjk_codepages(); if (is_clsid_supported(&CLSID_MXNamespaceManager40, &IID_IMXNamespaceManager)) { -- GitLab https://gitlab.winehq.org/wine/wine/-/merge_requests/10646
Nikolay Sivov (@nsivov) commented about dlls/msxml3/tests/domdoc.c:
+ const struct codepage_test { + const CHAR *cp; + BOOL supported; + } zhcn_tests[] = { + {"GB2312", TRUE}, + {"GBK", TRUE}, + {"Windows-936", FALSE}, + }; + + GetSystemDefaultLocaleName(locale, sizeof(locale)); + if (!wcscmp(locale, L"ja-JP") || !wcscmp(locale, L"en-AE")) + { + win_skip("Skipping tests on Japanese and English (AE) Windows.\n" ); + return; + } + Why would system locale matter?
-- https://gitlab.winehq.org/wine/wine/-/merge_requests/10646#note_135979
Nikolay Sivov (@nsivov) commented about dlls/msxml3/tests/domdoc.c:
+ return; + } + + for (i = 0; i < ARRAYSIZE(zhcn_tests); i++) + { + static const CHAR xml_zhcn[] = "<?xml version=\"1.0\" encoding=\"%s\"?>\r\n<euro>\x80</euro>\r\n"; + doc = create_document(&IID_IXMLDOMDocument); + + size = strlen(xml_zhcn) + strlen(zhcn_tests[i].cp) - 1; + content = malloc(size); + + wsprintfA(content, xml_zhcn, zhcn_tests[i].cp); + + hr = IXMLDOMDocument_loadXML(doc, _bstr_(content), NULL); + ok(hr == S_OK, "tests[%d]: Got hr %#lx.\n", i, hr); + loadXML() expects WCHARs, and will ignore encoding attribute. So this test is not testing encoding I believe.
-- https://gitlab.winehq.org/wine/wine/-/merge_requests/10646#note_135980
Nikolay Sivov (@nsivov) commented about dlls/msxml3/tests/domdoc.c:
+ continue; + } + + file = CreateFileA("test.xml", GENERIC_READ, 0, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); + ok(file != INVALID_HANDLE_VALUE, "tests[%d]: Could not open file: %#lx.\n", i, GetLastError()); + read = 0; + buffer = malloc(size); + memset(buffer, 0, size); + ReadFile(file, buffer, size, &read, NULL); + ok(!!read, "tests[%d]: Could not read file.\n", i); + ok(!memcmp(buffer, content, size), "tests[%d]: Got wrong content.\n", i); + free(buffer); + CloseHandle(file); + DeleteFileA("test.xml"); + + hr = IXMLDOMDocument_getElementsByTagName(doc, _bstr_("euro"), &node_list); In this case it's easier to do get_documentElement().
-- https://gitlab.winehq.org/wine/wine/-/merge_requests/10646#note_135981
Nikolay Sivov (@nsivov) commented about dlls/msxml3/mxwriter.c:
{ L"iso-8859-9", XmlEncoding_ISO_8859_9, 28599 }, { L"UTF-16", XmlEncoding_UTF16, ~0 }, { L"UTF-8", XmlEncoding_UTF8, CP_UTF8 }, + { L"GBK", XmlEncoding_windows_936, 936 }, + { L"GB2312", XmlEncoding_windows_936, 936 }, { L"windows-1250", XmlEncoding_windows_1250, 1250 },
For the writer will need separate tests, if we really need it. -- https://gitlab.winehq.org/wine/wine/-/merge_requests/10646#note_135982
Nikolay Sivov (@nsivov) commented about dlls/msxml3/main.c:
xmlCharEncodingOutputFunc output; } encoder[] = { + { "gb2312", gbk_to_utf8, utf8_to_gbk },
This is going to be used in schemas only, added test should already parse without any changes. -- https://gitlab.winehq.org/wine/wine/-/merge_requests/10646#note_135983
Hi Nikolay,
Nikolay Sivov started a new discussion on dlls/msxml3/tests/domdoc.c:https://gitlab.winehq.org/wine/wine/-/merge_requests/10646#note_135979
+ return; + } +
Why would system locale matter?
Both GBK and GB2312 aren't supported on w10pro64_en_AE_u8 and w10pro64_ja, at least the writer don't support it: https://testbot.winehq.org/JobDetails.pl?Key=162522
Nikolay Sivov started a new discussion on dlls/msxml3/tests/domdoc.c:https://gitlab.winehq.org/wine/wine/-/merge_requests/10646#note_135980
+ hr = IXMLDOMDocument_loadXML(doc,_bstr_(content), NULL); + ok(hr == S_OK, "tests[%d]: Got hr %#lx.\n", i, hr); +
loadXML() expects WCHARs, and will ignore encoding attribute. So this test is not testing encoding I believe.
Yes, these tests in the third patch are mainly for testing the writer, and trying to probe that GBK and GB2312 are treated as Windows' 936 codepage, which implements 0x80. -- https://gitlab.winehq.org/wine/wine/-/merge_requests/10646#note_136018
participants (3)
-
Jactry Zeng -
Jactry Zeng (@jactry) -
Nikolay Sivov (@nsivov)