[PATCH 0/3] MR10225: Draft: libs/xml2: Tolerate embedded XML declarations inside elements.
Windows MSXML tolerates nested <?xml?> processing instructions inside elements, but libxml2 rejects them with XML_ERR_RESERVED_XML_NAME. When an embedded <?xml is encountered inside an element, consume everything up to the parent's closing tag and emit it via the SAX characters callback as a text node, matching Windows MSXML behavior where the content after the embedded <?xml?> is treated as text. This is needed for applications like Adobe Creative Cloud installer that use embedded XML declarations in their data. -- https://gitlab.winehq.org/wine/wine/-/merge_requests/10225
From: Filip Bakreski <bakreski03@gmail.com> Windows MSXML tolerates nested <?xml?> processing instructions inside elements, but libxml2 rejects them with XML_ERR_RESERVED_XML_NAME. When an embedded <?xml is encountered inside an element, consume everything up to the parent's closing tag and emit it via the SAX characters callback as a text node, matching Windows MSXML behavior where the content after the embedded <?xml?> is treated as text. This is needed for applications like Adobe Creative Cloud installer that use embedded XML declarations in their data. --- libs/xml2/parser.c | 79 ++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 77 insertions(+), 2 deletions(-) diff --git a/libs/xml2/parser.c b/libs/xml2/parser.c index 3e8a588f536..6c0d7997ce2 100644 --- a/libs/xml2/parser.c +++ b/libs/xml2/parser.c @@ -5220,8 +5220,7 @@ xmlParsePITarget(xmlParserCtxtPtr ctxt) { int i; if ((name[0] == 'x') && (name[1] == 'm') && (name[2] == 'l') && (name[3] == 0)) { - xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME, - "XML declaration allowed only at the start of the document\n"); + /* Wine: Windows MSXML tolerates embedded XML declarations, handled in xmlParsePI */ return(name); } else if (name[3] == 0) { xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL); @@ -5345,6 +5344,82 @@ xmlParsePI(xmlParserCtxtPtr ctxt) { */ target = xmlParsePITarget(ctxt); if (target != NULL) { + /* Wine: Windows MSXML tolerates embedded XML declarations inside elements. */ + if ((target[0] == 'x') && (target[1] == 'm') && + (target[2] == 'l') && (target[3] == 0)) { + xmlChar *text; + size_t textlen = 0; + size_t textsize = 1024; + int nesting = 0; + + text = (xmlChar *) xmlMallocAtomic(textsize); + if (text == NULL) { + xmlErrMemory(ctxt, NULL); + ctxt->instate = state; + return; + } + + /* Start with "<?xml" */ + memcpy(text, "<?xml", 5); + textlen = 5; + + /* Consume everything until parent's close tag, tracking nesting */ + while (RAW != 0) { + /* Check for close tag </ */ + if (RAW == '<' && NXT(1) == '/') { + if (nesting == 0) { + /* This is the parent's close tag - stop here */ + break; + } + nesting--; + } + /* Check for start tag < followed by letter (not <? or <! or </) */ + else if (RAW == '<' && NXT(1) != '?' && NXT(1) != '!' && NXT(1) != '/') { + xmlChar c = NXT(1); + if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) { + /* Could be start tag - check if self-closing */ + const xmlChar *p = ctxt->input->cur + 1; + int is_selfclose = 0; + while (*p && *p != '>') { + if (*p == '/' && *(p+1) == '>') { + is_selfclose = 1; + break; + } + p++; + } + if (!is_selfclose) + nesting++; + } + } + + /* Grow buffer if needed */ + if (textlen + 2 >= textsize) { + xmlChar *tmp; + textsize *= 2; + tmp = (xmlChar *) xmlRealloc(text, textsize); + if (tmp == NULL) { + xmlErrMemory(ctxt, NULL); + xmlFree(text); + ctxt->instate = state; + return; + } + text = tmp; + } + text[textlen++] = RAW; + NEXT; + } + text[textlen] = 0; + + /* Emit as text content (like CDATA) */ + if ((ctxt->sax) && (!ctxt->disableSAX) && + (ctxt->sax->characters != NULL)) + ctxt->sax->characters(ctxt->userData, text, textlen); + + xmlFree(text); + if (ctxt->instate != XML_PARSER_EOF) + ctxt->instate = state; + return; + } if ((RAW == '?') && (NXT(1) == '>')) { if (inputid != ctxt->input->id) { xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, -- GitLab https://gitlab.winehq.org/wine/wine/-/merge_requests/10225
From: Filip Bakreski <bakreski03@gmail.com> --- dlls/msxml3/tests/domdoc.c | 200 +++++++++++++++++++++++++++++++++++++ 1 file changed, 200 insertions(+) diff --git a/dlls/msxml3/tests/domdoc.c b/dlls/msxml3/tests/domdoc.c index 9c16436bc1c..3944ec81088 100644 --- a/dlls/msxml3/tests/domdoc.c +++ b/dlls/msxml3/tests/domdoc.c @@ -14472,6 +14472,205 @@ static void test_indent(void) SysFreeString(str); } +static void test_embedded_xml_declaration(void) +{ + IXMLDOMDocument *doc; + IXMLDOMElement *elem; + IXMLDOMNode *node; + IXMLDOMNodeList *nodes; + BSTR str; + VARIANT_BOOL b; + HRESULT hr; + LONG len; + + /* Test XML with embedded <?xml?> declaration inside an element. + * Windows MSXML tolerates this but libxml2 rejects it. + * The implementation wraps such content in CDATA to make it parse. */ + static const char embedded_xml_str[] = + "<?xml version=\"1.0\"?>" + "<root>" + " <xmldata><?xml version=\"1.0\"?><nested>content</nested></xmldata>" + "</root>"; + + /* Test with xml:space preserved content containing XML declaration */ + static const char embedded_xml_space_str[] = + "<?xml version=\"1.0\"?>" + "<root xml:space=\"preserve\">" + " <?xml version=\"1.0\"?><data>test</data>" + "</root>"; + + /* Test normal XML without embedded declarations (should still work) */ + static const char normal_xml_str[] = + "<?xml version=\"1.0\"?>" + "<root><child>text</child></root>"; + + /* Test *XMLData element pattern - element content that should be wrapped */ + static const char xmldata_element_str[] = + "<?xml version=\"1.0\"?>" + "<root>" + " <CustomXMLData><item>value</item></CustomXMLData>" + "</root>"; + + /* Test multiple embedded declarations */ + static const char multi_embedded_str[] = + "<?xml version=\"1.0\"?>" + "<root>" + " <first><?xml version=\"1.0\"?><a>1</a></first>" + " <second><b>2</b></second>" + "</root>"; + + /* Test deeply nested embedded declaration */ + static const char deep_embedded_str[] = + "<?xml version=\"1.0\"?>" + "<root><level1><level2><data><?xml version=\"1.0\"?><deep>nested</deep></data></level2></level1></root>"; + + /* Test with encoding in embedded declaration */ + static const char embedded_with_encoding_str[] = + "<?xml version=\"1.0\"?>" + "<root>" + " <xmldata><?xml version=\"1.0\" encoding=\"UTF-8\"?><test>encoded</test></xmldata>" + "</root>"; + + /* Test self-closing XMLData element (should not need wrapping) */ + static const char selfclose_xmldata_str[] = + "<?xml version=\"1.0\"?>" + "<root><EmptyXMLData/></root>"; + + doc = NULL; + hr = CoCreateInstance(&CLSID_DOMDocument30, NULL, CLSCTX_INPROC_SERVER, + &IID_IXMLDOMDocument, (void**)&doc); + if (hr != S_OK) + { + win_skip("DOMDocument30 not available, skipping embedded XML tests\n"); + return; + } + + /* Test 1: Normal XML should parse fine */ + b = VARIANT_FALSE; + hr = IXMLDOMDocument_loadXML(doc, _bstr_(normal_xml_str), &b); + ok(hr == S_OK, "loadXML failed: %#lx\n", hr); + ok(b == VARIANT_TRUE, "failed to load normal XML\n"); + + hr = IXMLDOMDocument_get_documentElement(doc, &elem); + ok(hr == S_OK, "get_documentElement failed: %#lx\n", hr); + if (elem) + IXMLDOMElement_Release(elem); + + /* Test 2: XML with embedded declaration in element content */ + b = VARIANT_FALSE; + hr = IXMLDOMDocument_loadXML(doc, _bstr_(embedded_xml_str), &b); + ok(hr == S_OK, "loadXML with embedded XML declaration failed: %#lx\n", hr); + ok(b == VARIANT_TRUE, "failed to load XML with embedded declaration\n"); + + if (b == VARIANT_TRUE) + { + hr = IXMLDOMDocument_get_documentElement(doc, &elem); + ok(hr == S_OK, "get_documentElement failed: %#lx\n", hr); + if (elem) + IXMLDOMElement_Release(elem); + } + + /* Test 3: XML with embedded declaration and xml:space */ + b = VARIANT_FALSE; + hr = IXMLDOMDocument_loadXML(doc, _bstr_(embedded_xml_space_str), &b); + ok(hr == S_OK, "loadXML with embedded XML and xml:space failed: %#lx\n", hr); + ok(b == VARIANT_TRUE, "failed to load XML with embedded declaration and xml:space\n"); + + /* Test 4: *XMLData element with element content */ + b = VARIANT_FALSE; + hr = IXMLDOMDocument_loadXML(doc, _bstr_(xmldata_element_str), &b); + ok(hr == S_OK, "loadXML with *XMLData element failed: %#lx\n", hr); + ok(b == VARIANT_TRUE, "failed to load XML with *XMLData element\n"); + + if (b == VARIANT_TRUE) + { + hr = IXMLDOMDocument_get_documentElement(doc, &elem); + ok(hr == S_OK, "get_documentElement failed: %#lx\n", hr); + if (elem) + { + /* Verify we can access child elements */ + hr = IXMLDOMElement_get_childNodes(elem, &nodes); + ok(hr == S_OK, "get_childNodes failed: %#lx\n", hr); + if (nodes) + { + hr = IXMLDOMNodeList_get_length(nodes, &len); + ok(hr == S_OK, "get_length failed: %#lx\n", hr); + ok(len > 0, "expected child nodes, got %ld\n", len); + IXMLDOMNodeList_Release(nodes); + } + IXMLDOMElement_Release(elem); + } + } + + /* Test 5: Multiple embedded declarations in different elements */ + b = VARIANT_FALSE; + hr = IXMLDOMDocument_loadXML(doc, _bstr_(multi_embedded_str), &b); + ok(hr == S_OK, "loadXML with multiple embedded declarations failed: %#lx\n", hr); + ok(b == VARIANT_TRUE, "failed to load XML with multiple embedded declarations\n"); + + if (b == VARIANT_TRUE) + { + hr = IXMLDOMDocument_get_documentElement(doc, &elem); + ok(hr == S_OK, "get_documentElement failed: %#lx\n", hr); + if (elem) + { + hr = IXMLDOMElement_get_childNodes(elem, &nodes); + ok(hr == S_OK, "get_childNodes failed: %#lx\n", hr); + if (nodes) + { + hr = IXMLDOMNodeList_get_length(nodes, &len); + ok(hr == S_OK, "get_length failed: %#lx\n", hr); + /* Should have at least 2 child elements (first and second) */ + ok(len >= 2, "expected at least 2 child nodes, got %ld\n", len); + IXMLDOMNodeList_Release(nodes); + } + IXMLDOMElement_Release(elem); + } + } + + /* Test 6: Deeply nested embedded declaration */ + b = VARIANT_FALSE; + hr = IXMLDOMDocument_loadXML(doc, _bstr_(deep_embedded_str), &b); + ok(hr == S_OK, "loadXML with deeply nested embedded declaration failed: %#lx\n", hr); + ok(b == VARIANT_TRUE, "failed to load XML with deeply nested embedded declaration\n"); + + /* Test 7: Embedded declaration with encoding attribute */ + b = VARIANT_FALSE; + hr = IXMLDOMDocument_loadXML(doc, _bstr_(embedded_with_encoding_str), &b); + ok(hr == S_OK, "loadXML with embedded encoding declaration failed: %#lx\n", hr); + ok(b == VARIANT_TRUE, "failed to load XML with embedded encoding declaration\n"); + + /* Test 8: Self-closing XMLData element (no content to wrap) */ + b = VARIANT_FALSE; + hr = IXMLDOMDocument_loadXML(doc, _bstr_(selfclose_xmldata_str), &b); + ok(hr == S_OK, "loadXML with self-closing XMLData failed: %#lx\n", hr); + ok(b == VARIANT_TRUE, "failed to load XML with self-closing XMLData\n"); + + if (b == VARIANT_TRUE) + { + hr = IXMLDOMDocument_get_documentElement(doc, &elem); + ok(hr == S_OK, "get_documentElement failed: %#lx\n", hr); + if (elem) + { + hr = IXMLDOMElement_get_tagName(elem, &str); + ok(hr == S_OK, "get_tagName failed: %#lx\n", hr); + ok(!lstrcmpW(str, L"root"), "unexpected tag name: %s\n", wine_dbgstr_w(str)); + SysFreeString(str); + + /* Find the EmptyXMLData element */ + hr = IXMLDOMElement_selectSingleNode(elem, _bstr_("EmptyXMLData"), &node); + ok(hr == S_OK, "selectSingleNode failed: %#lx\n", hr); + if (node) + IXMLDOMNode_Release(node); + + IXMLDOMElement_Release(elem); + } + } + + IXMLDOMDocument_Release(doc); + free_bstrs(); +} + static DWORD WINAPI new_thread(void *arg) { HRESULT hr = CoInitialize(NULL); @@ -14596,6 +14795,7 @@ START_TEST(domdoc) test_xsltext(); test_max_element_depth_values(); test_get_parentNode(); + test_embedded_xml_declaration(); if (is_clsid_supported(&CLSID_MXNamespaceManager40, &IID_IMXNamespaceManager)) { -- GitLab https://gitlab.winehq.org/wine/wine/-/merge_requests/10225
From: lucas persson <lucas.persson@uc.se> Address review feedback: - Remove the content-consumption approach in xmlParsePI that treated everything after <?xml?> as text. Instead, just let libxml2 handle <?xml ...?> as a regular processing instruction, which matches Windows MSXML behavior more closely. - Rewrite tests to validate DOM structure: check node types (NODE_PROCESSING_INSTRUCTION, NODE_ELEMENT), verify text content, and assert exact child node counts. --- dlls/msxml3/tests/domdoc.c | 248 ++++++++++++++++++++++++------------- libs/xml2/parser.c | 79 +----------- 2 files changed, 163 insertions(+), 164 deletions(-) diff --git a/dlls/msxml3/tests/domdoc.c b/dlls/msxml3/tests/domdoc.c index 3944ec81088..a0b465f9bca 100644 --- a/dlls/msxml3/tests/domdoc.c +++ b/dlls/msxml3/tests/domdoc.c @@ -14476,41 +14476,28 @@ static void test_embedded_xml_declaration(void) { IXMLDOMDocument *doc; IXMLDOMElement *elem; - IXMLDOMNode *node; + IXMLDOMNode *node, *child; IXMLDOMNodeList *nodes; BSTR str; VARIANT_BOOL b; HRESULT hr; LONG len; + DOMNodeType type; /* Test XML with embedded <?xml?> declaration inside an element. - * Windows MSXML tolerates this but libxml2 rejects it. - * The implementation wraps such content in CDATA to make it parse. */ + * Windows MSXML tolerates this - the embedded <?xml?> becomes a PI node + * and subsequent content is parsed normally. */ static const char embedded_xml_str[] = "<?xml version=\"1.0\"?>" "<root>" " <xmldata><?xml version=\"1.0\"?><nested>content</nested></xmldata>" "</root>"; - /* Test with xml:space preserved content containing XML declaration */ - static const char embedded_xml_space_str[] = - "<?xml version=\"1.0\"?>" - "<root xml:space=\"preserve\">" - " <?xml version=\"1.0\"?><data>test</data>" - "</root>"; - /* Test normal XML without embedded declarations (should still work) */ static const char normal_xml_str[] = "<?xml version=\"1.0\"?>" "<root><child>text</child></root>"; - /* Test *XMLData element pattern - element content that should be wrapped */ - static const char xmldata_element_str[] = - "<?xml version=\"1.0\"?>" - "<root>" - " <CustomXMLData><item>value</item></CustomXMLData>" - "</root>"; - /* Test multiple embedded declarations */ static const char multi_embedded_str[] = "<?xml version=\"1.0\"?>" @@ -14519,11 +14506,6 @@ static void test_embedded_xml_declaration(void) " <second><b>2</b></second>" "</root>"; - /* Test deeply nested embedded declaration */ - static const char deep_embedded_str[] = - "<?xml version=\"1.0\"?>" - "<root><level1><level2><data><?xml version=\"1.0\"?><deep>nested</deep></data></level2></level1></root>"; - /* Test with encoding in embedded declaration */ static const char embedded_with_encoding_str[] = "<?xml version=\"1.0\"?>" @@ -14531,11 +14513,6 @@ static void test_embedded_xml_declaration(void) " <xmldata><?xml version=\"1.0\" encoding=\"UTF-8\"?><test>encoded</test></xmldata>" "</root>"; - /* Test self-closing XMLData element (should not need wrapping) */ - static const char selfclose_xmldata_str[] = - "<?xml version=\"1.0\"?>" - "<root><EmptyXMLData/></root>"; - doc = NULL; hr = CoCreateInstance(&CLSID_DOMDocument30, NULL, CLSCTX_INPROC_SERVER, &IID_IXMLDOMDocument, (void**)&doc); @@ -14551,36 +14528,38 @@ static void test_embedded_xml_declaration(void) ok(hr == S_OK, "loadXML failed: %#lx\n", hr); ok(b == VARIANT_TRUE, "failed to load normal XML\n"); - hr = IXMLDOMDocument_get_documentElement(doc, &elem); - ok(hr == S_OK, "get_documentElement failed: %#lx\n", hr); - if (elem) - IXMLDOMElement_Release(elem); - - /* Test 2: XML with embedded declaration in element content */ - b = VARIANT_FALSE; - hr = IXMLDOMDocument_loadXML(doc, _bstr_(embedded_xml_str), &b); - ok(hr == S_OK, "loadXML with embedded XML declaration failed: %#lx\n", hr); - ok(b == VARIANT_TRUE, "failed to load XML with embedded declaration\n"); - if (b == VARIANT_TRUE) { hr = IXMLDOMDocument_get_documentElement(doc, &elem); ok(hr == S_OK, "get_documentElement failed: %#lx\n", hr); if (elem) + { + hr = IXMLDOMElement_get_tagName(elem, &str); + ok(hr == S_OK, "get_tagName failed: %#lx\n", hr); + ok(!lstrcmpW(str, L"root"), "unexpected tag name: %s\n", wine_dbgstr_w(str)); + SysFreeString(str); + + hr = IXMLDOMElement_selectSingleNode(elem, _bstr_("child"), &node); + ok(hr == S_OK, "selectSingleNode failed: %#lx\n", hr); + if (node) + { + hr = IXMLDOMNode_get_text(node, &str); + ok(hr == S_OK, "get_text failed: %#lx\n", hr); + ok(!lstrcmpW(str, L"text"), "unexpected text: %s\n", wine_dbgstr_w(str)); + SysFreeString(str); + IXMLDOMNode_Release(node); + } IXMLDOMElement_Release(elem); + } } - /* Test 3: XML with embedded declaration and xml:space */ - b = VARIANT_FALSE; - hr = IXMLDOMDocument_loadXML(doc, _bstr_(embedded_xml_space_str), &b); - ok(hr == S_OK, "loadXML with embedded XML and xml:space failed: %#lx\n", hr); - ok(b == VARIANT_TRUE, "failed to load XML with embedded declaration and xml:space\n"); - - /* Test 4: *XMLData element with element content */ + /* Test 2: XML with embedded <?xml?> declaration inside an element. + * The document should load and the <xmldata> element should have + * a PI node and a <nested> child element. */ b = VARIANT_FALSE; - hr = IXMLDOMDocument_loadXML(doc, _bstr_(xmldata_element_str), &b); - ok(hr == S_OK, "loadXML with *XMLData element failed: %#lx\n", hr); - ok(b == VARIANT_TRUE, "failed to load XML with *XMLData element\n"); + hr = IXMLDOMDocument_loadXML(doc, _bstr_(embedded_xml_str), &b); + ok(hr == S_OK, "loadXML with embedded XML declaration failed: %#lx\n", hr); + ok(b == VARIANT_TRUE, "failed to load XML with embedded declaration\n"); if (b == VARIANT_TRUE) { @@ -14588,21 +14567,75 @@ static void test_embedded_xml_declaration(void) ok(hr == S_OK, "get_documentElement failed: %#lx\n", hr); if (elem) { - /* Verify we can access child elements */ - hr = IXMLDOMElement_get_childNodes(elem, &nodes); - ok(hr == S_OK, "get_childNodes failed: %#lx\n", hr); - if (nodes) + /* Find the <xmldata> element */ + hr = IXMLDOMElement_selectSingleNode(elem, _bstr_("xmldata"), &node); + ok(hr == S_OK, "selectSingleNode(xmldata) failed: %#lx\n", hr); + if (node) { - hr = IXMLDOMNodeList_get_length(nodes, &len); - ok(hr == S_OK, "get_length failed: %#lx\n", hr); - ok(len > 0, "expected child nodes, got %ld\n", len); - IXMLDOMNodeList_Release(nodes); + /* Check child nodes of <xmldata> */ + hr = IXMLDOMNode_get_childNodes(node, &nodes); + ok(hr == S_OK, "get_childNodes failed: %#lx\n", hr); + if (nodes) + { + hr = IXMLDOMNodeList_get_length(nodes, &len); + ok(hr == S_OK, "get_length failed: %#lx\n", hr); + ok(len == 2, "expected 2 child nodes, got %ld\n", len); + + /* First child should be the PI node */ + if (len >= 1) + { + hr = IXMLDOMNodeList_get_item(nodes, 0, &child); + ok(hr == S_OK, "get_item(0) failed: %#lx\n", hr); + if (child) + { + hr = IXMLDOMNode_get_nodeType(child, &type); + ok(hr == S_OK, "get_nodeType failed: %#lx\n", hr); + ok(type == NODE_PROCESSING_INSTRUCTION, + "expected NODE_PROCESSING_INSTRUCTION (%d), got %d\n", + NODE_PROCESSING_INSTRUCTION, type); + + hr = IXMLDOMNode_get_nodeName(child, &str); + ok(hr == S_OK, "get_nodeName failed: %#lx\n", hr); + ok(!lstrcmpW(str, L"xml"), "unexpected PI name: %s\n", wine_dbgstr_w(str)); + SysFreeString(str); + IXMLDOMNode_Release(child); + } + } + + /* Second child should be the <nested> element */ + if (len >= 2) + { + hr = IXMLDOMNodeList_get_item(nodes, 1, &child); + ok(hr == S_OK, "get_item(1) failed: %#lx\n", hr); + if (child) + { + hr = IXMLDOMNode_get_nodeType(child, &type); + ok(hr == S_OK, "get_nodeType failed: %#lx\n", hr); + ok(type == NODE_ELEMENT, + "expected NODE_ELEMENT (%d), got %d\n", + NODE_ELEMENT, type); + + hr = IXMLDOMNode_get_nodeName(child, &str); + ok(hr == S_OK, "get_nodeName failed: %#lx\n", hr); + ok(!lstrcmpW(str, L"nested"), "unexpected node name: %s\n", wine_dbgstr_w(str)); + SysFreeString(str); + + hr = IXMLDOMNode_get_text(child, &str); + ok(hr == S_OK, "get_text failed: %#lx\n", hr); + ok(!lstrcmpW(str, L"content"), "unexpected text: %s\n", wine_dbgstr_w(str)); + SysFreeString(str); + IXMLDOMNode_Release(child); + } + } + IXMLDOMNodeList_Release(nodes); + } + IXMLDOMNode_Release(node); } IXMLDOMElement_Release(elem); } } - /* Test 5: Multiple embedded declarations in different elements */ + /* Test 3: Multiple embedded declarations in different elements */ b = VARIANT_FALSE; hr = IXMLDOMDocument_loadXML(doc, _bstr_(multi_embedded_str), &b); ok(hr == S_OK, "loadXML with multiple embedded declarations failed: %#lx\n", hr); @@ -14614,55 +14647,96 @@ static void test_embedded_xml_declaration(void) ok(hr == S_OK, "get_documentElement failed: %#lx\n", hr); if (elem) { - hr = IXMLDOMElement_get_childNodes(elem, &nodes); - ok(hr == S_OK, "get_childNodes failed: %#lx\n", hr); - if (nodes) + /* <first> should have PI + <a> children */ + hr = IXMLDOMElement_selectSingleNode(elem, _bstr_("first"), &node); + ok(hr == S_OK, "selectSingleNode(first) failed: %#lx\n", hr); + if (node) { - hr = IXMLDOMNodeList_get_length(nodes, &len); - ok(hr == S_OK, "get_length failed: %#lx\n", hr); - /* Should have at least 2 child elements (first and second) */ - ok(len >= 2, "expected at least 2 child nodes, got %ld\n", len); - IXMLDOMNodeList_Release(nodes); + hr = IXMLDOMNode_get_childNodes(node, &nodes); + ok(hr == S_OK, "get_childNodes failed: %#lx\n", hr); + if (nodes) + { + hr = IXMLDOMNodeList_get_length(nodes, &len); + ok(hr == S_OK, "get_length failed: %#lx\n", hr); + ok(len == 2, "expected 2 child nodes in <first>, got %ld\n", len); + IXMLDOMNodeList_Release(nodes); + } + IXMLDOMNode_Release(node); + } + + /* <second> should have just <b> child (no embedded decl) */ + hr = IXMLDOMElement_selectSingleNode(elem, _bstr_("second"), &node); + ok(hr == S_OK, "selectSingleNode(second) failed: %#lx\n", hr); + if (node) + { + hr = IXMLDOMNode_get_childNodes(node, &nodes); + ok(hr == S_OK, "get_childNodes failed: %#lx\n", hr); + if (nodes) + { + hr = IXMLDOMNodeList_get_length(nodes, &len); + ok(hr == S_OK, "get_length failed: %#lx\n", hr); + ok(len == 1, "expected 1 child node in <second>, got %ld\n", len); + + hr = IXMLDOMNodeList_get_item(nodes, 0, &child); + ok(hr == S_OK, "get_item(0) failed: %#lx\n", hr); + if (child) + { + hr = IXMLDOMNode_get_text(child, &str); + ok(hr == S_OK, "get_text failed: %#lx\n", hr); + ok(!lstrcmpW(str, L"2"), "unexpected text: %s\n", wine_dbgstr_w(str)); + SysFreeString(str); + IXMLDOMNode_Release(child); + } + IXMLDOMNodeList_Release(nodes); + } + IXMLDOMNode_Release(node); } IXMLDOMElement_Release(elem); } } - /* Test 6: Deeply nested embedded declaration */ - b = VARIANT_FALSE; - hr = IXMLDOMDocument_loadXML(doc, _bstr_(deep_embedded_str), &b); - ok(hr == S_OK, "loadXML with deeply nested embedded declaration failed: %#lx\n", hr); - ok(b == VARIANT_TRUE, "failed to load XML with deeply nested embedded declaration\n"); - - /* Test 7: Embedded declaration with encoding attribute */ + /* Test 4: Embedded declaration with encoding attribute */ b = VARIANT_FALSE; hr = IXMLDOMDocument_loadXML(doc, _bstr_(embedded_with_encoding_str), &b); ok(hr == S_OK, "loadXML with embedded encoding declaration failed: %#lx\n", hr); ok(b == VARIANT_TRUE, "failed to load XML with embedded encoding declaration\n"); - /* Test 8: Self-closing XMLData element (no content to wrap) */ - b = VARIANT_FALSE; - hr = IXMLDOMDocument_loadXML(doc, _bstr_(selfclose_xmldata_str), &b); - ok(hr == S_OK, "loadXML with self-closing XMLData failed: %#lx\n", hr); - ok(b == VARIANT_TRUE, "failed to load XML with self-closing XMLData\n"); - if (b == VARIANT_TRUE) { hr = IXMLDOMDocument_get_documentElement(doc, &elem); ok(hr == S_OK, "get_documentElement failed: %#lx\n", hr); if (elem) { - hr = IXMLDOMElement_get_tagName(elem, &str); - ok(hr == S_OK, "get_tagName failed: %#lx\n", hr); - ok(!lstrcmpW(str, L"root"), "unexpected tag name: %s\n", wine_dbgstr_w(str)); - SysFreeString(str); - - /* Find the EmptyXMLData element */ - hr = IXMLDOMElement_selectSingleNode(elem, _bstr_("EmptyXMLData"), &node); - ok(hr == S_OK, "selectSingleNode failed: %#lx\n", hr); + hr = IXMLDOMElement_selectSingleNode(elem, _bstr_("xmldata"), &node); + ok(hr == S_OK, "selectSingleNode(xmldata) failed: %#lx\n", hr); if (node) - IXMLDOMNode_Release(node); + { + hr = IXMLDOMNode_get_childNodes(node, &nodes); + ok(hr == S_OK, "get_childNodes failed: %#lx\n", hr); + if (nodes) + { + hr = IXMLDOMNodeList_get_length(nodes, &len); + ok(hr == S_OK, "get_length failed: %#lx\n", hr); + ok(len == 2, "expected 2 child nodes, got %ld\n", len); + /* Second child should be <test> element with "encoded" text */ + if (len >= 2) + { + hr = IXMLDOMNodeList_get_item(nodes, 1, &child); + ok(hr == S_OK, "get_item(1) failed: %#lx\n", hr); + if (child) + { + hr = IXMLDOMNode_get_text(child, &str); + ok(hr == S_OK, "get_text failed: %#lx\n", hr); + ok(!lstrcmpW(str, L"encoded"), "unexpected text: %s\n", wine_dbgstr_w(str)); + SysFreeString(str); + IXMLDOMNode_Release(child); + } + } + IXMLDOMNodeList_Release(nodes); + } + IXMLDOMNode_Release(node); + } IXMLDOMElement_Release(elem); } } diff --git a/libs/xml2/parser.c b/libs/xml2/parser.c index 6c0d7997ce2..c5ca3d22048 100644 --- a/libs/xml2/parser.c +++ b/libs/xml2/parser.c @@ -5220,7 +5220,8 @@ xmlParsePITarget(xmlParserCtxtPtr ctxt) { int i; if ((name[0] == 'x') && (name[1] == 'm') && (name[2] == 'l') && (name[3] == 0)) { - /* Wine: Windows MSXML tolerates embedded XML declarations, handled in xmlParsePI */ + /* Wine: Windows MSXML tolerates embedded XML declarations; allow them + * to be parsed as regular processing instructions. */ return(name); } else if (name[3] == 0) { xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL); @@ -5344,82 +5345,6 @@ xmlParsePI(xmlParserCtxtPtr ctxt) { */ target = xmlParsePITarget(ctxt); if (target != NULL) { - /* Wine: Windows MSXML tolerates embedded XML declarations inside elements. */ - if ((target[0] == 'x') && (target[1] == 'm') && - (target[2] == 'l') && (target[3] == 0)) { - xmlChar *text; - size_t textlen = 0; - size_t textsize = 1024; - int nesting = 0; - - text = (xmlChar *) xmlMallocAtomic(textsize); - if (text == NULL) { - xmlErrMemory(ctxt, NULL); - ctxt->instate = state; - return; - } - - /* Start with "<?xml" */ - memcpy(text, "<?xml", 5); - textlen = 5; - - /* Consume everything until parent's close tag, tracking nesting */ - while (RAW != 0) { - /* Check for close tag </ */ - if (RAW == '<' && NXT(1) == '/') { - if (nesting == 0) { - /* This is the parent's close tag - stop here */ - break; - } - nesting--; - } - /* Check for start tag < followed by letter (not <? or <! or </) */ - else if (RAW == '<' && NXT(1) != '?' && NXT(1) != '!' && NXT(1) != '/') { - xmlChar c = NXT(1); - if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) { - /* Could be start tag - check if self-closing */ - const xmlChar *p = ctxt->input->cur + 1; - int is_selfclose = 0; - while (*p && *p != '>') { - if (*p == '/' && *(p+1) == '>') { - is_selfclose = 1; - break; - } - p++; - } - if (!is_selfclose) - nesting++; - } - } - - /* Grow buffer if needed */ - if (textlen + 2 >= textsize) { - xmlChar *tmp; - textsize *= 2; - tmp = (xmlChar *) xmlRealloc(text, textsize); - if (tmp == NULL) { - xmlErrMemory(ctxt, NULL); - xmlFree(text); - ctxt->instate = state; - return; - } - text = tmp; - } - text[textlen++] = RAW; - NEXT; - } - text[textlen] = 0; - - /* Emit as text content (like CDATA) */ - if ((ctxt->sax) && (!ctxt->disableSAX) && - (ctxt->sax->characters != NULL)) - ctxt->sax->characters(ctxt->userData, text, textlen); - - xmlFree(text); - if (ctxt->instate != XML_PARSER_EOF) - ctxt->instate = state; - return; - } if ((RAW == '?') && (NXT(1) == '>')) { if (inputid != ctxt->input->id) { xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, -- GitLab https://gitlab.winehq.org/wine/wine/-/merge_requests/10225
participants (3)
-
Filip Bakreski -
lucas (@luben93) -
lucas persson