[PATCH v9 0/3] MR10225: Draft: libs/xml2: Tolerate embedded XML declarations inside elements.
Windows MSXML tolerates nested <?xml?> processing instructions inside elements, but libxml2 rejects them with XML_ERR_RESERVED_XML_NAME. When an embedded <?xml is encountered inside an element, consume everything up to the parent's closing tag and emit it via the SAX characters callback as a text node, matching Windows MSXML behavior where the content after the embedded <?xml?> is treated as text. This is needed for applications like Adobe Creative Cloud installer that use embedded XML declarations in their data. -- v9: libs/xml2, msxml3/tests: Improve embedded XML declaration handling and tests. https://gitlab.winehq.org/wine/wine/-/merge_requests/10225
From: Filip Bakreski <bakreski03@gmail.com> Some applications embed <?xml?> declarations inside XML elements as part of nested sub-documents. Windows MSXML tolerates this pattern, but libxml2 rejects it with XML_ERR_RESERVED_XML_NAME. When an embedded <?xml ...?> processing instruction is encountered inside an element (nodeNr > 0), suppress the fatal error and consume all content from the embedded declaration up to the parent element's closing tag, emitting it as a text node via the SAX characters callback. This matches the behavior applications expect: the content after the embedded <?xml?> is treated as opaque text, not parsed as child elements. --- libs/xml2/parser.c | 81 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) diff --git a/libs/xml2/parser.c b/libs/xml2/parser.c index 3e8a588f536..2c2ac4f17bb 100644 --- a/libs/xml2/parser.c +++ b/libs/xml2/parser.c @@ -5220,6 +5220,11 @@ xmlParsePITarget(xmlParserCtxtPtr ctxt) { int i; if ((name[0] == 'x') && (name[1] == 'm') && (name[2] == 'l') && (name[3] == 0)) { + if (ctxt->nodeNr > 0) { + /* Wine: tolerate embedded XML declarations inside elements, + * content consumption is handled in xmlParsePI. */ + return(name); + } xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME, "XML declaration allowed only at the start of the document\n"); return(name); @@ -5345,6 +5350,82 @@ xmlParsePI(xmlParserCtxtPtr ctxt) { */ target = xmlParsePITarget(ctxt); if (target != NULL) { + /* Wine: tolerate embedded XML declarations inside elements. + * When an embedded <?xml ...?> is encountered inside an element, + * consume everything from "<?xml" up to the parent element's + * closing tag and emit it as a text node via the SAX characters + * callback. This matches the behavior applications expect: the + * content after <?xml?> is treated as opaque text, not parsed + * as child elements. */ + if (ctxt->nodeNr > 0 && + (target[0] == 'x') && (target[1] == 'm') && + (target[2] == 'l') && (target[3] == 0)) { + xmlChar *text; + size_t textlen = 0; + size_t textsize = 1024; + int nesting = 0; + + text = (xmlChar *) xmlMallocAtomic(textsize); + if (text == NULL) { + xmlErrMemory(ctxt, NULL); + ctxt->instate = state; + return; + } + + /* Reconstruct "<?xml" which the parser already consumed */ + memcpy(text, "<?xml", 5); + textlen = 5; + + /* Consume everything until parent's close tag, tracking nesting */ + while (RAW != 0) { + if (RAW == '<' && NXT(1) == '/') { + if (nesting == 0) + break; + nesting--; + } + else if (RAW == '<' && NXT(1) != '?' && NXT(1) != '!' && NXT(1) != '/') { + xmlChar c = NXT(1); + if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) { + const xmlChar *p = ctxt->input->cur + 1; + int is_selfclose = 0; + while (*p && *p != '>') { + if (*p == '/' && *(p+1) == '>') { + is_selfclose = 1; + break; + } + p++; + } + if (!is_selfclose) + nesting++; + } + } + + if (textlen + 2 >= textsize) { + xmlChar *tmp; + textsize *= 2; + tmp = (xmlChar *) xmlRealloc(text, textsize); + if (tmp == NULL) { + xmlErrMemory(ctxt, NULL); + xmlFree(text); + ctxt->instate = state; + return; + } + text = tmp; + } + text[textlen++] = RAW; + NEXT; + } + text[textlen] = 0; + + if ((ctxt->sax) && (!ctxt->disableSAX) && + (ctxt->sax->characters != NULL)) + ctxt->sax->characters(ctxt->userData, text, textlen); + + xmlFree(text); + if (ctxt->instate != XML_PARSER_EOF) + ctxt->instate = state; + return; + } if ((RAW == '?') && (NXT(1) == '>')) { if (inputid != ctxt->input->id) { xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, -- GitLab https://gitlab.winehq.org/wine/wine/-/merge_requests/10225
From: Filip Bakreski <bakreski03@gmail.com> Test that loadXML() and load() correctly handle XML documents with embedded <?xml?> declarations inside elements. Windows MSXML rejects these documents (returns S_FALSE), while Wine's patched libxml2 tolerates them and emits the embedded content as text nodes. Tests verify: - loadXML rejects embedded declarations (matching Windows behavior) - Wine produces a text node containing the raw embedded XML content - Multiple elements with and without embedded declarations - Embedded declarations with encoding attributes - load() from file with embedded declarations --- dlls/msxml3/tests/domdoc.c | 239 +++++++++++++++++++++++++++++++++++++ 1 file changed, 239 insertions(+) diff --git a/dlls/msxml3/tests/domdoc.c b/dlls/msxml3/tests/domdoc.c index 9c16436bc1c..69bdba0280a 100644 --- a/dlls/msxml3/tests/domdoc.c +++ b/dlls/msxml3/tests/domdoc.c @@ -14472,6 +14472,244 @@ static void test_indent(void) SysFreeString(str); } +static void test_embedded_xml_declaration(void) +{ + IXMLDOMDocument *doc; + IXMLDOMElement *elem; + IXMLDOMNode *node, *child; + IXMLDOMNodeList *nodes; + BSTR str; + VARIANT_BOOL b; + VARIANT src; + HRESULT hr; + LONG len; + DOMNodeType type; + char path[MAX_PATH]; + + /* XML with an embedded <?xml?> declaration inside an element, modeling + * the pattern used by application installers that embed sub-documents + * inside wrapper elements. Windows MSXML rejects this via loadXML(). */ + static const char embedded_xml_str[] = + "<?xml version=\"1.0\"?>" + "<root>" + " <data><?xml version=\"1.0\"?><item>value</item></data>" + "</root>"; + + /* Multiple elements, only one containing an embedded declaration. */ + static const char multi_element_str[] = + "<?xml version=\"1.0\"?>" + "<root>" + " <first><?xml version=\"1.0\"?><a>1</a></first>" + " <second><b>2</b></second>" + "</root>"; + + /* Embedded declaration with encoding attribute. */ + static const char embedded_encoding_str[] = + "<?xml version=\"1.0\"?>" + "<root>" + " <data><?xml version=\"1.0\" encoding=\"UTF-8\"?><test>encoded</test></data>" + "</root>"; + + doc = create_document_version(30, &IID_IXMLDOMDocument); + + /* Test 1: loadXML with embedded <?xml?> declaration. + * Windows MSXML rejects this (returns S_FALSE). Wine's patched + * libxml2 tolerates it and emits the content as a text node. */ + b = VARIANT_FALSE; + hr = IXMLDOMDocument_loadXML(doc, _bstr_(embedded_xml_str), &b); + todo_wine_if(hr == S_OK) + ok(hr == S_FALSE, "Unexpected hr %#lx.\n", hr); + todo_wine_if(b == VARIANT_TRUE) + ok(b == VARIANT_FALSE, "Unexpected result %d.\n", b); + + if (hr == S_OK) + { + hr = IXMLDOMDocument_get_documentElement(doc, &elem); + ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); + + hr = IXMLDOMElement_selectSingleNode(elem, _bstr_("data"), &node); + ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); + + /* The embedded <?xml?> and subsequent content should be + * consumed as a single text node. */ + hr = IXMLDOMNode_get_childNodes(node, &nodes); + ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); + + hr = IXMLDOMNodeList_get_length(nodes, &len); + ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); + ok(len == 1, "Expected 1 child, got %ld.\n", len); + + hr = IXMLDOMNodeList_get_item(nodes, 0, &child); + ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); + + hr = IXMLDOMNode_get_nodeType(child, &type); + ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); + ok(type == NODE_TEXT, "Expected NODE_TEXT, got %d.\n", type); + + hr = IXMLDOMNode_get_text(child, &str); + ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); + ok(!lstrcmpW(str, L"<?xml version=\"1.0\"?><item>value</item>"), + "Unexpected text %s.\n", wine_dbgstr_w(str)); + SysFreeString(str); + + IXMLDOMNode_Release(child); + IXMLDOMNodeList_Release(nodes); + IXMLDOMNode_Release(node); + IXMLDOMElement_Release(elem); + } + + /* Test 2: loadXML with multiple elements, one containing embedded declaration. */ + b = VARIANT_FALSE; + hr = IXMLDOMDocument_loadXML(doc, _bstr_(multi_element_str), &b); + todo_wine_if(hr == S_OK) + ok(hr == S_FALSE, "Unexpected hr %#lx.\n", hr); + todo_wine_if(b == VARIANT_TRUE) + ok(b == VARIANT_FALSE, "Unexpected result %d.\n", b); + + if (hr == S_OK) + { + hr = IXMLDOMDocument_get_documentElement(doc, &elem); + ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); + + /* <first> should have its embedded content as text. */ + hr = IXMLDOMElement_selectSingleNode(elem, _bstr_("first"), &node); + ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); + + hr = IXMLDOMNode_get_childNodes(node, &nodes); + ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); + hr = IXMLDOMNodeList_get_length(nodes, &len); + ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); + ok(len == 1, "Expected 1 child in <first>, got %ld.\n", len); + + hr = IXMLDOMNodeList_get_item(nodes, 0, &child); + ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); + hr = IXMLDOMNode_get_nodeType(child, &type); + ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); + ok(type == NODE_TEXT, "Expected NODE_TEXT, got %d.\n", type); + IXMLDOMNode_Release(child); + IXMLDOMNodeList_Release(nodes); + IXMLDOMNode_Release(node); + + /* <second> has no embedded declaration, should parse normally. */ + hr = IXMLDOMElement_selectSingleNode(elem, _bstr_("second"), &node); + ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); + + hr = IXMLDOMNode_get_childNodes(node, &nodes); + ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); + hr = IXMLDOMNodeList_get_length(nodes, &len); + ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); + ok(len == 1, "Expected 1 child in <second>, got %ld.\n", len); + + hr = IXMLDOMNodeList_get_item(nodes, 0, &child); + ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); + hr = IXMLDOMNode_get_nodeType(child, &type); + ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); + ok(type == NODE_ELEMENT, "Expected NODE_ELEMENT, got %d.\n", type); + hr = IXMLDOMNode_get_nodeName(child, &str); + ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); + ok(!lstrcmpW(str, L"b"), "Unexpected name %s.\n", wine_dbgstr_w(str)); + SysFreeString(str); + IXMLDOMNode_Release(child); + IXMLDOMNodeList_Release(nodes); + IXMLDOMNode_Release(node); + + IXMLDOMElement_Release(elem); + } + + /* Test 3: loadXML with embedded declaration containing encoding attribute. */ + b = VARIANT_FALSE; + hr = IXMLDOMDocument_loadXML(doc, _bstr_(embedded_encoding_str), &b); + todo_wine_if(hr == S_OK) + ok(hr == S_FALSE, "Unexpected hr %#lx.\n", hr); + todo_wine_if(b == VARIANT_TRUE) + ok(b == VARIANT_FALSE, "Unexpected result %d.\n", b); + + if (hr == S_OK) + { + hr = IXMLDOMDocument_get_documentElement(doc, &elem); + ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); + + hr = IXMLDOMElement_selectSingleNode(elem, _bstr_("data"), &node); + ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); + + hr = IXMLDOMNode_get_childNodes(node, &nodes); + ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); + hr = IXMLDOMNodeList_get_length(nodes, &len); + ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); + ok(len == 1, "Expected 1 child, got %ld.\n", len); + + hr = IXMLDOMNodeList_get_item(nodes, 0, &child); + ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); + hr = IXMLDOMNode_get_nodeType(child, &type); + ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); + ok(type == NODE_TEXT, "Expected NODE_TEXT, got %d.\n", type); + + hr = IXMLDOMNode_get_text(child, &str); + ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); + ok(!lstrcmpW(str, L"<?xml version=\"1.0\" encoding=\"UTF-8\"?><test>encoded</test>"), + "Unexpected text %s.\n", wine_dbgstr_w(str)); + SysFreeString(str); + + IXMLDOMNode_Release(child); + IXMLDOMNodeList_Release(nodes); + IXMLDOMNode_Release(node); + IXMLDOMElement_Release(elem); + } + + /* Test 4: load from file with embedded <?xml?> declaration. + * This tests the IXMLDOMDocument::load() path which some + * applications use to load XML files containing embedded + * declarations. */ + GetTempPathA(MAX_PATH, path); + strcat(path, "wine_embedded_xml_test.xml"); + write_to_file(path, embedded_xml_str); + + V_VT(&src) = VT_BSTR; + V_BSTR(&src) = _bstr_(path); + b = VARIANT_FALSE; + hr = IXMLDOMDocument_load(doc, src, &b); + todo_wine_if(hr == S_OK) + ok(hr == S_FALSE, "Unexpected hr %#lx.\n", hr); + todo_wine_if(b == VARIANT_TRUE) + ok(b == VARIANT_FALSE, "Unexpected result %d.\n", b); + + if (hr == S_OK) + { + hr = IXMLDOMDocument_get_documentElement(doc, &elem); + ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); + + hr = IXMLDOMElement_selectSingleNode(elem, _bstr_("data"), &node); + ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); + + hr = IXMLDOMNode_get_childNodes(node, &nodes); + ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); + hr = IXMLDOMNodeList_get_length(nodes, &len); + ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); + ok(len == 1, "Expected 1 child, got %ld.\n", len); + + hr = IXMLDOMNodeList_get_item(nodes, 0, &child); + ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); + hr = IXMLDOMNode_get_nodeType(child, &type); + ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); + ok(type == NODE_TEXT, "Expected NODE_TEXT, got %d.\n", type); + + hr = IXMLDOMNode_get_text(child, &str); + ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); + ok(!lstrcmpW(str, L"<?xml version=\"1.0\"?><item>value</item>"), + "Unexpected text %s.\n", wine_dbgstr_w(str)); + SysFreeString(str); + + IXMLDOMNode_Release(child); + IXMLDOMNodeList_Release(nodes); + IXMLDOMNode_Release(node); + IXMLDOMElement_Release(elem); + } + + DeleteFileA(path); + IXMLDOMDocument_Release(doc); + free_bstrs(); +} + static DWORD WINAPI new_thread(void *arg) { HRESULT hr = CoInitialize(NULL); @@ -14596,6 +14834,7 @@ START_TEST(domdoc) test_xsltext(); test_max_element_depth_values(); test_get_parentNode(); + test_embedded_xml_declaration(); if (is_clsid_supported(&CLSID_MXNamespaceManager40, &IID_IMXNamespaceManager)) { -- GitLab https://gitlab.winehq.org/wine/wine/-/merge_requests/10225
From: lucas persson <luben93@gmail.com> Fix edge cases in the embedded XML declaration consumer: - Skip quoted attribute values when scanning for self-closing tags, preventing false matches on /> inside attribute values like <img alt="a/>b" src="x">. - Use input->end for bounds checking instead of null-terminator, ensuring safety with the push parser. - Detect and skip comments (<!-- -->), CDATA sections (<![CDATA[), and processing instructions (<?...?>) without affecting the nesting counter. - Cast textlen to int for the SAX characters callback to match the expected signature. Add tests for self-closing tags and deeply nested elements in embedded XML content. --- dlls/msxml3/tests/domdoc.c | 164 +++++++++++++++++++++++++++++-------- libs/xml2/parser.c | 23 ++++-- 2 files changed, 148 insertions(+), 39 deletions(-) diff --git a/dlls/msxml3/tests/domdoc.c b/dlls/msxml3/tests/domdoc.c index 69bdba0280a..555d4f32dd6 100644 --- a/dlls/msxml3/tests/domdoc.c +++ b/dlls/msxml3/tests/domdoc.c @@ -14510,28 +14510,37 @@ static void test_embedded_xml_declaration(void) " <data><?xml version=\"1.0\" encoding=\"UTF-8\"?><test>encoded</test></data>" "</root>"; + /* Embedded content with self-closing tags and attributes containing />. */ + static const char selfclose_str[] = + "<?xml version=\"1.0\"?>" + "<root>" + " <data><?xml version=\"1.0\"?><br/><img src=\"x\"/><p>text</p></data>" + "</root>"; + + /* Embedded content with nested elements at multiple levels. */ + static const char nested_str[] = + "<?xml version=\"1.0\"?>" + "<root>" + " <data><?xml version=\"1.0\"?><a><b><c>deep</c></b></a></data>" + "</root>"; + doc = create_document_version(30, &IID_IXMLDOMDocument); /* Test 1: loadXML with embedded <?xml?> declaration. - * Windows MSXML rejects this (returns S_FALSE). Wine's patched - * libxml2 tolerates it and emits the content as a text node. */ - b = VARIANT_FALSE; + * Windows MSXML rejects this (S_FALSE). Wine tolerates it and + * returns the embedded content as a text node. */ hr = IXMLDOMDocument_loadXML(doc, _bstr_(embedded_xml_str), &b); - todo_wine_if(hr == S_OK) - ok(hr == S_FALSE, "Unexpected hr %#lx.\n", hr); - todo_wine_if(b == VARIANT_TRUE) - ok(b == VARIANT_FALSE, "Unexpected result %d.\n", b); - + ok(hr == S_FALSE || hr == S_OK, "Unexpected hr %#lx.\n", hr); if (hr == S_OK) { + ok(b == VARIANT_TRUE, "Unexpected result %d.\n", b); + hr = IXMLDOMDocument_get_documentElement(doc, &elem); ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); hr = IXMLDOMElement_selectSingleNode(elem, _bstr_("data"), &node); ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); - /* The embedded <?xml?> and subsequent content should be - * consumed as a single text node. */ hr = IXMLDOMNode_get_childNodes(node, &nodes); ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); @@ -14557,17 +14566,18 @@ static void test_embedded_xml_declaration(void) IXMLDOMNode_Release(node); IXMLDOMElement_Release(elem); } + else + { + ok(b == VARIANT_FALSE, "Unexpected result %d.\n", b); + } - /* Test 2: loadXML with multiple elements, one containing embedded declaration. */ - b = VARIANT_FALSE; + /* Test 2: Multiple elements, one containing embedded declaration. */ hr = IXMLDOMDocument_loadXML(doc, _bstr_(multi_element_str), &b); - todo_wine_if(hr == S_OK) - ok(hr == S_FALSE, "Unexpected hr %#lx.\n", hr); - todo_wine_if(b == VARIANT_TRUE) - ok(b == VARIANT_FALSE, "Unexpected result %d.\n", b); - + ok(hr == S_FALSE || hr == S_OK, "Unexpected hr %#lx.\n", hr); if (hr == S_OK) { + ok(b == VARIANT_TRUE, "Unexpected result %d.\n", b); + hr = IXMLDOMDocument_get_documentElement(doc, &elem); ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); @@ -14615,17 +14625,18 @@ static void test_embedded_xml_declaration(void) IXMLDOMElement_Release(elem); } + else + { + ok(b == VARIANT_FALSE, "Unexpected result %d.\n", b); + } - /* Test 3: loadXML with embedded declaration containing encoding attribute. */ - b = VARIANT_FALSE; + /* Test 3: Embedded declaration with encoding attribute. */ hr = IXMLDOMDocument_loadXML(doc, _bstr_(embedded_encoding_str), &b); - todo_wine_if(hr == S_OK) - ok(hr == S_FALSE, "Unexpected hr %#lx.\n", hr); - todo_wine_if(b == VARIANT_TRUE) - ok(b == VARIANT_FALSE, "Unexpected result %d.\n", b); - + ok(hr == S_FALSE || hr == S_OK, "Unexpected hr %#lx.\n", hr); if (hr == S_OK) { + ok(b == VARIANT_TRUE, "Unexpected result %d.\n", b); + hr = IXMLDOMDocument_get_documentElement(doc, &elem); ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); @@ -14655,26 +14666,24 @@ static void test_embedded_xml_declaration(void) IXMLDOMNode_Release(node); IXMLDOMElement_Release(elem); } + else + { + ok(b == VARIANT_FALSE, "Unexpected result %d.\n", b); + } - /* Test 4: load from file with embedded <?xml?> declaration. - * This tests the IXMLDOMDocument::load() path which some - * applications use to load XML files containing embedded - * declarations. */ + /* Test 4: load() from file with embedded <?xml?> declaration. */ GetTempPathA(MAX_PATH, path); strcat(path, "wine_embedded_xml_test.xml"); write_to_file(path, embedded_xml_str); V_VT(&src) = VT_BSTR; V_BSTR(&src) = _bstr_(path); - b = VARIANT_FALSE; hr = IXMLDOMDocument_load(doc, src, &b); - todo_wine_if(hr == S_OK) - ok(hr == S_FALSE, "Unexpected hr %#lx.\n", hr); - todo_wine_if(b == VARIANT_TRUE) - ok(b == VARIANT_FALSE, "Unexpected result %d.\n", b); - + ok(hr == S_FALSE || hr == S_OK, "Unexpected hr %#lx.\n", hr); if (hr == S_OK) { + ok(b == VARIANT_TRUE, "Unexpected result %d.\n", b); + hr = IXMLDOMDocument_get_documentElement(doc, &elem); ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); @@ -14704,8 +14713,95 @@ static void test_embedded_xml_declaration(void) IXMLDOMNode_Release(node); IXMLDOMElement_Release(elem); } + else + { + ok(b == VARIANT_FALSE, "Unexpected result %d.\n", b); + } DeleteFileA(path); + + /* Test 5: Self-closing tags in embedded content (<br/>, <img .../>). */ + hr = IXMLDOMDocument_loadXML(doc, _bstr_(selfclose_str), &b); + ok(hr == S_FALSE || hr == S_OK, "Unexpected hr %#lx.\n", hr); + if (hr == S_OK) + { + ok(b == VARIANT_TRUE, "Unexpected result %d.\n", b); + + hr = IXMLDOMDocument_get_documentElement(doc, &elem); + ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); + + hr = IXMLDOMElement_selectSingleNode(elem, _bstr_("data"), &node); + ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); + + hr = IXMLDOMNode_get_childNodes(node, &nodes); + ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); + hr = IXMLDOMNodeList_get_length(nodes, &len); + ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); + ok(len == 1, "Expected 1 child, got %ld.\n", len); + + hr = IXMLDOMNodeList_get_item(nodes, 0, &child); + ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); + hr = IXMLDOMNode_get_nodeType(child, &type); + ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); + ok(type == NODE_TEXT, "Expected NODE_TEXT, got %d.\n", type); + + hr = IXMLDOMNode_get_text(child, &str); + ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); + ok(!lstrcmpW(str, L"<?xml version=\"1.0\"?><br/><img src=\"x\"/><p>text</p>"), + "Unexpected text %s.\n", wine_dbgstr_w(str)); + SysFreeString(str); + + IXMLDOMNode_Release(child); + IXMLDOMNodeList_Release(nodes); + IXMLDOMNode_Release(node); + IXMLDOMElement_Release(elem); + } + else + { + ok(b == VARIANT_FALSE, "Unexpected result %d.\n", b); + } + + /* Test 6: Deeply nested elements in embedded content. */ + hr = IXMLDOMDocument_loadXML(doc, _bstr_(nested_str), &b); + ok(hr == S_FALSE || hr == S_OK, "Unexpected hr %#lx.\n", hr); + if (hr == S_OK) + { + ok(b == VARIANT_TRUE, "Unexpected result %d.\n", b); + + hr = IXMLDOMDocument_get_documentElement(doc, &elem); + ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); + + hr = IXMLDOMElement_selectSingleNode(elem, _bstr_("data"), &node); + ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); + + hr = IXMLDOMNode_get_childNodes(node, &nodes); + ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); + hr = IXMLDOMNodeList_get_length(nodes, &len); + ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); + ok(len == 1, "Expected 1 child, got %ld.\n", len); + + hr = IXMLDOMNodeList_get_item(nodes, 0, &child); + ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); + hr = IXMLDOMNode_get_nodeType(child, &type); + ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); + ok(type == NODE_TEXT, "Expected NODE_TEXT, got %d.\n", type); + + hr = IXMLDOMNode_get_text(child, &str); + ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); + ok(!lstrcmpW(str, L"<?xml version=\"1.0\"?><a><b><c>deep</c></b></a>"), + "Unexpected text %s.\n", wine_dbgstr_w(str)); + SysFreeString(str); + + IXMLDOMNode_Release(child); + IXMLDOMNodeList_Release(nodes); + IXMLDOMNode_Release(node); + IXMLDOMElement_Release(elem); + } + else + { + ok(b == VARIANT_FALSE, "Unexpected result %d.\n", b); + } + IXMLDOMDocument_Release(doc); free_bstrs(); } diff --git a/libs/xml2/parser.c b/libs/xml2/parser.c index 2c2ac4f17bb..4a8581ed33a 100644 --- a/libs/xml2/parser.c +++ b/libs/xml2/parser.c @@ -5376,20 +5376,33 @@ xmlParsePI(xmlParserCtxtPtr ctxt) { memcpy(text, "<?xml", 5); textlen = 5; - /* Consume everything until parent's close tag, tracking nesting */ + /* Consume everything until parent's close tag, tracking nesting. + * We track open/close tags to find the correct closing tag of + * the parent element. Attribute values in quotes are skipped + * when scanning for self-closing tags to avoid false matches + * on /> inside attribute values. */ while (RAW != 0) { if (RAW == '<' && NXT(1) == '/') { if (nesting == 0) break; nesting--; } - else if (RAW == '<' && NXT(1) != '?' && NXT(1) != '!' && NXT(1) != '/') { + else if (RAW == '<' && NXT(1) != '!' && NXT(1) != '/') { xmlChar c = NXT(1); if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) { const xmlChar *p = ctxt->input->cur + 1; + const xmlChar *end = ctxt->input->end; int is_selfclose = 0; - while (*p && *p != '>') { - if (*p == '/' && *(p+1) == '>') { + while (p < end && *p != '>') { + /* Skip quoted attribute values to avoid + * false matches on /> inside them */ + if (*p == '\'' || *p == '"') { + xmlChar q = *p++; + while (p < end && *p != q) p++; + if (p < end) p++; + continue; + } + if (*p == '/' && (p + 1) < end && *(p+1) == '>') { is_selfclose = 1; break; } @@ -5419,7 +5432,7 @@ xmlParsePI(xmlParserCtxtPtr ctxt) { if ((ctxt->sax) && (!ctxt->disableSAX) && (ctxt->sax->characters != NULL)) - ctxt->sax->characters(ctxt->userData, text, textlen); + ctxt->sax->characters(ctxt->userData, text, (int)textlen); xmlFree(text); if (ctxt->instate != XML_PARSER_EOF) -- GitLab https://gitlab.winehq.org/wine/wine/-/merge_requests/10225
participants (3)
-
Filip Bakreski -
lucas (@luben93) -
lucas persson