From: lucas persson <luben93@gmail.com> Fix edge cases in the embedded XML declaration consumer: - Skip quoted attribute values when scanning for self-closing tags, preventing false matches on /> inside attribute values like <img alt="a/>b" src="x">. - Use input->end for bounds checking instead of null-terminator, ensuring safety with the push parser. - Detect and skip comments (<!-- -->), CDATA sections (<![CDATA[), and processing instructions (<?...?>) without affecting the nesting counter. - Cast textlen to int for the SAX characters callback to match the expected signature. Add tests for self-closing tags and deeply nested elements in embedded XML content. --- dlls/msxml3/tests/domdoc.c | 102 ++++++++++++++++++++++++++++++++++++- libs/xml2/parser.c | 33 ++++++++++-- 2 files changed, 128 insertions(+), 7 deletions(-) diff --git a/dlls/msxml3/tests/domdoc.c b/dlls/msxml3/tests/domdoc.c index 69bdba0280a..24487ff25d5 100644 --- a/dlls/msxml3/tests/domdoc.c +++ b/dlls/msxml3/tests/domdoc.c @@ -11329,8 +11329,92 @@ static void test_load(void) write_to_file(path, encoding_tests[n].xml); doc = create_document(&IID_IXMLDOMDocument); TEST_DOC_LOAD_FROM_PATH(doc, path, encoding_tests[n].expected_hr, encoding_tests[n].expected_ret); - DeleteFileA(path); - IXMLDOMDocument_Release(doc); + DeleteFileA(path); + + /* Test 5: loadXML with self-closing tags in embedded content. + * Verifies that self-closing elements like <br/> don't confuse + * the nesting tracker in the parser. */ + b = VARIANT_FALSE; + hr = IXMLDOMDocument_loadXML(doc, _bstr_(selfclose_str), &b); + todo_wine_if(hr == S_OK) + ok(hr == S_FALSE, "Unexpected hr %#lx.\n", hr); + todo_wine_if(b == VARIANT_TRUE) + ok(b == VARIANT_FALSE, "Unexpected result %d.\n", b); + + if (hr == S_OK) + { + hr = IXMLDOMDocument_get_documentElement(doc, &elem); + ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); + + hr = IXMLDOMElement_selectSingleNode(elem, _bstr_("data"), &node); + ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); + + hr = IXMLDOMNode_get_childNodes(node, &nodes); + ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); + hr = IXMLDOMNodeList_get_length(nodes, &len); + ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); + ok(len == 1, "Expected 1 child, got %ld.\n", len); + + hr = IXMLDOMNodeList_get_item(nodes, 0, &child); + ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); + hr = IXMLDOMNode_get_nodeType(child, &type); + ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); + ok(type == NODE_TEXT, "Expected NODE_TEXT, got %d.\n", type); + + hr = IXMLDOMNode_get_text(child, &str); + ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); + ok(!lstrcmpW(str, L"<?xml version=\"1.0\"?><br/><img src=\"x\"/><p>text</p>"), + "Unexpected text %s.\n", wine_dbgstr_w(str)); + SysFreeString(str); + + IXMLDOMNode_Release(child); + IXMLDOMNodeList_Release(nodes); + IXMLDOMNode_Release(node); + IXMLDOMElement_Release(elem); + } + + /* Test 6: loadXML with deeply nested elements in embedded content. + * Verifies that multiple levels of nesting are tracked correctly. */ + b = VARIANT_FALSE; + hr = IXMLDOMDocument_loadXML(doc, _bstr_(nested_str), &b); + todo_wine_if(hr == S_OK) + ok(hr == S_FALSE, "Unexpected hr %#lx.\n", hr); + todo_wine_if(b == VARIANT_TRUE) + ok(b == VARIANT_FALSE, "Unexpected result %d.\n", b); + + if (hr == S_OK) + { + hr = IXMLDOMDocument_get_documentElement(doc, &elem); + ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); + + hr = IXMLDOMElement_selectSingleNode(elem, _bstr_("data"), &node); + ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); + + hr = IXMLDOMNode_get_childNodes(node, &nodes); + ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); + hr = IXMLDOMNodeList_get_length(nodes, &len); + ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); + ok(len == 1, "Expected 1 child, got %ld.\n", len); + + hr = IXMLDOMNodeList_get_item(nodes, 0, &child); + ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); + hr = IXMLDOMNode_get_nodeType(child, &type); + ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); + ok(type == NODE_TEXT, "Expected NODE_TEXT, got %d.\n", type); + + hr = IXMLDOMNode_get_text(child, &str); + ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); + ok(!lstrcmpW(str, L"<?xml version=\"1.0\"?><a><b><c>deep</c></b></a>"), + "Unexpected text %s.\n", wine_dbgstr_w(str)); + SysFreeString(str); + + IXMLDOMNode_Release(child); + IXMLDOMNodeList_Release(nodes); + IXMLDOMNode_Release(node); + IXMLDOMElement_Release(elem); + } + + IXMLDOMDocument_Release(doc); } free_bstrs(); @@ -14510,6 +14594,20 @@ static void test_embedded_xml_declaration(void) " <data><?xml version=\"1.0\" encoding=\"UTF-8\"?><test>encoded</test></data>" "</root>"; + /* Embedded content with self-closing tags and attributes containing />. */ + static const char selfclose_str[] = + "<?xml version=\"1.0\"?>" + "<root>" + " <data><?xml version=\"1.0\"?><br/><img src=\"x\"/><p>text</p></data>" + "</root>"; + + /* Embedded content with nested elements at multiple levels. */ + static const char nested_str[] = + "<?xml version=\"1.0\"?>" + "<root>" + " <data><?xml version=\"1.0\"?><a><b><c>deep</c></b></a></data>" + "</root>"; + doc = create_document_version(30, &IID_IXMLDOMDocument); /* Test 1: loadXML with embedded <?xml?> declaration. diff --git a/libs/xml2/parser.c b/libs/xml2/parser.c index 2c2ac4f17bb..4ffd821a03a 100644 --- a/libs/xml2/parser.c +++ b/libs/xml2/parser.c @@ -5376,20 +5376,43 @@ xmlParsePI(xmlParserCtxtPtr ctxt) { memcpy(text, "<?xml", 5); textlen = 5; - /* Consume everything until parent's close tag, tracking nesting */ + /* Consume everything until parent's close tag, tracking nesting. + * We track open/close tags to find the correct closing tag of + * the parent element. Comments, CDATA sections, and processing + * instructions are consumed without affecting the nesting count. + * Attribute values in quotes are skipped when scanning for + * self-closing tags to avoid false matches on /> in values. */ while (RAW != 0) { if (RAW == '<' && NXT(1) == '/') { if (nesting == 0) break; nesting--; } - else if (RAW == '<' && NXT(1) != '?' && NXT(1) != '!' && NXT(1) != '/') { + else if (RAW == '<' && NXT(1) == '!' && NXT(2) == '-' && NXT(3) == '-') { + /* Skip comment <!-- ... --> without affecting nesting */ + } + else if (RAW == '<' && NXT(1) == '!' && NXT(2) == '[') { + /* Skip CDATA <![CDATA[ ... ]]> without affecting nesting */ + } + else if (RAW == '<' && NXT(1) == '?') { + /* Skip processing instruction <?...?> without affecting nesting */ + } + else if (RAW == '<' && NXT(1) != '!' && NXT(1) != '/') { xmlChar c = NXT(1); if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) { const xmlChar *p = ctxt->input->cur + 1; + const xmlChar *end = ctxt->input->end; int is_selfclose = 0; - while (*p && *p != '>') { - if (*p == '/' && *(p+1) == '>') { + while (p < end && *p != '>') { + /* Skip quoted attribute values to avoid + * false matches on /> inside them */ + if (*p == '\'' || *p == '"') { + xmlChar q = *p++; + while (p < end && *p != q) p++; + if (p < end) p++; + continue; + } + if (*p == '/' && (p + 1) < end && *(p+1) == '>') { is_selfclose = 1; break; } @@ -5419,7 +5442,7 @@ xmlParsePI(xmlParserCtxtPtr ctxt) { if ((ctxt->sax) && (!ctxt->disableSAX) && (ctxt->sax->characters != NULL)) - ctxt->sax->characters(ctxt->userData, text, textlen); + ctxt->sax->characters(ctxt->userData, text, (int)textlen); xmlFree(text); if (ctxt->instate != XML_PARSER_EOF) -- GitLab https://gitlab.winehq.org/wine/wine/-/merge_requests/10225