From: lucas persson <luben93@gmail.com> Windows MSXML tolerates nested <?xml?> processing instructions inside elements, but libxml2 rejects them with XML_ERR_RESERVED_XML_NAME. When an embedded <?xml is encountered inside an element, consume everything up to the parent's closing tag and emit it via the SAX characters callback as a text node, matching Windows MSXML behavior where the content after the embedded <?xml?> is treated as text. This is needed for applications like Adobe Creative Cloud installer that use embedded XML declarations in their data. --- libs/xml2/parser.c | 79 ++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 77 insertions(+), 2 deletions(-) diff --git a/libs/xml2/parser.c b/libs/xml2/parser.c index 3e8a588f536..6c0d7997ce2 100644 --- a/libs/xml2/parser.c +++ b/libs/xml2/parser.c @@ -5220,8 +5220,7 @@ xmlParsePITarget(xmlParserCtxtPtr ctxt) { int i; if ((name[0] == 'x') && (name[1] == 'm') && (name[2] == 'l') && (name[3] == 0)) { - xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME, - "XML declaration allowed only at the start of the document\n"); + /* Wine: Windows MSXML tolerates embedded XML declarations, handled in xmlParsePI */ return(name); } else if (name[3] == 0) { xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL); @@ -5345,6 +5344,82 @@ xmlParsePI(xmlParserCtxtPtr ctxt) { */ target = xmlParsePITarget(ctxt); if (target != NULL) { + /* Wine: Windows MSXML tolerates embedded XML declarations inside elements. */ + if ((target[0] == 'x') && (target[1] == 'm') && + (target[2] == 'l') && (target[3] == 0)) { + xmlChar *text; + size_t textlen = 0; + size_t textsize = 1024; + int nesting = 0; + + text = (xmlChar *) xmlMallocAtomic(textsize); + if (text == NULL) { + xmlErrMemory(ctxt, NULL); + ctxt->instate = state; + return; + } + + /* Start with "<?xml" */ + memcpy(text, "<?xml", 5); + textlen = 5; + + /* Consume everything until parent's close tag, tracking nesting */ + while (RAW != 0) { + /* Check for close tag </ */ + if (RAW == '<' && NXT(1) == '/') { + if (nesting == 0) { + /* This is the parent's close tag - stop here */ + break; + } + nesting--; + } + /* Check for start tag < followed by letter (not <? or <! or </) */ + else if (RAW == '<' && NXT(1) != '?' && NXT(1) != '!' && NXT(1) != '/') { + xmlChar c = NXT(1); + if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) { + /* Could be start tag - check if self-closing */ + const xmlChar *p = ctxt->input->cur + 1; + int is_selfclose = 0; + while (*p && *p != '>') { + if (*p == '/' && *(p+1) == '>') { + is_selfclose = 1; + break; + } + p++; + } + if (!is_selfclose) + nesting++; + } + } + + /* Grow buffer if needed */ + if (textlen + 2 >= textsize) { + xmlChar *tmp; + textsize *= 2; + tmp = (xmlChar *) xmlRealloc(text, textsize); + if (tmp == NULL) { + xmlErrMemory(ctxt, NULL); + xmlFree(text); + ctxt->instate = state; + return; + } + text = tmp; + } + text[textlen++] = RAW; + NEXT; + } + text[textlen] = 0; + + /* Emit as text content (like CDATA) */ + if ((ctxt->sax) && (!ctxt->disableSAX) && + (ctxt->sax->characters != NULL)) + ctxt->sax->characters(ctxt->userData, text, textlen); + + xmlFree(text); + if (ctxt->instate != XML_PARSER_EOF) + ctxt->instate = state; + return; + } if ((RAW == '?') && (NXT(1) == '>')) { if (inputid != ctxt->input->id) { xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, -- GitLab https://gitlab.winehq.org/wine/wine/-/merge_requests/10225