v4: Create special XML_ELEMENT_NODE for PI node attributes to avoid hitting a libxml2 limitation and manual properties management.
Signed-off-by: Dmitry Timoshkov dmitry@baikal.ru --- dlls/msxml3/nodemap.c | 4 +- dlls/msxml3/pi.c | 114 +++++++++++++++++++++++++++++++++++++++++- 2 files changed, 114 insertions(+), 4 deletions(-)
diff --git a/dlls/msxml3/nodemap.c b/dlls/msxml3/nodemap.c index 5ea083c4495..856f407654a 100644 --- a/dlls/msxml3/nodemap.c +++ b/dlls/msxml3/nodemap.c @@ -141,7 +141,7 @@ static ULONG WINAPI xmlnodemap_Release( if ( ref == 0 ) { xmlnode_release( This->node ); - xmldoc_release( This->node->doc ); + if (This->node->doc) xmldoc_release( This->node->doc ); if (This->enumvariant) IEnumVARIANT_Release(This->enumvariant); heap_free( This ); } @@ -452,7 +452,7 @@ IXMLDOMNamedNodeMap *create_nodemap(xmlNodePtr node, const struct nodemap_funcs init_dispex(&This->dispex, (IUnknown*)&This->IXMLDOMNamedNodeMap_iface, &xmlnodemap_dispex);
xmlnode_add_ref(node); - xmldoc_add_ref(node->doc); + if (node->doc) xmldoc_add_ref(node->doc);
return &This->IXMLDOMNamedNodeMap_iface; } diff --git a/dlls/msxml3/pi.c b/dlls/msxml3/pi.c index cea95708087..6bacff12027 100644 --- a/dlls/msxml3/pi.c +++ b/dlls/msxml3/pi.c @@ -34,6 +34,7 @@ #include "ole2.h" #include "msxml6.h"
+#include "xmlparser.h" #include "msxml_private.h"
#include "wine/debug.h" @@ -45,6 +46,7 @@ WINE_DEFAULT_DEBUG_CHANNEL(msxml); typedef struct _dom_pi { xmlnode node; + xmlNodePtr attributes; IXMLDOMProcessingInstruction IXMLDOMProcessingInstruction_iface; LONG ref; } dom_pi; @@ -114,6 +116,7 @@ static ULONG WINAPI dom_pi_Release( TRACE("(%p)->(%d)\n", This, ref); if ( ref == 0 ) { + xmlFreeNode(This->attributes); destroy_xmlnode(&This->node); heap_free( This ); } @@ -287,6 +290,107 @@ static HRESULT WINAPI dom_pi_get_nextSibling( return node_get_next_sibling(&This->node, domNode); }
+static HRESULT xml_get_value(xmlChar **p, xmlChar **value) +{ + xmlChar *v; + int len; + + while (isspace(**p)) *p += 1; + if (**p != '=') return XML_E_MISSINGEQUALS; + *p += 1; + + while (isspace(**p)) *p += 1; + if (**p != '"') return XML_E_MISSINGQUOTE; + *p += 1; + + v = *p; + while (**p && **p != '"') *p += 1; + if (!**p) return XML_E_EXPECTINGCLOSEQUOTE; + len = *p - v; + if (!len) return XML_E_MISSINGNAME; + *p += 1; + + *value = heap_alloc(len + 1); + if (!*value) return E_OUTOFMEMORY; + memcpy(*value, v, len); + *(*value + len) = 0; + + return S_OK; +} + +static HRESULT parse_xml_decl(dom_pi *This) +{ + xmlChar *version, *encoding, *standalone, *p; + xmlAttrPtr attr; + HRESULT hr = S_OK; + + if (!This->node.node->content || This->attributes->properties) + return S_OK; + + version = encoding = standalone = NULL; + + p = This->node.node->content; + + while (*p) + { + while (isspace(*p)) p++; + if (!*p) break; + + if (!strncmp((const char *)p, "version", 7)) + { + p += 7; + if ((hr = xml_get_value(&p, &version)) != S_OK) goto fail; + } + else if (!strncmp((const char *)p, "encoding", 8)) + { + p += 8; + if ((hr = xml_get_value(&p, &encoding)) != S_OK) goto fail; + } + else if (!strncmp((const char *)p, "standalone", 10)) + { + p += 10; + if ((hr = xml_get_value(&p, &standalone)) != S_OK) goto fail; + } + else + { + FIXME("unexpected XML attribute %s\n", debugstr_a((const char *)p)); + hr = XML_E_UNEXPECTED_ATTRIBUTE; + goto fail; + } + } + + if (version) + { + attr = xmlSetNsProp(This->attributes, NULL, (const xmlChar *)"version", version); + if (!attr) + hr = E_OUTOFMEMORY; + } + if (encoding) + { + attr = xmlSetNsProp(This->attributes, NULL, (const xmlChar *)"encoding", encoding); + if (!attr) + hr = E_OUTOFMEMORY; + } + if (standalone) + { + attr = xmlSetNsProp(This->attributes, NULL, (const xmlChar *)"standalone", standalone); + if (!attr) + hr = E_OUTOFMEMORY; + } + +fail: + if (hr != S_OK) + { + xmlFreePropList(This->attributes->properties); + This->attributes->properties = NULL; + } + + heap_free(version); + heap_free(encoding); + heap_free(standalone); + return hr; +} + static HRESULT WINAPI dom_pi_get_attributes( IXMLDOMProcessingInstruction *iface, IXMLDOMNamedNodeMap** map) @@ -307,8 +411,10 @@ static HRESULT WINAPI dom_pi_get_attributes(
if (!strcmpW(name, xmlW)) { - FIXME("created dummy map for <?xml ?>\n"); - *map = create_nodemap(This->node.node, &dom_pi_attr_map); + hr = parse_xml_decl(This); + if (hr != S_OK) return S_FALSE; + + *map = create_nodemap(This->attributes, &dom_pi_attr_map); SysFreeString(name); return S_OK; } @@ -761,6 +867,10 @@ IUnknown* create_pi( xmlNodePtr pi )
This->IXMLDOMProcessingInstruction_iface.lpVtbl = &dom_pi_vtbl; This->ref = 1; + /* xmlSetProp/xmlSetNsProp accept only nodes of type XML_ELEMENT_NODE, + * so we have to create special node for attributes. + */ + This->attributes = xmlNewNode(NULL, (const xmlChar *)"attributes");
init_xmlnode(&This->node, pi, (IXMLDOMNode*)&This->IXMLDOMProcessingInstruction_iface, &dompi_dispex);
On 5/20/21 9:50 AM, Dmitry Timoshkov wrote:
@@ -45,6 +46,7 @@ WINE_DEFAULT_DEBUG_CHANNEL(msxml); typedef struct _dom_pi { xmlnode node;
- xmlNodePtr attributes; IXMLDOMProcessingInstruction IXMLDOMProcessingInstruction_iface; LONG ref;
} dom_pi;
Doesn't this create a situation when multiple msxml pi instances could be referencing same prolog node, but with different "attributes" contents? For tree consistency everything should be linked at libxml level.
Nikolay Sivov nsivov@codeweavers.com wrote:
@@ -45,6 +46,7 @@ WINE_DEFAULT_DEBUG_CHANNEL(msxml); typedef struct _dom_pi { xmlnode node;
- xmlNodePtr attributes; IXMLDOMProcessingInstruction IXMLDOMProcessingInstruction_iface; LONG ref;
} dom_pi;
Doesn't this create a situation when multiple msxml pi instances could be referencing same prolog node, but with different "attributes" contents? For tree consistency everything should be linked at libxml level.
That might be the case. Also, I've found that ::get_text() for such a node returns data for main PI node instead of the special attributes one.
At the mement I see only two possible solutions: keap using custom node properties code like previous version of the patch does, or replace node->type by XML_ELEMENT_NODE (like node.c,htmldoc_dumpcontent() does) before calling libxml2's xmlSetNsProp/xmlFreeNode/xmlHasProp.
I'm open for suggestions. Still, it seems to me that manual properties management is cleaner than hacking node->type.
Dmitry Timoshkov dmitry@baikal.ru wrote:
Nikolay Sivov nsivov@codeweavers.com wrote:
@@ -45,6 +46,7 @@ WINE_DEFAULT_DEBUG_CHANNEL(msxml); typedef struct _dom_pi { xmlnode node;
- xmlNodePtr attributes; IXMLDOMProcessingInstruction IXMLDOMProcessingInstruction_iface; LONG ref;
} dom_pi;
Doesn't this create a situation when multiple msxml pi instances could be referencing same prolog node, but with different "attributes" contents? For tree consistency everything should be linked at libxml level.
That might be the case. Also, I've found that ::get_text() for such a node returns data for main PI node instead of the special attributes one.
At the mement I see only two possible solutions: keap using custom node properties code like previous version of the patch does, or replace node->type by XML_ELEMENT_NODE (like node.c,htmldoc_dumpcontent() does) before calling libxml2's xmlSetNsProp/xmlFreeNode/xmlHasProp.
I'm open for suggestions. Still, it seems to me that manual properties management is cleaner than hacking node->type.
I'm not sure how should I treat absence of your response. Also, I have no idea why you consider hacks in node.c,htmldoc_dumpcontent() with temporary patching node->type acceptable, but using legitimate node->properties storage with custom management like my previous implementation does as an abuse. I'm personally inclined to return back to v3 since it works in all cases.
Also, I'd prefer to retain 3/3 as it is, and avoid introducing different XML declaration parsers.
I'd like to see some more definitive answers/suggestions before I simply stop trying to upsteam these patches.
On 5/28/21 11:37 AM, Dmitry Timoshkov wrote:
Dmitry Timoshkov dmitry@baikal.ru wrote:
Nikolay Sivov nsivov@codeweavers.com wrote:
@@ -45,6 +46,7 @@ WINE_DEFAULT_DEBUG_CHANNEL(msxml); typedef struct _dom_pi { xmlnode node;
- xmlNodePtr attributes; IXMLDOMProcessingInstruction IXMLDOMProcessingInstruction_iface; LONG ref;
} dom_pi;
Doesn't this create a situation when multiple msxml pi instances could be referencing same prolog node, but with different "attributes" contents? For tree consistency everything should be linked at libxml level.
That might be the case. Also, I've found that ::get_text() for such a node returns data for main PI node instead of the special attributes one.
At the mement I see only two possible solutions: keap using custom node properties code like previous version of the patch does, or replace node->type by XML_ELEMENT_NODE (like node.c,htmldoc_dumpcontent() does) before calling libxml2's xmlSetNsProp/xmlFreeNode/xmlHasProp.
I'm open for suggestions. Still, it seems to me that manual properties management is cleaner than hacking node->type.
I'm not sure how should I treat absence of your response. Also, I have no idea why you consider hacks in node.c,htmldoc_dumpcontent() with temporary patching node->type acceptable, but using legitimate node->properties storage with custom management like my previous implementation does as an abuse. I'm personally inclined to return back to v3 since it works in all cases.
Also, I'd prefer to retain 3/3 as it is, and avoid introducing different XML declaration parsers.
It doesn't have to be different, you already have a function for that. If the only thing you need is to save with correct encoding, you only need to read that encoding. Node map for prolog node is nice to have, but as you see yourself it goes against libxml2 tree management concepts, when attaching properties to something that's not an element, or creates inconsistency when you keep attributes at msxml instance, not reflected in libxml2 tree.
I'd like to see some more definitive answers/suggestions before I simply stop trying to upsteam these patches.
Saving with encoding read from prolog node was that suggestion.
Nikolay Sivov nsivov@codeweavers.com wrote:
@@ -45,6 +46,7 @@ WINE_DEFAULT_DEBUG_CHANNEL(msxml); typedef struct _dom_pi { xmlnode node;
- xmlNodePtr attributes; IXMLDOMProcessingInstruction IXMLDOMProcessingInstruction_iface; LONG ref;
} dom_pi;
Doesn't this create a situation when multiple msxml pi instances could be referencing same prolog node, but with different "attributes" contents? For tree consistency everything should be linked at libxml level.
That might be the case. Also, I've found that ::get_text() for such a node returns data for main PI node instead of the special attributes one.
At the mement I see only two possible solutions: keap using custom node properties code like previous version of the patch does, or replace node->type by XML_ELEMENT_NODE (like node.c,htmldoc_dumpcontent() does) before calling libxml2's xmlSetNsProp/xmlFreeNode/xmlHasProp.
I'm open for suggestions. Still, it seems to me that manual properties management is cleaner than hacking node->type.
I'm not sure how should I treat absence of your response. Also, I have no idea why you consider hacks in node.c,htmldoc_dumpcontent() with temporary patching node->type acceptable, but using legitimate node->properties storage with custom management like my previous implementation does as an abuse. I'm personally inclined to return back to v3 since it works in all cases.
Also, I'd prefer to retain 3/3 as it is, and avoid introducing different XML declaration parsers.
It doesn't have to be different, you already have a function for that. If the only thing you need is to save with correct encoding, you only need to read that encoding. Node map for prolog node is nice to have, but as you see yourself it goes against libxml2 tree management concepts, when attaching properties to something that's not an element, or creates inconsistency when you keep attributes at msxml instance, not reflected in libxml2 tree.
I have an application that reads the encoding from a PI node using IXMLDOMNamedNodeMap::getNamedItem(), and in addition it does some other things similar to what the provided tests do, that's why I implemented it this way, and I'd like to see full patchset to get in.
On 5/28/21 12:06 PM, Dmitry Timoshkov wrote:
Nikolay Sivov nsivov@codeweavers.com wrote:
@@ -45,6 +46,7 @@ WINE_DEFAULT_DEBUG_CHANNEL(msxml); typedef struct _dom_pi { xmlnode node;
- xmlNodePtr attributes; IXMLDOMProcessingInstruction IXMLDOMProcessingInstruction_iface; LONG ref;
} dom_pi;
Doesn't this create a situation when multiple msxml pi instances could be referencing same prolog node, but with different "attributes" contents? For tree consistency everything should be linked at libxml level.
That might be the case. Also, I've found that ::get_text() for such a node returns data for main PI node instead of the special attributes one.
At the mement I see only two possible solutions: keap using custom node properties code like previous version of the patch does, or replace node->type by XML_ELEMENT_NODE (like node.c,htmldoc_dumpcontent() does) before calling libxml2's xmlSetNsProp/xmlFreeNode/xmlHasProp.
I'm open for suggestions. Still, it seems to me that manual properties management is cleaner than hacking node->type.
I'm not sure how should I treat absence of your response. Also, I have no idea why you consider hacks in node.c,htmldoc_dumpcontent() with temporary patching node->type acceptable, but using legitimate node->properties storage with custom management like my previous implementation does as an abuse. I'm personally inclined to return back to v3 since it works in all cases.
Also, I'd prefer to retain 3/3 as it is, and avoid introducing different XML declaration parsers.
It doesn't have to be different, you already have a function for that. If the only thing you need is to save with correct encoding, you only need to read that encoding. Node map for prolog node is nice to have, but as you see yourself it goes against libxml2 tree management concepts, when attaching properties to something that's not an element, or creates inconsistency when you keep attributes at msxml instance, not reflected in libxml2 tree.
I have an application that reads the encoding from a PI node using IXMLDOMNamedNodeMap::getNamedItem(), and in addition it does some other things similar to what the provided tests do, that's why I implemented it this way, and I'd like to see full patchset to get in.
In that case sure, v3 is better (that's the one with 'properties' used in PI node?). Please resend that one.