From: Phiality <bakreski03@gmail.com> - Revert incorrect DISPATCH_PROPERTYGET | DISPATCH_METHOD behavior that broke compatibility - Add IXMLSerializer interface implementation for mshtml - Add CDATA wrapping for embedded XML declarations in msxml3 to handle cases where Windows MSXML tolerates embedded <?xml?> declarations but libxml2 rejects them --- dlls/jscript/dispex.c | 8 +- dlls/mshtml/dispex.c | 1 - dlls/mshtml/mshtml_private.h | 5 +- dlls/mshtml/omnavigator.c | 150 +++++++++++++ dlls/mshtml/tests/dom.c | 79 ------- dlls/msxml3/domdoc.c | 418 ++++++++++++++++++++++++++++++++++- include/mshtmdid.h | 4 + include/mshtml.idl | 40 ++++ 8 files changed, 619 insertions(+), 86 deletions(-) diff --git a/dlls/jscript/dispex.c b/dlls/jscript/dispex.c index dc2ff9f6a55..f39a8cd4b12 100644 --- a/dlls/jscript/dispex.c +++ b/dlls/jscript/dispex.c @@ -2136,10 +2136,10 @@ static HRESULT WINAPI DispatchEx_InvokeEx(IWineJSDispatch *iface, DISPID id, LCI if(pspCaller) IServiceProvider_AddRef(pspCaller); - if(wFlags == (DISPATCH_METHOD | DISPATCH_PROPERTYGET)) - wFlags = (This->ctx->version < SCRIPTLANGUAGEVERSION_ES5) ? DISPATCH_METHOD : DISPATCH_PROPERTYGET; - switch(wFlags) { + case DISPATCH_METHOD|DISPATCH_PROPERTYGET: + wFlags = DISPATCH_METHOD; + /* fall through */ case DISPATCH_METHOD: case DISPATCH_CONSTRUCT: { jsval_t *argv, buf[6], r; @@ -2712,7 +2712,7 @@ HRESULT disp_call(script_ctx_t *ctx, IDispatch *disp, DISPID id, WORD flags, uns jsdisp_release(jsdisp); flags &= ~DISPATCH_JSCRIPT_INTERNAL_MASK; - if(ret && argc && (!jsdisp || ctx->version < SCRIPTLANGUAGEVERSION_ES5)) + if(ret && argc) flags |= DISPATCH_PROPERTYGET; dp.cArgs = argc; diff --git a/dlls/mshtml/dispex.c b/dlls/mshtml/dispex.c index a6b8afb0543..0af89aca630 100644 --- a/dlls/mshtml/dispex.c +++ b/dlls/mshtml/dispex.c @@ -868,7 +868,6 @@ static HRESULT dispex_value(DispatchEx *This, LCID lcid, WORD flags, DISPPARAMS return This->info->vtbl->value(This, lcid, flags, params, res, ei, caller); switch(flags) { - case DISPATCH_PROPERTYGET | DISPATCH_METHOD: case DISPATCH_PROPERTYGET: V_VT(res) = VT_BSTR; hres = dispex_to_string(This, &V_BSTR(res)); diff --git a/dlls/mshtml/mshtml_private.h b/dlls/mshtml/mshtml_private.h index ce872249e62..536d9ccbff7 100644 --- a/dlls/mshtml/mshtml_private.h +++ b/dlls/mshtml/mshtml_private.h @@ -160,6 +160,7 @@ struct constructor; XDIID(DispHTMLWindow2) \ XDIID(DispHTMLXMLHttpRequest) \ XDIID(DispXDomainRequest) \ + XDIID(DispXMLSerializer) \ XDIID(DispSVGCircleElement) \ XDIID(DispSVGSVGElement) \ XDIID(DispSVGTSpanElement) \ @@ -298,6 +299,7 @@ struct constructor; XIID(IHTMLXMLHttpRequestFactory) \ XIID(IHTMLXDomainRequest) \ XIID(IHTMLXDomainRequestFactory) \ + XIID(IXMLSerializer) \ XIID(IOmHistory) \ XIID(IOmNavigator) \ XIID(ISVGCircleElement) \ @@ -530,7 +532,8 @@ typedef struct { X(Window) \ X(XDomainRequest) \ X(XMLDocument) \ - X(XMLHttpRequest) + X(XMLHttpRequest) \ + X(XMLSerializer) typedef enum { OBJID_NONE, diff --git a/dlls/mshtml/omnavigator.c b/dlls/mshtml/omnavigator.c index fc4d2f5b408..6d26d13636d 100644 --- a/dlls/mshtml/omnavigator.c +++ b/dlls/mshtml/omnavigator.c @@ -451,6 +451,156 @@ static HRESULT init_dom_parser_ctor(struct constructor *constr) return S_OK; } +struct xml_serializer { + DispatchEx dispex; + IXMLSerializer IXMLSerializer_iface; +}; + +static inline struct xml_serializer *impl_from_IXMLSerializer(IXMLSerializer *iface) +{ + return CONTAINING_RECORD(iface, struct xml_serializer, IXMLSerializer_iface); +} + +DISPEX_IDISPATCH_IMPL(xml_serializer, IXMLSerializer, impl_from_IXMLSerializer(iface)->dispex) + +static HRESULT WINAPI xml_serializer_serializeToString(IXMLSerializer *iface, IHTMLDOMNode *node, BSTR *pString) +{ + struct xml_serializer *This = impl_from_IXMLSerializer(iface); + HTMLDOMNode *dom_node; + nsAString nsstr; + HRESULT hres; + + TRACE("(%p)->(%p %p)\n", This, node, pString); + + if(!node || !pString) + return E_INVALIDARG; + + *pString = NULL; + + dom_node = unsafe_impl_from_IHTMLDOMNode(node); + if(!dom_node) { + WARN("not an HTMLDOMNode\n"); + return E_INVALIDARG; + } + + nsAString_Init(&nsstr, NULL); + hres = nsnode_to_nsstring(dom_node->nsnode, &nsstr); + if(SUCCEEDED(hres)) { + const WCHAR *str; + nsAString_GetData(&nsstr, &str); + *pString = SysAllocString(str); + if(!*pString) + hres = E_OUTOFMEMORY; + } + nsAString_Finish(&nsstr); + + return hres; +} + +static const IXMLSerializerVtbl xml_serializer_vtbl = { + xml_serializer_QueryInterface, + xml_serializer_AddRef, + xml_serializer_Release, + xml_serializer_GetTypeInfoCount, + xml_serializer_GetTypeInfo, + xml_serializer_GetIDsOfNames, + xml_serializer_Invoke, + xml_serializer_serializeToString +}; + +static inline struct xml_serializer *xml_serializer_from_DispatchEx(DispatchEx *iface) +{ + return CONTAINING_RECORD(iface, struct xml_serializer, dispex); +} + +static void *xml_serializer_query_interface(DispatchEx *dispex, REFIID riid) +{ + struct xml_serializer *This = xml_serializer_from_DispatchEx(dispex); + + if(IsEqualGUID(&IID_IXMLSerializer, riid)) + return &This->IXMLSerializer_iface; + + return NULL; +} + +static void xml_serializer_destructor(DispatchEx *dispex) +{ + struct xml_serializer *This = xml_serializer_from_DispatchEx(dispex); + free(This); +} + +static HRESULT init_xml_serializer_ctor(struct constructor*); + +static const dispex_static_data_vtbl_t xml_serializer_dispex_vtbl = { + .query_interface = xml_serializer_query_interface, + .destructor = xml_serializer_destructor, +}; + +static const tid_t xml_serializer_iface_tids[] = { + IXMLSerializer_tid, + 0 +}; + +dispex_static_data_t XMLSerializer_dispex = { + .id = OBJID_XMLSerializer, + .init_constructor = &init_xml_serializer_ctor, + .vtbl = &xml_serializer_dispex_vtbl, + .disp_tid = DispXMLSerializer_tid, + .iface_tids = xml_serializer_iface_tids, +}; + +static HRESULT xml_serializer_ctor_value(DispatchEx *dispex, LCID lcid, WORD flags, DISPPARAMS *params, + VARIANT *res, EXCEPINFO *ei, IServiceProvider *caller) +{ + struct constructor *This = constructor_from_DispatchEx(dispex); + struct xml_serializer *ret; + + TRACE("\n"); + + switch(flags) { + case DISPATCH_METHOD|DISPATCH_PROPERTYGET: + if(!res) + return E_INVALIDARG; + /* fall through */ + case DISPATCH_METHOD: + case DISPATCH_CONSTRUCT: + break; + default: + FIXME("flags %x not supported\n", flags); + return E_NOTIMPL; + } + + if(!(ret = calloc(1, sizeof(*ret)))) + return E_OUTOFMEMORY; + + ret->IXMLSerializer_iface.lpVtbl = &xml_serializer_vtbl; + init_dispatch(&ret->dispex, &XMLSerializer_dispex, This->window, dispex_compat_mode(&This->dispex)); + + V_VT(res) = VT_DISPATCH; + V_DISPATCH(res) = (IDispatch*)&ret->IXMLSerializer_iface; + return S_OK; +} + +static const dispex_static_data_vtbl_t xml_serializer_ctor_dispex_vtbl = { + .destructor = constructor_destructor, + .traverse = constructor_traverse, + .unlink = constructor_unlink, + .value = xml_serializer_ctor_value, +}; + +static dispex_static_data_t xml_serializer_ctor_dispex = { + .name = "XMLSerializer", + .constructor_id = OBJID_XMLSerializer, + .vtbl = &xml_serializer_ctor_dispex_vtbl, +}; + +static HRESULT init_xml_serializer_ctor(struct constructor *constr) +{ + init_dispatch(&constr->dispex, &xml_serializer_ctor_dispex, constr->window, + dispex_compat_mode(&constr->window->event_target.dispex)); + return S_OK; +} + typedef struct { DispatchEx dispex; IHTMLScreen IHTMLScreen_iface; diff --git a/dlls/mshtml/tests/dom.c b/dlls/mshtml/tests/dom.c index 58a409a7a77..3235cbbaca5 100644 --- a/dlls/mshtml/tests/dom.c +++ b/dlls/mshtml/tests/dom.c @@ -11731,83 +11731,6 @@ static void test_case_insens(IHTMLDocument2 *doc) IDispatchEx_Release(dispex); } -static void test_method_vs_getter(IHTMLDocument2 *doc) -{ - DISPPARAMS dp = { 0 }; - IDispatchEx *dispex; - DISPID dispid; - HRESULT hres; - VARIANT v; - BSTR bstr; - - hres = IHTMLDocument2_QueryInterface(doc, &IID_IDispatchEx, (void**)&dispex); - ok(hres == S_OK, "Could not get IDispatchEx: %08lx\n", hres); - - V_VT(&v) = VT_EMPTY; - hres = IDispatchEx_InvokeEx(dispex, DISPID_VALUE, LOCALE_NEUTRAL, DISPATCH_METHOD | DISPATCH_PROPERTYGET, &dp, &v, NULL, NULL); - ok(hres == S_OK, "InvokeEx failed: %08lx\n", hres); - ok(V_VT(&v) == VT_BSTR, "V_VT = %d\n", V_VT(&v)); - VariantClear(&v); - - bstr = SysAllocString(L"body"); - hres = IDispatchEx_GetDispID(dispex, bstr, 0, &dispid); - ok(hres == S_OK, "GetDispID returned: %08lx\n", hres); - SysFreeString(bstr); - - hres = IDispatchEx_InvokeEx(dispex, dispid, LOCALE_NEUTRAL, DISPATCH_METHOD | DISPATCH_PROPERTYGET, &dp, &v, NULL, NULL); - ok(hres == S_OK, "InvokeEx failed: %08lx\n", hres); - ok(V_VT(&v) == VT_DISPATCH, "V_VT = %d\n", V_VT(&v)); - ok(V_DISPATCH(&v) != NULL, "V_DISPATCH == NULL\n"); - VariantClear(&v); - - bstr = SysAllocString(L"title"); - hres = IDispatchEx_GetDispID(dispex, bstr, 0, &dispid); - ok(hres == S_OK, "GetDispID returned: %08lx\n", hres); - SysFreeString(bstr); - - hres = IDispatchEx_InvokeEx(dispex, dispid, LOCALE_NEUTRAL, DISPATCH_METHOD | DISPATCH_PROPERTYGET, &dp, &v, NULL, NULL); - ok(hres == S_OK, "InvokeEx failed: %08lx\n", hres); - ok(V_VT(&v) == VT_BSTR, "V_VT = %d\n", V_VT(&v)); - VariantClear(&v); - - bstr = SysAllocString(L"close"); - hres = IDispatchEx_GetDispID(dispex, bstr, 0, &dispid); - ok(hres == S_OK, "GetDispID returned: %08lx\n", hres); - SysFreeString(bstr); - - hres = IDispatchEx_InvokeEx(dispex, dispid, LOCALE_NEUTRAL, DISPATCH_METHOD | DISPATCH_PROPERTYGET, &dp, &v, NULL, NULL); - ok(hres == S_OK, "InvokeEx failed: %08lx\n", hres); - if(compat_mode < COMPAT_IE9) - ok(V_VT(&v) == VT_EMPTY, "V_VT = %d\n", V_VT(&v)); - else { - ok(V_VT(&v) == VT_DISPATCH, "V_VT = %d\n", V_VT(&v)); - ok(V_DISPATCH(&v) != NULL, "V_DISPATCH == NULL\n"); - } - VariantClear(&v); - - hres = IDispatchEx_InvokeEx(dispex, dispid, LOCALE_NEUTRAL, DISPATCH_PROPERTYGET, &dp, &v, NULL, NULL); - ok(hres == S_OK, "InvokeEx failed: %08lx\n", hres); - ok(V_VT(&v) == VT_DISPATCH, "V_VT = %d\n", V_VT(&v)); - ok(V_DISPATCH(&v) != NULL, "V_DISPATCH == NULL\n"); - IDispatchEx_Release(dispex); - - hres = IDispatch_QueryInterface(V_DISPATCH(&v), &IID_IDispatchEx, (void**)&dispex); - ok(hres == S_OK, "Could not get IDispatchEx: %08lx\n", hres); - VariantClear(&v); - - hres = IDispatchEx_InvokeEx(dispex, DISPID_VALUE, LOCALE_NEUTRAL, DISPATCH_METHOD | DISPATCH_PROPERTYGET, &dp, &v, NULL, NULL); - if(compat_mode < COMPAT_IE9) - todo_wine - ok(hres == E_ACCESSDENIED, "InvokeEx returned: %08lx\n", hres); - else { - ok(hres == S_OK, "InvokeEx failed: %08lx\n", hres); - ok(V_VT(&v) == VT_BSTR, "V_VT = %d\n", V_VT(&v)); - } - VariantClear(&v); - - IDispatchEx_Release(dispex); -} - static void test_null_write(IHTMLDocument2 *doc) { HRESULT hres; @@ -13919,11 +13842,9 @@ START_TEST(dom) run_domtest(doc_blank_ie8, test_quirks_mode_perf_toJSON); run_domtest(doctype_str, test_doctype); run_domtest(case_insens_str, test_case_insens); - run_domtest(doc_blank, test_method_vs_getter); if(is_ie9plus) { compat_mode = COMPAT_IE9; run_domtest(emptydiv_ie9_str, test_docfrag); - run_domtest(doc_blank_ie9, test_method_vs_getter); compat_mode = COMPAT_NONE; } diff --git a/dlls/msxml3/domdoc.c b/dlls/msxml3/domdoc.c index f35daff5f6d..17eff897110 100644 --- a/dlls/msxml3/domdoc.c +++ b/dlls/msxml3/domdoc.c @@ -486,11 +486,415 @@ static void sax_serror(void* ctx, const xmlError* err) LIBXML2_CALLBACK_SERROR(doparse, err); } +/* Check if ptr points to "<?xml" in UTF-8 or UTF-16LE format */ +static int is_xml_decl(const char *ptr, int len, int is_utf16) +{ + if (is_utf16) + { + /* UTF-16LE: each char is 2 bytes, second byte is 0 for ASCII */ + if (len < 10) return 0; + return ptr[0] == '<' && ptr[1] == 0 && + ptr[2] == '?' && ptr[3] == 0 && + ptr[4] == 'x' && ptr[5] == 0 && + ptr[6] == 'm' && ptr[7] == 0 && + ptr[8] == 'l' && ptr[9] == 0; + } + else + { + if (len < 5) return 0; + return !strncmp(ptr, "<?xml", 5); + } +} + +/* Check if char is whitespace (handles UTF-16LE) */ +static int is_ws(const char *ptr, int is_utf16) +{ + char c = ptr[0]; + if (is_utf16 && ptr[1] != 0) return 0; + return c == ' ' || c == '\t' || c == '\r' || c == '\n'; +} + +/* Check if ptr points to "</" in UTF-8 or UTF-16LE format */ +static int is_close_tag(const char *ptr, int len, int is_utf16) +{ + if (is_utf16) + { + if (len < 4) return 0; + return ptr[0] == '<' && ptr[1] == 0 && ptr[2] == '/' && ptr[3] == 0; + } + else + { + if (len < 2) return 0; + return ptr[0] == '<' && ptr[1] == '/'; + } +} + +/* Check if ptr points to "<" followed by a letter (start tag) */ +static int is_start_tag(const char *ptr, int len, int is_utf16) +{ + char c; + if (is_utf16) + { + if (len < 4) return 0; + if (ptr[0] != '<' || ptr[1] != 0) return 0; + c = ptr[2]; + if (ptr[3] != 0) return 0; + } + else + { + if (len < 2) return 0; + if (ptr[0] != '<') return 0; + c = ptr[1]; + } + return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); +} + +/* Check if ptr points to ">" */ +static int is_gt(const char *ptr, int is_utf16) +{ + if (is_utf16) + return ptr[0] == '>' && ptr[1] == 0; + return ptr[0] == '>'; +} + +/* Check if ptr points to "/>" (self-closing tag end) */ +static int is_self_close(const char *ptr, int len, int is_utf16) +{ + if (is_utf16) + { + if (len < 4) return 0; + return ptr[0] == '/' && ptr[1] == 0 && ptr[2] == '>' && ptr[3] == 0; + } + if (len < 2) return 0; + return ptr[0] == '/' && ptr[1] == '>'; +} + +/* Check if ptr points to "<!" (comment, CDATA, DOCTYPE, etc.) */ +static int is_markup_decl(const char *ptr, int len, int is_utf16) +{ + if (is_utf16) + { + if (len < 4) return 0; + return ptr[0] == '<' && ptr[1] == 0 && ptr[2] == '!' && ptr[3] == 0; + } + if (len < 2) return 0; + return ptr[0] == '<' && ptr[1] == '!'; +} + +/* Check if ptr points to "<?" (PI like <?xml) */ +static int is_pi(const char *ptr, int len, int is_utf16) +{ + if (is_utf16) + { + if (len < 4) return 0; + return ptr[0] == '<' && ptr[1] == 0 && ptr[2] == '?' && ptr[3] == 0; + } + if (len < 2) return 0; + return ptr[0] == '<' && ptr[1] == '?'; +} + +/* Check if element name ends with "XMLData" (case-sensitive). + * ptr points to first char after '<', len is remaining buffer length. + * Returns 1 if element name ends with XMLData, 0 otherwise. */ +static int is_xmldata_element(const char *ptr, int len, int is_utf16) +{ + const char *p = ptr; + const char *end = ptr + len; + const char *name_end = NULL; + int char_size = is_utf16 ? 2 : 1; + int name_len; + const char *suffix_check; + + /* Find end of element name (whitespace, >, or /) */ + while (p + char_size <= end) + { + char c = p[0]; + if (is_utf16 && p[1] != 0) { p += char_size; continue; } + if (c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '>' || c == '/') + { + name_end = p; + break; + } + p += char_size; + } + if (!name_end) return 0; + + name_len = (name_end - ptr) / char_size; + if (name_len < 7) return 0; /* "XMLData" is 7 chars */ + + /* Check if name ends with "XMLData" */ + suffix_check = name_end - (7 * char_size); + if (is_utf16) + { + return suffix_check[0] == 'X' && suffix_check[1] == 0 && + suffix_check[2] == 'M' && suffix_check[3] == 0 && + suffix_check[4] == 'L' && suffix_check[5] == 0 && + suffix_check[6] == 'D' && suffix_check[7] == 0 && + suffix_check[8] == 'a' && suffix_check[9] == 0 && + suffix_check[10] == 't' && suffix_check[11] == 0 && + suffix_check[12] == 'a' && suffix_check[13] == 0; + } + else + { + return !strncmp(suffix_check, "XMLData", 7); + } +} + +/* Wrap embedded XML content in CDATA so it becomes text, not parsed elements. + * Windows MSXML tolerates embedded <?xml?> declarations but libxml2 does not. + * Returns a newly allocated buffer that must be freed, or NULL if no changes needed. */ +static char *wrap_embedded_xml_in_cdata(const char *ptr, int len, int *new_len, xmlCharEncoding encoding) +{ + const char *p, *decl_start, *content_start, *content_end = NULL, *end; + char *result, *dst; + int skip_first = 0; + int is_utf16 = (encoding == XML_CHAR_ENCODING_UTF16LE); /* BE not handled - Windows uses LE */ + int char_size = is_utf16 ? 2 : 1; + int decl_size = is_utf16 ? 10 : 5; /* "<?xml" */ + int nesting; + + TRACE("len=%d encoding=%d is_utf16=%d\n", len, encoding, is_utf16); + + end = ptr + len; + + /* Check if document starts with XML declaration - if so, skip it for search */ + p = ptr; + while (p + char_size <= end && is_ws(p, is_utf16)) + p += char_size; + if (p + decl_size <= end && is_xml_decl(p, end - p, is_utf16)) + skip_first = 1; + + /* Search for embedded <?xml declarations */ + decl_start = NULL; + for (p = ptr; p + decl_size <= end; p += char_size) + { + if (is_xml_decl(p, end - p, is_utf16)) + { + if (skip_first) + { + skip_first = 0; + continue; + } + decl_start = p; + break; + } + } + + if (!decl_start) + { + /* No <?xml found - also check for *XMLData elements whose content should be wrapped. + * Pattern: <*XMLData><Element>... where Element content should remain as text. */ + const char *xmldata_start = NULL; + const char *xmldata_content = NULL; + + for (p = ptr; p + char_size <= end; p += char_size) + { + if (is_start_tag(p, end - p, is_utf16)) + { + /* Check if this element name ends with "XMLData" */ + if (is_xmldata_element(p + char_size, end - p - char_size, is_utf16)) + { + /* Found *XMLData element - find end of its start tag */ + const char *tag_end; + for (tag_end = p + char_size; tag_end + char_size <= end; tag_end += char_size) + { + if (is_gt(tag_end, is_utf16)) + { + xmldata_content = tag_end + char_size; + break; + } + if (is_self_close(tag_end, end - tag_end, is_utf16)) + break; /* Self-closing, no content */ + } + if (xmldata_content) + { + /* Check if content starts with an element (needs CDATA wrapping) */ + const char *content_check = xmldata_content; + /* Skip whitespace */ + while (content_check + char_size <= end && is_ws(content_check, is_utf16)) + content_check += char_size; + /* Check for element start that's not <? or <! */ + if (is_start_tag(content_check, end - content_check, is_utf16) && + !is_pi(content_check, end - content_check, is_utf16) && + !is_markup_decl(content_check, end - content_check, is_utf16)) + { + xmldata_start = p; + content_start = xmldata_content; + TRACE("found *XMLData element with element content\n"); + break; + } + } + xmldata_content = NULL; + } + } + } + + if (!xmldata_start) + { + TRACE("no embedded declarations found\n"); + return NULL; + } + + /* Find the matching close tag for the *XMLData element */ + nesting = 0; + for (p = content_start; p + char_size <= end; p += char_size) + { + if (is_start_tag(p, end - p, is_utf16)) + { + const char *tag_end; + int is_selfclose = 0; + for (tag_end = p + char_size; tag_end + char_size <= end; tag_end += char_size) + { + if (is_self_close(tag_end, end - tag_end, is_utf16)) + { + is_selfclose = 1; + break; + } + if (is_gt(tag_end, is_utf16)) + break; + } + if (!is_selfclose) + nesting++; + } + else if (is_close_tag(p, end - p, is_utf16)) + { + if (nesting == 0) + { + content_end = p; + break; + } + nesting--; + } + } + if (!content_end) + { + TRACE("could not find *XMLData element end\n"); + return NULL; + } + goto do_wrap; + } + + /* Find the > before the embedded declaration (end of parent start tag) */ + content_start = NULL; + for (p = decl_start - char_size; p >= ptr; p -= char_size) + { + if (is_gt(p, is_utf16)) + { + content_start = p + char_size; + break; + } + } + if (!content_start) + { + TRACE("could not find parent element start\n"); + return NULL; + } + + /* Find the matching closing tag by tracking nesting level */ + nesting = 0; /* Start at 0 - we're inside parent, looking for its close tag */ + content_end = NULL; + for (p = decl_start; p + char_size <= end; p += char_size) + { + if (is_start_tag(p, end - p, is_utf16)) + { + /* Check if this is a self-closing tag by scanning for /> or > */ + const char *tag_end; + int is_selfclose = 0; + for (tag_end = p + char_size; tag_end + char_size <= end; tag_end += char_size) + { + if (is_self_close(tag_end, end - tag_end, is_utf16)) + { + is_selfclose = 1; + break; + } + if (is_gt(tag_end, is_utf16)) + break; + } + if (!is_selfclose) + nesting++; + } + else if (is_close_tag(p, end - p, is_utf16)) + { + if (nesting == 0) + { + /* This close tag is for our parent element */ + content_end = p; + break; + } + nesting--; + } + } + if (!content_end) + { + TRACE("could not find parent element end\n"); + return NULL; + } + +do_wrap: + TRACE("wrapping content in CDATA: start=%d end=%d\n", + (int)(content_start - ptr), (int)(content_end - ptr)); + + /* Create result with CDATA wrapper: <![CDATA[ ... ]]> */ + /* Extra space: 9 chars for <![CDATA[ and 3 for ]]> = 12, doubled for UTF-16 */ + result = malloc(len + 24 * char_size + char_size); + if (!result) + return NULL; + + dst = result; + /* Copy everything up to content_start */ + for (p = ptr; p < content_start; p++) + *dst++ = *p; + /* Insert <![CDATA[ */ + if (is_utf16) + { + *dst++ = '<'; *dst++ = 0; + *dst++ = '!'; *dst++ = 0; + *dst++ = '['; *dst++ = 0; + *dst++ = 'C'; *dst++ = 0; + *dst++ = 'D'; *dst++ = 0; + *dst++ = 'A'; *dst++ = 0; + *dst++ = 'T'; *dst++ = 0; + *dst++ = 'A'; *dst++ = 0; + *dst++ = '['; *dst++ = 0; + } + else + { + memcpy(dst, "<![CDATA[", 9); + dst += 9; + } + /* Copy the content */ + for (p = content_start; p < content_end; p++) + *dst++ = *p; + /* Insert ]]> */ + if (is_utf16) + { + *dst++ = ']'; *dst++ = 0; + *dst++ = ']'; *dst++ = 0; + *dst++ = '>'; *dst++ = 0; + } + else + { + memcpy(dst, "]]>", 3); + dst += 3; + } + /* Copy the rest */ + for (p = content_end; p < end; p++) + *dst++ = *p; + + if (is_utf16) + *dst++ = 0; + *dst = '\0'; + *new_len = dst - result - (is_utf16 ? 1 : 0); + return result; +} + static xmlDocPtr doparse(domdoc* This, char const* ptr, int len, xmlCharEncoding encoding) { char *ctx_encoding; xmlDocPtr doc = NULL; xmlParserCtxtPtr pctx; + char *modified_ptr = NULL; + int modified_len; static xmlSAXHandler sax_handler = { xmlSAX2InternalSubset, /* internalSubset */ xmlSAX2IsStandalone, /* isStandalone */ @@ -526,10 +930,19 @@ static xmlDocPtr doparse(domdoc* This, char const* ptr, int len, xmlCharEncoding sax_serror /* serror */ }; + /* Wrap embedded XML declarations in CDATA - Windows MSXML tolerates these but libxml2 rejects them */ + modified_ptr = wrap_embedded_xml_in_cdata(ptr, len, &modified_len, encoding); + if (modified_ptr) + { + ptr = modified_ptr; + len = modified_len; + } + pctx = xmlCreateMemoryParserCtxt(ptr, len); if (!pctx) { ERR("Failed to create parser context\n"); + free(modified_ptr); return NULL; } @@ -556,6 +969,7 @@ static xmlDocPtr doparse(domdoc* This, char const* ptr, int len, xmlCharEncoding ctx_encoding = (char *)pctx->encoding; pctx->encoding = NULL; xmlFreeParserCtxt(pctx); + free(modified_ptr); /* TODO: put this in one of the SAX callbacks */ /* create first child as a <?xml...?> */ @@ -2467,7 +2881,9 @@ static HRESULT WINAPI domdoc_loadXML( if (This->properties->version == MSXML_DEFAULT || This->properties->version == MSXML26) while (*ptr && iswspace(*ptr)) ptr++; - xmldoc = doparse(This, (char*)ptr, lstrlenW(ptr)*sizeof(WCHAR), XML_CHAR_ENCODING_UTF16LE); + /* Handle empty string gracefully - Windows MSXML returns VARIANT_FALSE without error */ + if (*ptr) + xmldoc = doparse(This, (char*)ptr, lstrlenW(ptr)*sizeof(WCHAR), XML_CHAR_ENCODING_UTF16LE); if ( !xmldoc ) { This->error = E_FAIL; diff --git a/include/mshtmdid.h b/include/mshtmdid.h index 45b22f6349f..204e5e403f6 100644 --- a/include/mshtmdid.h +++ b/include/mshtmdid.h @@ -107,6 +107,7 @@ #define DISPID_XMLHTTPREQUEST DISPID_NORMAL_FIRST #define DISPID_XDOMAINREQUEST DISPID_NORMAL_FIRST #define DISPID_DOMPARSER DISPID_NORMAL_FIRST +#define DISPID_XMLSERIALIZER DISPID_NORMAL_FIRST #define DISPID_DOCUMENTCOMPATIBLEINFO_COLLECTION DISPID_NORMAL_FIRST #define DISPID_DOCUMENTCOMPATIBLEINFO DISPID_NORMAL_FIRST #define DISPID_XDOMAINREQUEST DISPID_NORMAL_FIRST @@ -4676,6 +4677,9 @@ /* IDOMParser */ #define DISPID_IDOMPARSER_PARSEFROMSTRING DISPID_DOMPARSER +/* IXMLSerializer */ +#define DISPID_IXMLSERIALIZER_SERIALIZETOSTRING DISPID_XMLSERIALIZER + /* IEventTarget */ #define DISPID_IEVENTTARGET_ADDEVENTLISTENER DISPID_HTMLOBJECT+10 #define DISPID_IEVENTTARGET_REMOVEEVENTLISTENER DISPID_HTMLOBJECT+11 diff --git a/include/mshtml.idl b/include/mshtml.idl index 1d2896f88b8..11f3fa5c59d 100644 --- a/include/mshtml.idl +++ b/include/mshtml.idl @@ -30286,6 +30286,46 @@ coclass DOMParser interface IDOMParser; } +/***************************************************************************** + * IXMLSerializer interface + */ +[ + object, + oleautomation, + dual, + uuid(30510783-98b5-11cf-bb82-00aa00bdce0b) +] +interface IXMLSerializer : IDispatch +{ + [id(DISPID_IXMLSERIALIZER_SERIALIZETOSTRING)] + HRESULT serializeToString([in] IHTMLDOMNode *node, [retval, out] BSTR *pString); +} + +/***************************************************************************** + * DispXMLSerializer dispinterface + */ +[ + hidden, + uuid(305900af-98b5-11cf-bb82-00aa00bdce0b) +] +dispinterface DispXMLSerializer +{ +properties: +methods: + [id(DISPID_IXMLSERIALIZER_SERIALIZETOSTRING)] + BSTR serializeToString([in] IHTMLDOMNode *node); +} + +[ + noncreatable, + uuid(30510784-98b5-11cf-bb82-00aa00bdce0b) +] +coclass XMLSerializer +{ + [default] dispinterface DispXMLSerializer; + interface IXMLSerializer; +} + /***************************************************************************** * IXMLGenericParse interface */ -- GitLab https://gitlab.winehq.org/wine/wine/-/merge_requests/10025