Surrogates have to be written in pairs. Also, handle related errors
Signed-off-by: David Kahurani k.kahurani@gmail.com
-- v4: xmllite/writer: Handle surrogate pairs
From: David Kahurani k.kahurani@gmail.com
Surrogates have to be written in pairs. Also, handle related errors.
Signed-off-by: David Kahurani k.kahurani@gmail.com --- dlls/xmllite/reader.c | 10 ------ dlls/xmllite/tests/writer.c | 20 ++++++++++++ dlls/xmllite/writer.c | 59 ++++++++++++++++++++++++++++++---- dlls/xmllite/xmllite_private.h | 9 ++++++ 4 files changed, 81 insertions(+), 17 deletions(-)
diff --git a/dlls/xmllite/reader.c b/dlls/xmllite/reader.c index d747e36e623..f238e2af79d 100644 --- a/dlls/xmllite/reader.c +++ b/dlls/xmllite/reader.c @@ -1466,16 +1466,6 @@ static HRESULT reader_parse_comment(xmlreader *reader) return S_OK; }
-/* [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] */ -static inline BOOL is_char(WCHAR ch) -{ - return (ch == '\t') || (ch == '\r') || (ch == '\n') || - (ch >= 0x20 && ch <= 0xd7ff) || - (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */ - (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */ - (ch >= 0xe000 && ch <= 0xfffd); -} - /* [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */ BOOL is_pubchar(WCHAR ch) { diff --git a/dlls/xmllite/tests/writer.c b/dlls/xmllite/tests/writer.c index c4ab079373c..deb8efa98d8 100644 --- a/dlls/xmllite/tests/writer.c +++ b/dlls/xmllite/tests/writer.c @@ -1339,6 +1339,7 @@ static void test_WriteRaw(void) IXmlWriter *writer; IStream *stream; HRESULT hr; + static const WCHAR surrogates[] = {0xdc00, 0xd800, '\0'};
hr = CreateXmlWriter(&IID_IXmlWriter, (void**)&writer, NULL); ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); @@ -1351,6 +1352,9 @@ static void test_WriteRaw(void)
stream = writer_set_output(writer);
+ hr = IXmlWriter_WriteRaw(writer, surrogates); + ok(hr == WR_E_INVALIDSURROGATEPAIR, "Unexpected hr %#lx.\n", hr); + hr = IXmlWriter_WriteRaw(writer, NULL); ok(hr == S_OK, "Unexpected hr %#lx.\n", hr);
@@ -1888,6 +1892,7 @@ static void test_WriteString(void) IXmlWriter *writer; IStream *stream; HRESULT hr; + static const WCHAR surrogates[] = {0xd800, 0xdc00, 'x', 'y', '\0'};
hr = CreateXmlWriter(&IID_IXmlWriter, (void**)&writer, NULL); ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); @@ -1905,6 +1910,21 @@ static void test_WriteString(void)
stream = writer_set_output(writer);
+ hr = IXmlWriter_WriteStartElement(writer, NULL, L"sub", NULL); + ok(hr == S_OK, "Unexpected hr #%lx.\n", hr); + + hr = IXmlWriter_WriteString(writer, surrogates); + ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); + + hr = IXmlWriter_Flush(writer); + ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); + + CHECK_OUTPUT(stream, + "<sub>\U00010000xy"); + IStream_Release(stream); + + stream = writer_set_output(writer); + hr = IXmlWriter_WriteStartElement(writer, NULL, L"b", NULL); ok(hr == S_OK, "Unexpected hr %#lx.\n", hr);
diff --git a/dlls/xmllite/writer.c b/dlls/xmllite/writer.c index 065716f7e8c..a1aaec974cc 100644 --- a/dlls/xmllite/writer.c +++ b/dlls/xmllite/writer.c @@ -163,6 +163,16 @@ static inline void *writer_alloc(const xmlwriter *writer, size_t len) return m_alloc(writer->imalloc, len); }
+static BOOL is_high_surrogate(WCHAR ch) +{ + return ch >= 0xd800 && ch <= 0xdbff; +} + +static BOOL is_low_surrogate(WCHAR ch) +{ + return ch >= 0xdc00 && ch <= 0xdfff; +} + static inline void writer_free(const xmlwriter *writer, void *mem) { m_free(writer->imalloc, mem); @@ -1684,6 +1694,31 @@ static HRESULT WINAPI xmlwriter_WriteQualifiedName(IXmlWriter *iface, LPCWSTR pw return E_NOTIMPL; }
+static HRESULT write_raw_char(xmlwriteroutput *output, LPCWSTR *context) +{ + const WCHAR *data = *context; + + if (!is_char(*data)) return WC_E_XMLCHARACTER; + + if (is_high_surrogate(*data)) + { + if (!is_low_surrogate(*(data + 1))) + return WR_E_INVALIDSURROGATEPAIR; + + write_output_buffer(output, data, 2); + (*context) += 2; + } + else if (is_low_surrogate(*data)) + return WR_E_INVALIDSURROGATEPAIR; + else + { + write_output_buffer(output, data, 1); + (*context)++; + } + + return S_OK; +} + static HRESULT WINAPI xmlwriter_WriteRaw(IXmlWriter *iface, LPCWSTR data) { xmlwriter *This = impl_from_IXmlWriter(iface); @@ -1713,7 +1748,12 @@ static HRESULT WINAPI xmlwriter_WriteRaw(IXmlWriter *iface, LPCWSTR data) return WR_E_INVALIDACTION; }
- write_output_buffer(This->output, data, -1); + while (*data) + { + HRESULT hr = write_raw_char(This->output, &data); + if (FAILED(hr)) return hr; + } + return S_OK; }
@@ -1834,7 +1874,7 @@ static HRESULT WINAPI xmlwriter_WriteStartElement(IXmlWriter *iface, LPCWSTR pre return S_OK; }
-static void write_escaped_string(xmlwriter *writer, const WCHAR *string) +static HRESULT write_escaped_string(xmlwriter *writer, const WCHAR *string) { while (*string) { @@ -1842,19 +1882,25 @@ static void write_escaped_string(xmlwriter *writer, const WCHAR *string) { case '<': write_output_buffer(writer->output, L"<", 4); + string++; break; case '&': write_output_buffer(writer->output, L"&", 5); + string++; break; case '>': write_output_buffer(writer->output, L">", 4); + string++; break; default: - write_output_buffer(writer->output, string, 1); + { + HRESULT hr = write_raw_char(writer->output, &string); + if (FAILED(hr)) return hr; + } } - - string++; } + + return S_OK; }
static HRESULT WINAPI xmlwriter_WriteString(IXmlWriter *iface, const WCHAR *string) @@ -1884,8 +1930,7 @@ static HRESULT WINAPI xmlwriter_WriteString(IXmlWriter *iface, const WCHAR *stri }
This->textnode = 1; - write_escaped_string(This, string); - return S_OK; + return write_escaped_string(This, string); }
static HRESULT WINAPI xmlwriter_WriteSurrogateCharEntity(IXmlWriter *iface, WCHAR wchLow, WCHAR wchHigh) diff --git a/dlls/xmllite/xmllite_private.h b/dlls/xmllite/xmllite_private.h index bd53fca575f..45993d20567 100644 --- a/dlls/xmllite/xmllite_private.h +++ b/dlls/xmllite/xmllite_private.h @@ -68,4 +68,13 @@ static inline BOOL is_wchar_space(WCHAR ch) return ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n'; }
+/* [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] */ +static inline BOOL is_char(WCHAR ch) +{ + return (ch == '\t') || (ch == '\r') || (ch == '\n') || + (ch >= 0x20 && ch <= 0xd7ff) || + (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */ + (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */ + (ch >= 0xe000 && ch <= 0xfffd); +} #endif /* __XMLLITE_PRIVATE__ */
Hi,
It looks like your patch introduced the new failures shown below. Please investigate and fix them before resubmitting your patch. If they are not new, fixing them anyway would help a lot. Otherwise please ask for the known failures list to be updated.
The tests also ran into some preexisting test failures. If you know how to fix them that would be helpful. See the TestBot job for the details:
The full results can be found at: https://testbot.winehq.org/JobDetails.pl?Key=126153
Your paranoid android.
=== debian11 (32 bit report) ===
crypt32: cert.c:4191: Test failed: success cert.c:4192: Test failed: got 00000000 cert.c:4193: Test failed: got 00000000
Nikolay Sivov (@nsivov) commented about dlls/xmllite/writer.c:
return E_NOTIMPL;
}
+static HRESULT write_raw_char(xmlwriteroutput *output, LPCWSTR *context) +{
- const WCHAR *data = *context;
- if (!is_char(*data)) return WC_E_XMLCHARACTER;
Do we have any tests for is_char() check?
Nikolay Sivov (@nsivov) commented about dlls/xmllite/writer.c:
return m_alloc(writer->imalloc, len);
}
+static BOOL is_high_surrogate(WCHAR ch) +{
- return ch >= 0xd800 && ch <= 0xdbff;
+}
+static BOOL is_low_surrogate(WCHAR ch) +{
- return ch >= 0xdc00 && ch <= 0xdfff;
+}
There is an option to use some macros for this, from winnls.h.
Nikolay Sivov (@nsivov) commented about dlls/xmllite/writer.c:
string++; break; case '&': write_output_buffer(writer->output, L"&", 5);
string++; break; case '>': write_output_buffer(writer->output, L">", 4);
string++; break; default:
write_output_buffer(writer->output, string, 1);
{
HRESULT hr = write_raw_char(writer->output, &string);
if (FAILED(hr)) return hr;
}
Doing error checks now, we can consolidate it, and use return values from write_output_buffer() calls as well.