Surrogates have to be written in pairs. Also, handle related errors
Signed-off-by: David Kahurani k.kahurani@gmail.com
From: David Kahurani k.kahurani@gmail.com
Surrogates have to be written in pairs. Also, handle related errors
Signed-off-by: David Kahurani k.kahurani@gmail.com --- dlls/xmllite/reader.c | 10 ------- dlls/xmllite/tests/writer.c | 20 +++++++++++++ dlls/xmllite/writer.c | 53 ++++++++++++++++++++++++++++++---- dlls/xmllite/xmllite_private.h | 9 ++++++ 4 files changed, 76 insertions(+), 16 deletions(-)
diff --git a/dlls/xmllite/reader.c b/dlls/xmllite/reader.c index d747e36e623..f238e2af79d 100644 --- a/dlls/xmllite/reader.c +++ b/dlls/xmllite/reader.c @@ -1466,16 +1466,6 @@ static HRESULT reader_parse_comment(xmlreader *reader) return S_OK; }
-/* [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] */ -static inline BOOL is_char(WCHAR ch) -{ - return (ch == '\t') || (ch == '\r') || (ch == '\n') || - (ch >= 0x20 && ch <= 0xd7ff) || - (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */ - (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */ - (ch >= 0xe000 && ch <= 0xfffd); -} - /* [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */ BOOL is_pubchar(WCHAR ch) { diff --git a/dlls/xmllite/tests/writer.c b/dlls/xmllite/tests/writer.c index c4ab079373c..1777bf0f40e 100644 --- a/dlls/xmllite/tests/writer.c +++ b/dlls/xmllite/tests/writer.c @@ -1339,6 +1339,7 @@ static void test_WriteRaw(void) IXmlWriter *writer; IStream *stream; HRESULT hr; + WCHAR surrogates[] = {0xdc00, 0xd800, '\0'};
hr = CreateXmlWriter(&IID_IXmlWriter, (void**)&writer, NULL); ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); @@ -1351,6 +1352,9 @@ static void test_WriteRaw(void)
stream = writer_set_output(writer);
+ hr = IXmlWriter_WriteRaw(writer, surrogates); + ok(hr == WR_E_INVALIDSURROGATEPAIR, "Unexpected hr %#lx.\n", hr); + hr = IXmlWriter_WriteRaw(writer, NULL); ok(hr == S_OK, "Unexpected hr %#lx.\n", hr);
@@ -1888,6 +1892,7 @@ static void test_WriteString(void) IXmlWriter *writer; IStream *stream; HRESULT hr; + WCHAR surrogates[] = {0xd800, 0xdc00, 'x', 'y', '\0'};
hr = CreateXmlWriter(&IID_IXmlWriter, (void**)&writer, NULL); ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); @@ -1905,6 +1910,21 @@ static void test_WriteString(void)
stream = writer_set_output(writer);
+ hr = IXmlWriter_WriteStartElement(writer, NULL, L"sub", NULL); + ok(hr == S_OK, "Unexpected hr #%lx.\n", hr); + + hr = IXmlWriter_WriteString(writer, surrogates); + ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); + + hr = IXmlWriter_Flush(writer); + ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); + + CHECK_OUTPUT(stream, + "<sub>\U00010000xy"); + IStream_Release(stream); + + stream = writer_set_output(writer); + hr = IXmlWriter_WriteStartElement(writer, NULL, L"b", NULL); ok(hr == S_OK, "Unexpected hr %#lx.\n", hr);
diff --git a/dlls/xmllite/writer.c b/dlls/xmllite/writer.c index 065716f7e8c..f9c2b98fecb 100644 --- a/dlls/xmllite/writer.c +++ b/dlls/xmllite/writer.c @@ -163,6 +163,16 @@ static inline void *writer_alloc(const xmlwriter *writer, size_t len) return m_alloc(writer->imalloc, len); }
+static BOOL is_high_surrogate(WCHAR ch) +{ + return ch >= 0xd800 && ch <= 0xdbff; +} + +static BOOL is_low_surrogate(WCHAR ch) +{ + return ch >= 0xdc00 && ch <= 0xdfff; +} + static inline void writer_free(const xmlwriter *writer, void *mem) { m_free(writer->imalloc, mem); @@ -1684,6 +1694,28 @@ static HRESULT WINAPI xmlwriter_WriteQualifiedName(IXmlWriter *iface, LPCWSTR pw return E_NOTIMPL; }
+static HRESULT write_raw_char(xmlwriteroutput *output, LPCWSTR *context) +{ + const WCHAR *data = *context; + + if (!is_char(*data)) return WC_E_XMLCHARACTER; + + if (is_high_surrogate(*data)) + { + if (!*(data + 1) || !is_low_surrogate(*(data + 1))) + return WR_E_INVALIDSURROGATEPAIR; + + write_output_buffer(output, data, 2); + (*context)++; + } + else if (is_low_surrogate(*data)) + return WR_E_INVALIDSURROGATEPAIR; + else + write_output_buffer(output, data, 1); + + return S_OK; +} + static HRESULT WINAPI xmlwriter_WriteRaw(IXmlWriter *iface, LPCWSTR data) { xmlwriter *This = impl_from_IXmlWriter(iface); @@ -1713,7 +1745,13 @@ static HRESULT WINAPI xmlwriter_WriteRaw(IXmlWriter *iface, LPCWSTR data) return WR_E_INVALIDACTION; }
- write_output_buffer(This->output, data, -1); + while (*data) + { + HRESULT hr = write_raw_char(This->output, &data); + if (FAILED(hr)) return hr; + data++; + } + return S_OK; }
@@ -1834,7 +1872,7 @@ static HRESULT WINAPI xmlwriter_WriteStartElement(IXmlWriter *iface, LPCWSTR pre return S_OK; }
-static void write_escaped_string(xmlwriter *writer, const WCHAR *string) +static HRESULT write_escaped_string(xmlwriter *writer, const WCHAR *string) { while (*string) { @@ -1850,11 +1888,15 @@ static void write_escaped_string(xmlwriter *writer, const WCHAR *string) write_output_buffer(writer->output, L">", 4); break; default: - write_output_buffer(writer->output, string, 1); + { + HRESULT hr = write_raw_char(writer->output, &string); + if (FAILED(hr)) return hr; + } } - string++; } + + return S_OK; }
static HRESULT WINAPI xmlwriter_WriteString(IXmlWriter *iface, const WCHAR *string) @@ -1884,8 +1926,7 @@ static HRESULT WINAPI xmlwriter_WriteString(IXmlWriter *iface, const WCHAR *stri }
This->textnode = 1; - write_escaped_string(This, string); - return S_OK; + return write_escaped_string(This, string); }
static HRESULT WINAPI xmlwriter_WriteSurrogateCharEntity(IXmlWriter *iface, WCHAR wchLow, WCHAR wchHigh) diff --git a/dlls/xmllite/xmllite_private.h b/dlls/xmllite/xmllite_private.h index bd53fca575f..45993d20567 100644 --- a/dlls/xmllite/xmllite_private.h +++ b/dlls/xmllite/xmllite_private.h @@ -68,4 +68,13 @@ static inline BOOL is_wchar_space(WCHAR ch) return ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n'; }
+/* [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] */ +static inline BOOL is_char(WCHAR ch) +{ + return (ch == '\t') || (ch == '\r') || (ch == '\n') || + (ch >= 0x20 && ch <= 0xd7ff) || + (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */ + (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */ + (ch >= 0xe000 && ch <= 0xfffd); +} #endif /* __XMLLITE_PRIVATE__ */
Nikolay Sivov (@nsivov) commented about dlls/xmllite/writer.c:
return E_NOTIMPL;
}
+static HRESULT write_raw_char(xmlwriteroutput *output, LPCWSTR *context)
If this is going to be used for WriteChars() as well, we'll probably need to pass length/-1.
Nikolay Sivov (@nsivov) commented about dlls/xmllite/writer.c:
return E_NOTIMPL;
}
+static HRESULT write_raw_char(xmlwriteroutput *output, LPCWSTR *context) +{
- const WCHAR *data = *context;
- if (!is_char(*data)) return WC_E_XMLCHARACTER;
- if (is_high_surrogate(*data))
- {
if (!*(data + 1) || !is_low_surrogate(*(data + 1)))
Isn't the first condition redundant.
Nikolay Sivov (@nsivov) commented about dlls/xmllite/writer.c:
return E_NOTIMPL;
}
+static HRESULT write_raw_char(xmlwriteroutput *output, LPCWSTR *context) +{
- const WCHAR *data = *context;
- if (!is_char(*data)) return WC_E_XMLCHARACTER;
- if (is_high_surrogate(*data))
- {
if (!*(data + 1) || !is_low_surrogate(*(data + 1)))
return WR_E_INVALIDSURROGATEPAIR;
write_output_buffer(output, data, 2);
- (*context)++;
This increment has to be moved to one place - either this helper advances the pointer always, or it's advanced by the caller.
On Fri Nov 11 12:35:49 2022 +0000, Nikolay Sivov wrote:
Isn't the first condition redundant.
Yes, you're right, null will get caught anyways as it's not a low surrogate.
On Fri Nov 11 12:35:24 2022 +0000, Nikolay Sivov wrote:
If this is going to be used for WriteChars() as well, we'll probably need to pass length/-1.
Yes, WriteChars will need a similar helper with the simple modification of passing the length but I thought I would have to write as entirely a different function because if we used the same helper for the four functions, we are going to have to lstrlenW input in cases where the user wants to write the whole string(Like is the case with WriteString/WriteRaw), something which AFAIK you're against.
On Fri Nov 11 12:36:49 2022 +0000, Nikolay Sivov wrote:
This increment has to be moved to one place - either this helper advances the pointer always, or it's advanced by the caller.
The issue is that when we write a surrogate we have to advance the pointer by two while with a normal character, it just gets advanced by one, not sure this can be done at one place.