The comments in the XML declaration test suggest that it is supposed to be to checking for failure when no element follows the XML declaration. However, the error being tested for is due to a nul character in the input stream.
Create a separate test for nul-character errors, and fix the XML declaration test accordingly.
Signed-off-by: Jeff Smith whydoubt@gmail.com --- dlls/xmllite/tests/reader.c | 83 +++++++++++++++++++++++++++++-------- 1 file changed, 66 insertions(+), 17 deletions(-)
diff --git a/dlls/xmllite/tests/reader.c b/dlls/xmllite/tests/reader.c index 41adad1598..88b9103e1e 100644 --- a/dlls/xmllite/tests/reader.c +++ b/dlls/xmllite/tests/reader.c @@ -863,7 +863,6 @@ static void test_read_xmldeclaration(void) { {'s','t','a','n','d','a','l','o','n','e',0}, {'y','e','s',0} } }; IXmlReader *reader; - IStream *stream; HRESULT hr; XmlNodeType type; UINT count = 0, len, i; @@ -873,10 +872,7 @@ static void test_read_xmldeclaration(void) hr = CreateXmlReader(&IID_IXmlReader, (LPVOID*)&reader, NULL); ok(hr == S_OK, "Expected S_OK, got %08x\n", hr);
- stream = create_stream_on_data(xmldecl_full, sizeof(xmldecl_full)); - - hr = IXmlReader_SetInput(reader, (IUnknown*)stream); - ok(hr == S_OK, "Expected S_OK, got %08x\n", hr); + set_input_string(reader, xmldecl_full);
hr = IXmlReader_GetAttributeCount(reader, &count); ok(hr == S_OK, "got %08x\n", hr); @@ -968,19 +964,19 @@ static void test_read_xmldeclaration(void)
type = XmlNodeType_XmlDeclaration; hr = IXmlReader_Read(reader, &type); - /* newer versions return syntax error here cause document is incomplete, - it makes more sense than invalid char error */ + ok(hr == S_OK, "got %08x\n", hr); + ok(type == XmlNodeType_Whitespace, "expected XmlNodeType_Whitespace, got %s\n", type_to_str(type)); + + type = XmlNodeType_Whitespace; + hr = IXmlReader_Read(reader, &type); todo_wine { - ok(hr == WC_E_SYNTAX || broken(hr == WC_E_XMLCHARACTER), "got 0x%08x\n", hr); + ok(hr == WC_E_ROOTELEMENT, "got %08x\n", hr); ok(type == XmlNodeType_None, "got %d\n", type); + TEST_READER_STATE(reader, XmlReadState_Error); } - IStream_Release(stream);
/* test short variant */ - stream = create_stream_on_data(xmldecl_short, sizeof(xmldecl_short)); - - hr = IXmlReader_SetInput(reader, (IUnknown *)stream); - ok(hr == S_OK, "expected S_OK, got %08x\n", hr); + set_input_string(reader, xmldecl_short);
read_node(reader, XmlNodeType_XmlDeclaration); TEST_READER_POSITION2(reader, 1, 3, ~0u, 21); @@ -1027,14 +1023,66 @@ todo_wine {
type = -1; hr = IXmlReader_Read(reader, &type); -todo_wine - ok(hr == WC_E_SYNTAX || hr == WC_E_XMLCHARACTER /* XP */, "expected WC_E_SYNTAX, got %08x\n", hr); + ok(hr == S_FALSE, "expected S_FALSE, got %08x\n", hr); ok(type == XmlNodeType_None, "expected XmlNodeType_None, got %s\n", type_to_str(type)); - TEST_READER_POSITION(reader, 1, 41); + + IXmlReader_Release(reader); +} + +static void test_read_nul(void) +{ + static const char xml_empty[] = "<a/>"; + static const char xml_ws[] = "<a/> "; + static const char xml_comment[] = "<a/><!-- comment -->"; + IXmlReader *reader; + XmlNodeType type; + IStream *stream; + HRESULT hr; + + hr = CreateXmlReader(&IID_IXmlReader, (LPVOID*)&reader, NULL); + ok(hr == S_OK, "Expected S_OK, got %08x\n", hr); + + stream = create_stream_on_data(xml_empty, sizeof(xml_empty)); + hr = IXmlReader_SetInput(reader, (IUnknown *)stream); + ok(hr == S_OK, "got %08x\n", hr); + IStream_Release(stream); + + read_node(reader, XmlNodeType_Element); + + type = -1; + hr = IXmlReader_Read(reader, &type); todo_wine - TEST_READER_STATE(reader, XmlReadState_Error); + ok(hr == WC_E_SYNTAX || broken(hr == WC_E_XMLCHARACTER), "expected WC_E_SYNTAX, got 0x%08x\n", hr); + ok(type == XmlNodeType_None, "expected XmlNodeType_None, got %s\n", type_to_str(type));
+ stream = create_stream_on_data(xml_ws, sizeof(xml_ws)); + hr = IXmlReader_SetInput(reader, (IUnknown *)stream); + ok(hr == S_OK, "got %08x\n", hr); IStream_Release(stream); + + read_node(reader, XmlNodeType_Element); + + type = -1; + hr = IXmlReader_Read(reader, &type); +todo_wine { + ok(hr == WC_E_SYNTAX || broken(hr == WC_E_XMLCHARACTER), "expected WC_E_SYNTAX, got 0x%08x\n", hr); + ok(type == XmlNodeType_None, "expected XmlNodeType_None, got %s\n", type_to_str(type)); +} + + stream = create_stream_on_data(xml_comment, sizeof(xml_comment)); + hr = IXmlReader_SetInput(reader, (IUnknown *)stream); + ok(hr == S_OK, "got %08x\n", hr); + IStream_Release(stream); + + read_node(reader, XmlNodeType_Element); + read_node(reader, XmlNodeType_Comment); + + type = -1; + hr = IXmlReader_Read(reader, &type); +todo_wine + ok(hr == WC_E_SYNTAX || broken(hr == WC_E_XMLCHARACTER), "expected WC_E_SYNTAX, got 0x%08x\n", hr); + ok(type == XmlNodeType_None, "expected XmlNodeType_None, got %s\n", type_to_str(type)); + IXmlReader_Release(reader); }
@@ -2672,6 +2720,7 @@ START_TEST(reader) test_read_pending(); test_readvaluechunk(); test_read_xmldeclaration(); + test_read_nul(); test_reader_properties(); test_prefix(); test_namespaceuri();
Signed-off-by: Jeff Smith whydoubt@gmail.com --- dlls/xmllite/reader.c | 12 ++++++++++-- dlls/xmllite/tests/reader.c | 2 -- 2 files changed, 10 insertions(+), 4 deletions(-)
diff --git a/dlls/xmllite/reader.c b/dlls/xmllite/reader.c index eddc4d8eec..79e5c2253a 100644 --- a/dlls/xmllite/reader.c +++ b/dlls/xmllite/reader.c @@ -1113,8 +1113,8 @@ static inline UINT reader_get_cur(xmlreader *reader) static inline WCHAR *reader_get_ptr(xmlreader *reader) { encoded_buffer *buffer = &reader->input->buffer->utf16; - WCHAR *ptr = (WCHAR*)buffer->data + buffer->cur; - if (!*ptr) reader_more(reader); + if (buffer->cur*sizeof(WCHAR) >= buffer->written) + reader_more(reader); return (WCHAR*)buffer->data + buffer->cur; }
@@ -1714,8 +1714,16 @@ static HRESULT reader_parse_whitespace(xmlreader *reader) { strval value; UINT start; + const encoded_buffer *buffer = &reader->input->buffer->utf16;
reader_skipspaces(reader); + + /* Do NOT return Whitespace node if followed by a character other than '<'. + * The reader_skipspaces call should have already read in the character. */ + if (buffer->cur*sizeof(WCHAR) < buffer->written && + *reader_get_ptr2(reader, buffer->cur) != '<') + return WC_E_SYNTAX; + if (is_reader_pending(reader)) return S_OK;
start = reader->resume[XmlReadResume_Body]; diff --git a/dlls/xmllite/tests/reader.c b/dlls/xmllite/tests/reader.c index 88b9103e1e..b02301907d 100644 --- a/dlls/xmllite/tests/reader.c +++ b/dlls/xmllite/tests/reader.c @@ -1064,10 +1064,8 @@ todo_wine
type = -1; hr = IXmlReader_Read(reader, &type); -todo_wine { ok(hr == WC_E_SYNTAX || broken(hr == WC_E_XMLCHARACTER), "expected WC_E_SYNTAX, got 0x%08x\n", hr); ok(type == XmlNodeType_None, "expected XmlNodeType_None, got %s\n", type_to_str(type)); -}
stream = create_stream_on_data(xml_comment, sizeof(xml_comment)); hr = IXmlReader_SetInput(reader, (IUnknown *)stream);
On 12/5/19 10:53 PM, Jeff Smith wrote:
Signed-off-by: Jeff Smith whydoubt@gmail.com
dlls/xmllite/reader.c | 12 ++++++++++-- dlls/xmllite/tests/reader.c | 2 -- 2 files changed, 10 insertions(+), 4 deletions(-)
diff --git a/dlls/xmllite/reader.c b/dlls/xmllite/reader.c index eddc4d8eec..79e5c2253a 100644 --- a/dlls/xmllite/reader.c +++ b/dlls/xmllite/reader.c @@ -1113,8 +1113,8 @@ static inline UINT reader_get_cur(xmlreader *reader) static inline WCHAR *reader_get_ptr(xmlreader *reader) { encoded_buffer *buffer = &reader->input->buffer->utf16;
- WCHAR *ptr = (WCHAR*)buffer->data + buffer->cur;
- if (!*ptr) reader_more(reader);
- if (buffer->cur*sizeof(WCHAR) >= buffer->written)
}reader_more(reader); return (WCHAR*)buffer->data + buffer->cur;
Why do you need to change that? It's used everywhere.
@@ -1714,8 +1714,16 @@ static HRESULT reader_parse_whitespace(xmlreader *reader) { strval value; UINT start;
const encoded_buffer *buffer = &reader->input->buffer->utf16; reader_skipspaces(reader);
/* Do NOT return Whitespace node if followed by a character other than '<'.
* The reader_skipspaces call should have already read in the character. */
if (buffer->cur*sizeof(WCHAR) < buffer->written &&
*reader_get_ptr2(reader, buffer->cur) != '<')
return WC_E_SYNTAX;
Buffer access should not be exposed like that.
if (is_reader_pending(reader)) return S_OK; start = reader->resume[XmlReadResume_Body];
diff --git a/dlls/xmllite/tests/reader.c b/dlls/xmllite/tests/reader.c index 88b9103e1e..b02301907d 100644 --- a/dlls/xmllite/tests/reader.c +++ b/dlls/xmllite/tests/reader.c @@ -1064,10 +1064,8 @@ todo_wine
type = -1; hr = IXmlReader_Read(reader, &type);
-todo_wine { ok(hr == WC_E_SYNTAX || broken(hr == WC_E_XMLCHARACTER), "expected WC_E_SYNTAX, got 0x%08x\n", hr); ok(type == XmlNodeType_None, "expected XmlNodeType_None, got %s\n", type_to_str(type)); -}
stream = create_stream_on_data(xml_comment, sizeof(xml_comment)); hr = IXmlReader_SetInput(reader, (IUnknown *)stream);
On Fri, Dec 6, 2019 at 11:13 AM Nikolay Sivov nsivov@codeweavers.com wrote:
On 12/5/19 10:53 PM, Jeff Smith wrote:
Signed-off-by: Jeff Smith whydoubt@gmail.com
dlls/xmllite/reader.c | 12 ++++++++++-- dlls/xmllite/tests/reader.c | 2 -- 2 files changed, 10 insertions(+), 4 deletions(-)
diff --git a/dlls/xmllite/reader.c b/dlls/xmllite/reader.c index eddc4d8eec..79e5c2253a 100644 --- a/dlls/xmllite/reader.c +++ b/dlls/xmllite/reader.c @@ -1113,8 +1113,8 @@ static inline UINT reader_get_cur(xmlreader *reader) static inline WCHAR *reader_get_ptr(xmlreader *reader) { encoded_buffer *buffer = &reader->input->buffer->utf16;
- WCHAR *ptr = (WCHAR*)buffer->data + buffer->cur;
- if (!*ptr) reader_more(reader);
- if (buffer->cur*sizeof(WCHAR) >= buffer->written)
}reader_more(reader); return (WCHAR*)buffer->data + buffer->cur;
Hi Nikolay,
Why do you need to change that? It's used everywhere.
Since the test is fixed even without this, I will probably take this chunk out of this patch set.
@@ -1714,8 +1714,16 @@ static HRESULT reader_parse_whitespace(xmlreader *reader) { strval value; UINT start;
const encoded_buffer *buffer = &reader->input->buffer->utf16; reader_skipspaces(reader);
/* Do NOT return Whitespace node if followed by a character other than '<'.
* The reader_skipspaces call should have already read in the character. */
if (buffer->cur*sizeof(WCHAR) < buffer->written &&
*reader_get_ptr2(reader, buffer->cur) != '<')
return WC_E_SYNTAX;
Buffer access should not be exposed like that.
OK, I will try to improve where that is handled. Though that could potentially entail changing more things that are used everywhere.
Regards, Jeff
Signed-off-by: Jeff Smith whydoubt@gmail.com --- dlls/xmllite/reader.c | 3 ++- dlls/xmllite/tests/reader.c | 2 -- 2 files changed, 2 insertions(+), 3 deletions(-)
diff --git a/dlls/xmllite/reader.c b/dlls/xmllite/reader.c index 79e5c2253a..c4b0fe521d 100644 --- a/dlls/xmllite/reader.c +++ b/dlls/xmllite/reader.c @@ -2562,6 +2562,7 @@ static HRESULT reader_parse_content(xmlreader *reader)
static HRESULT reader_parse_nextnode(xmlreader *reader) { + const encoded_buffer *buffer = &reader->input->buffer->utf16; XmlNodeType nodetype = reader_get_nodetype(reader); HRESULT hr;
@@ -2662,7 +2663,7 @@ static HRESULT reader_parse_nextnode(xmlreader *reader) hr = reader_parse_misc(reader); if (hr != S_FALSE) return hr;
- if (*reader_get_ptr(reader)) + if (buffer->cur*sizeof(WCHAR) < buffer->written) { WARN("found garbage in the end of XML\n"); return WC_E_SYNTAX; diff --git a/dlls/xmllite/tests/reader.c b/dlls/xmllite/tests/reader.c index b02301907d..7ad548d081 100644 --- a/dlls/xmllite/tests/reader.c +++ b/dlls/xmllite/tests/reader.c @@ -1051,7 +1051,6 @@ static void test_read_nul(void)
type = -1; hr = IXmlReader_Read(reader, &type); -todo_wine ok(hr == WC_E_SYNTAX || broken(hr == WC_E_XMLCHARACTER), "expected WC_E_SYNTAX, got 0x%08x\n", hr); ok(type == XmlNodeType_None, "expected XmlNodeType_None, got %s\n", type_to_str(type));
@@ -1077,7 +1076,6 @@ todo_wine
type = -1; hr = IXmlReader_Read(reader, &type); -todo_wine ok(hr == WC_E_SYNTAX || broken(hr == WC_E_XMLCHARACTER), "expected WC_E_SYNTAX, got 0x%08x\n", hr); ok(type == XmlNodeType_None, "expected XmlNodeType_None, got %s\n", type_to_str(type));
On 12/5/19 10:53 PM, Jeff Smith wrote:
@@ -2662,7 +2663,7 @@ static HRESULT reader_parse_nextnode(xmlreader *reader) hr = reader_parse_misc(reader); if (hr != S_FALSE) return hr;
if (*reader_get_ptr(reader))
if (buffer->cur*sizeof(WCHAR) < buffer->written) { WARN("found garbage in the end of XML\n"); return WC_E_SYNTAX;
That means we don't have enough data, it's another change not backed by tests and potentially depending on current read-ahead buffer size/filled level.
On Fri, Dec 6, 2019 at 11:16 AM Nikolay Sivov nsivov@codeweavers.com wrote:
On 12/5/19 10:53 PM, Jeff Smith wrote:
@@ -2662,7 +2663,7 @@ static HRESULT reader_parse_nextnode(xmlreader *reader) hr = reader_parse_misc(reader); if (hr != S_FALSE) return hr;
if (*reader_get_ptr(reader))
if (buffer->cur*sizeof(WCHAR) < buffer->written) { WARN("found garbage in the end of XML\n"); return WC_E_SYNTAX;
Hi Nikolay,
That means we don't have enough data,
How do you figure that?
it's another change not backed by tests
This fixes two tests, and does not break any others.
and potentially depending on current read-ahead buffer size/filled level.
I'm pretty sure reader_parse_misc would have read at least one byte ahead, which is all that is required for this to trigger, though I could double-check that. However, to your point made in the patch 2 of the set about not exposing the buffer at this level, I will also consider this something that potentially needs to be handled elsewhere.
Regards, Jeff
On 12/7/19 12:24 AM, Jeff Smith wrote:
On Fri, Dec 6, 2019 at 11:16 AM Nikolay Sivov nsivov@codeweavers.com wrote:
On 12/5/19 10:53 PM, Jeff Smith wrote:
@@ -2662,7 +2663,7 @@ static HRESULT reader_parse_nextnode(xmlreader *reader) hr = reader_parse_misc(reader); if (hr != S_FALSE) return hr;
if (*reader_get_ptr(reader))
if (buffer->cur*sizeof(WCHAR) < buffer->written) { WARN("found garbage in the end of XML\n"); return WC_E_SYNTAX;
Hi Nikolay,
That means we don't have enough data,
How do you figure that?
it's another change not backed by tests
This fixes two tests, and does not break any others.
and potentially depending on current read-ahead buffer size/filled level.
I'm pretty sure reader_parse_misc would have read at least one byte ahead, which is all that is required for this to trigger, though I could double-check that. However, to your point made in the patch 2 of the set about not exposing the buffer at this level, I will also consider this something that potentially needs to be handled elsewhere.
My point is that we should always hit this single invalid syntax/garbage at the end condition that we already have, instead of doing fixups for specific node types.
Regards, Jeff
On Fri, Dec 6, 2019 at 4:19 PM Nikolay Sivov nsivov@codeweavers.com wrote:
On 12/7/19 12:24 AM, Jeff Smith wrote:
On Fri, Dec 6, 2019 at 11:16 AM Nikolay Sivov nsivov@codeweavers.com wrote:
On 12/5/19 10:53 PM, Jeff Smith wrote:
@@ -2662,7 +2663,7 @@ static HRESULT reader_parse_nextnode(xmlreader *reader) hr = reader_parse_misc(reader); if (hr != S_FALSE) return hr;
if (*reader_get_ptr(reader))
if (buffer->cur*sizeof(WCHAR) < buffer->written) { WARN("found garbage in the end of XML\n"); return WC_E_SYNTAX;
Hi Nikolay,
That means we don't have enough data,
How do you figure that?
it's another change not backed by tests
This fixes two tests, and does not break any others.
and potentially depending on current read-ahead buffer size/filled level.
I'm pretty sure reader_parse_misc would have read at least one byte ahead, which is all that is required for this to trigger, though I could double-check that. However, to your point made in the patch 2 of the set about not exposing the buffer at this level, I will also consider this something that potentially needs to be handled elsewhere.
My point is that we should always hit this single invalid syntax/garbage at the end condition that we already have,
That garbage-at-the-end condition, as it exists, is explicitly NOT triggered by a null character, but it should. So what we have here currently is not sufficient. While there may be cases that my patch does not cover, based on existing test cases, it is an improvement.
instead of doing fixups for specific node types.
On Windows, the context in which the null character is encountered is significant. For instance, if any character other than '<' is encountered at the end of a whitespace sequence, it raises the syntax error without returning a Whitespace node. So we need to catch the invalid character and interrupt before a Whitespace node is returned.