Module: wine Branch: master Commit: a04064b8868762ebd38aecaafd5d9f1e1a14844a URL: http://source.winehq.org/git/wine.git/?a=commit;h=a04064b8868762ebd38aecaafd...
Author: Nikolay Sivov nsivov@codeweavers.com Date: Mon Aug 19 10:46:49 2013 +0400
xmllite: Improve attribute value parsing.
---
dlls/xmllite/reader.c | 94 ++++++++++++++++++++++++++++++++++++++++++- dlls/xmllite/tests/reader.c | 10 +++++ 2 files changed, 102 insertions(+), 2 deletions(-)
diff --git a/dlls/xmllite/reader.c b/dlls/xmllite/reader.c index 4dc8185..efdc6ff 100644 --- a/dlls/xmllite/reader.c +++ b/dlls/xmllite/reader.c @@ -1766,12 +1766,99 @@ static HRESULT reader_parse_qname(xmlreader *reader, strval *prefix, strval *loc return S_OK; }
+/* Applies normalization rules to a single char, used for attribute values. + + Rules include 2 steps: + + 1) replacing \r\n with a single \n; + 2) replacing all whitespace chars with ' '. + + */ +static void reader_normalize_space(xmlreader *reader, WCHAR *ptr) +{ + encoded_buffer *buffer = &reader->input->buffer->utf16; + + if (!is_wchar_space(*ptr)) return; + + if (*ptr == '\r' && *(ptr+1) == '\n') + { + int len = buffer->written - ((char*)ptr - buffer->data) - 2*sizeof(WCHAR); + memmove(ptr+1, ptr+2, len); + } + *ptr = ' '; +} + /* [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';' [67] Reference ::= EntityRef | CharRef [68] EntityRef ::= '&' Name ';' */ static HRESULT reader_parse_reference(xmlreader *reader) { - FIXME("References not supported\n"); + WCHAR *start = reader_get_cur(reader), *ptr; + WCHAR ch = 0; + int len; + + /* skip '&' */ + reader_skipn(reader, 1); + ptr = reader_get_cur(reader); + + if (*ptr == '#') + { + encoded_buffer *buffer = &reader->input->buffer->utf16; + + reader_skipn(reader, 1); + ptr = reader_get_cur(reader); + + /* hex char or decimal */ + if (*ptr == 'x') + { + reader_skipn(reader, 1); + ptr = reader_get_cur(reader); + + while (*ptr != ';') + { + if ((*ptr >= '0' && *ptr <= '9')) + ch = ch*16 + *ptr - '0'; + else if ((*ptr >= 'a' && *ptr <= 'f')) + ch = ch*16 + *ptr - 'a' + 10; + else if ((*ptr >= 'A' && *ptr <= 'F')) + ch = ch*16 + *ptr - 'A' + 10; + else + return ch ? WC_E_SEMICOLON : WC_E_HEXDIGIT; + reader_skipn(reader, 1); + ptr = reader_get_cur(reader); + } + } + else + { + while (*ptr != ';') + { + if ((*ptr >= '0' && *ptr <= '9')) + { + ch = ch*10 + *ptr - '0'; + reader_skipn(reader, 1); + ptr = reader_get_cur(reader); + } + else + return ch ? WC_E_SEMICOLON : WC_E_DIGIT; + } + } + + if (!is_char(ch)) return WC_E_XMLCHARACTER; + + /* normalize */ + if (is_wchar_space(ch)) ch = ' '; + + len = buffer->written - ((char*)ptr - buffer->data) - sizeof(WCHAR); + memmove(start+1, ptr+1, len); + buffer->cur = (char*)start; + + *start = ch; + + return S_OK; + } + else + FIXME("Entity references not supported\n"); + return E_NOTIMPL; }
@@ -1806,7 +1893,10 @@ static HRESULT reader_parse_attvalue(xmlreader *reader, strval *value) if (FAILED(hr)) return hr; } else + { + reader_normalize_space(reader, ptr); reader_skipn(reader, 1); + } ptr = reader_get_cur(reader); }
@@ -1848,7 +1938,7 @@ static HRESULT reader_parse_attribute(xmlreader *reader) hr = reader_parse_attvalue(reader, &value); if (FAILED(hr)) return hr;
- TRACE("%s="%s"\n", debugstr_wn(local.str, local.len), debugstr_wn(value.str, value.len)); + TRACE("%s=%s\n", debugstr_wn(local.str, local.len), debugstr_wn(value.str, value.len)); return reader_add_attr(reader, &local, &value); }
diff --git a/dlls/xmllite/tests/reader.c b/dlls/xmllite/tests/reader.c index 6d81482..b53c044 100644 --- a/dlls/xmllite/tests/reader.c +++ b/dlls/xmllite/tests/reader.c @@ -1571,6 +1571,16 @@ static struct test_entry attributes_tests[] = { { "<a attr1='a"ttrvalue'/>", "attr1", "a"ttrvalue", S_OK }, { "<a attr1=' '/>", "attr1", " ", S_OK }, { "<a attr1=" "/>", "attr1", " ", S_OK }, + { "<a attr1="\r\n \r \n \t\n\r"/>", "attr1", " ", S_OK }, + { "<a attr1=" val "/>", "attr1", " val ", S_OK }, + { "<a attr1="\r\n\tval\n"/>", "attr1", " val ", S_OK }, + { "<a attr1="val "/>", "attr1", "val ", S_OK }, + { "<a attr1="val "/>", "attr1", "val ", S_OK }, + { "<a attr1="val"/>", NULL, NULL, WC_E_XMLCHARACTER }, + { "<a attr1="val &#a;"/>", NULL, NULL, WC_E_DIGIT, WC_E_SEMICOLON }, + { "<a attr1="val a;"/>", NULL, NULL, WC_E_SEMICOLON }, + { "<a attr1="val g;"/>", NULL, NULL, WC_E_SEMICOLON }, + { "<a attr1="val &#xg;"/>", NULL, NULL, WC_E_HEXDIGIT, WC_E_SEMICOLON }, { "<a attr1=attrvalue/>", NULL, NULL, WC_E_QUOTE }, { "<a attr1="attr<value"/>", NULL, NULL, WC_E_LESSTHAN }, { NULL }