Module: wine Branch: master Commit: 61b46738274a7d4ad9b7bc5c5b51615feb8951bd URL: http://source.winehq.org/git/wine.git/?a=commit;h=61b46738274a7d4ad9b7bc5c5b...
Author: Nikolay Sivov nsivov@codeweavers.com Date: Fri Jan 18 10:04:27 2013 +0400
xmllite: Initial support for start tag parsing.
---
dlls/xmllite/reader.c | 140 +++++++++++++++++++++++++++++++++++++++--- dlls/xmllite/tests/reader.c | 67 ++++++++++++++++++++ 2 files changed, 197 insertions(+), 10 deletions(-)
diff --git a/dlls/xmllite/reader.c b/dlls/xmllite/reader.c index c3f53b4..058875a 100644 --- a/dlls/xmllite/reader.c +++ b/dlls/xmllite/reader.c @@ -51,7 +51,8 @@ typedef enum XmlReadInState_Misc_DTD, XmlReadInState_DTD, XmlReadInState_DTD_Misc, - XmlReadInState_Element + XmlReadInState_Element, + XmlReadInState_Content } XmlReaderInternalState;
typedef enum @@ -448,11 +449,28 @@ static void readerinput_grow(xmlreaderinput *readerinput, int length) } }
-static HRESULT readerinput_detectencoding(xmlreaderinput *readerinput, xml_encoding *enc) +static inline int readerinput_is_utf8(xmlreaderinput *readerinput) { - encoded_buffer *buffer = &readerinput->buffer->encoded; static char startA[] = {'<','?'}; static char commentA[] = {'<','!'}; + encoded_buffer *buffer = &readerinput->buffer->encoded; + unsigned char *ptr = (unsigned char*)buffer->data; + + return !memcmp(buffer->data, startA, sizeof(startA)) || + !memcmp(buffer->data, commentA, sizeof(commentA)) || + /* test start byte */ + (ptr[0] == '<' && + ( + (ptr[1] && (ptr[1] <= 0x7f)) || + (buffer->data[1] >> 5) == 0x6 || /* 2 bytes */ + (buffer->data[1] >> 4) == 0xe || /* 3 bytes */ + (buffer->data[1] >> 3) == 0x1e) /* 4 bytes */ + ); +} + +static HRESULT readerinput_detectencoding(xmlreaderinput *readerinput, xml_encoding *enc) +{ + encoded_buffer *buffer = &readerinput->buffer->encoded; static WCHAR startW[] = {'<','?'}; static WCHAR commentW[] = {'<','!'}; static char utf8bom[] = {0xef,0xbb,0xbf}; @@ -464,8 +482,7 @@ static HRESULT readerinput_detectencoding(xmlreaderinput *readerinput, xml_encod
/* try start symbols if we have enough data to do that, input buffer should contain first chunk already */ - if (!memcmp(buffer->data, startA, sizeof(startA)) || - !memcmp(buffer->data, commentA, sizeof(commentA))) + if (readerinput_is_utf8(readerinput)) *enc = XmlEncoding_UTF8; else if (!memcmp(buffer->data, startW, sizeof(startW)) || !memcmp(buffer->data, commentW, sizeof(commentW))) @@ -987,9 +1004,10 @@ static inline int is_namestartchar(WCHAR ch) (ch >= 0xfdf0 && ch <= 0xfffd); }
-static inline int is_namechar(WCHAR ch) +/* [4 NS] NCName ::= Name - (Char* ':' Char*) */ +static inline int is_ncnamechar(WCHAR ch) { - return (ch == ':') || (ch >= 'A' && ch <= 'Z') || + return (ch >= 'A' && ch <= 'Z') || (ch == '_') || (ch >= 'a' && ch <= 'z') || (ch == '-') || (ch == '.') || (ch >= '0' && ch <= '9') || @@ -1011,6 +1029,11 @@ static inline int is_namechar(WCHAR ch) (ch >= 0xfdf0 && ch <= 0xfffd); }
+static inline int is_namechar(WCHAR ch) +{ + return (ch == ':') || is_ncnamechar(ch); +} + /* [4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF] @@ -1316,11 +1339,106 @@ static HRESULT reader_parse_dtd(xmlreader *reader) return S_OK; }
+/* [7 NS] QName ::= PrefixedName | UnprefixedName + [8 NS] PrefixedName ::= Prefix ':' LocalPart + [9 NS] UnprefixedName ::= LocalPart + [10 NS] Prefix ::= NCName + [11 NS] LocalPart ::= NCName */ +static HRESULT reader_parse_qname(xmlreader *reader, strval *prefix, strval *local, strval *qname) +{ + WCHAR *ptr, *start = reader_get_cur(reader); + + ptr = start; + if (!is_ncnamechar(*ptr)) return NC_E_QNAMECHARACTER; + + while (is_ncnamechar(*ptr)) + { + reader_skipn(reader, 1); + ptr = reader_get_cur(reader); + } + + /* got a qualified name */ + if (*ptr == ':') + { + prefix->str = start; + prefix->len = ptr-start; + + reader_skipn(reader, 1); + start = ptr = reader_get_cur(reader); + + while (is_ncnamechar(*ptr)) + { + reader_skipn(reader, 1); + ptr = reader_get_cur(reader); + } + } + else + { + prefix->str = NULL; + prefix->len = 0; + } + + local->str = start; + local->len = ptr-start; + + if (prefix->len) + TRACE("qname %s:%s\n", debugstr_wn(prefix->str, prefix->len), debugstr_wn(local->str, local->len)); + else + TRACE("ncname %s\n", debugstr_wn(local->str, local->len)); + + qname->str = prefix->str ? prefix->str : local->str; + /* count ':' too */ + qname->len = (prefix->len ? prefix->len + 1 : 0) + local->len; + + return S_OK; +} + +/* [12 NS] STag ::= '<' QName (S Attribute)* S? '>' + [14 NS] EmptyElemTag ::= '<' QName (S Attribute)* S? '/>' */ +static HRESULT reader_parse_stag(xmlreader *reader, strval *prefix, strval *local, strval *qname) +{ + static const WCHAR endW[] = {'/','>',0}; + HRESULT hr; + + /* skip '<' */ + reader_skipn(reader, 1); + + hr = reader_parse_qname(reader, prefix, local, qname); + if (FAILED(hr)) return hr; + + reader_skipspaces(reader); + + if (!reader_cmp(reader, endW)) return S_OK; + + FIXME("only empty elements without attributes supported\n"); + return E_NOTIMPL; +} + /* [39] element ::= EmptyElemTag | STag content ETag */ static HRESULT reader_parse_element(xmlreader *reader) { - FIXME("element parsing not implemented\n"); - return E_NOTIMPL; + static const WCHAR ltW[] = {'<',0}; + strval qname, prefix, local; + HRESULT hr; + + /* check if we are really on element */ + if (reader_cmp(reader, ltW)) return S_FALSE; + reader_shrink(reader); + + /* this handles empty elements too */ + hr = reader_parse_stag(reader, &prefix, &local, &qname); + if (FAILED(hr)) return hr; + + /* FIXME: need to check for defined namespace to reject invalid prefix, + currently reject all prefixes */ + if (prefix.len) return NC_E_UNDECLAREDPREFIX; + + reader->nodetype = XmlNodeType_Element; + reader_set_strvalue(reader, StringValue_LocalName, &local); + reader_set_strvalue(reader, StringValue_QualifiedName, &qname); + + FIXME("element content parsing not implemented\n"); + return hr; }
static HRESULT reader_parse_nextnode(xmlreader *reader) @@ -1389,7 +1507,9 @@ static HRESULT reader_parse_nextnode(xmlreader *reader) case XmlReadInState_Element: hr = reader_parse_element(reader); if (FAILED(hr)) return hr; - break; + + reader->instate = XmlReadInState_Content; + return hr; default: FIXME("internal state %d not handled\n", reader->instate); return E_NOTIMPL; diff --git a/dlls/xmllite/tests/reader.c b/dlls/xmllite/tests/reader.c index d084e7b..7394564 100644 --- a/dlls/xmllite/tests/reader.c +++ b/dlls/xmllite/tests/reader.c @@ -1022,6 +1022,72 @@ todo_wine { IXmlReader_Release(reader); }
+static struct test_entry element_tests[] = { + { "<a/>", "a", "", S_OK }, + { "<a />", "a", "", S_OK }, + { "<a:b/>", "a:b", "", NC_E_UNDECLAREDPREFIX }, + { "<:a/>", NULL, NULL, NC_E_QNAMECHARACTER }, + { "< a/>", NULL, NULL, NC_E_QNAMECHARACTER }, + { NULL } +}; + +static void test_read_element(void) +{ + struct test_entry *test = element_tests; + IXmlReader *reader; + HRESULT hr; + + hr = pCreateXmlReader(&IID_IXmlReader, (void**)&reader, NULL); + ok(hr == S_OK, "S_OK, got %08x\n", hr); + + while (test->xml) + { + XmlNodeType type; + IStream *stream; + + stream = create_stream_on_data(test->xml, strlen(test->xml)+1); + hr = IXmlReader_SetInput(reader, (IUnknown*)stream); + ok(hr == S_OK, "got %08x\n", hr); + + type = XmlNodeType_None; + hr = IXmlReader_Read(reader, &type); + if (test->hr_broken) + ok(hr == test->hr || broken(hr == test->hr_broken), "got %08x for %s\n", hr, test->xml); + else + ok(hr == test->hr, "got %08x for %s\n", hr, test->xml); + if (hr == S_OK) + { + const WCHAR *str; + WCHAR *str_exp; + UINT len; + + ok(type == XmlNodeType_Element, "got %d for %s\n", type, test->xml); + + len = 0; + str = NULL; + hr = IXmlReader_GetQualifiedName(reader, &str, &len); + ok(hr == S_OK, "got 0x%08x\n", hr); + ok(len == strlen(test->name), "got %u\n", len); + str_exp = a2w(test->name); + ok(!lstrcmpW(str, str_exp), "got %s\n", wine_dbgstr_w(str)); + free_str(str_exp); + + /* value */ + len = 1; + str = NULL; + hr = IXmlReader_GetValue(reader, &str, &len); + ok(hr == S_OK, "got 0x%08x\n", hr); + ok(len == 0, "got %u\n", len); + ok(*str == 0, "got %s\n", wine_dbgstr_w(str)); + } + + IStream_Release(stream); + test++; + } + + IXmlReader_Release(reader); +} + START_TEST(reader) { HRESULT r; @@ -1041,6 +1107,7 @@ START_TEST(reader) test_read_comment(); test_read_pi(); test_read_dtd(); + test_read_element(); test_read_full(); test_read_xmldeclaration();