Module: wine Branch: master Commit: 48fff1b93110963a5e9ca61e0bca90ff8d39db82 URL: http://source.winehq.org/git/wine.git/?a=commit;h=48fff1b93110963a5e9ca61e0b...
Author: Jacek Caban jacek@codeweavers.com Date: Tue Mar 21 16:19:32 2017 +0100
xmllite: Replace crln in input buffer with nl.
Signed-off-by: Jacek Caban jacek@codeweavers.com Signed-off-by: Nikolay Sivov nsivov@codeweavers.com Signed-off-by: Alexandre Julliard julliard@winehq.org
---
dlls/xmllite/reader.c | 103 ++++++++++++++++++++++++-------------------- dlls/xmllite/tests/reader.c | 8 ++-- 2 files changed, 60 insertions(+), 51 deletions(-)
diff --git a/dlls/xmllite/reader.c b/dlls/xmllite/reader.c index 478830d..4350764 100644 --- a/dlls/xmllite/reader.c +++ b/dlls/xmllite/reader.c @@ -178,6 +178,7 @@ typedef struct UINT cur; unsigned int allocated; unsigned int written; + BOOL prev_cr; } encoded_buffer;
typedef struct input_buffer input_buffer; @@ -687,6 +688,7 @@ static HRESULT init_encoded_buffer(xmlreaderinput *input, encoded_buffer *buffer buffer->cur = 0; buffer->allocated = initial_len; buffer->written = 0; + buffer->prev_cr = FALSE;
return S_OK; } @@ -952,6 +954,34 @@ static void readerinput_shrinkraw(xmlreaderinput *readerinput, int len) buffer->cur = 0; }
+static void fixup_buffer_cr(encoded_buffer *buffer, int off) +{ + BOOL prev_cr = buffer->prev_cr; + const WCHAR *src; + WCHAR *dest; + + src = dest = (WCHAR*)buffer->data + off; + while ((const char*)src < buffer->data + buffer->written) + { + if (*src == '\r') + { + *dest++ = '\n'; + src++; + prev_cr = TRUE; + continue; + } + if(prev_cr && *src == '\n') + src++; + else + *dest++ = *src++; + prev_cr = FALSE; + } + + buffer->written = (char*)dest - buffer->data; + buffer->prev_cr = prev_cr; + *dest = 0; +} + /* note that raw buffer content is kept */ static void readerinput_switchencoding(xmlreaderinput *readerinput, xml_encoding enc) { @@ -976,15 +1006,18 @@ static void readerinput_switchencoding(xmlreaderinput *readerinput, xml_encoding readerinput_grow(readerinput, len); memcpy(dest->data, src->data + src->cur, len); dest->written += len*sizeof(WCHAR); - return; + } + else + { + dest_len = MultiByteToWideChar(cp, 0, src->data + src->cur, len, NULL, 0); + readerinput_grow(readerinput, dest_len); + ptr = (WCHAR*)dest->data; + MultiByteToWideChar(cp, 0, src->data + src->cur, len, ptr, dest_len); + ptr[dest_len] = 0; + dest->written += dest_len*sizeof(WCHAR); }
- dest_len = MultiByteToWideChar(cp, 0, src->data + src->cur, len, NULL, 0); - readerinput_grow(readerinput, dest_len); - ptr = (WCHAR*)dest->data; - MultiByteToWideChar(cp, 0, src->data + src->cur, len, ptr, dest_len); - ptr[dest_len] = 0; - dest->written += dest_len*sizeof(WCHAR); + fixup_buffer_cr(dest, 0); }
/* shrinks parsed data a buffer begins with */ @@ -1010,13 +1043,14 @@ static HRESULT reader_more(xmlreader *reader) encoded_buffer *src = &readerinput->buffer->encoded; encoded_buffer *dest = &readerinput->buffer->utf16; UINT cp = readerinput->buffer->code_page; - int len, dest_len; + int len, dest_len, prev_len; HRESULT hr; WCHAR *ptr;
/* get some raw data from stream first */ hr = readerinput_growraw(readerinput); len = readerinput_get_convlen(readerinput); + prev_len = dest->written / sizeof(WCHAR);
/* just copy for UTF-16 case */ if (cp == ~0) @@ -1024,18 +1058,20 @@ static HRESULT reader_more(xmlreader *reader) readerinput_grow(readerinput, len); memcpy(dest->data + dest->written, src->data + src->cur, len); dest->written += len*sizeof(WCHAR); - return hr; + } + else + { + dest_len = MultiByteToWideChar(cp, 0, src->data + src->cur, len, NULL, 0); + readerinput_grow(readerinput, dest_len); + ptr = (WCHAR*)(dest->data + dest->written); + MultiByteToWideChar(cp, 0, src->data + src->cur, len, ptr, dest_len); + ptr[dest_len] = 0; + dest->written += dest_len*sizeof(WCHAR); + /* get rid of processed data */ + readerinput_shrinkraw(readerinput, len); }
- dest_len = MultiByteToWideChar(cp, 0, src->data + src->cur, len, NULL, 0); - readerinput_grow(readerinput, dest_len); - ptr = (WCHAR*)(dest->data + dest->written); - MultiByteToWideChar(cp, 0, src->data + src->cur, len, ptr, dest_len); - ptr[dest_len] = 0; - dest->written += dest_len*sizeof(WCHAR); - /* get rid of processed data */ - readerinput_shrinkraw(readerinput, len); - + fixup_buffer_cr(dest, prev_len); return hr; }
@@ -1974,28 +2010,6 @@ static HRESULT reader_parse_qname(xmlreader *reader, strval *prefix, strval *loc return S_OK; }
-/* Applies normalization rules to a single char, used for attribute values. - - Rules include 2 steps: - - 1) replacing \r\n with a single \n; - 2) replacing all whitespace chars with ' '. - - */ -static void reader_normalize_space(xmlreader *reader, WCHAR *ptr) -{ - encoded_buffer *buffer = &reader->input->buffer->utf16; - - if (!is_wchar_space(*ptr)) return; - - if (*ptr == '\r' && *(ptr+1) == '\n') - { - int len = buffer->written - ((char*)ptr - buffer->data) - 2*sizeof(WCHAR); - memmove(ptr+1, ptr+2, len); - } - *ptr = ' '; -} - static WCHAR get_predefined_entity(const xmlreader *reader, const strval *name) { static const WCHAR entltW[] = {'l','t'}; @@ -2171,7 +2185,8 @@ static HRESULT reader_parse_attvalue(xmlreader *reader, strval *value) } else { - reader_normalize_space(reader, ptr); + /* replace all whitespace chars with ' ' */ + if (is_wchar_space(*ptr)) *ptr = ' '; reader_skipn(reader, 1); } ptr = reader_get_ptr(reader); @@ -2393,12 +2408,6 @@ static HRESULT reader_parse_cdata(xmlreader *reader) } else { - /* Value normalization is not fully implemented, rules are: - - - single '\r' -> '\n'; - - sequence '\r\n' -> '\n', in this case value length changes; - */ - if (*ptr == '\r') *ptr = '\n'; reader_skipn(reader, 1); ptr++; } diff --git a/dlls/xmllite/tests/reader.c b/dlls/xmllite/tests/reader.c index e91383f..c9ae1db 100644 --- a/dlls/xmllite/tests/reader.c +++ b/dlls/xmllite/tests/reader.c @@ -1761,8 +1761,8 @@ static void test_readvaluechunk(void) static struct test_entry cdata_tests[] = { { "<a><![CDATA[ ]]data ]]></a>", "", " ]]data ", S_OK }, { "<a><![CDATA[<![CDATA[ data ]]]]></a>", "", "<![CDATA[ data ]]", S_OK }, - { "<a><![CDATA[\n \r\n \n\n ]]></a>", "", "\n \n \n\n ", S_OK, S_OK, TRUE }, - { "<a><![CDATA[\r \r\r\n \n\n ]]></a>", "", "\n \n\n \n\n ", S_OK, S_OK, TRUE }, + { "<a><![CDATA[\n \r\n \n\n ]]></a>", "", "\n \n \n\n ", S_OK, S_OK }, + { "<a><![CDATA[\r \r\r\n \n\n ]]></a>", "", "\n \n\n \n\n ", S_OK, S_OK }, { "<a><![CDATA[\r\r \n\r \r \n\n ]]></a>", "", "\n\n \n\n \n \n\n ", S_OK }, { NULL } }; @@ -1856,8 +1856,8 @@ static void test_read_cdata(void) static struct test_entry text_tests[] = { { "<a>simple text</a>", "", "simple text", S_OK }, { "<a>text ]]> text</a>", "", "", WC_E_CDSECTEND }, - { "<a>\n \r\n \n\n text</a>", "", "\n \n \n\n text", S_OK, S_OK, TRUE }, - { "<a>\r \r\r\n \n\n text</a>", "", "\n \n\n \n\n text", S_OK, S_OK, TRUE }, + { "<a>\n \r\n \n\n text</a>", "", "\n \n \n\n text", S_OK, S_OK }, + { "<a>\r \r\r\n \n\n text</a>", "", "\n \n\n \n\n text", S_OK, S_OK }, { NULL } };