Module: wine Branch: master Commit: d693790175ac8c8d1b2ac88ac2026e9a10f3102d URL: http://source.winehq.org/git/wine.git/?a=commit;h=d693790175ac8c8d1b2ac88ac2...
Author: Nikolay Sivov nsivov@codeweavers.com Date: Sun Nov 18 18:01:46 2012 -0500
xmllite: Initial support for reader input encoding detection.
---
dlls/xmllite/reader.c | 123 ++++++++++++++++++++++++++++++++-------- dlls/xmllite/tests/reader.c | 11 +++- dlls/xmllite/xmllite_private.h | 5 ++ 3 files changed, 115 insertions(+), 24 deletions(-)
diff --git a/dlls/xmllite/reader.c b/dlls/xmllite/reader.c index ae05057..188b419 100644 --- a/dlls/xmllite/reader.c +++ b/dlls/xmllite/reader.c @@ -117,6 +117,14 @@ static inline void *m_alloc(IMalloc *imalloc, size_t len) return heap_alloc(len); }
+static inline void *m_realloc(IMalloc *imalloc, void *mem, size_t len) +{ + if (imalloc) + return IMalloc_Realloc(imalloc, mem, len); + else + return heap_realloc(mem, len); +} + static inline void m_free(IMalloc *imalloc, void *mem) { if (imalloc) @@ -142,6 +150,11 @@ static inline void *readerinput_alloc(xmlreaderinput *input, size_t len) return m_alloc(input->imalloc, len); }
+static inline void *readerinput_realloc(xmlreaderinput *input, void *mem, size_t len) +{ + return m_realloc(input->imalloc, mem, len); +} + static inline void readerinput_free(xmlreaderinput *input, void *mem) { return m_free(input->imalloc, mem); @@ -165,7 +178,7 @@ static void free_encoded_buffer(xmlreaderinput *input, encoded_buffer *buffer) readerinput_free(input, buffer->data); }
-static HRESULT get_code_page(xml_encoding encoding, UINT *cp) +static HRESULT get_code_page(xml_encoding encoding, xmlreaderinput *input) { const struct xml_encoding_data *data;
@@ -176,12 +189,12 @@ static HRESULT get_code_page(xml_encoding encoding, UINT *cp) }
data = &xml_encoding_map[encoding]; - *cp = data->cp; + input->buffer->code_page = data->cp;
return S_OK; }
-static HRESULT alloc_input_buffer(xmlreaderinput *input, xml_encoding encoding) +static HRESULT alloc_input_buffer(xmlreaderinput *input) { input_buffer *buffer; HRESULT hr; @@ -192,29 +205,20 @@ static HRESULT alloc_input_buffer(xmlreaderinput *input, xml_encoding encoding) if (!buffer) return E_OUTOFMEMORY;
buffer->input = input; - hr = get_code_page(encoding, &buffer->code_page); + buffer->code_page = ~0; /* code page is unknown at this point */ + hr = init_encoded_buffer(input, &buffer->utf16); if (hr != S_OK) { readerinput_free(input, buffer); return hr; }
- hr = init_encoded_buffer(input, &buffer->utf16); + hr = init_encoded_buffer(input, &buffer->encoded); if (hr != S_OK) { + free_encoded_buffer(input, &buffer->utf16); readerinput_free(input, buffer); return hr; }
- if (encoding != XmlEncoding_UTF16) { - hr = init_encoded_buffer(input, &buffer->encoded); - if (hr != S_OK) { - free_encoded_buffer(input, &buffer->utf16); - readerinput_free(input, buffer); - return hr; - } - } - else - memset(&buffer->encoded, 0, sizeof(buffer->encoded)); - input->buffer = buffer; return S_OK; } @@ -226,7 +230,7 @@ static void free_input_buffer(input_buffer *buffer) readerinput_free(buffer->input, buffer); }
-static void xmlreaderinput_release_stream(xmlreaderinput *readerinput) +static void readerinput_release_stream(xmlreaderinput *readerinput) { if (readerinput->stream) { ISequentialStream_Release(readerinput->stream); @@ -236,11 +240,11 @@ static void xmlreaderinput_release_stream(xmlreaderinput *readerinput)
/* Queries already stored interface for IStream/ISequentialStream. Interface supplied on creation will be overwritten */ -static HRESULT xmlreaderinput_query_for_stream(xmlreaderinput *readerinput) +static HRESULT readerinput_query_for_stream(xmlreaderinput *readerinput) { HRESULT hr;
- xmlreaderinput_release_stream(readerinput); + readerinput_release_stream(readerinput); hr = IUnknown_QueryInterface(readerinput->input, &IID_IStream, (void**)&readerinput->stream); if (hr != S_OK) hr = IUnknown_QueryInterface(readerinput->input, &IID_ISequentialStream, (void**)&readerinput->stream); @@ -248,6 +252,59 @@ static HRESULT xmlreaderinput_query_for_stream(xmlreaderinput *readerinput) return hr; }
+/* reads a chunk to raw buffer */ +static HRESULT readerinput_growraw(xmlreaderinput *readerinput) +{ + encoded_buffer *buffer = &readerinput->buffer->encoded; + ULONG len = buffer->allocated - buffer->written, read; + HRESULT hr; + + /* always try to get aligned to 4 bytes, so the only case we can get partialy read characters is + variable width encodings like UTF-8 */ + len = (len + 3) & ~3; + /* try to use allocated space or grow */ + if (buffer->allocated - buffer->written < len) + { + buffer->allocated *= 2; + buffer->data = readerinput_realloc(readerinput, buffer->data, buffer->allocated); + len = buffer->allocated - buffer->written; + } + + hr = ISequentialStream_Read(readerinput->stream, buffer->data + buffer->written, len, &read); + if (FAILED(hr)) return hr; + TRACE("requested %d, read %d, ret 0x%08x\n", len, read, hr); + buffer->written += read; + + return hr; +} + +static xml_encoding readerinput_detectencoding(xmlreaderinput *readerinput) +{ + encoded_buffer *buffer = &readerinput->buffer->encoded; + + /* try start symbols if we have enough data to do that, input buffer should contain + first chunk already */ + if (buffer->written >= 4) + { + static char startA[] = {'<','?','x','m'}; + static WCHAR startW[] = {'<','?'}; + + if (!memcmp(buffer->data, startA, sizeof(startA))) return XmlEncoding_UTF8; + if (!memcmp(buffer->data, startW, sizeof(startW))) return XmlEncoding_UTF16; + } + + /* try with BOM now */ + if (buffer->written >= 3) + { + static char utf8bom[] = {0xef,0xbb,0xbf}; + static char utf16lebom[] = {0xff,0xfe}; + if (!memcmp(buffer->data, utf8bom, sizeof(utf8bom))) return XmlEncoding_UTF8; + if (!memcmp(buffer->data, utf16lebom, sizeof(utf16lebom))) return XmlEncoding_UTF16; + } + + return XmlEncoding_Unknown; +} + static HRESULT WINAPI xmlreader_QueryInterface(IXmlReader *iface, REFIID riid, void** ppvObject) { xmlreader *This = impl_from_IXmlReader(iface); @@ -305,7 +362,7 @@ static HRESULT WINAPI xmlreader_SetInput(IXmlReader* iface, IUnknown *input)
if (This->input) { - xmlreaderinput_release_stream(This->input); + readerinput_release_stream(This->input); IUnknown_Release(&This->input->IXmlReaderInput_iface); This->input = NULL; } @@ -333,7 +390,7 @@ static HRESULT WINAPI xmlreader_SetInput(IXmlReader* iface, IUnknown *input) }
/* set stream for supplied IXmlReaderInput */ - hr = xmlreaderinput_query_for_stream(This->input); + hr = readerinput_query_for_stream(This->input); if (hr == S_OK) This->state = XmlReadState_Initial;
@@ -386,7 +443,27 @@ static HRESULT WINAPI xmlreader_SetProperty(IXmlReader* iface, UINT property, LO
static HRESULT WINAPI xmlreader_Read(IXmlReader* iface, XmlNodeType *node_type) { - FIXME("(%p %p): stub\n", iface, node_type); + xmlreader *This = impl_from_IXmlReader(iface); + + FIXME("(%p)->(%p): stub\n", This, node_type); + + if (This->state == XmlReadState_Closed) return S_FALSE; + + /* if it's a first call for a new input we need to detect stream encoding */ + if (This->state == XmlReadState_Initial) + { + xml_encoding enc; + HRESULT hr; + + hr = readerinput_growraw(This->input); + if (FAILED(hr)) return hr; + + /* try to detect encoding by BOM or data and set input code page */ + enc = readerinput_detectencoding(This->input); + TRACE("detected encoding %d\n", enc); + get_code_page(enc, This->input); + } + return E_NOTIMPL; }
@@ -683,7 +760,7 @@ HRESULT WINAPI CreateXmlReaderInputWithEncodingName(IUnknown *stream, readerinput->stream = NULL; if (imalloc) IMalloc_AddRef(imalloc);
- hr = alloc_input_buffer(readerinput, XmlEncoding_UTF16); + hr = alloc_input_buffer(readerinput); if (hr != S_OK) { readerinput_free(readerinput, readerinput); diff --git a/dlls/xmllite/tests/reader.c b/dlls/xmllite/tests/reader.c index 582ad48..f5b7680 100644 --- a/dlls/xmllite/tests/reader.c +++ b/dlls/xmllite/tests/reader.c @@ -551,15 +551,24 @@ static void test_readerinput(void) static void test_reader_state(void) { IXmlReader *reader; + XmlNodeType nodetype; HRESULT hr;
- hr = pCreateXmlReader(&IID_IXmlReader, (LPVOID*)&reader, NULL); + hr = pCreateXmlReader(&IID_IXmlReader, (void**)&reader, NULL); ok(hr == S_OK, "Expected S_OK, got %08x\n", hr);
/* invalid arguments */ hr = IXmlReader_GetProperty(reader, XmlReaderProperty_ReadState, NULL); ok(hr == E_INVALIDARG, "Expected E_INVALIDARG, got %08x\n", hr);
+ /* attempt to read on closed reader */ + test_read_state(reader, XmlReadState_Closed, -1, 0); +if (0) +{ + /* newer versions crash here, probably cause no input was set */ + hr = IXmlReader_Read(reader, &nodetype); + ok(hr == S_FALSE, "got %08x\n", hr); +} IXmlReader_Release(reader); }
diff --git a/dlls/xmllite/xmllite_private.h b/dlls/xmllite/xmllite_private.h index 1677e5c..97993e1 100644 --- a/dlls/xmllite/xmllite_private.h +++ b/dlls/xmllite/xmllite_private.h @@ -27,6 +27,11 @@ static inline void *heap_alloc(size_t len) return HeapAlloc(GetProcessHeap(), 0, len); }
+static inline void *heap_realloc(void *mem, size_t len) +{ + return HeapReAlloc(GetProcessHeap(), 0, mem, len); +} + static inline BOOL heap_free(void *mem) { return HeapFree(GetProcessHeap(), 0, mem);