Dot segments removal in parse_canonicalize makes the assumption that the entire path part of the URI fits into the output buffer (len is the length the output URI would have taken had the output buffer been large enough). So when the output buffer is too small, remove_dot_segments will access out-of-bound.
Considering URI unescaping could be happening at the same time, there is no simple way to calculate the required output buffer size correctly without keeping the actual output URI in memory.
* * *
As you can see from the [test results](https://testbot.winehq.org/JobDetails.pl?Key=158742), this seems to be bugged on native. I don't know if we need to replicate native's behavior
From: Yuxuan Shui yshui@codeweavers.com
Dot segments removal in parse_canonicalize makes the assumption that the entire path part of the URI fits into the output buffer (len is the length the output URI would have taken had the output buffer been large enough). So when the output buffer is too small, remove_dot_segments will access out-of-bound.
Considering URI unescaping could be happening at the same time, there is no simple way to calculate the required output buffer size correctly without keeping the actual output URI in memory. --- dlls/urlmon/uri.c | 56 +++++++++++++++++++++++++++++++---------------- 1 file changed, 37 insertions(+), 19 deletions(-)
diff --git a/dlls/urlmon/uri.c b/dlls/urlmon/uri.c index cb5266518d1..46652588bfe 100644 --- a/dlls/urlmon/uri.c +++ b/dlls/urlmon/uri.c @@ -6502,9 +6502,10 @@ static HRESULT parse_canonicalize(const Uri *uri, DWORD flags, LPWSTR output, DWORD output_len, DWORD *result_len) { const WCHAR *ptr = NULL; - WCHAR *path = NULL; + WCHAR *write = output; const WCHAR **pptr; - DWORD len = 0; + DWORD len = 0, write_len = output_len; + int path_start = -1; BOOL reduce_path;
/* URL_UNESCAPE only has effect if none of the URL_ESCAPE flags are set. */ @@ -6529,25 +6530,42 @@ static HRESULT parse_canonicalize(const Uri *uri, DWORD flags, LPWSTR output, /* Keep track of the path if we need to remove dot segments from * it later. */ - if(reduce_path && !path && ptr == uri->canon_uri+uri->path_start) - path = output+len; + if(reduce_path && path_start < 0 && ptr == uri->canon_uri+uri->path_start) + path_start = len;
/* Check if it's time to reduce the path. */ if(reduce_path && ptr == uri->canon_uri+uri->path_start+uri->path_len) { - DWORD current_path_len = (output+len) - path; - DWORD new_path_len = remove_dot_segments(path, current_path_len); + DWORD current_path_len = len-path_start; + DWORD new_path_len = remove_dot_segments(write+path_start, current_path_len);
/* Update the current length. */ len -= (current_path_len-new_path_len); reduce_path = FALSE; }
+ if (len+3 >= write_len) { + /* we might write up to 3 chars in one iteration, make sure we expand at least that much */ + write_len = write_len + max(write_len, len+3-write_len); + if (write == output) { + write = malloc(write_len * sizeof(*write)); + if (!write) + return E_OUTOFMEMORY; + memcpy(write, output, len * sizeof(*write)); + } else { + void *alloc = realloc(write, write_len * sizeof(*write)); + if (!alloc) { + free(write); + return E_OUTOFMEMORY; + } + write = alloc; + } + } + if(*ptr == '%') { const WCHAR decoded = decode_pct_val(ptr); if(decoded) { if(allow_unescape && (flags & URL_UNESCAPE)) { - if(len < output_len) - output[len] = decoded; + write[len] = decoded; len++; ptr += 2; do_default_action = FALSE; @@ -6556,31 +6574,27 @@ static HRESULT parse_canonicalize(const Uri *uri, DWORD flags, LPWSTR output,
/* See if %'s needed to encoded. */ if(do_default_action && (flags & URL_ESCAPE_PERCENT)) { - if(len + 3 < output_len) - pct_encode_val(*ptr, output+len); + pct_encode_val(*ptr, write+len); len += 3; do_default_action = FALSE; } } else if(*ptr == ' ') { if((flags & URL_ESCAPE_SPACES_ONLY) && !(flags & URL_ESCAPE_UNSAFE)) { - if(len + 3 < output_len) - pct_encode_val(*ptr, output+len); + pct_encode_val(*ptr, write+len); len += 3; do_default_action = FALSE; } } else if(is_ascii(*ptr) && !is_reserved(*ptr) && !is_unreserved(*ptr)) { if(flags & URL_ESCAPE_UNSAFE) { - if(len + 3 < output_len) - pct_encode_val(*ptr, output+len); + pct_encode_val(*ptr, write+len); len += 3; do_default_action = FALSE; } }
if(do_default_action) { - if(len < output_len) - output[len] = *ptr; + write[len] = *ptr; len++; } } @@ -6588,14 +6602,18 @@ static HRESULT parse_canonicalize(const Uri *uri, DWORD flags, LPWSTR output, /* Sometimes the path is the very last component of the IUri, so * see if the dot segments need to be reduced now. */ - if(reduce_path && path) { - DWORD current_path_len = (output+len) - path; - DWORD new_path_len = remove_dot_segments(path, current_path_len); + if(reduce_path && path_start >= 0) { + DWORD current_path_len = len-path_start; + DWORD new_path_len = remove_dot_segments(write+path_start, current_path_len);
/* Update the current length. */ len -= (current_path_len-new_path_len); }
+ if (write != output) { + memcpy(output, write, output_len * sizeof(*output)); + free(write); + } if(len < output_len) output[len] = 0; else
From: Yuxuan Shui yshui@codeweavers.com
Native handles this poorly. When the buffer isn't large enough for URI unescaping, native will return STATUS_SUCCESS with a 0 output length.
Besides that, on <= Windows 10 v1507, there are additional cases where the returned length is 0. This appears to have been fixed in later versions.
Unsure if we need bug compatibility here. --- dlls/urlmon/tests/uri.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-)
diff --git a/dlls/urlmon/tests/uri.c b/dlls/urlmon/tests/uri.c index a26036f69ef..5294d4ec8c0 100644 --- a/dlls/urlmon/tests/uri.c +++ b/dlls/urlmon/tests/uri.c @@ -8050,6 +8050,7 @@ static const uri_parse_test uri_parse_tests[] = { {"zip://google.com/test<|>",0,PARSE_CANONICALIZE,0,"zip://google.com/test<|>",S_OK,FALSE}, {"http://google.com/test<|>",0,PARSE_CANONICALIZE,0,"http://google.com/test%3C%7C%3E%22,S_OK,FALSE%7D, {"http://google.com/%30%23%3F%22,0,PARSE_CANONICALIZE,URL_UNESCAPE,%22http://g..., + {"http://google.com/%30%23%3F/..%22,0,PARSE_CANONICALIZE,URL_UNESCAPE,%22http:..., {"test <|>",Uri_CREATE_ALLOW_RELATIVE,PARSE_CANONICALIZE,URL_ESCAPE_UNSAFE,"test %3C%7C%3E",S_OK,FALSE}, {"test <|>",Uri_CREATE_ALLOW_RELATIVE,PARSE_CANONICALIZE,URL_ESCAPE_SPACES_ONLY,"test%20<|>",S_OK,FALSE}, {"test%20<|>",Uri_CREATE_ALLOW_RELATIVE,PARSE_CANONICALIZE,URL_UNESCAPE|URL_ESCAPE_UNSAFE,"test%20%3C%7C%3E",S_OK,FALSE}, @@ -11517,9 +11518,21 @@ static void test_CoInternetParseIUri(void) { hr = pCreateUri(uriW, test.uri_flags, 0, &uri); ok(SUCCEEDED(hr), "Error: CreateUri returned 0x%08lx on uri_parse_tests[%ld].\n", hr, i); if(SUCCEEDED(hr)) { - WCHAR result[INTERNET_MAX_URL_LENGTH+1]; + WCHAR result[INTERNET_MAX_URL_LENGTH+1], short_result[1]; DWORD result_len = -1;
+ if (!test.expected && lstrlenA(test.property) > 1) { + /* test result_len calculation with insufficient buffer. */ + hr = pCoInternetParseIUri(uri, test.action, test.flags, short_result, 1, &result_len, 0); + ok(hr == STRSAFE_E_INSUFFICIENT_BUFFER || broken(!hr && test.action == PARSE_CANONICALIZE), + "Error: CoInternetParseIUri returned 0x%08lx, expected 0x%08lx on uri_parse_tests[%ld].\n", + hr, STRSAFE_E_INSUFFICIENT_BUFFER, i); + ok(lstrlenA(test.property) == result_len || (test.property2 && lstrlenA(test.property2) == result_len) || + broken(test.action == PARSE_CANONICALIZE && result_len == 0), + "Error: Expected %d, but got %ld instead on uri_parse_tests[%ld] - %s.\n", + lstrlenA(test.property), result_len, i, wine_dbgstr_w(uriW)); + } + hr = pCoInternetParseIUri(uri, test.action, test.flags, result, INTERNET_MAX_URL_LENGTH+1, &result_len, 0); todo_wine_if(test.todo) ok(hr == test.expected,