This fixes an issue when the path includes non-ASCII characters.
Signed-off-by: Jactry Zeng jzeng@codeweavers.com
-- v10: mshtml: Call UrlUnescapeW() with URL_UNESCAPE_AS_UTF8 in is_gecko_path(). shlwapi/tests: Test UrlUnescapeW() with URL_UNESCAPE_AS_UTF8. kernelbase: Implement URL_UNESCAPE_AS_UTF8 for UrlUnescapeW().
From: Jactry Zeng jzeng@codeweavers.com
--- dlls/shlwapi/tests/url.c | 71 ++++++++++++++++++++++------------------ 1 file changed, 39 insertions(+), 32 deletions(-)
diff --git a/dlls/shlwapi/tests/url.c b/dlls/shlwapi/tests/url.c index 6870de4ee5f..3690bc1686c 100644 --- a/dlls/shlwapi/tests/url.c +++ b/dlls/shlwapi/tests/url.c @@ -407,7 +407,19 @@ static struct { {"file://%24%25foobar", "file://$%foobar"} };
-/* ################ */ +static struct +{ + const WCHAR *url; + const WCHAR *expect; + DWORD flags; +} TEST_URL_UNESCAPEW[] = +{ + { L"file://foo/bar", L"file://foo/bar" }, + { L"file://fo%20o%5Ca/bar", L"file://fo o\a/bar" }, + { L"file://%24%25foobar", L"file://$%foobar" }, + { L"file:///C:/Program Files", L"file:///C:/Program Files" }, + { L"file:///C:/Program%20Files", L"file:///C:/Program Files" }, +};
static const struct { const char *path; @@ -1391,17 +1403,14 @@ static void test_UrlIs(void)
static void test_UrlUnescape(void) { + WCHAR urlW[INTERNET_MAX_URL_LENGTH], bufferW[INTERNET_MAX_URL_LENGTH]; CHAR szReturnUrl[INTERNET_MAX_URL_LENGTH]; - WCHAR ret_urlW[INTERNET_MAX_URL_LENGTH]; - WCHAR *urlW, *expected_urlW; - DWORD dwEscaped; - size_t i; + DWORD dwEscaped, unescaped; static char inplace[] = "file:///C:/Program%20Files"; static char another_inplace[] = "file:///C:/Program%20Files"; static const char expected[] = "file:///C:/Program Files"; - static WCHAR inplaceW[] = L"file:///C:/Program Files"; - static WCHAR another_inplaceW[] = L"file:///C:/Program%20Files"; HRESULT res; + int i;
for (i = 0; i < ARRAY_SIZE(TEST_URL_UNESCAPE); i++) { dwEscaped=INTERNET_MAX_URL_LENGTH; @@ -1418,21 +1427,30 @@ static void test_UrlUnescape(void) "UrlUnescapeA returned 0x%lx (expected E_INVALIDARG) for "%s"\n", res, TEST_URL_UNESCAPE[i].url); ok(strcmp(szReturnUrl,"")==0, "Expected empty string\n"); + }
- dwEscaped = INTERNET_MAX_URL_LENGTH; - urlW = GetWideString(TEST_URL_UNESCAPE[i].url); - expected_urlW = GetWideString(TEST_URL_UNESCAPE[i].expect); - res = UrlUnescapeW(urlW, ret_urlW, &dwEscaped, 0); - ok(res == S_OK, - "UrlUnescapeW returned 0x%lx (expected S_OK) for "%s"\n", - res, TEST_URL_UNESCAPE[i].url); - - WideCharToMultiByte(CP_ACP,0,ret_urlW,-1,szReturnUrl,INTERNET_MAX_URL_LENGTH,0,0); - ok(lstrcmpW(ret_urlW, expected_urlW)==0, - "Expected "%s", but got "%s" from "%s" flags %08lx\n", - TEST_URL_UNESCAPE[i].expect, szReturnUrl, TEST_URL_UNESCAPE[i].url, 0L); - FreeWideString(urlW); - FreeWideString(expected_urlW); + for (i = 0; i < ARRAYSIZE(TEST_URL_UNESCAPEW); i++) + { + lstrcpyW(urlW, TEST_URL_UNESCAPEW[i].url); + + memset(bufferW, 0xff, sizeof(bufferW)); + unescaped = INTERNET_MAX_URL_LENGTH; + res = UrlUnescapeW(urlW, bufferW, &unescaped, TEST_URL_UNESCAPEW[i].flags); + ok(res == S_OK, "[%d]: returned %#lx.\n", i, res); + ok(unescaped == wcslen(TEST_URL_UNESCAPEW[i].expect), "[%d]: got unescaped %ld.\n", i, unescaped); + ok(!wcscmp(bufferW, TEST_URL_UNESCAPEW[i].expect), "[%d]: got result %s.\n", i, debugstr_w(bufferW)); + + /* Test with URL_UNESCAPE_INPLACE */ + unescaped = INTERNET_MAX_URL_LENGTH; + res = UrlUnescapeW(urlW, NULL, &unescaped, TEST_URL_UNESCAPEW[i].flags | URL_UNESCAPE_INPLACE); + ok(res == S_OK, "[%d]: returned %#lx.\n", i, res); + ok(unescaped == INTERNET_MAX_URL_LENGTH, "[%d]: got unescaped %ld.\n", i, unescaped); + ok(!wcscmp(urlW, TEST_URL_UNESCAPEW[i].expect), "[%d]: got result %s.\n", i, debugstr_w(urlW)); + + lstrcpyW(urlW, TEST_URL_UNESCAPEW[i].url); + unescaped = wcslen(TEST_URL_UNESCAPEW[i].expect) - 1; + res = UrlUnescapeW(urlW, bufferW, &unescaped, TEST_URL_UNESCAPEW[i].flags); + ok(res == E_POINTER, "[%d]: returned %#lx.\n", i, res); }
dwEscaped = sizeof(inplace); @@ -1445,17 +1463,6 @@ static void test_UrlUnescape(void) res = UrlUnescapeA(another_inplace, NULL, NULL, URL_UNESCAPE_INPLACE); ok(res == S_OK, "UrlUnescapeA returned 0x%lx (expected S_OK)\n", res); ok(!strcmp(another_inplace, expected), "got %s expected %s\n", another_inplace, expected); - - dwEscaped = sizeof(inplaceW); - res = UrlUnescapeW(inplaceW, NULL, &dwEscaped, URL_UNESCAPE_INPLACE); - ok(res == S_OK, "UrlUnescapeW returned 0x%lx (expected S_OK)\n", res); - ok(dwEscaped == 50, "got %ld expected 50\n", dwEscaped); - - /* if we set the buffer pointer to NULL, the string apparently still gets converted (Google Lively does this) */ - res = UrlUnescapeW(another_inplaceW, NULL, NULL, URL_UNESCAPE_INPLACE); - ok(res == S_OK, "UrlUnescapeW returned 0x%lx (expected S_OK)\n", res); - - ok(lstrlenW(another_inplaceW) == 24, "got %d expected 24\n", lstrlenW(another_inplaceW)); }
static const struct parse_url_test_t {
From: Jactry Zeng jzeng@codeweavers.com
--- dlls/kernelbase/path.c | 58 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 55 insertions(+), 3 deletions(-)
diff --git a/dlls/kernelbase/path.c b/dlls/kernelbase/path.c index 7eda9bd483c..40ddf840045 100644 --- a/dlls/kernelbase/path.c +++ b/dlls/kernelbase/path.c @@ -2907,11 +2907,26 @@ HRESULT WINAPI UrlUnescapeA(char *url, char *unescaped, DWORD *unescaped_len, DW return hr; }
+static int get_utf8_len(unsigned char code) +{ + if (code < 0x80) + return 1; + else if ((code & 0xe0) == 0xc0) + return 2; + else if ((code & 0xf0) == 0xe0) + return 3; + else if ((code & 0xf8) == 0xf0) + return 4; + return 0; +} + HRESULT WINAPI UrlUnescapeW(WCHAR *url, WCHAR *unescaped, DWORD *unescaped_len, DWORD flags) { + WCHAR *dst, next, utf16_buf[4]; BOOL stop_unescaping = FALSE; + int utf8_len, utf16_len, i; const WCHAR *src; - WCHAR *dst, next; + char utf8_buf[4]; DWORD needed; HRESULT hr;
@@ -2930,6 +2945,7 @@ HRESULT WINAPI UrlUnescapeW(WCHAR *url, WCHAR *unescaped, DWORD *unescaped_len,
for (src = url, needed = 0; *src; src++, needed++) { + utf16_len = 0; if (flags & URL_DONT_UNESCAPE_EXTRA_INFO && (*src == '#' || *src == '?')) { stop_unescaping = TRUE; @@ -2939,17 +2955,53 @@ HRESULT WINAPI UrlUnescapeW(WCHAR *url, WCHAR *unescaped, DWORD *unescaped_len, { INT ih; WCHAR buf[5] = L"0x"; + memcpy(buf + 2, src + 1, 2*sizeof(WCHAR)); buf[4] = 0; StrToIntExW(buf, STIF_SUPPORT_HEX, &ih); - next = (WCHAR) ih; src += 2; /* Advance to end of escape */ + + if (flags & URL_UNESCAPE_AS_UTF8) + { + utf8_buf[0] = ih; + utf8_len = get_utf8_len(ih); + for (i = 1; i < utf8_len && *(src + 1) == '%' && *(src + 2) && *(src + 3); i++) + { + memcpy(buf + 2, src + 2, 2 * sizeof(WCHAR)); + StrToIntExW(buf, STIF_SUPPORT_HEX, &ih); + /* Check if it is a valid continuation byte. */ + if ((ih & 0xc0) == 0x80) + { + utf8_buf[i] = ih; + src += 3; + } + else + break; + } + + utf16_len = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, + utf8_buf, i, utf16_buf, ARRAYSIZE(utf16_buf)); + if (utf16_len) + needed += utf16_len - 1; + else + next = 0xfffd; + } + else + next = (WCHAR) ih; } else next = *src;
if (flags & URL_UNESCAPE_INPLACE || needed < *unescaped_len) - *dst++ = next; + { + if (utf16_len) + { + memcpy(dst, utf16_buf, utf16_len * sizeof(*utf16_buf)); + dst += utf16_len; + } + else + *dst++ = next; + } }
if (flags & URL_UNESCAPE_INPLACE || needed < *unescaped_len)
From: Jactry Zeng jzeng@codeweavers.com
--- dlls/shlwapi/tests/url.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+)
diff --git a/dlls/shlwapi/tests/url.c b/dlls/shlwapi/tests/url.c index 3690bc1686c..160f7696cc2 100644 --- a/dlls/shlwapi/tests/url.c +++ b/dlls/shlwapi/tests/url.c @@ -28,6 +28,7 @@ #include "shlwapi.h" #include "wininet.h" #include "intshcut.h" +#include "winternl.h"
static const char* TEST_URL_1 = "http://www.winehq.org/tests?date=10/10/1923"; static const char* TEST_URL_2 = "http://localhost:8080/tests%2e.html?date=Mon%2010/10/1923"; @@ -418,7 +419,20 @@ static struct { L"file://fo%20o%5Ca/bar", L"file://fo o\a/bar" }, { L"file://%24%25foobar", L"file://$%foobar" }, { L"file:///C:/Program Files", L"file:///C:/Program Files" }, + { L"file:///C:/Program Files", L"file:///C:/Program Files", URL_UNESCAPE_AS_UTF8 }, { L"file:///C:/Program%20Files", L"file:///C:/Program Files" }, + { L"file:///C:/Program%20Files", L"file:///C:/Program Files", URL_UNESCAPE_AS_UTF8 }, + { L"file://foo/%E4%B8%AD%E6%96%87/bar", L"file://foo/\xe4\xb8\xad\xe6\x96\x87/bar" }, /* with 3 btyes utf-8 */ + { L"file://foo/%E4%B8%AD%E6%96%87/bar", L"file://foo/\x4e2d\x6587/bar", URL_UNESCAPE_AS_UTF8 }, + /* mix corrupt and good utf-8 */ + { L"file://foo/%E4%AD%E6%96%87/bar", L"file://foo/\xfffd\x6587/bar", URL_UNESCAPE_AS_UTF8 }, + { L"file://foo/%F0%9F%8D%B7/bar", L"file://foo/\xf0\x9f\x8d\xb7/bar" }, /* with 4 btyes utf-8 */ + { L"file://foo/%F0%9F%8D%B7/bar", L"file://foo/\xd83c\xdf77/bar", URL_UNESCAPE_AS_UTF8 }, + /* non-escaped chars between multi-byte escaped chars */ + { L"file://foo/%E4%B8%ADabc%E6%96%87/bar", L"file://foo/\x4e2d""abc""\x6587/bar", URL_UNESCAPE_AS_UTF8 }, + { L"file://foo/%E4B8%AD/bar", L"file://foo/\xfffd""B8\xfffd/bar", URL_UNESCAPE_AS_UTF8 }, + { L"file://foo/%E4%G8%AD/bar", L"file://foo/\xfffd""%G8\xfffd/bar", URL_UNESCAPE_AS_UTF8 }, + { L"file://foo/%G4%B8%AD/bar", L"file://foo/%G4\xfffd\xfffd/bar", URL_UNESCAPE_AS_UTF8 }, };
static const struct { @@ -1406,6 +1420,7 @@ static void test_UrlUnescape(void) WCHAR urlW[INTERNET_MAX_URL_LENGTH], bufferW[INTERNET_MAX_URL_LENGTH]; CHAR szReturnUrl[INTERNET_MAX_URL_LENGTH]; DWORD dwEscaped, unescaped; + BOOL utf8_support = TRUE; static char inplace[] = "file:///C:/Program%20Files"; static char another_inplace[] = "file:///C:/Program%20Files"; static const char expected[] = "file:///C:/Program Files"; @@ -1429,8 +1444,21 @@ static void test_UrlUnescape(void) ok(strcmp(szReturnUrl,"")==0, "Expected empty string\n"); }
+ unescaped = INTERNET_MAX_URL_LENGTH; + lstrcpyW(urlW, L"%F0%9F%8D%B7"); + res = UrlUnescapeW(urlW, NULL, &unescaped, URL_UNESCAPE_AS_UTF8 | URL_UNESCAPE_INPLACE); + ok(res == S_OK, "Got %#lx.\n", res); + if (!wcscmp(urlW, L"\xf0\x9f\x8d\xb7")) + { + utf8_support = FALSE; + win_skip("Skip URL_UNESCAPE_AS_UTF8 tests for pre-win7 systems.\n"); + } + for (i = 0; i < ARRAYSIZE(TEST_URL_UNESCAPEW); i++) { + if (TEST_URL_UNESCAPEW[i].flags & URL_UNESCAPE_AS_UTF8 && !utf8_support) + continue; + lstrcpyW(urlW, TEST_URL_UNESCAPEW[i].url);
memset(bufferW, 0xff, sizeof(bufferW));
From: Jactry Zeng jzeng@codeweavers.com
This fixes an issue when the path includes non-ASCII characters. --- dlls/mshtml/nsembed.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/dlls/mshtml/nsembed.c b/dlls/mshtml/nsembed.c index 95c07c90390..ea20ffbd202 100644 --- a/dlls/mshtml/nsembed.c +++ b/dlls/mshtml/nsembed.c @@ -1304,7 +1304,7 @@ BOOL is_gecko_path(const char *path) *ptr = '/'; }
- UrlUnescapeW(buf, NULL, NULL, URL_UNESCAPE_INPLACE); + UrlUnescapeW(buf, NULL, NULL, URL_UNESCAPE_INPLACE | URL_UNESCAPE_AS_UTF8); buf[gecko_path_len] = 0;
ret = !wcsicmp(buf, gecko_path);
On Mon Aug 7 09:46:11 2023 +0000, Jacek Caban wrote:
At this point, for all you know src[1], src[2] and src[3] may be null bytes. The check bellow is too late, this memcpy potentially reads bytes after buffer end.
Hi Jacek,
Sorry for the noise, please just compare the latest version with version 7. I moved the verification of src[1], src[2] and src[3] to the beginning of the for loop.
This merge request was approved by Jacek Caban.