[PATCH v10 0/4] MR585: Improve is_gecko_path() for handling non-ASCII characters.

List overview All Threads

newer

older

[PATCH 0/1] MR3530: magnification:...

[PATCH v4 0/7] MR3506: Improve...

Jactry Zeng (＠jactry)

8 Aug 2023 8 Aug '23

4:20 a.m.

This fixes an issue when the path includes non-ASCII characters.

Signed-off-by: Jactry Zeng jzeng@codeweavers.com

-- v10: mshtml: Call UrlUnescapeW() with URL_UNESCAPE_AS_UTF8 in is_gecko_path(). shlwapi/tests: Test UrlUnescapeW() with URL_UNESCAPE_AS_UTF8. kernelbase: Implement URL_UNESCAPE_AS_UTF8 for UrlUnescapeW().

https://gitlab.winehq.org/wine/wine/-/merge_requests/585

Show replies by date

Jactry Zeng

8 Aug 8 Aug

4:20 a.m.

New subject: [PATCH v10 1/4] shlwapi/tests: Test UrlUnescapeW() with independent data.

From: Jactry Zeng jzeng@codeweavers.com

--- dlls/shlwapi/tests/url.c | 71 ++++++++++++++++++++++------------------ 1 file changed, 39 insertions(+), 32 deletions(-)

diff --git a/dlls/shlwapi/tests/url.c b/dlls/shlwapi/tests/url.c index 6870de4ee5f..3690bc1686c 100644 --- a/dlls/shlwapi/tests/url.c +++ b/dlls/shlwapi/tests/url.c @@ -407,7 +407,19 @@ static struct { {"file://%24%25foobar", "file://$%foobar"} };

-/* ################ */ +static struct +{ + const WCHAR *url; + const WCHAR *expect; + DWORD flags; +} TEST_URL_UNESCAPEW[] = +{ + { L"file://foo/bar", L"file://foo/bar" }, + { L"file://fo%20o%5Ca/bar", L"file://fo o\a/bar" }, + { L"file://%24%25foobar", L"file://$%foobar" }, + { L"file:///C:/Program Files", L"file:///C:/Program Files" }, + { L"file:///C:/Program%20Files", L"file:///C:/Program Files" }, +};

static const struct { const char *path; @@ -1391,17 +1403,14 @@ static void test_UrlIs(void)

static void test_UrlUnescape(void) { + WCHAR urlW[INTERNET_MAX_URL_LENGTH], bufferW[INTERNET_MAX_URL_LENGTH]; CHAR szReturnUrl[INTERNET_MAX_URL_LENGTH]; - WCHAR ret_urlW[INTERNET_MAX_URL_LENGTH]; - WCHAR *urlW, *expected_urlW; - DWORD dwEscaped; - size_t i; + DWORD dwEscaped, unescaped; static char inplace[] = "file:///C:/Program%20Files"; static char another_inplace[] = "file:///C:/Program%20Files"; static const char expected[] = "file:///C:/Program Files"; - static WCHAR inplaceW[] = L"file:///C:/Program Files"; - static WCHAR another_inplaceW[] = L"file:///C:/Program%20Files"; HRESULT res; + int i;

for (i = 0; i < ARRAY_SIZE(TEST_URL_UNESCAPE); i++) { dwEscaped=INTERNET_MAX_URL_LENGTH; @@ -1418,21 +1427,30 @@ static void test_UrlUnescape(void) "UrlUnescapeA returned 0x%lx (expected E_INVALIDARG) for "%s"\n", res, TEST_URL_UNESCAPE[i].url); ok(strcmp(szReturnUrl,"")==0, "Expected empty string\n"); + }

- dwEscaped = INTERNET_MAX_URL_LENGTH; - urlW = GetWideString(TEST_URL_UNESCAPE[i].url); - expected_urlW = GetWideString(TEST_URL_UNESCAPE[i].expect); - res = UrlUnescapeW(urlW, ret_urlW, &dwEscaped, 0); - ok(res == S_OK, - "UrlUnescapeW returned 0x%lx (expected S_OK) for "%s"\n", - res, TEST_URL_UNESCAPE[i].url); - - WideCharToMultiByte(CP_ACP,0,ret_urlW,-1,szReturnUrl,INTERNET_MAX_URL_LENGTH,0,0); - ok(lstrcmpW(ret_urlW, expected_urlW)==0, - "Expected "%s", but got "%s" from "%s" flags %08lx\n", - TEST_URL_UNESCAPE[i].expect, szReturnUrl, TEST_URL_UNESCAPE[i].url, 0L); - FreeWideString(urlW); - FreeWideString(expected_urlW); + for (i = 0; i < ARRAYSIZE(TEST_URL_UNESCAPEW); i++) + { + lstrcpyW(urlW, TEST_URL_UNESCAPEW[i].url); + + memset(bufferW, 0xff, sizeof(bufferW)); + unescaped = INTERNET_MAX_URL_LENGTH; + res = UrlUnescapeW(urlW, bufferW, &unescaped, TEST_URL_UNESCAPEW[i].flags); + ok(res == S_OK, "[%d]: returned %#lx.\n", i, res); + ok(unescaped == wcslen(TEST_URL_UNESCAPEW[i].expect), "[%d]: got unescaped %ld.\n", i, unescaped); + ok(!wcscmp(bufferW, TEST_URL_UNESCAPEW[i].expect), "[%d]: got result %s.\n", i, debugstr_w(bufferW)); + + /* Test with URL_UNESCAPE_INPLACE */ + unescaped = INTERNET_MAX_URL_LENGTH; + res = UrlUnescapeW(urlW, NULL, &unescaped, TEST_URL_UNESCAPEW[i].flags | URL_UNESCAPE_INPLACE); + ok(res == S_OK, "[%d]: returned %#lx.\n", i, res); + ok(unescaped == INTERNET_MAX_URL_LENGTH, "[%d]: got unescaped %ld.\n", i, unescaped); + ok(!wcscmp(urlW, TEST_URL_UNESCAPEW[i].expect), "[%d]: got result %s.\n", i, debugstr_w(urlW)); + + lstrcpyW(urlW, TEST_URL_UNESCAPEW[i].url); + unescaped = wcslen(TEST_URL_UNESCAPEW[i].expect) - 1; + res = UrlUnescapeW(urlW, bufferW, &unescaped, TEST_URL_UNESCAPEW[i].flags); + ok(res == E_POINTER, "[%d]: returned %#lx.\n", i, res); }

dwEscaped = sizeof(inplace); @@ -1445,17 +1463,6 @@ static void test_UrlUnescape(void) res = UrlUnescapeA(another_inplace, NULL, NULL, URL_UNESCAPE_INPLACE); ok(res == S_OK, "UrlUnescapeA returned 0x%lx (expected S_OK)\n", res); ok(!strcmp(another_inplace, expected), "got %s expected %s\n", another_inplace, expected); - - dwEscaped = sizeof(inplaceW); - res = UrlUnescapeW(inplaceW, NULL, &dwEscaped, URL_UNESCAPE_INPLACE); - ok(res == S_OK, "UrlUnescapeW returned 0x%lx (expected S_OK)\n", res); - ok(dwEscaped == 50, "got %ld expected 50\n", dwEscaped); - - /* if we set the buffer pointer to NULL, the string apparently still gets converted (Google Lively does this) */ - res = UrlUnescapeW(another_inplaceW, NULL, NULL, URL_UNESCAPE_INPLACE); - ok(res == S_OK, "UrlUnescapeW returned 0x%lx (expected S_OK)\n", res); - - ok(lstrlenW(another_inplaceW) == 24, "got %d expected 24\n", lstrlenW(another_inplaceW)); }

static const struct parse_url_test_t {

-- GitLab https://gitlab.winehq.org/wine/wine/-/merge_requests/585

Jactry Zeng

4:20 a.m.

New subject: [PATCH v10 2/4] kernelbase: Implement URL_UNESCAPE_AS_UTF8 for UrlUnescapeW().

From: Jactry Zeng jzeng@codeweavers.com

--- dlls/kernelbase/path.c | 58 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 55 insertions(+), 3 deletions(-)

diff --git a/dlls/kernelbase/path.c b/dlls/kernelbase/path.c index 7eda9bd483c..40ddf840045 100644 --- a/dlls/kernelbase/path.c +++ b/dlls/kernelbase/path.c @@ -2907,11 +2907,26 @@ HRESULT WINAPI UrlUnescapeA(char *url, char *unescaped, DWORD *unescaped_len, DW return hr; }

+static int get_utf8_len(unsigned char code) +{ + if (code < 0x80) + return 1; + else if ((code & 0xe0) == 0xc0) + return 2; + else if ((code & 0xf0) == 0xe0) + return 3; + else if ((code & 0xf8) == 0xf0) + return 4; + return 0; +} + HRESULT WINAPI UrlUnescapeW(WCHAR *url, WCHAR *unescaped, DWORD *unescaped_len, DWORD flags) { + WCHAR *dst, next, utf16_buf[4]; BOOL stop_unescaping = FALSE; + int utf8_len, utf16_len, i; const WCHAR *src; - WCHAR *dst, next; + char utf8_buf[4]; DWORD needed; HRESULT hr;

@@ -2930,6 +2945,7 @@ HRESULT WINAPI UrlUnescapeW(WCHAR *url, WCHAR *unescaped, DWORD *unescaped_len,

for (src = url, needed = 0; *src; src++, needed++) { + utf16_len = 0; if (flags & URL_DONT_UNESCAPE_EXTRA_INFO && (*src == '#' || *src == '?')) { stop_unescaping = TRUE; @@ -2939,17 +2955,53 @@ HRESULT WINAPI UrlUnescapeW(WCHAR *url, WCHAR *unescaped, DWORD *unescaped_len, { INT ih; WCHAR buf[5] = L"0x"; + memcpy(buf + 2, src + 1, 2*sizeof(WCHAR)); buf[4] = 0; StrToIntExW(buf, STIF_SUPPORT_HEX, &ih); - next = (WCHAR) ih; src += 2; /* Advance to end of escape */ + + if (flags & URL_UNESCAPE_AS_UTF8) + { + utf8_buf[0] = ih; + utf8_len = get_utf8_len(ih); + for (i = 1; i < utf8_len && *(src + 1) == '%' && *(src + 2) && *(src + 3); i++) + { + memcpy(buf + 2, src + 2, 2 * sizeof(WCHAR)); + StrToIntExW(buf, STIF_SUPPORT_HEX, &ih); + /* Check if it is a valid continuation byte. */ + if ((ih & 0xc0) == 0x80) + { + utf8_buf[i] = ih; + src += 3; + } + else + break; + } + + utf16_len = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, + utf8_buf, i, utf16_buf, ARRAYSIZE(utf16_buf)); + if (utf16_len) + needed += utf16_len - 1; + else + next = 0xfffd; + } + else + next = (WCHAR) ih; } else next = *src;

if (flags & URL_UNESCAPE_INPLACE || needed < *unescaped_len) - *dst++ = next; + { + if (utf16_len) + { + memcpy(dst, utf16_buf, utf16_len * sizeof(*utf16_buf)); + dst += utf16_len; + } + else + *dst++ = next; + } }

if (flags & URL_UNESCAPE_INPLACE || needed < *unescaped_len)

-- GitLab https://gitlab.winehq.org/wine/wine/-/merge_requests/585

Jactry Zeng

4:20 a.m.

New subject: [PATCH v10 3/4] shlwapi/tests: Test UrlUnescapeW() with URL_UNESCAPE_AS_UTF8.

From: Jactry Zeng jzeng@codeweavers.com

--- dlls/shlwapi/tests/url.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+)

diff --git a/dlls/shlwapi/tests/url.c b/dlls/shlwapi/tests/url.c index 3690bc1686c..160f7696cc2 100644 --- a/dlls/shlwapi/tests/url.c +++ b/dlls/shlwapi/tests/url.c @@ -28,6 +28,7 @@ #include "shlwapi.h" #include "wininet.h" #include "intshcut.h" +#include "winternl.h"

static const char* TEST_URL_1 = "http://www.winehq.org/tests?date=10/10/1923"; static const char* TEST_URL_2 = "http://localhost:8080/tests%2e.html?date=Mon%2010/10/1923"; @@ -418,7 +419,20 @@ static struct { L"file://fo%20o%5Ca/bar", L"file://fo o\a/bar" }, { L"file://%24%25foobar", L"file://$%foobar" }, { L"file:///C:/Program Files", L"file:///C:/Program Files" }, + { L"file:///C:/Program Files", L"file:///C:/Program Files", URL_UNESCAPE_AS_UTF8 }, { L"file:///C:/Program%20Files", L"file:///C:/Program Files" }, + { L"file:///C:/Program%20Files", L"file:///C:/Program Files", URL_UNESCAPE_AS_UTF8 }, + { L"file://foo/%E4%B8%AD%E6%96%87/bar", L"file://foo/\xe4\xb8\xad\xe6\x96\x87/bar" }, /* with 3 btyes utf-8 */ + { L"file://foo/%E4%B8%AD%E6%96%87/bar", L"file://foo/\x4e2d\x6587/bar", URL_UNESCAPE_AS_UTF8 }, + /* mix corrupt and good utf-8 */ + { L"file://foo/%E4%AD%E6%96%87/bar", L"file://foo/\xfffd\x6587/bar", URL_UNESCAPE_AS_UTF8 }, + { L"file://foo/%F0%9F%8D%B7/bar", L"file://foo/\xf0\x9f\x8d\xb7/bar" }, /* with 4 btyes utf-8 */ + { L"file://foo/%F0%9F%8D%B7/bar", L"file://foo/\xd83c\xdf77/bar", URL_UNESCAPE_AS_UTF8 }, + /* non-escaped chars between multi-byte escaped chars */ + { L"file://foo/%E4%B8%ADabc%E6%96%87/bar", L"file://foo/\x4e2d""abc""\x6587/bar", URL_UNESCAPE_AS_UTF8 }, + { L"file://foo/%E4B8%AD/bar", L"file://foo/\xfffd""B8\xfffd/bar", URL_UNESCAPE_AS_UTF8 }, + { L"file://foo/%E4%G8%AD/bar", L"file://foo/\xfffd""%G8\xfffd/bar", URL_UNESCAPE_AS_UTF8 }, + { L"file://foo/%G4%B8%AD/bar", L"file://foo/%G4\xfffd\xfffd/bar", URL_UNESCAPE_AS_UTF8 }, };

static const struct { @@ -1406,6 +1420,7 @@ static void test_UrlUnescape(void) WCHAR urlW[INTERNET_MAX_URL_LENGTH], bufferW[INTERNET_MAX_URL_LENGTH]; CHAR szReturnUrl[INTERNET_MAX_URL_LENGTH]; DWORD dwEscaped, unescaped; + BOOL utf8_support = TRUE; static char inplace[] = "file:///C:/Program%20Files"; static char another_inplace[] = "file:///C:/Program%20Files"; static const char expected[] = "file:///C:/Program Files"; @@ -1429,8 +1444,21 @@ static void test_UrlUnescape(void) ok(strcmp(szReturnUrl,"")==0, "Expected empty string\n"); }

+ unescaped = INTERNET_MAX_URL_LENGTH; + lstrcpyW(urlW, L"%F0%9F%8D%B7"); + res = UrlUnescapeW(urlW, NULL, &unescaped, URL_UNESCAPE_AS_UTF8 | URL_UNESCAPE_INPLACE); + ok(res == S_OK, "Got %#lx.\n", res); + if (!wcscmp(urlW, L"\xf0\x9f\x8d\xb7")) + { + utf8_support = FALSE; + win_skip("Skip URL_UNESCAPE_AS_UTF8 tests for pre-win7 systems.\n"); + } + for (i = 0; i < ARRAYSIZE(TEST_URL_UNESCAPEW); i++) { + if (TEST_URL_UNESCAPEW[i].flags & URL_UNESCAPE_AS_UTF8 && !utf8_support) + continue; + lstrcpyW(urlW, TEST_URL_UNESCAPEW[i].url);

memset(bufferW, 0xff, sizeof(bufferW));

-- GitLab https://gitlab.winehq.org/wine/wine/-/merge_requests/585

Jactry Zeng

4:20 a.m.

New subject: [PATCH v10 4/4] mshtml: Call UrlUnescapeW() with URL_UNESCAPE_AS_UTF8 in is_gecko_path().

From: Jactry Zeng jzeng@codeweavers.com

This fixes an issue when the path includes non-ASCII characters. --- dlls/mshtml/nsembed.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dlls/mshtml/nsembed.c b/dlls/mshtml/nsembed.c index 95c07c90390..ea20ffbd202 100644 --- a/dlls/mshtml/nsembed.c +++ b/dlls/mshtml/nsembed.c @@ -1304,7 +1304,7 @@ BOOL is_gecko_path(const char *path) *ptr = '/'; }

- UrlUnescapeW(buf, NULL, NULL, URL_UNESCAPE_INPLACE); + UrlUnescapeW(buf, NULL, NULL, URL_UNESCAPE_INPLACE | URL_UNESCAPE_AS_UTF8); buf[gecko_path_len] = 0;

ret = !wcsicmp(buf, gecko_path);

-- GitLab https://gitlab.winehq.org/wine/wine/-/merge_requests/585

Jactry Zeng (＠jactry)

4:37 a.m.

On Mon Aug 7 09:46:11 2023 +0000, Jacek Caban wrote:

...

At this point, for all you know src[1], src[2] and src[3] may be null bytes. The check bellow is too late, this memcpy potentially reads bytes after buffer end.

Hi Jacek,

Sorry for the noise, please just compare the latest version with version 7. I moved the verification of src[1], src[2] and src[3] to the beginning of the for loop.

-- https://gitlab.winehq.org/wine/wine/-/merge_requests/585#note_41582

Jacek Caban (＠jacek)

9:45 a.m.

New subject: [PATCH v10 0/4] MR585: Improve is_gecko_path() for handling non-ASCII characters. - approved

This merge request was approved by Jacek Caban.

-- https://gitlab.winehq.org/wine/wine/-/merge_requests/585

814

Age (days ago)

814

Last active (days ago)

wine-gitlab@winehq.org

6 comments

3 participants

tags (0)

participants (3)

Jacek Caban (＠jacek)
Jactry Zeng
Jactry Zeng (＠jactry)