This fixes an issue when the path includes non-ASCII characters.
Signed-off-by: Jactry Zeng jzeng@codeweavers.com
-- v9: mshtml: Call UrlUnescapeW() with URL_UNESCAPE_AS_UTF8 in is_gecko_path(). shlwapi/tests: Test UrlUnescapeW() with URL_UNESCAPE_AS_UTF8. kernelbase: Implement URL_UNESCAPE_AS_UTF8 for UrlUnescapeW().
From: Jactry Zeng jzeng@codeweavers.com
--- dlls/shlwapi/tests/url.c | 71 ++++++++++++++++++++++------------------ 1 file changed, 39 insertions(+), 32 deletions(-)
diff --git a/dlls/shlwapi/tests/url.c b/dlls/shlwapi/tests/url.c index 6870de4ee5f..3690bc1686c 100644 --- a/dlls/shlwapi/tests/url.c +++ b/dlls/shlwapi/tests/url.c @@ -407,7 +407,19 @@ static struct { {"file://%24%25foobar", "file://$%foobar"} };
-/* ################ */ +static struct +{ + const WCHAR *url; + const WCHAR *expect; + DWORD flags; +} TEST_URL_UNESCAPEW[] = +{ + { L"file://foo/bar", L"file://foo/bar" }, + { L"file://fo%20o%5Ca/bar", L"file://fo o\a/bar" }, + { L"file://%24%25foobar", L"file://$%foobar" }, + { L"file:///C:/Program Files", L"file:///C:/Program Files" }, + { L"file:///C:/Program%20Files", L"file:///C:/Program Files" }, +};
static const struct { const char *path; @@ -1391,17 +1403,14 @@ static void test_UrlIs(void)
static void test_UrlUnescape(void) { + WCHAR urlW[INTERNET_MAX_URL_LENGTH], bufferW[INTERNET_MAX_URL_LENGTH]; CHAR szReturnUrl[INTERNET_MAX_URL_LENGTH]; - WCHAR ret_urlW[INTERNET_MAX_URL_LENGTH]; - WCHAR *urlW, *expected_urlW; - DWORD dwEscaped; - size_t i; + DWORD dwEscaped, unescaped; static char inplace[] = "file:///C:/Program%20Files"; static char another_inplace[] = "file:///C:/Program%20Files"; static const char expected[] = "file:///C:/Program Files"; - static WCHAR inplaceW[] = L"file:///C:/Program Files"; - static WCHAR another_inplaceW[] = L"file:///C:/Program%20Files"; HRESULT res; + int i;
for (i = 0; i < ARRAY_SIZE(TEST_URL_UNESCAPE); i++) { dwEscaped=INTERNET_MAX_URL_LENGTH; @@ -1418,21 +1427,30 @@ static void test_UrlUnescape(void) "UrlUnescapeA returned 0x%lx (expected E_INVALIDARG) for "%s"\n", res, TEST_URL_UNESCAPE[i].url); ok(strcmp(szReturnUrl,"")==0, "Expected empty string\n"); + }
- dwEscaped = INTERNET_MAX_URL_LENGTH; - urlW = GetWideString(TEST_URL_UNESCAPE[i].url); - expected_urlW = GetWideString(TEST_URL_UNESCAPE[i].expect); - res = UrlUnescapeW(urlW, ret_urlW, &dwEscaped, 0); - ok(res == S_OK, - "UrlUnescapeW returned 0x%lx (expected S_OK) for "%s"\n", - res, TEST_URL_UNESCAPE[i].url); - - WideCharToMultiByte(CP_ACP,0,ret_urlW,-1,szReturnUrl,INTERNET_MAX_URL_LENGTH,0,0); - ok(lstrcmpW(ret_urlW, expected_urlW)==0, - "Expected "%s", but got "%s" from "%s" flags %08lx\n", - TEST_URL_UNESCAPE[i].expect, szReturnUrl, TEST_URL_UNESCAPE[i].url, 0L); - FreeWideString(urlW); - FreeWideString(expected_urlW); + for (i = 0; i < ARRAYSIZE(TEST_URL_UNESCAPEW); i++) + { + lstrcpyW(urlW, TEST_URL_UNESCAPEW[i].url); + + memset(bufferW, 0xff, sizeof(bufferW)); + unescaped = INTERNET_MAX_URL_LENGTH; + res = UrlUnescapeW(urlW, bufferW, &unescaped, TEST_URL_UNESCAPEW[i].flags); + ok(res == S_OK, "[%d]: returned %#lx.\n", i, res); + ok(unescaped == wcslen(TEST_URL_UNESCAPEW[i].expect), "[%d]: got unescaped %ld.\n", i, unescaped); + ok(!wcscmp(bufferW, TEST_URL_UNESCAPEW[i].expect), "[%d]: got result %s.\n", i, debugstr_w(bufferW)); + + /* Test with URL_UNESCAPE_INPLACE */ + unescaped = INTERNET_MAX_URL_LENGTH; + res = UrlUnescapeW(urlW, NULL, &unescaped, TEST_URL_UNESCAPEW[i].flags | URL_UNESCAPE_INPLACE); + ok(res == S_OK, "[%d]: returned %#lx.\n", i, res); + ok(unescaped == INTERNET_MAX_URL_LENGTH, "[%d]: got unescaped %ld.\n", i, unescaped); + ok(!wcscmp(urlW, TEST_URL_UNESCAPEW[i].expect), "[%d]: got result %s.\n", i, debugstr_w(urlW)); + + lstrcpyW(urlW, TEST_URL_UNESCAPEW[i].url); + unescaped = wcslen(TEST_URL_UNESCAPEW[i].expect) - 1; + res = UrlUnescapeW(urlW, bufferW, &unescaped, TEST_URL_UNESCAPEW[i].flags); + ok(res == E_POINTER, "[%d]: returned %#lx.\n", i, res); }
dwEscaped = sizeof(inplace); @@ -1445,17 +1463,6 @@ static void test_UrlUnescape(void) res = UrlUnescapeA(another_inplace, NULL, NULL, URL_UNESCAPE_INPLACE); ok(res == S_OK, "UrlUnescapeA returned 0x%lx (expected S_OK)\n", res); ok(!strcmp(another_inplace, expected), "got %s expected %s\n", another_inplace, expected); - - dwEscaped = sizeof(inplaceW); - res = UrlUnescapeW(inplaceW, NULL, &dwEscaped, URL_UNESCAPE_INPLACE); - ok(res == S_OK, "UrlUnescapeW returned 0x%lx (expected S_OK)\n", res); - ok(dwEscaped == 50, "got %ld expected 50\n", dwEscaped); - - /* if we set the buffer pointer to NULL, the string apparently still gets converted (Google Lively does this) */ - res = UrlUnescapeW(another_inplaceW, NULL, NULL, URL_UNESCAPE_INPLACE); - ok(res == S_OK, "UrlUnescapeW returned 0x%lx (expected S_OK)\n", res); - - ok(lstrlenW(another_inplaceW) == 24, "got %d expected 24\n", lstrlenW(another_inplaceW)); }
static const struct parse_url_test_t {
From: Jactry Zeng jzeng@codeweavers.com
--- dlls/kernelbase/path.c | 58 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 55 insertions(+), 3 deletions(-)
diff --git a/dlls/kernelbase/path.c b/dlls/kernelbase/path.c index 7eda9bd483c..b2cf82ef617 100644 --- a/dlls/kernelbase/path.c +++ b/dlls/kernelbase/path.c @@ -2907,11 +2907,26 @@ HRESULT WINAPI UrlUnescapeA(char *url, char *unescaped, DWORD *unescaped_len, DW return hr; }
+static int get_utf8_len(unsigned char code) +{ + if (code < 0x80) + return 1; + else if ((code & 0xe0) == 0xc0) + return 2; + else if ((code & 0xf0) == 0xe0) + return 3; + else if ((code & 0xf8) == 0xf0) + return 4; + return 0; +} + HRESULT WINAPI UrlUnescapeW(WCHAR *url, WCHAR *unescaped, DWORD *unescaped_len, DWORD flags) { + WCHAR *dst, next, utf16_buf[4]; BOOL stop_unescaping = FALSE; + int utf8_len, utf16_len, i; const WCHAR *src; - WCHAR *dst, next; + char utf8_buf[4]; DWORD needed; HRESULT hr;
@@ -2930,6 +2945,7 @@ HRESULT WINAPI UrlUnescapeW(WCHAR *url, WCHAR *unescaped, DWORD *unescaped_len,
for (src = url, needed = 0; *src; src++, needed++) { + utf16_len = 0; if (flags & URL_DONT_UNESCAPE_EXTRA_INFO && (*src == '#' || *src == '?')) { stop_unescaping = TRUE; @@ -2939,17 +2955,53 @@ HRESULT WINAPI UrlUnescapeW(WCHAR *url, WCHAR *unescaped, DWORD *unescaped_len, { INT ih; WCHAR buf[5] = L"0x"; + memcpy(buf + 2, src + 1, 2*sizeof(WCHAR)); buf[4] = 0; StrToIntExW(buf, STIF_SUPPORT_HEX, &ih); - next = (WCHAR) ih; src += 2; /* Advance to end of escape */ + + if (flags & URL_UNESCAPE_AS_UTF8) + { + utf8_buf[0] = ih; + utf8_len = get_utf8_len(ih); + for (i = 1; i < utf8_len && *(src + 1) == '%'; i++) + { + memcpy(buf + 2, src + 2, 2 * sizeof(WCHAR)); + StrToIntExW(buf, STIF_SUPPORT_HEX, &ih); + /* Check if it is a valid continuation byte. */ + if ((ih & 0xc0) == 0x80) + { + utf8_buf[i] = ih; + src += 3; + } + else + break; + } + + utf16_len = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, + utf8_buf, i, utf16_buf, ARRAYSIZE(utf16_buf)); + if (utf16_len) + needed += utf16_len - 1; + else + next = 0xfffd; + } + else + next = (WCHAR) ih; } else next = *src;
if (flags & URL_UNESCAPE_INPLACE || needed < *unescaped_len) - *dst++ = next; + { + if (utf16_len) + { + memcpy(dst, utf16_buf, utf16_len * sizeof(*utf16_buf)); + dst += utf16_len; + } + else + *dst++ = next; + } }
if (flags & URL_UNESCAPE_INPLACE || needed < *unescaped_len)
From: Jactry Zeng jzeng@codeweavers.com
--- dlls/shlwapi/tests/url.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+)
diff --git a/dlls/shlwapi/tests/url.c b/dlls/shlwapi/tests/url.c index 3690bc1686c..160f7696cc2 100644 --- a/dlls/shlwapi/tests/url.c +++ b/dlls/shlwapi/tests/url.c @@ -28,6 +28,7 @@ #include "shlwapi.h" #include "wininet.h" #include "intshcut.h" +#include "winternl.h"
static const char* TEST_URL_1 = "http://www.winehq.org/tests?date=10/10/1923"; static const char* TEST_URL_2 = "http://localhost:8080/tests%2e.html?date=Mon%2010/10/1923"; @@ -418,7 +419,20 @@ static struct { L"file://fo%20o%5Ca/bar", L"file://fo o\a/bar" }, { L"file://%24%25foobar", L"file://$%foobar" }, { L"file:///C:/Program Files", L"file:///C:/Program Files" }, + { L"file:///C:/Program Files", L"file:///C:/Program Files", URL_UNESCAPE_AS_UTF8 }, { L"file:///C:/Program%20Files", L"file:///C:/Program Files" }, + { L"file:///C:/Program%20Files", L"file:///C:/Program Files", URL_UNESCAPE_AS_UTF8 }, + { L"file://foo/%E4%B8%AD%E6%96%87/bar", L"file://foo/\xe4\xb8\xad\xe6\x96\x87/bar" }, /* with 3 btyes utf-8 */ + { L"file://foo/%E4%B8%AD%E6%96%87/bar", L"file://foo/\x4e2d\x6587/bar", URL_UNESCAPE_AS_UTF8 }, + /* mix corrupt and good utf-8 */ + { L"file://foo/%E4%AD%E6%96%87/bar", L"file://foo/\xfffd\x6587/bar", URL_UNESCAPE_AS_UTF8 }, + { L"file://foo/%F0%9F%8D%B7/bar", L"file://foo/\xf0\x9f\x8d\xb7/bar" }, /* with 4 btyes utf-8 */ + { L"file://foo/%F0%9F%8D%B7/bar", L"file://foo/\xd83c\xdf77/bar", URL_UNESCAPE_AS_UTF8 }, + /* non-escaped chars between multi-byte escaped chars */ + { L"file://foo/%E4%B8%ADabc%E6%96%87/bar", L"file://foo/\x4e2d""abc""\x6587/bar", URL_UNESCAPE_AS_UTF8 }, + { L"file://foo/%E4B8%AD/bar", L"file://foo/\xfffd""B8\xfffd/bar", URL_UNESCAPE_AS_UTF8 }, + { L"file://foo/%E4%G8%AD/bar", L"file://foo/\xfffd""%G8\xfffd/bar", URL_UNESCAPE_AS_UTF8 }, + { L"file://foo/%G4%B8%AD/bar", L"file://foo/%G4\xfffd\xfffd/bar", URL_UNESCAPE_AS_UTF8 }, };
static const struct { @@ -1406,6 +1420,7 @@ static void test_UrlUnescape(void) WCHAR urlW[INTERNET_MAX_URL_LENGTH], bufferW[INTERNET_MAX_URL_LENGTH]; CHAR szReturnUrl[INTERNET_MAX_URL_LENGTH]; DWORD dwEscaped, unescaped; + BOOL utf8_support = TRUE; static char inplace[] = "file:///C:/Program%20Files"; static char another_inplace[] = "file:///C:/Program%20Files"; static const char expected[] = "file:///C:/Program Files"; @@ -1429,8 +1444,21 @@ static void test_UrlUnescape(void) ok(strcmp(szReturnUrl,"")==0, "Expected empty string\n"); }
+ unescaped = INTERNET_MAX_URL_LENGTH; + lstrcpyW(urlW, L"%F0%9F%8D%B7"); + res = UrlUnescapeW(urlW, NULL, &unescaped, URL_UNESCAPE_AS_UTF8 | URL_UNESCAPE_INPLACE); + ok(res == S_OK, "Got %#lx.\n", res); + if (!wcscmp(urlW, L"\xf0\x9f\x8d\xb7")) + { + utf8_support = FALSE; + win_skip("Skip URL_UNESCAPE_AS_UTF8 tests for pre-win7 systems.\n"); + } + for (i = 0; i < ARRAYSIZE(TEST_URL_UNESCAPEW); i++) { + if (TEST_URL_UNESCAPEW[i].flags & URL_UNESCAPE_AS_UTF8 && !utf8_support) + continue; + lstrcpyW(urlW, TEST_URL_UNESCAPEW[i].url);
memset(bufferW, 0xff, sizeof(bufferW));
From: Jactry Zeng jzeng@codeweavers.com
This fixes an issue when the path includes non-ASCII characters. --- dlls/mshtml/nsembed.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/dlls/mshtml/nsembed.c b/dlls/mshtml/nsembed.c index 95c07c90390..ea20ffbd202 100644 --- a/dlls/mshtml/nsembed.c +++ b/dlls/mshtml/nsembed.c @@ -1304,7 +1304,7 @@ BOOL is_gecko_path(const char *path) *ptr = '/'; }
- UrlUnescapeW(buf, NULL, NULL, URL_UNESCAPE_INPLACE); + UrlUnescapeW(buf, NULL, NULL, URL_UNESCAPE_INPLACE | URL_UNESCAPE_AS_UTF8); buf[gecko_path_len] = 0;
ret = !wcsicmp(buf, gecko_path);
Hi,
It looks like your patch introduced the new failures shown below. Please investigate and fix them before resubmitting your patch. If they are not new, fixing them anyway would help a lot. Otherwise please ask for the known failures list to be updated.
The tests also ran into some preexisting test failures. If you know how to fix them that would be helpful. See the TestBot job for the details:
The full results can be found at: https://testbot.winehq.org/JobDetails.pl?Key=135761
Your paranoid android.
=== debian11b (64 bit WoW report) ===
ntdll: exception.c:3256: Test failed: B0 flag is not set in Dr6 exception.c:3257: Test failed: BS flag is set in Dr6