From: Paul Gofman pgofman@codeweavers.com
--- dlls/msvcrt/locale.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-)
diff --git a/dlls/msvcrt/locale.c b/dlls/msvcrt/locale.c index 5fdede174a0..6fc45b3bdbe 100644 --- a/dlls/msvcrt/locale.c +++ b/dlls/msvcrt/locale.c @@ -1292,7 +1292,7 @@ static pthreadlocinfo create_locinfo(int category, char buf[256]; BOOL sname_match; wchar_t wbuf[256], map_buf[256]; - int i; + int i, ret;
TRACE("(%d %s)\n", category, locale);
@@ -1514,9 +1514,13 @@ static pthreadlocinfo create_locinfo(int category,
MultiByteToWideChar(locinfo->lc_codepage, 0, buf, 256, wbuf, 256); LCMapStringW(LOCALE_INVARIANT, LCMAP_LOWERCASE, wbuf, 256, map_buf, 256); - WideCharToMultiByte(locinfo->lc_codepage, 0, map_buf, 256, (char *)locinfo->pclmap, 256, NULL, NULL); + if ((ret = WideCharToMultiByte(locinfo->lc_codepage, 0, map_buf, 256, + (char *)locinfo->pclmap, 256, NULL, NULL)) != 256) + FIXME("WideCharToMultiByte failed, ret %d, error %lu.\n", ret, GetLastError()); LCMapStringW(LOCALE_INVARIANT, LCMAP_UPPERCASE, wbuf, 256, map_buf, 256); - WideCharToMultiByte(locinfo->lc_codepage, 0, map_buf, 256, (char *)locinfo->pcumap, 256, NULL, NULL); + if ((ret = WideCharToMultiByte(locinfo->lc_codepage, 0, map_buf, 256, + (char *)locinfo->pcumap, 256, NULL, NULL)) != 256) + FIXME("WideCharToMultiByte failed, ret %d, error %lu.\n", ret, GetLastError()); } else { locinfo->lc_clike = 1; locinfo->mb_cur_max = 1;
From: Paul Gofman pgofman@codeweavers.com
--- dlls/msvcrt/locale.c | 39 ++++++++++++++++++++++++-------------- dlls/ucrtbase/tests/file.c | 19 +++++++++++++++++++ 2 files changed, 44 insertions(+), 14 deletions(-)
diff --git a/dlls/msvcrt/locale.c b/dlls/msvcrt/locale.c index 6fc45b3bdbe..df1013aff95 100644 --- a/dlls/msvcrt/locale.c +++ b/dlls/msvcrt/locale.c @@ -1292,7 +1292,7 @@ static pthreadlocinfo create_locinfo(int category, char buf[256]; BOOL sname_match; wchar_t wbuf[256], map_buf[256]; - int i, ret; + int i, ret, count;
TRACE("(%d %s)\n", category, locale);
@@ -1491,8 +1491,10 @@ static pthreadlocinfo create_locinfo(int category, locinfo->ctype1[0] = 0; locinfo->pctype = locinfo->ctype1+1;
+ count = locinfo->lc_codepage == CP_UTF8 ? 128 : 256; + buf[1] = buf[2] = '\0'; - for(i=1; i<257; i++) { + for(i = 1; i < count + 1; i++) { buf[0] = i-1;
MultiByteToWideChar(locinfo->lc_codepage, 0, buf, 1, wbuf, 1); @@ -1500,27 +1502,36 @@ static pthreadlocinfo create_locinfo(int category, locinfo->ctype1[i] = 0; GetStringTypeW(CT_CTYPE1, wbuf, 1, &locinfo->ctype1[i]); } + if (locinfo->lc_codepage == CP_UTF8) + { + for (; i < 257; ++i) + locinfo->ctype1[i] = (i >= 0xc3 && i <= 0xf5) ? _LEADBYTE : 0; + } + else + { + for(i=0; cp_info.LeadByte[i+1]!=0; i+=2) + for(j=cp_info.LeadByte[i]; j<=cp_info.LeadByte[i+1]; j++) + locinfo->ctype1[j+1] |= _LEADBYTE; + }
- for(i=0; cp_info.LeadByte[i+1]!=0; i+=2) - for(j=cp_info.LeadByte[i]; j<=cp_info.LeadByte[i+1]; j++) - locinfo->ctype1[j+1] |= _LEADBYTE; - - for(i=0; i<256; i++) { + for(i = 0; i < count; i++) { if(locinfo->pctype[i] & _LEADBYTE) buf[i] = ' '; else buf[i] = i; }
- MultiByteToWideChar(locinfo->lc_codepage, 0, buf, 256, wbuf, 256); - LCMapStringW(LOCALE_INVARIANT, LCMAP_LOWERCASE, wbuf, 256, map_buf, 256); - if ((ret = WideCharToMultiByte(locinfo->lc_codepage, 0, map_buf, 256, - (char *)locinfo->pclmap, 256, NULL, NULL)) != 256) + MultiByteToWideChar(locinfo->lc_codepage, 0, buf, count, wbuf, count); + LCMapStringW(LOCALE_INVARIANT, LCMAP_LOWERCASE, wbuf, count, map_buf, count); + if ((ret = WideCharToMultiByte(locinfo->lc_codepage, 0, map_buf, count, + (char *)locinfo->pclmap, count, NULL, NULL)) != count) FIXME("WideCharToMultiByte failed, ret %d, error %lu.\n", ret, GetLastError()); - LCMapStringW(LOCALE_INVARIANT, LCMAP_UPPERCASE, wbuf, 256, map_buf, 256); - if ((ret = WideCharToMultiByte(locinfo->lc_codepage, 0, map_buf, 256, - (char *)locinfo->pcumap, 256, NULL, NULL)) != 256) + LCMapStringW(LOCALE_INVARIANT, LCMAP_UPPERCASE, wbuf, count, map_buf, count); + if ((ret = WideCharToMultiByte(locinfo->lc_codepage, 0, map_buf, count, + (char *)locinfo->pcumap, count, NULL, NULL)) != count) FIXME("WideCharToMultiByte failed, ret %d, error %lu.\n", ret, GetLastError()); + for (i = count; i < 256; ++i) + ((char *)locinfo->pclmap)[i] = ((char *)locinfo->pcumap)[i] = i; } else { locinfo->lc_clike = 1; locinfo->mb_cur_max = 1; diff --git a/dlls/ucrtbase/tests/file.c b/dlls/ucrtbase/tests/file.c index d5303a6020c..47971c76f95 100644 --- a/dlls/ucrtbase/tests/file.c +++ b/dlls/ucrtbase/tests/file.c @@ -232,6 +232,7 @@ static void test_utf8(const char *argv0) intptr_t hfind, hproc; WCHAR bufW[256], *pW; struct _stat64 stat; + unsigned int i; FILE *f; int ret;
@@ -241,6 +242,24 @@ static void test_utf8(const char *argv0) return; }
+ for (i = 128; i < 256; ++i) + { + unsigned int v; + + winetest_push_context("%#x", i); + v = tolower(i); + ok(i == v, "got %#x.\n", v); + v = toupper(i); + ok(i == v, "got %#x.\n", v); + + v = _isctype(i, ~0u); + if (i >= 0xc2 && i <= 0xf4) + ok(v == _LEADBYTE, "got %#x.\n", v); + else + ok(!v, "got %#x.\n", v); + winetest_pop_context(); + } + ret = _mkdir(dir); if (ret == -1 && errno == ENOENT) {
That, in particular, fixes a crash in Crysis 2 Remastered which is happening after commit a0e2cbd46a7e629005ff306f032b02c263362775.
At least for UTF8 locale WideCharToMultiByte() always fails in create_locinfo(). The code doesn't assume any failures there (leaving potentially initialized data), I guess such a case warrants a FIXME (patch 1). Currently tolower() / toupper() for utf8 chars 128-255 locale will return some bogus characters instead of unchanged input character.
Then, GetCPInfo(CP_UTF8, ...) doesn't report any LeadBytes (also on Windows) which I guess pretty much warrants special handling for utf8 in ucrtbase.
Does something like in attached patches work for you?
[0001-msvcrt-Print-FIXME-when-WideCharToMultiByte-fails-in.patch](/uploads/56d6b0ad6b6326079be88e963c5dbf57/0001-msvcrt-Print-FIXME-when-WideCharToMultiByte-fails-in.patch)
[0002-ucrtbase-Fix-CP_UTF8-handling-in-_toupper_l.patch](/uploads/6972e474ae498c53915abcf7d32541b5/0002-ucrtbase-Fix-CP_UTF8-handling-in-_toupper_l.patch)
[0003-ucrtbase-Fix-CP_UTF8-handling-in-_tolower_l.patch](/uploads/7bcb608dc4db056c1e7b3d1695eb5484/0003-ucrtbase-Fix-CP_UTF8-handling-in-_tolower_l.patch)
[0004-ucrtbase-Fix-case-mapping-and-ctype1-tables-for-utf8.patch](/uploads/7de0039c3d623c7d53792e5085add570/0004-ucrtbase-Fix-case-mapping-and-ctype1-tables-for-utf8.patch)
I can check (while it takes a bit of effort to check for sure with the game, not something immediately easy to confirm). But even if we stop depending on case map tables for chars >= 0x80 for UTF8, don't we still want to do something with failing WideCharToMultiByte? It now returns 0 on utf8 locale and that only works for characters < 0x80 due to implementation detail when WideCharToMultiByte will actually fill the output buffer up to the failure moment.
Nevermind this part, the failure is not triggered with _LEADBYTE correctly set.
Yes, that works (well, why wouldn't it, the functional effect is the same).
Can you push these patches to the MR? I prefer this solution since it makes create_locinfo simpler (and the function is already a hard to review mess). Also depending on character case makes more sense in tolower/toupper functions.