Instead of calling it, querying the current locinfo with a TLS call, on every character.
Signed-off-by: Rémi Bernon rbernon@codeweavers.com ---
Since we're now using msvcrt wcsicmp in gdi32, this starts to show up high on prefix startup perf profiles.
This brings roughly ~300ms prefix startup time improvement (1.7s -> 1.4s total execution time for "wine cmd /c exit" in average for instance).
dlls/msvcrt/wcs.c | 55 ++++++++++++++++++++++++++++++++++------------- 1 file changed, 40 insertions(+), 15 deletions(-)
diff --git a/dlls/msvcrt/wcs.c b/dlls/msvcrt/wcs.c index 858ecbd7ed0..ffcc7d11926 100644 --- a/dlls/msvcrt/wcs.c +++ b/dlls/msvcrt/wcs.c @@ -85,19 +85,10 @@ MSVCRT_wchar_t* CDECL MSVCRT__wcsdup( const MSVCRT_wchar_t* str ) return ret; }
-/********************************************************************* - * _towlower_l (MSVCRT.@) - */ -int CDECL MSVCRT__towlower_l(MSVCRT_wint_t c, MSVCRT__locale_t locale) +static inline int msvcrt_towlower_internal(MSVCRT_wint_t c, MSVCRT_pthreadlocinfo locinfo) { - MSVCRT_pthreadlocinfo locinfo; MSVCRT_wchar_t ret;
- if(!locale) - locinfo = get_locinfo(); - else - locinfo = locale->locinfo; - if(!locinfo->lc_handle[MSVCRT_LC_CTYPE]) { if(c >= 'A' && c <= 'Z') return c + 'a' - 'A'; @@ -109,6 +100,21 @@ int CDECL MSVCRT__towlower_l(MSVCRT_wint_t c, MSVCRT__locale_t locale) return ret; }
+/********************************************************************* + * _towlower_l (MSVCRT.@) + */ +int CDECL MSVCRT__towlower_l(MSVCRT_wint_t c, MSVCRT__locale_t locale) +{ + MSVCRT_pthreadlocinfo locinfo; + + if(!locale) + locinfo = get_locinfo(); + else + locinfo = locale->locinfo; + + return msvcrt_towlower_internal(c, locinfo); +} + /********************************************************************* * towlower (MSVCRT.@) */ @@ -119,15 +125,21 @@ int CDECL MSVCRT_towlower(MSVCRT_wint_t c)
INT CDECL MSVCRT__wcsicmp_l(const MSVCRT_wchar_t *str1, const MSVCRT_wchar_t *str2, MSVCRT__locale_t locale) { + MSVCRT_pthreadlocinfo locinfo; MSVCRT_wchar_t c1, c2;
if(!MSVCRT_CHECK_PMT(str1 != NULL) || !MSVCRT_CHECK_PMT(str2 != NULL)) return MSVCRT__NLSCMPERROR;
+ if(!locale) + locinfo = get_locinfo(); + else + locinfo = locale->locinfo; + do { - c1 = MSVCRT__towlower_l(*str1++, locale); - c2 = MSVCRT__towlower_l(*str2++, locale); + c1 = msvcrt_towlower_internal(*str1++, locinfo); + c2 = msvcrt_towlower_internal(*str2++, locinfo); } while(c1 && (c1 == c2)); return c1 - c2; } @@ -146,6 +158,7 @@ INT CDECL MSVCRT__wcsicmp( const MSVCRT_wchar_t* str1, const MSVCRT_wchar_t* str INT CDECL MSVCRT__wcsnicmp_l(const MSVCRT_wchar_t *str1, const MSVCRT_wchar_t *str2, MSVCRT_size_t n, MSVCRT__locale_t locale) { + MSVCRT_pthreadlocinfo locinfo; MSVCRT_wchar_t c1, c2;
if (!n) @@ -154,10 +167,15 @@ INT CDECL MSVCRT__wcsnicmp_l(const MSVCRT_wchar_t *str1, const MSVCRT_wchar_t *s if(!MSVCRT_CHECK_PMT(str1 != NULL) || !MSVCRT_CHECK_PMT(str2 != NULL)) return MSVCRT__NLSCMPERROR;
+ if(!locale) + locinfo = get_locinfo(); + else + locinfo = locale->locinfo; + do { - c1 = MSVCRT__towlower_l(*str1++, locale); - c2 = MSVCRT__towlower_l(*str2++, locale); + c1 = msvcrt_towlower_internal(*str1++, locinfo); + c2 = msvcrt_towlower_internal(*str2++, locinfo); } while(--n && c1 && (c1 == c2)); return c1 - c2; } @@ -343,6 +361,7 @@ MSVCRT_wchar_t* CDECL MSVCRT__wcsset( MSVCRT_wchar_t* str, MSVCRT_wchar_t c ) int CDECL MSVCRT__wcsupr_s_l( MSVCRT_wchar_t* str, MSVCRT_size_t n, MSVCRT__locale_t locale ) { + MSVCRT_pthreadlocinfo locinfo; MSVCRT_wchar_t* ptr = str;
if (!str || !n) @@ -397,6 +416,7 @@ MSVCRT_wchar_t* CDECL MSVCRT__wcsupr( MSVCRT_wchar_t *str ) */ int CDECL MSVCRT__wcslwr_s_l( MSVCRT_wchar_t* str, MSVCRT_size_t n, MSVCRT__locale_t locale ) { + MSVCRT_pthreadlocinfo locinfo; MSVCRT_wchar_t* ptr = str;
if (!str || !n) @@ -406,10 +426,15 @@ int CDECL MSVCRT__wcslwr_s_l( MSVCRT_wchar_t* str, MSVCRT_size_t n, MSVCRT__loca return MSVCRT_EINVAL; }
+ if(!locale) + locinfo = get_locinfo(); + else + locinfo = locale->locinfo; + while (n--) { if (!*ptr) return 0; - *ptr = MSVCRT__towlower_l(*ptr, locale); + *ptr = msvcrt_towlower_internal(*ptr, locinfo); ptr++; }
Signed-off-by: Rémi Bernon rbernon@codeweavers.com --- dlls/msvcrt/wcs.c | 33 ++++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 11 deletions(-)
diff --git a/dlls/msvcrt/wcs.c b/dlls/msvcrt/wcs.c index ffcc7d11926..6d0e43a722e 100644 --- a/dlls/msvcrt/wcs.c +++ b/dlls/msvcrt/wcs.c @@ -100,6 +100,21 @@ static inline int msvcrt_towlower_internal(MSVCRT_wint_t c, MSVCRT_pthreadlocinf return ret; }
+static inline int msvcrt_towupper_internal(MSVCRT_wint_t c, MSVCRT_pthreadlocinfo locinfo) +{ + MSVCRT_wchar_t ret; + + if(!locinfo->lc_handle[MSVCRT_LC_CTYPE]) { + if(c >= 'a' && c <= 'z') + return c + 'A' - 'a'; + return c; + } + + if(!LCMapStringW(locinfo->lc_handle[MSVCRT_LC_CTYPE], LCMAP_UPPERCASE, &c, 1, &ret, 1)) + return c; + return ret; +} + /********************************************************************* * _towlower_l (MSVCRT.@) */ @@ -371,10 +386,15 @@ int CDECL MSVCRT__wcsupr_s_l( MSVCRT_wchar_t* str, MSVCRT_size_t n, return MSVCRT_EINVAL; }
+ if(!locale) + locinfo = get_locinfo(); + else + locinfo = locale->locinfo; + while (n--) { if (!*ptr) return 0; - *ptr = MSVCRT__towupper_l(*ptr, locale); + *ptr = msvcrt_towupper_internal(*ptr, locinfo); ptr++; }
@@ -2779,22 +2799,13 @@ MSVCRT_size_t CDECL MSVCRT_wcsnlen(const MSVCRT_wchar_t *s, MSVCRT_size_t maxlen int CDECL MSVCRT__towupper_l(MSVCRT_wint_t c, MSVCRT__locale_t locale) { MSVCRT_pthreadlocinfo locinfo; - MSVCRT_wchar_t ret;
if(!locale) locinfo = get_locinfo(); else locinfo = locale->locinfo;
- if(!locinfo->lc_handle[MSVCRT_LC_CTYPE]) { - if(c >= 'a' && c <= 'z') - return c + 'A' - 'a'; - return c; - } - - if(!LCMapStringW(locinfo->lc_handle[MSVCRT_LC_CTYPE], LCMAP_UPPERCASE, &c, 1, &ret, 1)) - return c; - return ret; + return msvcrt_towupper_internal(c, locinfo); }
/*********************************************************************
Hi Rémi,
I think it would be better to fix it by creating temporary locale_t object when NULL is passed. This way we can avoid adding multiple internal functions (it will also make it possible to reuse it in more functions in future). _get_current_locale is buggy but it should be good enough in this case (you can also introduce get_current_locale_noalloc and free_locale_noalloc that avoids allocation to speed things up).
Thanks, Piotr