Signed-off-by: Sergio Gómez Del Real sdelreal@codeweavers.com --- dlls/kernel32/locale.c | 66 ++++++++++- dlls/kernel32/tests/locale.c | 88 ++++++++++++++ dlls/kernel32/tests/normalization_tests.h | 190 ++++++++++++++++++++++++++++++ 3 files changed, 339 insertions(+), 5 deletions(-) create mode 100644 dlls/kernel32/tests/normalization_tests.h
diff --git a/dlls/kernel32/locale.c b/dlls/kernel32/locale.c index 5a6ff35b75..7dbc132c07 100644 --- a/dlls/kernel32/locale.c +++ b/dlls/kernel32/locale.c @@ -5359,13 +5359,69 @@ INT WINAPI GetUserDefaultLocaleName(LPWSTR localename, int buffersize)
/****************************************************************************** * NormalizeString (KERNEL32.@) + * + * Normalizes a string according to a Unicode Normalization Form. + * + * PARAMS + * norm [I] Normalization Form + * src [I] Source string to normalize + * srclen [I] Length of source string (if -1, source string is null-terminated) + * dst [O] Buffer to write normalized source string (can be NULL) + * dstlen [I] Length of dst string (can be 0) + * + * RETURNS + * Success: If dstlen is 0, return size needed, else return size of normalized string. + * Failure: ret <= 0. Use GetLastError to determine error. */ -INT WINAPI NormalizeString(NORM_FORM NormForm, LPCWSTR lpSrcString, INT cwSrcLength, - LPWSTR lpDstString, INT cwDstLength) +INT WINAPI NormalizeString(NORM_FORM norm, LPCWSTR src, INT srclen, + LPWSTR dst, INT dstlen) { - FIXME("%x %p %d %p %d\n", NormForm, lpSrcString, cwSrcLength, lpDstString, cwDstLength); - SetLastError(ERROR_CALL_NOT_IMPLEMENTED); - return 0; + extern unsigned int wine_unicode_decompose_string( int compat, const WCHAR *src, + int srclen, WCHAR *dst, int dstlen ); + extern unsigned int unicode_canonical_composition( WCHAR *str, UINT strlen ); + extern void unicode_canon_order( WCHAR *str, int strlen ); + + WCHAR *decomp = NULL; + INT compat = 0; + UINT needed_len; + + if (norm == NormalizationKC || norm == NormalizationKD) compat++; + + if (srclen == -1) srclen = strlenW( src ) + 1; + + needed_len = wine_unicode_decompose_string( compat, src, srclen, NULL, 0 ); + if (norm == NormalizationC || norm == NormalizationKC) + { + decomp = HeapAlloc( GetProcessHeap(), 0, needed_len*sizeof(WCHAR)+1 ); + wine_unicode_decompose_string( compat, src, srclen, decomp, needed_len ); + unicode_canon_order( decomp, needed_len ); + needed_len = unicode_canonical_composition( decomp, needed_len ); + } + + if (dstlen < needed_len && dstlen > 0) + { + if (decomp) HeapFree(GetProcessHeap(), 0, decomp); + SetLastError(ERROR_INSUFFICIENT_BUFFER); + return -1; + } + else if (dstlen <= 0) + { + if (decomp) HeapFree(GetProcessHeap(), 0, decomp); + return needed_len; + } + + if (norm == NormalizationC || norm == NormalizationKC) + { + lstrcpynW( dst, decomp, needed_len ); + HeapFree(GetProcessHeap(), 0, decomp); + return needed_len; + } + else + { + int decomp_len = wine_unicode_decompose_string( compat, src, srclen, dst, needed_len ); + unicode_canon_order( dst, needed_len ); + return decomp_len; + } }
/****************************************************************************** diff --git a/dlls/kernel32/tests/locale.c b/dlls/kernel32/tests/locale.c index a99763db92..8cc3b62730 100644 --- a/dlls/kernel32/tests/locale.c +++ b/dlls/kernel32/tests/locale.c @@ -36,6 +36,8 @@ #include "winerror.h" #include "winnls.h"
+#include "normalization_tests.h" + static const WCHAR upper_case[] = {'\t','J','U','S','T','!',' ','A',',',' ','T','E','S','T',';',' ','S','T','R','I','N','G',' ','1','/','*','+','-','.','\r','\n',0}; static const WCHAR lower_case[] = {'\t','j','u','s','t','!',' ','a',',',' ','t','e','s','t',';',' ','s','t','r','i','n','g',' ','1','/','*','+','-','.','\r','\n',0}; static const WCHAR title_case[] = {'\t','J','u','s','t','!',' ','A',',',' ','T','e','s','t',';',' ','S','t','r','i','n','g',' ','1','/','*','+','-','.','\r','\n',0}; @@ -106,6 +108,7 @@ static INT (WINAPI *pGetNumberFormatEx)(LPCWSTR, DWORD, LPCWSTR, const NUMBERFMT static INT (WINAPI *pFindNLSStringEx)(LPCWSTR, DWORD, LPCWSTR, INT, LPCWSTR, INT, LPINT, LPNLSVERSIONINFO, LPVOID, LPARAM); static LANGID (WINAPI *pSetThreadUILanguage)(LANGID); static LANGID (WINAPI *pGetThreadUILanguage)(VOID); +static INT (WINAPI *pNormalizeString)(NORM_FORM, LPCWSTR, INT, LPWSTR, INT);
static void InitFunctionPointers(void) { @@ -141,6 +144,7 @@ static void InitFunctionPointers(void) X(FindNLSStringEx); X(SetThreadUILanguage); X(GetThreadUILanguage); + X(NormalizeString);
mod = GetModuleHandleA("ntdll"); X(RtlUpcaseUnicodeChar); @@ -5470,6 +5474,89 @@ static void test_SetThreadUILanguage(void) "expected %d got %d\n", MAKELANGID(LANG_DUTCH, SUBLANG_DUTCH_BELGIAN), res); }
+static void test_NormalizeString(void) +{ + struct test_data_normal test_arr[] = + { + { part0_str1, part0_nfc1, part0_nfd1, part0_nfkc1, part0_nfkd1 }, + { part0_str2, part0_nfc2, part0_nfd2, part0_nfkc2, part0_nfkd2 }, + { part0_str3, part0_nfc3, part0_nfd3, part0_nfkc3, part0_nfkd3 }, + { part0_str4, part0_nfc4, part0_nfd4, part0_nfkc4, part0_nfkd4 }, + { part0_str5, part0_nfc5, part0_nfd5, part0_nfkc5, part0_nfkd5 }, + { part0_str6, part0_nfc6, part0_nfd6, part0_nfkc6, part0_nfkd6 }, + { part0_str8, part0_nfc8, part0_nfd8, part0_nfkc8, part0_nfkd8 }, + { part0_str9, part0_nfc9, part0_nfd9, part0_nfkc9, part0_nfkd9 }, + { part0_str10, part0_nfc10, part0_nfd10, part0_nfkc10, part0_nfkd10 }, + { part0_str11, part0_nfc11, part0_nfd11, part0_nfkc11, part0_nfkd11 }, + { part0_str12, part0_nfc12, part0_nfd12, part0_nfkc12, part0_nfkd12 }, + { part1_str1, part1_nfc1, part1_nfd1, part1_nfkc1, part1_nfkd1 }, + { part1_str2, part1_nfc2, part1_nfd2, part1_nfkc2, part1_nfkd2 }, + { part1_str3, part1_nfc3, part1_nfd3, part1_nfkc3, part1_nfkd3 }, + { part1_str4, part1_nfc4, part1_nfd4, part1_nfkc4, part1_nfkd4 }, + { part1_str5, part1_nfc5, part1_nfd5, part1_nfkc5, part1_nfkd5 }, + { part1_str6, part1_nfc6, part1_nfd6, part1_nfkc6, part1_nfkd6 }, + { part1_str7, part1_nfc7, part1_nfd7, part1_nfkc7, part1_nfkd7 }, + { part1_str8, part1_nfc8, part1_nfd8, part1_nfkc8, part1_nfkd8 }, + { part1_str9, part1_nfc9, part1_nfd9, part1_nfkc9, part1_nfkd9 }, + { part1_str10, part1_nfc10, part1_nfd10, part1_nfkc10, part1_nfkd10 }, + { part1_str11, part1_nfc11, part1_nfd11, part1_nfkc11, part1_nfkd11 }, + { 0 } + }; + + struct test_data_normal *ptest = test_arr; + + if (!pFindNLSStringEx) + { + win_skip("NormalizeString is not available.\n"); + return; + } + + while (ptest->str != 0) + { + WCHAR *dst; + int str_cmp; + int dstlen; + + dstlen = pNormalizeString( NormalizationD, ptest->str, -1, NULL, 0 ); + dst = HeapAlloc(GetProcessHeap(), 0, dstlen * sizeof(WCHAR) + 1); + dstlen = pNormalizeString( NormalizationD, ptest->str, -1, dst, dstlen ); + ok(dstlen == strlenW(ptest->nfd)+1, "Copied length differed: was %d, should be %d\n", + dstlen, strlenW(ptest->nfd)+1); + str_cmp = strncmpW(ptest->nfd, dst, dstlen + 1); + ok(str_cmp == 0, "NFD test failed: returned value was %d\n", str_cmp); + HeapFree(GetProcessHeap(), 0, dst); + + dstlen = pNormalizeString( NormalizationC, ptest->str, -1, NULL, 0 ); + dst = HeapAlloc(GetProcessHeap(), 0, dstlen * sizeof(WCHAR) + 1); + dstlen = pNormalizeString( NormalizationC, ptest->str, -1, dst, dstlen ); + ok(dstlen == strlenW(ptest->nfc)+1, "Copied length differed: was %d, should be %d\n", + dstlen, strlenW(ptest->nfc)+1); + str_cmp = strncmpW(ptest->nfc, dst, dstlen + 1); + ok(str_cmp == 0, "NFC test failed: returned value was %d\n", str_cmp); + HeapFree(GetProcessHeap(), 0, dst); + + dstlen = pNormalizeString( NormalizationKD, ptest->str, -1, NULL, 0 ); + dst = HeapAlloc(GetProcessHeap(), 0, dstlen * sizeof(WCHAR) + 1); + dstlen = pNormalizeString( NormalizationKD, ptest->str, -1, dst, dstlen ); + ok(dstlen == strlenW(ptest->nfkd)+1, "Copied length differed: was %d, should be %d\n", + dstlen, strlenW(ptest->nfkd)+1); + str_cmp = strncmpW(ptest->nfkd, dst, dstlen + 1); + ok(str_cmp == 0, "NFKD test failed: returned value was %d\n", str_cmp); + HeapFree(GetProcessHeap(), 0, dst); + + dstlen = pNormalizeString( NormalizationKC, ptest->str, -1, NULL, 0 ); + dst = HeapAlloc(GetProcessHeap(), 0, dstlen * sizeof(WCHAR) + 1); + dstlen = pNormalizeString( NormalizationKC, ptest->str, -1, dst, dstlen ); + ok(dstlen == strlenW(ptest->nfkc)+1, "Copied length differed: was %d, should be %d\n", + dstlen, strlenW(ptest->nfkc)+1); + str_cmp = strncmpW(ptest->nfkc, dst, dstlen + 1); + ok(str_cmp == 0, "NFKC test failed: returned value was %d\n", str_cmp); + HeapFree(GetProcessHeap(), 0, dst); + + ptest++; + } +} + START_TEST(locale) { InitFunctionPointers(); @@ -5518,6 +5605,7 @@ START_TEST(locale) test_GetUserPreferredUILanguages(); test_FindNLSStringEx(); test_SetThreadUILanguage(); + test_NormalizeString(); /* this requires collation table patch to make it MS compatible */ if (0) test_sorting(); } diff --git a/dlls/kernel32/tests/normalization_tests.h b/dlls/kernel32/tests/normalization_tests.h new file mode 100644 index 0000000000..2f435bab30 --- /dev/null +++ b/dlls/kernel32/tests/normalization_tests.h @@ -0,0 +1,190 @@ +/* + * Test data for use in normalization tests. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +/* part 0: specific cases */ +/* LATIN CAPITAL LETTER D WITH DOT ABOVE */ +static WCHAR part0_str1[] = {0x1e0a,0}; +static WCHAR *part0_nfc1 = part0_str1; +static WCHAR part0_nfd1[] = {0x0044,0x0307,0}; +static WCHAR *part0_nfkc1 = part0_str1; +static WCHAR *part0_nfkd1 = part0_nfd1; + +/* LATIN CAPITAL LETTER D, COMBINING DOT BELOW, COMBINING DOT ABOVE */ +static WCHAR part0_str2[] = {0x0044,0x0323,0x0307,0}; +static WCHAR part0_nfc2[] = {0x1e0c,0x0307,0}; +static WCHAR *part0_nfd2 = part0_str2; +static WCHAR *part0_nfkc2 = part0_nfc2; +static WCHAR *part0_nfkd2 = part0_str2; + +/* LATIN CAPITAL LETTER D, COMBINING HORN, COMBINING DOT BELOW, COMBINING DOT ABOVE */ +static WCHAR part0_str3[] = {0x0044,0x031b,0x0323,0x0307,0}; +static WCHAR part0_nfc3[] = {0x1e0c,0x031b,0x0307,0}; +static WCHAR *part0_nfd3 = part0_str3; +static WCHAR *part0_nfkc3 = part0_nfc3; +static WCHAR *part0_nfkd3 = part0_str3; + +/* LATIN CAPITAL LETTER D, COMBINING HORN, COMBINING DOT BELOW, COMBINING DOT ABOVE */ +static WCHAR part0_str4[] = {0x0044,0x031b,0x0323,0x0307,0}; +static WCHAR part0_nfc4[] = {0x1e0c,0x031b,0x0307,0}; +static WCHAR *part0_nfd4 = part0_str4; +static WCHAR *part0_nfkc4 = part0_nfc4; +static WCHAR *part0_nfkd4 = part0_str4; + +/* + * HEBREW ACCENT SEGOL, HEBREW POINT PATAH, HEBREW POINT DAGESH OR MAPIQ, + * HEBREW ACCENT MERKHA, HEBREW POINT SHEVA, HEBREW PUNCTUATION PASEQ, + * HEBREW MARK UPPER DOT, HEBREW ACCENT DEHI + */ +static WCHAR part0_str5[] = {0x0592,0x05B7,0x05BC,0x05A5,0x05B0,0x05C0,0x05C4,0x05AD,0}; +static WCHAR part0_nfc5[] = {0x05B0,0x05B7,0x05BC,0x05A5,0x0592,0x05C0,0x05AD,0x05C4,0}; +static WCHAR *part0_nfd5 = part0_nfc5; +static WCHAR *part0_nfkc5 = part0_nfc5; +static WCHAR *part0_nfkd5 = part0_nfc5; + +/* + * HEBREW POINT QAMATS, HEBREW POINT HOLAM, HEBREW POINT HATAF SEGOL, + * HEBREW ACCENT ETNAHTA, HEBREW PUNCTUATION SOF PASUQ, HEBREW POINT SHEVA, + * HEBREW ACCENT ILUY, HEBREW ACCENT QARNEY PARA + */ +static WCHAR part0_str6[] = {0x05B8,0x05B9,0x05B1,0x0591,0x05C3,0x05B0,0x05AC,0x059F,0}; +static WCHAR part0_nfc6[] = {0x05B1,0x05B8,0x05B9,0x0591,0x05C3,0x05B0,0x05AC,0x059F,0}; +static WCHAR *part0_nfd6 = part0_nfc6; +static WCHAR *part0_nfkc6 = part0_nfc6; +static WCHAR *part0_nfkd6 = part0_nfc6; + +/* LATIN CAPITAL LETTER D WITH DOT BELOW */ +static WCHAR part0_str8[] = {0x1E0C,0}; +static WCHAR *part0_nfc8 = part0_str8; +static WCHAR part0_nfd8[] = {0x0044,0x0323,0}; +static WCHAR *part0_nfkc8 = part0_str8; +static WCHAR *part0_nfkd8 = part0_nfd8; + +/* LATIN CAPITAL LETTER D WITH DOT ABOVE, COMBINING DOT BELOW */ +static WCHAR part0_str9[] = {0x1E0A,0x0323,0}; +static WCHAR part0_nfc9[] = {0x1E0C,0x0307,0}; +static WCHAR part0_nfd9[] = {0x0044,0x0323,0x0307,0}; +static WCHAR *part0_nfkc9 = part0_nfc9; +static WCHAR *part0_nfkd9 = part0_nfd9; + +/* LATIN CAPITAL LETTER D WITH DOT BELOW, COMBINING DOT ABOVE */ +static WCHAR part0_str10[] = {0x1E0C,0x0307,0}; +static WCHAR *part0_nfc10 = part0_str10; +static WCHAR part0_nfd10[] = {0x0044,0x0323,0x0307,0}; +static WCHAR *part0_nfkc10 = part0_str10; +static WCHAR *part0_nfkd10 = part0_nfd10; + +/* LATIN CAPITAL LETTER E WITH MACRON AND GRAVE, COMBINING MACRON */ +static WCHAR part0_str11[] = {0x1E14,0x0304,0}; +static WCHAR *part0_nfc11 = part0_str11; +static WCHAR part0_nfd11[] = {0x0045,0x0304,0x0300,0x0304,0}; +static WCHAR *part0_nfkc11 = part0_str11; +static WCHAR *part0_nfkd11 = part0_nfd11; + +/* LATIN CAPITAL LETTER E WITH MACRON, COMBINING GRAVE ACCENT */ +static WCHAR part0_str12[] = {0x0112,0x0300,0}; +static WCHAR part0_nfc12[] = {0x1E14,0}; +static WCHAR part0_nfd12[] = {0x0045,0x0304,0x0300,0}; +static WCHAR *part0_nfkc12 = part0_nfc12; +static WCHAR *part0_nfkd12 = part0_nfd12; + +/* part 1: character by character */ +/* DIAERESIS */ +static WCHAR part1_str1[] = {0x00a8,0}; +static WCHAR *part1_nfc1 = part1_str1; +static WCHAR *part1_nfd1 = part1_str1; +static WCHAR part1_nfkc1[] = {0x0020,0x0308,0}; +static WCHAR *part1_nfkd1 = part1_nfkc1; + +/* VULGAR FRACTION ONE QUARTER */ +static WCHAR part1_str2[] = {0x00bc,0}; +static WCHAR *part1_nfc2 = part1_str2; +static WCHAR *part1_nfd2 = part1_str2; +static WCHAR part1_nfkc2[] = {0x0031,0x2044,0x0034,0}; +static WCHAR *part1_nfkd2 = part1_nfkc2; + +/* LATIN CAPITAL LETTER E WITH CIRCUMFLEX */ +static WCHAR part1_str3[] = {0x00ca,0}; +static WCHAR *part1_nfc3 = part1_str3; +static WCHAR part1_nfd3[] = {0x0045,0x0302,0}; +static WCHAR *part1_nfkc3 = part1_str3; +static WCHAR *part1_nfkd3 = part1_nfd3; + +/* MODIFIER LETTER SMALL GAMMA */ +static WCHAR part1_str4[] = {0x02e0,0}; +static WCHAR *part1_nfc4 = part1_str4; +static WCHAR *part1_nfd4 = part1_str4; +static WCHAR part1_nfkc4[] = {0x0263,0}; +static WCHAR *part1_nfkd4 = part1_nfkc4; + +/* CYRILLIC CAPITAL LETTER IE WITH GRAVE */ +static WCHAR part1_str5[] = {0x0400,0}; +static WCHAR *part1_nfc5 = part1_str5; +static WCHAR part1_nfd5[] = {0x0415,0x0300,0}; +static WCHAR *part1_nfkc5 = part1_str5; +static WCHAR *part1_nfkd5 = part1_nfd5; + +/* CYRILLIC CAPITAL LETTER IZHITSA WITH DOUBLE GRAVE ACCENT */ +static WCHAR part1_str6[] = {0x0476,0}; +static WCHAR *part1_nfc6 = part1_str6; +static WCHAR part1_nfd6[] = {0x0474,0x030F,0}; +static WCHAR *part1_nfkc6 = part1_str6; +static WCHAR *part1_nfkd6 = part1_nfd6; + +/* ARABIC LIGATURE HAH WITH JEEM INITIAL FORM */ +static WCHAR part1_str7[] = {0xFCA9,0}; +static WCHAR *part1_nfc7 = part1_str7; +static WCHAR *part1_nfd7 = part1_str7; +static WCHAR part1_nfkc7[] = {0x062D,0x062C,0}; +static WCHAR *part1_nfkd7 = part1_nfkc7; + +/* GREEK SMALL LETTER OMICRON WITH PSILI AND VARIA */ +static WCHAR part1_str8[] = {0x1F42,0}; +static WCHAR *part1_nfc8 = part1_str8; +static WCHAR part1_nfd8[] = {0x03BF,0x0313,0x0300,0}; +static WCHAR *part1_nfkc8 = part1_str8; +static WCHAR *part1_nfkd8 = part1_nfd8; + +/* GREEK SMALL LETTER ALPHA WITH PSILI AND VARIA AND YPOGEGRAMMENI */ +static WCHAR part1_str9[] = {0x1F82,0}; +static WCHAR *part1_nfc9 = part1_str9; +static WCHAR part1_nfd9[] = {0x03B1,0x0313,0x0300,0x0345,0}; +static WCHAR *part1_nfkc9 = part1_str9; +static WCHAR *part1_nfkd9 = part1_nfd9; + +/* QUADRUPLE PRIME */ +static WCHAR part1_str10[] = {0x2057,0}; +static WCHAR *part1_nfc10 = part1_str10; +static WCHAR *part1_nfd10 = part1_str10; +static WCHAR part1_nfkc10[] = {0x2032,0x2032,0x2032,0x2032,0}; +static WCHAR *part1_nfkd10 = part1_nfkc10; + +/* KATAKANA-HIRAGANA VOICED SOUND MARK */ +static WCHAR part1_str11[] = {0x309B,0}; +static WCHAR *part1_nfc11 = part1_str11; +static WCHAR *part1_nfd11 = part1_str11; +static WCHAR part1_nfkc11[] = {0x20,0x3099,0}; +static WCHAR *part1_nfkd11 = part1_nfkc11; + +struct test_data_normal { + WCHAR *str; + WCHAR *nfc; + WCHAR *nfd; + WCHAR *nfkc; + WCHAR *nfkd; + UINT exp_dstlen; +};