--- dlls/kernel32/tests/locale.c | 106 ++++++++ dlls/kernelbase/locale.c | 466 ++++++++++++++++++++++++++--------- 2 files changed, 449 insertions(+), 123 deletions(-)
diff --git a/dlls/kernel32/tests/locale.c b/dlls/kernel32/tests/locale.c index 4c1e1b4d73..a451594b19 100644 --- a/dlls/kernel32/tests/locale.c +++ b/dlls/kernel32/tests/locale.c @@ -2681,6 +2681,13 @@ static void test_lcmapstring_unicode(lcmapstring_wrapper func_ptr, const char *f lstrlenW(symbols_stripped) + 1, ret); ok(!lstrcmpW(buf, symbols_stripped), "%s string comparison mismatch\n", func_name);
+ /* test small buffer */ + lstrcpyW(buf, fooW); + ret = func_ptr(LCMAP_SORTKEY, lower_case, -1, buf, 2); + ok(ret == 0, "Expected a failure\n"); + ok(GetLastError() == ERROR_INSUFFICIENT_BUFFER, + "%s unexpected error code %d\n", func_name, GetLastError());; + /* test srclen = 0 */ SetLastError(0xdeadbeef); ret = func_ptr(0, upper_case, 0, buf, ARRAY_SIZE(buf)); @@ -3108,6 +3115,104 @@ static void test_sorting(void) } }
+struct sorting_test_entry { + const WCHAR* locale; + DWORD flags; + const WCHAR* first; + const WCHAR* second; + int result_sortkey; + int result_compare; + BOOL broken_on_old_win; +}; + +static const struct sorting_test_entry unicode_sorting_tests[] = +{ + /* 0 */ { L"en-US", 0, L"\ue6e3\u0a02", L"\ue6e3\u20dc", CSTR_LESS_THAN, 0, TRUE }, /* Test default character, when there is main weight extra there must be no diacritic weight */ + /* 1 */ { L"en-US", 0, L"\u276a", L"\u2768", CSTR_GREATER_THAN }, /* Test symbols, must add diacritic weight */ + /* 2 */ { L"en-US", 0, L"\u204d", L"\uff02", CSTR_LESS_THAN }, /* Test symbols, must add case weight */ + /* 3 */ { L"en-US", 0, L"a \u2060 b", L"a b", CSTR_EQUAL }, /* Test unsortable characters */ + /* 4 */ { L"en-US", 0, L"a \xfff0 b", L"a b", CSTR_EQUAL }, /* Test invalid characters */ + /* 5 */ { L"en-US", 0, L"\x00fc", L"\x016d", CSTR_LESS_THAN }, + /* 6 */ { L"en-US", 0, L"\x3fcb\x7fd5", L"\x0006\x3032", CSTR_GREATER_THAN }, + /* 7 */ { L"en-US", 0, L"\x00fc\x30fd", L"\x00fa\x1833", CSTR_LESS_THAN }, + /* 8 */ { L"en-US", 0, L"\x0037", L"\x277c", CSTR_LESS_THAN, 0, TRUE }, /* Normal character */ + /* 9 */ { L"en-US", 0, L"\x1eca", L"\x1ecb", CSTR_GREATER_THAN }, /* Normal character */ + /* 10 */ { L"en-US", 0, L"\x1d05", L"\x1d48", CSTR_GREATER_THAN }, /* Normal character */ + /* 11 */ { L"en-US", 0, L"\x19d7", L"\x096d", CSTR_GREATER_THAN }, /* Normal character diacritics */ + /* 12 */ { L"en-US", 0, L"\x00f5", L"\x1ecf", CSTR_LESS_THAN }, /* Normal character diacritics */ + /* 13 */ { L"en-US", 0, L"\x2793", L"\x0d70", CSTR_LESS_THAN, 0, TRUE }, /* Normal character diacritics */ + /* 14 */ { L"en-US", 0, L"A", L"a", CSTR_GREATER_THAN }, /* Normal character case weights */ + /* 15 */ { L"en-US", 0, L"z", L"Z", CSTR_LESS_THAN }, /* Normal character case weights */ + /* 16 */ { L"en-US", 0, L"\xe5a6", L"\xe5a5\x0333", CSTR_GREATER_THAN, 0, TRUE }, /* CJK with extra value */ + /* 17 */ { L"en-US", 0, L"\xe5d7", L"\xe5d6\x0330", CSTR_GREATER_THAN, 0, TRUE }, /* CJK with extra value */ + /* 18 */ { L"en-US", 0, L"\x1B56\x0330", L"\x1096", CSTR_GREATER_THAN }, /* Diacritic is added */ + /* 19 */ { L"en-US", 0, L"\x1817\x0333", L"\x19d7", CSTR_GREATER_THAN }, /* Diacritic is added */ + /* 20 */ { L"en-US", 0, L"\x04de\x05ac", L"\x0499", CSTR_GREATER_THAN }, /* Diacritic is added */ + /* 21 */ { L"en-US", 0, L"\x01ba\x0654", L"\x01b8", CSTR_LESS_THAN }, /* Diacritic can overflow */ + /* 22 */ { L"en-US", 0, L"\x06b7\x06eb", L"\x06b6", CSTR_LESS_THAN }, /* Diacritic can overflow */ + /* 23 */ { L"en-US", 0, L"\x1420\x0333", L"\x141f", CSTR_LESS_THAN }, /* Diacritic can overflow */ + /* 24 */ { L"en-US", 0, L"\x11bc", L"\x110b", CSTR_GREATER_THAN }, /* Jamo case weight */ + /* 25 */ { L"en-US", 0, L"\x11c1", L"\x1111", CSTR_GREATER_THAN }, /* Jamo case weight */ + /* 26 */ { L"en-US", 0, L"\x11af", L"\x1105", CSTR_GREATER_THAN }, /* Jamo case weight */ + /* 27 */ { L"en-US", 0, L"\x11c2", L"\x11f5", CSTR_LESS_THAN }, /* Jamo main weight */ + /* 28 */ { L"en-US", 0, L"\x1108", L"\x1121", CSTR_LESS_THAN }, /* Jamo main weight */ + /* 29 */ { L"en-US", 0, L"\x1116", L"\x11c7", CSTR_LESS_THAN }, /* Jamo main weight */ + /* 30 */ { L"en-US", 0, L"\x11b1", L"\x11d1", CSTR_LESS_THAN }, /* Jamo main weight */ + /* 31 */ { L"en-US", 0, L"\x4550\x73d2", L"\x3211\x23ad", CSTR_GREATER_THAN }, /* Script 5 main weight 1 */ + /* 32 */ { L"en-US", 0, L"\x3265", L"\x4079", CSTR_LESS_THAN }, /* Script 5 main weight 1 */ + /* 33 */ { L"en-US", 0, L"\x4c19\x68d0\x52d0", L"\x316d", CSTR_GREATER_THAN }, /* Script 5 main weight 1 */ + /* 34 */ { L"en-US", 0, L"\x72dd", L"\x6b8a", CSTR_GREATER_THAN }, /* Script 5 main weight 2 */ + /* 35 */ { L"en-US", 0, L"\x6785\x3bff\x6f83", L"\x7550\x34c9\x71a7", CSTR_LESS_THAN }, /* Script 5 main weight 2 */ + /* 36 */ { L"en-US", 0, L"\x5d61", L"\x3aef", CSTR_LESS_THAN }, /* Script 5 main weight 2 */ + /* 37 */ { L"en-US", 0, L"\x207a", L"\xfe62", CSTR_GREATER_THAN }, /* Symbols case weights */ + /* 38 */ { L"en-US", 0, L"\xfe65", L"\xff1e", CSTR_GREATER_THAN }, /* Symbols case weights */ + /* 39 */ { L"en-US", 0, L"\x2502", L"\xffe8", CSTR_GREATER_THAN }, /* Symbols case weights */ + /* 40 */ { L"en-US", 0, L"\x21da", L"\x21dc", CSTR_LESS_THAN }, /* Symbols diacritic weights */ + /* 41 */ { L"en-US", 0, L"\x29fb", L"\x2295", CSTR_LESS_THAN }, /* Symbols diacritic weights */ + /* 42 */ { L"en-US", 0, L"\x0092", L"\x009c", CSTR_LESS_THAN }, /* Symbols diacritic weights */ + /* 43 */ { L"en-US", NORM_IGNORESYMBOLS, L"\x21da", L"\x21dc", CSTR_EQUAL }, /* NORM_IGNORESYMBOLS */ + /* 44 */ { L"en-US", NORM_IGNORESYMBOLS, L"\x29fb", L"\x2295", CSTR_EQUAL }, /* NORM_IGNORESYMBOLS */ + /* 45 */ { L"en-US", NORM_IGNORESYMBOLS, L"\x0092", L"\x009c", CSTR_EQUAL }, /* NORM_IGNORESYMBOLS */ +}; + +static void test_unicode_sorting(void) +{ + int i; + if (!pLCMapStringEx) + { + win_skip("LCMapStringEx not available\n"); + return; + } + for (i = 0; i < ARRAY_SIZE(unicode_sorting_tests); i++) + { + int pos; + BYTE buff1[1000]; + BYTE buff2[1000]; + int len1, len2; + int result = CSTR_EQUAL; + const struct sorting_test_entry* entry = &unicode_sorting_tests[i]; + + len1 = pLCMapStringEx(entry->locale, LCMAP_SORTKEY | entry->flags, entry->first, -1, (WCHAR*)buff1, ARRAY_SIZE(buff1), NULL, NULL, 0); + len2 = pLCMapStringEx(entry->locale, LCMAP_SORTKEY | entry->flags, entry->second, -1, (WCHAR*)buff2, ARRAY_SIZE(buff2), NULL, NULL, 0); + + for (pos = 0; pos < len1 && pos < len2; pos++) + { + if (buff1[pos] > buff2[pos]) + { + result = CSTR_GREATER_THAN; + break; + } + else if (buff1[pos] < buff2[pos]) + { + result = CSTR_LESS_THAN; + break; + } + } + + ok (result == entry->result_sortkey || broken(entry->broken_on_old_win), "Test %d - Expected %d, got %d\n", i, entry->result_sortkey, result); + } +} + static void test_FoldStringA(void) { int ret, i, j; @@ -6897,4 +7002,5 @@ START_TEST(locale) test_NLSVersion(); /* this requires collation table patch to make it MS compatible */ if (0) test_sorting(); + test_unicode_sorting(); } diff --git a/dlls/kernelbase/locale.c b/dlls/kernelbase/locale.c index 53e4e42da3..af78f76e29 100644 --- a/dlls/kernelbase/locale.c +++ b/dlls/kernelbase/locale.c @@ -2126,127 +2126,6 @@ static int wcstombs_codepage( UINT codepage, DWORD flags, const WCHAR *src, int return wcstombs_sbcs( info, src, srclen, dst, dstlen ); }
- -static int get_sortkey( DWORD flags, const WCHAR *src, int srclen, char *dst, int dstlen ) -{ - WCHAR dummy[4]; /* no decomposition is larger than 4 chars */ - int key_len[4]; - char *key_ptr[4]; - const WCHAR *src_save = src; - int srclen_save = srclen; - - key_len[0] = key_len[1] = key_len[2] = key_len[3] = 0; - for (; srclen; srclen--, src++) - { - unsigned int i, decomposed_len = 1;/*wine_decompose(*src, dummy, 4);*/ - dummy[0] = *src; - if (decomposed_len) - { - for (i = 0; i < decomposed_len; i++) - { - WCHAR wch = dummy[i]; - unsigned int ce; - - if ((flags & NORM_IGNORESYMBOLS) && - (get_char_type( CT_CTYPE1, wch ) & (C1_PUNCT | C1_SPACE))) - continue; - - if (flags & NORM_IGNORECASE) wch = casemap( nls_info.LowerCaseTable, wch ); - - ce = collation_table[collation_table[collation_table[wch >> 8] + ((wch >> 4) & 0x0f)] + (wch & 0xf)]; - if (ce != (unsigned int)-1) - { - if (ce >> 16) key_len[0] += 2; - if ((ce >> 8) & 0xff) key_len[1]++; - if ((ce >> 4) & 0x0f) key_len[2]++; - if (ce & 1) - { - if (wch >> 8) key_len[3]++; - key_len[3]++; - } - } - else - { - key_len[0] += 2; - if (wch >> 8) key_len[0]++; - if (wch & 0xff) key_len[0]++; - } - } - } - } - - if (!dstlen) /* compute length */ - /* 4 * '\1' + key length */ - return key_len[0] + key_len[1] + key_len[2] + key_len[3] + 4; - - if (dstlen < key_len[0] + key_len[1] + key_len[2] + key_len[3] + 4 + 1) - return 0; /* overflow */ - - src = src_save; - srclen = srclen_save; - - key_ptr[0] = dst; - key_ptr[1] = key_ptr[0] + key_len[0] + 1; - key_ptr[2] = key_ptr[1] + key_len[1] + 1; - key_ptr[3] = key_ptr[2] + key_len[2] + 1; - - for (; srclen; srclen--, src++) - { - unsigned int i, decomposed_len = 1;/*wine_decompose(*src, dummy, 4);*/ - dummy[0] = *src; - if (decomposed_len) - { - for (i = 0; i < decomposed_len; i++) - { - WCHAR wch = dummy[i]; - unsigned int ce; - - if ((flags & NORM_IGNORESYMBOLS) && - (get_char_type( CT_CTYPE1, wch ) & (C1_PUNCT | C1_SPACE))) - continue; - - if (flags & NORM_IGNORECASE) wch = casemap( nls_info.LowerCaseTable, wch ); - - ce = collation_table[collation_table[collation_table[wch >> 8] + ((wch >> 4) & 0x0f)] + (wch & 0xf)]; - if (ce != (unsigned int)-1) - { - WCHAR key; - if ((key = ce >> 16)) - { - *key_ptr[0]++ = key >> 8; - *key_ptr[0]++ = key & 0xff; - } - /* make key 1 start from 2 */ - if ((key = (ce >> 8) & 0xff)) *key_ptr[1]++ = key + 1; - /* make key 2 start from 2 */ - if ((key = (ce >> 4) & 0x0f)) *key_ptr[2]++ = key + 1; - /* key 3 is always a character code */ - if (ce & 1) - { - if (wch >> 8) *key_ptr[3]++ = wch >> 8; - if (wch & 0xff) *key_ptr[3]++ = wch & 0xff; - } - } - else - { - *key_ptr[0]++ = 0xff; - *key_ptr[0]++ = 0xfe; - if (wch >> 8) *key_ptr[0]++ = wch >> 8; - if (wch & 0xff) *key_ptr[0]++ = wch & 0xff; - } - } - } - } - - *key_ptr[0] = 1; - *key_ptr[1] = 1; - *key_ptr[2] = 1; - *key_ptr[3]++ = 1; - *key_ptr[3] = 0; - return key_ptr[3] - dst; -} - - /* compose a full-width katakana. return consumed source characters. */ static int compose_katakana( const WCHAR *src, int srclen, WCHAR *dst ) { @@ -2574,6 +2453,347 @@ static int compare_weights(int flags, const WCHAR *str1, int len1, return len1 - len2; }
+/* Start sortkey handler code. */ + +/* Defines */ + +#define JAPANESE 3 +#define MIN_WEIGHT 2 +#define LIST_STACK_BUFFER 1000 + +/* Internal structures */ + +typedef struct _character_info +{ + BYTE weight_primary; + BYTE script_member; + BYTE weight_diacritic; + BYTE weight_case; +} character_info; + +typedef struct _weight_main_info +{ + BYTE script_member; + BYTE weight_primary; + BYTE extra; +} weight_main_info; + +typedef struct _list +{ + int extra_len; + int len; + BYTE buffer[LIST_STACK_BUFFER]; + int buffer_count; + BYTE* extra; + int element_size; +} list; + +typedef struct _sortkey_data +{ + int flags; + list key; + list weights_main; + list weights_diacritic; + list weights_case; +} sortkey_data; + +/* List functions */ + +#define LIST_INIT(name, type) \ + name.extra_len = 0; \ + name.len = 0; \ + name.extra = 0; \ + name.buffer_count = LIST_STACK_BUFFER / sizeof(type); \ + name.element_size = sizeof(type); + +#define LIST_DESTROY(name) \ + RtlFreeHeap(GetProcessHeap(), 0, name.extra); + + +#define LIST_GET(name, type, index) \ + (type*) (((index + 1) * name.element_size <= LIST_STACK_BUFFER) ? \ + &((type*)name.buffer)[index] : \ + &((type*)name.extra)[index - name.buffer_count]) \ + +/* Add entry to list, resizing as needed */ +#define LIST_ADD(name, type, value) \ + do { \ + if ((name.len + 1) * name.element_size > name.extra_len + LIST_STACK_BUFFER) { \ + if (!name.extra) { /* First allocation */ \ + name.extra_len = LIST_STACK_BUFFER; \ + name.extra = RtlAllocateHeap(GetProcessHeap(), 0, name.extra_len); \ + } else { \ + name.extra_len *= 2; \ + name.extra = RtlReAllocateHeap(GetProcessHeap(), 0,name.extra, name.extra_len); \ + } \ + } \ + *LIST_GET(name, type, name.len) = value; \ + name.len++; \ + } while (0); + + +/* Append a weight list to the sortkey */ +#define APPEND_LIST_TO_SORTKEY(data, weights, type, statement_get_value, statement_is_ignored) \ + do { \ + int z; \ + int end = data->weights.len - 1; \ + while (end >= 0) \ + { \ + const type* element = LIST_GET(data->weights, type, end); \ + (void)element; \ + if (!(statement_is_ignored)) break; \ + end--; \ + } \ + for (z = 0; z <= end; z++) \ + { \ + const type* element = LIST_GET(data->weights, type, z); \ + LIST_ADD(data->key, BYTE, statement_get_value); \ + } \ + } while (0); + +/* Helper functions */ + +static BOOL get_char(sortkey_data* data, character_info* info, WCHAR ch) +{ + DWORD value = sort.keys[ch]; + + info->weight_case = value >> 24; + info->weight_diacritic = (value >> 16) & 0xff; + info->script_member = (value >> 8) & 0xff; + info->weight_primary = value & 0xff; + return info->script_member != 0; +} + +static void sortkey_data_init(sortkey_data* data, int flags, const WCHAR* locale, BOOL is_compare_string) +{ + data->flags = flags; + LIST_INIT(data->key, BYTE); + LIST_INIT(data->weights_main, BYTE); + LIST_INIT(data->weights_diacritic, BYTE); + LIST_INIT(data->weights_case, BYTE); +} + +static void sortkey_data_destroy(sortkey_data* data) +{ + LIST_DESTROY(data->key); + LIST_DESTROY(data->weights_main); + LIST_DESTROY(data->weights_diacritic); + LIST_DESTROY(data->weights_case); +} + +static weight_main_info create_weight_main(BYTE script_member, BYTE weight_primary) +{ + weight_main_info ret = { 0 }; + ret.script_member = script_member; + ret.weight_primary = weight_primary; + return ret; +} + +static void case_weights_add(sortkey_data* data, BYTE value) +{ + int flags = data->flags; + if (NORM_IGNORECASE & flags) + value = value & ~(16 + 8); + if (NORM_IGNOREWIDTH & flags) + value = value & ~(1); + if (NORM_IGNOREKANATYPE & flags) + value = value & ~(32); + + LIST_ADD(data->weights_case, BYTE, value); +} + +static void main_weights_add(sortkey_data *data, weight_main_info* value) +{ + LIST_ADD(data->weights_main, BYTE, value->script_member); + LIST_ADD(data->weights_main, BYTE, value->weight_primary); + if (value->extra > 0) + LIST_ADD(data->weights_main, BYTE, value->extra); +} + +static void diacritic_weights_add(sortkey_data* data, const character_info* info, BYTE value) +{ + LIST_ADD(data->weights_diacritic, BYTE, value); +} + +/* Main sortkey logic */ + +static void sortkey_handle_default_character(sortkey_data* data, WCHAR c) +{ + weight_main_info weightmain; + character_info info; + + if (!get_char(data, &info, c)) + { + return; + } + + weightmain = create_weight_main(info.script_member, info.weight_primary); + if (info.script_member >= 0xa9 && info.script_member <= 0xaf) /* Some CJK have extra value */ + weightmain.extra = info.weight_diacritic; + else + diacritic_weights_add(data, &info, info.weight_diacritic); + + main_weights_add(data, &weightmain); + + case_weights_add(data, info.weight_case); +} + +static BOOL sortkey_handle_character(sortkey_data* data, WCHAR c, const WCHAR* str, int i) +{ + weight_main_info weightmain; + character_info info; + int flags = data->flags; + + if (!get_char(data, &info, c)) + { + return FALSE; + } + + switch (info.script_member) + { + case 0: /* Not sorted */ + break; + + case 1: + if (data->weights_diacritic.len > 0) + { + BYTE* entry = LIST_GET(data->weights_diacritic, BYTE, data->weights_diacritic.len - 1); + *entry += info.weight_diacritic; /* Overflow can happen, that's okay */ + } + else + diacritic_weights_add(data, &info, info.weight_diacritic); + break; + + case JAPANESE: + /* TODO */ + break; + + case 4: /* Jamo */ + weightmain = create_weight_main(info.weight_primary, info.weight_diacritic); + main_weights_add(data, &weightmain); + + diacritic_weights_add(data, &info, MIN_WEIGHT); + + case_weights_add(data, info.weight_case); + break; + + case 5: + weightmain = create_weight_main(253, 255); + main_weights_add(data, &weightmain); + + weightmain = create_weight_main(info.weight_primary, info.weight_diacritic); + main_weights_add(data, &weightmain); + + diacritic_weights_add(data, &info, MIN_WEIGHT); + + case_weights_add(data, MIN_WEIGHT); + break; + + case 6: /* Punctuation */ + /* TODO */ + break; + + case 7: /* Symbols */ + case 8: /* Symbols */ + case 9: /* Symbols */ + case 10: /* Symbols */ + case 11: /* Symbols */ + case 12: /* Symbols */ + if (flags & NORM_IGNORESYMBOLS) + break; + + weightmain = create_weight_main(info.script_member, info.weight_primary); + main_weights_add(data, &weightmain); + + diacritic_weights_add(data, &info, info.weight_diacritic); + + case_weights_add(data, info.weight_case); + break; + + default: + sortkey_handle_default_character(data, c); + break; + } + return TRUE; +} + +static void sortkey_write_result(sortkey_data* data) +{ + int flags = data->flags; + + const BYTE SORTKEY_SEPARATOR = 1; + const BYTE SORTKEY_TERMINATOR = 0; + + /* Main weights */ + + APPEND_LIST_TO_SORTKEY(data, weights_main, BYTE, *element, FALSE); + + LIST_ADD(data->key, BYTE, SORTKEY_SEPARATOR); + + /* Diacritic weights */ + + if ((flags & NORM_IGNORENONSPACE) == 0) + { + APPEND_LIST_TO_SORTKEY(data, weights_diacritic, BYTE, *element, *element <= MIN_WEIGHT); + } + + LIST_ADD(data->key, BYTE, SORTKEY_SEPARATOR); + + /* Case weights */ + if ((NORM_IGNORECASE & flags) == 0 || (NORM_IGNOREWIDTH & flags) == 0) + { + APPEND_LIST_TO_SORTKEY(data, weights_case, BYTE, *element, *element <= MIN_WEIGHT); + } + + LIST_ADD(data->key, BYTE, SORTKEY_SEPARATOR); + + /* Extra weights */ + /* TODO */ + + LIST_ADD(data->key, BYTE, SORTKEY_SEPARATOR); + + /* Special weights */ + /* TODO */ + + LIST_ADD(data->key, BYTE, SORTKEY_TERMINATOR); +} + +static int sortkey_generate(int flags, const WCHAR* locale, const WCHAR* str, int str_len, BYTE* buffer, int buffer_len) +{ + int i; + sortkey_data data; + int ret = 0; + + sortkey_data_init(&data, flags, locale, FALSE); + + if (str_len == -1) + str_len = wcslen(str); + + for (i = 0; i < str_len; i++) + { + sortkey_handle_character(&data, str[i], str, i); + } + + sortkey_write_result(&data); + + if (data.key.len <= buffer_len) + { + for (i = 0; i < data.key.len; i++) + { + BYTE* value = LIST_GET(data.key, BYTE, i); + buffer[i] = *value; + } + ret = data.key.len; + } + else if (!buffer) + { + ret = data.key.len; + } + sortkey_data_destroy(&data); + return ret; +} + +/* End sortkey handler code */
static const struct geoinfo *get_geoinfo_ptr( GEOID geoid ) { @@ -4964,8 +5184,8 @@ INT WINAPI DECLSPEC_HOTPATCH LCMapStringEx( const WCHAR *locale, DWORD flags, co TRACE( "(%s,0x%08x,%s,%d,%p,%d)\n", debugstr_w(locale), flags, debugstr_wn(src, srclen), srclen, dst, dstlen );
- if ((ret = get_sortkey( flags, src, srclen, (char *)dst, dstlen ))) ret++; - else SetLastError( ERROR_INSUFFICIENT_BUFFER ); + if (!(ret = sortkey_generate(flags, L"", src, srclen, (BYTE *)dst, dstlen ))) + SetLastError( ERROR_INSUFFICIENT_BUFFER ); return ret; }
-- 2.26.0
--- dlls/kernel32/tests/locale.c | 17 ++++++++++++++++ dlls/kernelbase/locale.c | 39 ++++++++++++++++++++++++++++++++++-- 2 files changed, 54 insertions(+), 2 deletions(-)
diff --git a/dlls/kernel32/tests/locale.c b/dlls/kernel32/tests/locale.c index a451594b19..4b0c106cd2 100644 --- a/dlls/kernel32/tests/locale.c +++ b/dlls/kernel32/tests/locale.c @@ -3173,6 +3173,23 @@ static const struct sorting_test_entry unicode_sorting_tests[] = /* 43 */ { L"en-US", NORM_IGNORESYMBOLS, L"\x21da", L"\x21dc", CSTR_EQUAL }, /* NORM_IGNORESYMBOLS */ /* 44 */ { L"en-US", NORM_IGNORESYMBOLS, L"\x29fb", L"\x2295", CSTR_EQUAL }, /* NORM_IGNORESYMBOLS */ /* 45 */ { L"en-US", NORM_IGNORESYMBOLS, L"\x0092", L"\x009c", CSTR_EQUAL }, /* NORM_IGNORESYMBOLS */ + /* 46 */ { L"en-US", 0, L"\x001b", L"\x001c", CSTR_LESS_THAN }, /* Punctuation primary weight */ + /* 47 */ { L"en-US", 0, L"\x0005", L"\x0006", CSTR_LESS_THAN }, /* Punctuation primary weight */ + /* 48 */ { L"en-US", 0, L"\x0027", L"\xff07", CSTR_LESS_THAN, 0, TRUE }, /* Punctuation diacritic/case weight */ + /* 49 */ { L"en-US", 0, L"\x07f4", L"\x07f5", CSTR_LESS_THAN, 0, TRUE }, /* Punctuation diacritic/case weight */ + /* 50 */ { L"en-US", 0, L"\x207b", L"\x0008", CSTR_GREATER_THAN }, /* Punctuation diacritic/case weight */ + /* 51 */ { L"en-US", NORM_IGNORESYMBOLS, L"\x207b", L"\x0008", CSTR_EQUAL }, /* Punctuation NORM_IGNORESYMBOLS */ + /* 52 */ { L"en-US", NORM_IGNORESYMBOLS, L"\x0004", L"\x0011", CSTR_EQUAL }, /* Punctuation NORM_IGNORESYMBOLS */ + /* 53 */ { L"en-US", NORM_IGNORESYMBOLS | SORT_STRINGSORT, L"\x207b", L"\x0008", CSTR_EQUAL }, /* Punctuation NORM_IGNORESYMBOLS SORT_STRINGSORT */ + /* 54 */ { L"en-US", NORM_IGNORESYMBOLS | SORT_STRINGSORT, L"\x0004", L"\x0011", CSTR_EQUAL }, /* Punctuation NORM_IGNORESYMBOLS SORT_STRINGSORT */ + /* 55 */ { L"en-US", SORT_STRINGSORT, L"\x001a", L"\x001b", CSTR_LESS_THAN }, /* Punctuation SORT_STRINGSORT main weight */ + /* 56 */ { L"en-US", SORT_STRINGSORT, L"\x2027", L"\x2011", CSTR_GREATER_THAN }, /* Punctuation SORT_STRINGSORT main weight */ + /* 57 */ { L"en-US", SORT_STRINGSORT, L"\x3030", L"\x301c", CSTR_GREATER_THAN }, /* Punctuation SORT_STRINGSORT main weight */ + /* 58 */ { L"en-US", SORT_STRINGSORT, L"\x058a", L"\x2010", CSTR_GREATER_THAN }, /* Punctuation SORT_STRINGSORT diacritic weight */ + /* 59 */ { L"en-US", SORT_STRINGSORT, L"\x07F5", L"\x07F4", CSTR_GREATER_THAN }, /* Punctuation SORT_STRINGSORT diacritic weight */ + /* 60 */ { L"en-US", SORT_STRINGSORT, L"\xfe32", L"\x2013", CSTR_GREATER_THAN }, /* Punctuation SORT_STRINGSORT case weight */ + /* 61 */ { L"en-US", SORT_STRINGSORT, L"\xfe31", L"\xfe58", CSTR_GREATER_THAN }, /* Punctuation SORT_STRINGSORT case weight */ + /* 62 */ { L"en-US", SORT_STRINGSORT, L"\xff07", L"\x0027", CSTR_GREATER_THAN }, /* Punctuation SORT_STRINGSORT case weight */ };
static void test_unicode_sorting(void) diff --git a/dlls/kernelbase/locale.c b/dlls/kernelbase/locale.c index af78f76e29..242caf6764 100644 --- a/dlls/kernelbase/locale.c +++ b/dlls/kernelbase/locale.c @@ -2478,6 +2478,12 @@ typedef struct _weight_main_info BYTE extra; } weight_main_info;
+typedef struct _weight_special_info +{ + BYTE script_member; + BYTE weight_primary; +} weight_special_info; + typedef struct _list { int extra_len; @@ -2495,6 +2501,7 @@ typedef struct _sortkey_data list weights_main; list weights_diacritic; list weights_case; + list weights_special; } sortkey_data;
/* List functions */ @@ -2571,6 +2578,7 @@ static void sortkey_data_init(sortkey_data* data, int flags, const WCHAR* locale LIST_INIT(data->weights_main, BYTE); LIST_INIT(data->weights_diacritic, BYTE); LIST_INIT(data->weights_case, BYTE); + LIST_INIT(data->weights_special, BYTE); }
static void sortkey_data_destroy(sortkey_data* data) @@ -2579,6 +2587,7 @@ static void sortkey_data_destroy(sortkey_data* data) LIST_DESTROY(data->weights_main); LIST_DESTROY(data->weights_diacritic); LIST_DESTROY(data->weights_case); + LIST_DESTROY(data->weights_special); }
static weight_main_info create_weight_main(BYTE script_member, BYTE weight_primary) @@ -2610,6 +2619,12 @@ static void main_weights_add(sortkey_data *data, weight_main_info* value) LIST_ADD(data->weights_main, BYTE, value->extra); }
+static void special_weights_add(sortkey_data* data, weight_special_info* value) +{ + LIST_ADD(data->weights_special, BYTE, value->script_member); + LIST_ADD(data->weights_special, BYTE, value->weight_primary); +} + static void diacritic_weights_add(sortkey_data* data, const character_info* info, BYTE value) { LIST_ADD(data->weights_diacritic, BYTE, value); @@ -2690,7 +2705,26 @@ static BOOL sortkey_handle_character(sortkey_data* data, WCHAR c, const WCHAR* s break;
case 6: /* Punctuation */ - /* TODO */ + if (flags & NORM_IGNORESYMBOLS) + break; + + if (flags & SORT_STRINGSORT) + { + weightmain = create_weight_main(info.script_member, info.weight_primary); + main_weights_add(data, &weightmain); + + diacritic_weights_add(data, &info, info.weight_diacritic); + + case_weights_add(data, info.weight_case); + } + else + { + weight_special_info special; + + special.script_member = info.weight_primary; + special.weight_primary = (BYTE)(info.weight_diacritic * 8 + info.weight_case); /* Logic found through testing, seems to work reliably */ + special_weights_add(data, &special); + } break;
case 7: /* Symbols */ @@ -2753,7 +2787,8 @@ static void sortkey_write_result(sortkey_data* data) LIST_ADD(data->key, BYTE, SORTKEY_SEPARATOR);
/* Special weights */ - /* TODO */ + + APPEND_LIST_TO_SORTKEY(data, weights_special, BYTE, *element, FALSE);
LIST_ADD(data->key, BYTE, SORTKEY_TERMINATOR); } -- 2.26.0
--- dlls/kernel32/tests/locale.c | 26 +++++++++++++++ dlls/kernelbase/locale.c | 65 ++++++++++++++++++++++++++++++++++-- 2 files changed, 89 insertions(+), 2 deletions(-)
diff --git a/dlls/kernel32/tests/locale.c b/dlls/kernel32/tests/locale.c index 4b0c106cd2..01ba3c0cb6 100644 --- a/dlls/kernel32/tests/locale.c +++ b/dlls/kernel32/tests/locale.c @@ -3190,6 +3190,32 @@ static const struct sorting_test_entry unicode_sorting_tests[] = /* 60 */ { L"en-US", SORT_STRINGSORT, L"\xfe32", L"\x2013", CSTR_GREATER_THAN }, /* Punctuation SORT_STRINGSORT case weight */ /* 61 */ { L"en-US", SORT_STRINGSORT, L"\xfe31", L"\xfe58", CSTR_GREATER_THAN }, /* Punctuation SORT_STRINGSORT case weight */ /* 62 */ { L"en-US", SORT_STRINGSORT, L"\xff07", L"\x0027", CSTR_GREATER_THAN }, /* Punctuation SORT_STRINGSORT case weight */ + /* 63 */ { L"en-US", 0, L"\x04b0", L"\x32db", CSTR_LESS_THAN }, /* Japanese main weight */ + /* 64 */ { L"en-US", 0, L"\x3093", L"\x1e62\x013f", CSTR_GREATER_THAN }, /* japanese main weight */ + /* 65 */ { L"en-US", 0, L"\x30d3", L"\x30d4", CSTR_LESS_THAN }, /* japanese diacritic weight */ + /* 66 */ { L"en-US", 0, L"\x307b", L"\x307c", CSTR_LESS_THAN }, /* japanese diacritic weight */ + /* 67 */ { L"en-US", 0, L"\x30ea", L"\x32f7", CSTR_LESS_THAN }, /* japanese diacritic weight */ + /* 68 */ { L"en-US", 0, L"\x31fb", L"\x30e9", CSTR_LESS_THAN }, /* japanese case weight small */ + /* 69 */ { L"en-US", 0, L"\x30db", L"\x31f9", CSTR_GREATER_THAN }, /* japanese case weight small */ + /* 70 */ { L"en-US", 0, L"\xff6d", L"\xff95", CSTR_LESS_THAN }, /* japanese case weight small */ + /* 71 */ { L"en-US", NORM_IGNORENONSPACE, L"\x31fb", L"\x30e9", CSTR_EQUAL }, /* japanese case weight small */ + /* 72 */ { L"en-US", NORM_IGNORENONSPACE, L"\x30db", L"\x31f9", CSTR_EQUAL }, /* japanese case weight small */ + /* 73 */ { L"en-US", NORM_IGNORENONSPACE, L"\xff6d", L"\xff95", CSTR_EQUAL }, /* japanese case weight small */ + /* 74 */ { L"en-US", 0, L"\x30d5", L"\x3075", CSTR_LESS_THAN }, /* japanese case weight kana */ + /* 75 */ { L"en-US", 0, L"\x306a", L"\x30ca", CSTR_GREATER_THAN }, /* japanese case weight kana */ + /* 76 */ { L"en-US", 0, L"\x305a", L"\x30ba", CSTR_GREATER_THAN }, /* japanese case weight kana */ + /* 77 */ { L"en-US", NORM_IGNOREKANATYPE, L"\x30d5", L"\x3075", CSTR_EQUAL }, /* japanese case weight kana */ + /* 78 */ { L"en-US", NORM_IGNOREKANATYPE, L"\x306a", L"\x30ca", CSTR_EQUAL }, /* japanese case weight kana */ + /* 79 */ { L"en-US", NORM_IGNOREKANATYPE, L"\x305a", L"\x30ba", CSTR_EQUAL }, /* japanese case weight kana */ + /* 80 */ { L"en-US", 0, L"\x30bf", L"\xff80", CSTR_GREATER_THAN }, /* japanese case weight width */ + /* 81 */ { L"en-US", 0, L"\x30ab", L"\xff76", CSTR_GREATER_THAN }, /* japanese case weight width */ + /* 82 */ { L"en-US", 0, L"\x30a2", L"\xff71", CSTR_GREATER_THAN }, /* japanese case weight width */ + /* 83 */ { L"en-US", NORM_IGNOREWIDTH, L"\x30bf", L"\xff80", CSTR_EQUAL }, /* japanese case weight width */ + /* 84 */ { L"en-US", NORM_IGNOREWIDTH, L"\x30ab", L"\xff76", CSTR_EQUAL }, /* japanese case weight width */ + /* 85 */ { L"en-US", NORM_IGNOREWIDTH, L"\x30a2", L"\xff71", CSTR_EQUAL }, /* japanese case weight width */ + /* 86 */ { L"en-US", NORM_IGNORENONSPACE, L"\x31a2", L"\x3110", CSTR_EQUAL }, /* NORM_IGNORENONSPACE */ + /* 87 */ { L"en-US", NORM_IGNORENONSPACE, L"\x1342", L"\x133a", CSTR_EQUAL }, /* NORM_IGNORENONSPACE */ + /* 88 */ { L"en-US", NORM_IGNORENONSPACE, L"\x16a4", L"\x16a5", CSTR_EQUAL }, /* NORM_IGNORENONSPACE */ };
static void test_unicode_sorting(void) diff --git a/dlls/kernelbase/locale.c b/dlls/kernelbase/locale.c index 242caf6764..789e90cbf5 100644 --- a/dlls/kernelbase/locale.c +++ b/dlls/kernelbase/locale.c @@ -2484,6 +2484,13 @@ typedef struct _weight_special_info BYTE weight_primary; } weight_special_info;
+typedef struct _weight_extra_info +{ + BYTE flag_small; + BYTE flag_kana; + BYTE flag_width; +} weight_extra_info; + typedef struct _list { int extra_len; @@ -2502,6 +2509,7 @@ typedef struct _sortkey_data list weights_diacritic; list weights_case; list weights_special; + list weights_extra; } sortkey_data;
/* List functions */ @@ -2579,6 +2587,7 @@ static void sortkey_data_init(sortkey_data* data, int flags, const WCHAR* locale LIST_INIT(data->weights_diacritic, BYTE); LIST_INIT(data->weights_case, BYTE); LIST_INIT(data->weights_special, BYTE); + LIST_INIT(data->weights_extra, weight_extra_info); }
static void sortkey_data_destroy(sortkey_data* data) @@ -2588,6 +2597,7 @@ static void sortkey_data_destroy(sortkey_data* data) LIST_DESTROY(data->weights_diacritic); LIST_DESTROY(data->weights_case); LIST_DESTROY(data->weights_special); + LIST_DESTROY(data->weights_extra); }
static weight_main_info create_weight_main(BYTE script_member, BYTE weight_primary) @@ -2653,6 +2663,36 @@ static void sortkey_handle_default_character(sortkey_data* data, WCHAR c) case_weights_add(data, info.weight_case); }
+static void sortkey_handle_japanese_character(sortkey_data* data, weight_main_info* weightmain, const character_info* info, const character_info* info_other) +{ + const BYTE BASELINE_EXTRA = 0xc4; + const BYTE ISOLATE_KANA = 0x20 | BASELINE_EXTRA; /* if bit is set then hiragana, else katakana */ + const BYTE ISOLATE_SMALL = 0x2 | BASELINE_EXTRA; /* if bit is set then normal kana, else small kana */ + const BYTE ISOLATE_WIDTH = 0x1 | BASELINE_EXTRA; /* if bit is set then full width, else half width */ + int weight_case; + weight_extra_info extra; + + weightmain->script_member = 34; + weightmain->weight_primary = info_other->weight_primary; + + main_weights_add(data, weightmain); + + weight_case = info_other->weight_case | BASELINE_EXTRA; + + extra.flag_small = (BYTE)(weight_case & ISOLATE_SMALL); + extra.flag_kana = (BYTE)(weight_case & ISOLATE_KANA); + extra.flag_width = (BYTE)(weight_case & ISOLATE_WIDTH); + + if (data->flags & NORM_IGNOREKANATYPE) + extra.flag_kana = BASELINE_EXTRA; + if (data->flags & NORM_IGNOREWIDTH) + extra.flag_width = BASELINE_EXTRA; + LIST_ADD(data->weights_extra, weight_extra_info, extra); + + diacritic_weights_add(data, info, info->weight_diacritic); + case_weights_add(data, MIN_WEIGHT); +} + static BOOL sortkey_handle_character(sortkey_data* data, WCHAR c, const WCHAR* str, int i) { weight_main_info weightmain; @@ -2680,7 +2720,16 @@ static BOOL sortkey_handle_character(sortkey_data* data, WCHAR c, const WCHAR* s break;
case JAPANESE: - /* TODO */ + weightmain = create_weight_main(info.script_member, info.weight_primary); + + if (weightmain.weight_primary <= 1) + { + /* TODO */ + } + else + { + sortkey_handle_japanese_character(data, &weightmain, &info, &info); + } break;
case 4: /* Jamo */ @@ -2782,7 +2831,19 @@ static void sortkey_write_result(sortkey_data* data) LIST_ADD(data->key, BYTE, SORTKEY_SEPARATOR);
/* Extra weights */ - /* TODO */ + if (data->weights_extra.len > 0) + { + if ((NORM_IGNORENONSPACE & flags) == 0) + { + APPEND_LIST_TO_SORTKEY(data, weights_extra, weight_extra_info, element->flag_small, element->flag_small > 196); + } + LIST_ADD(data->key, BYTE, 0xff); + LIST_ADD(data->key, BYTE, 0x02); + APPEND_LIST_TO_SORTKEY(data, weights_extra, weight_extra_info, element->flag_kana, element->flag_kana > 196); + LIST_ADD(data->key, BYTE, 0xff); + APPEND_LIST_TO_SORTKEY(data, weights_extra, weight_extra_info, element->flag_width, element->flag_width > 196); + LIST_ADD(data->key, BYTE, 0xff); + }
LIST_ADD(data->key, BYTE, SORTKEY_SEPARATOR);
-- 2.26.0
--- dlls/kernel32/tests/locale.c | 3 +++ dlls/kernelbase/locale.c | 45 +++++++++++++++++++++++++++++++++--- 2 files changed, 45 insertions(+), 3 deletions(-)
diff --git a/dlls/kernel32/tests/locale.c b/dlls/kernel32/tests/locale.c index 01ba3c0cb6..6437f043f7 100644 --- a/dlls/kernel32/tests/locale.c +++ b/dlls/kernel32/tests/locale.c @@ -3216,6 +3216,9 @@ static const struct sorting_test_entry unicode_sorting_tests[] = /* 86 */ { L"en-US", NORM_IGNORENONSPACE, L"\x31a2", L"\x3110", CSTR_EQUAL }, /* NORM_IGNORENONSPACE */ /* 87 */ { L"en-US", NORM_IGNORENONSPACE, L"\x1342", L"\x133a", CSTR_EQUAL }, /* NORM_IGNORENONSPACE */ /* 88 */ { L"en-US", NORM_IGNORENONSPACE, L"\x16a4", L"\x16a5", CSTR_EQUAL }, /* NORM_IGNORENONSPACE */ + /* 89 */ { L"en-US", 0, L"\x00c6", L"\x0041\x0045", CSTR_EQUAL }, /* Expansion */ + /* 90 */ { L"en-US", 0, L"\x0f5c", L"\x0f5b\x0fb7", CSTR_EQUAL }, /* Expansion */ + /* 91 */ { L"en-US", 0, L"\x05f0", L"\x05d5\x05d5", CSTR_EQUAL }, /* Expansion */ };
static void test_unicode_sorting(void) diff --git a/dlls/kernelbase/locale.c b/dlls/kernelbase/locale.c index 789e90cbf5..a02d1bf855 100644 --- a/dlls/kernelbase/locale.c +++ b/dlls/kernelbase/locale.c @@ -2471,6 +2471,13 @@ typedef struct _character_info BYTE weight_case; } character_info;
+typedef struct _character_info_expansion +{ + int character; + int character_result1; + int character_result2; +} character_info_expansion; + typedef struct _weight_main_info { BYTE script_member; @@ -2571,6 +2578,8 @@ typedef struct _sortkey_data static BOOL get_char(sortkey_data* data, character_info* info, WCHAR ch) { DWORD value = sort.keys[ch]; + if ((WORD)value == 0x200) /* Expansion */ + return FALSE;
info->weight_case = value >> 24; info->weight_diacritic = (value >> 16) & 0xff; @@ -2579,6 +2588,28 @@ static BOOL get_char(sortkey_data* data, character_info* info, WCHAR ch) return info->script_member != 0; }
+static BOOL get_expansion(character_info_expansion* info, WCHAR ch) +{ + DWORD pos_info = sort.keys[ch]; + int count = (WORD)pos_info; + int pos = pos_info >> 16; + const DWORD* ptr; + const WCHAR* p; + int count_expansion; + if (count != 0x200) + return FALSE; + ptr = (const DWORD *)(sort.guids + sort.guid_count); + count_expansion = *ptr++; + if (pos >= count_expansion) + return FALSE; + p = (const WCHAR *)(ptr + pos); + info->character = ch; + info->character_result1 = p[0]; + info->character_result2 = p[1]; + return TRUE; +} + + static void sortkey_data_init(sortkey_data* data, int flags, const WCHAR* locale, BOOL is_compare_string) { data->flags = flags; @@ -2642,14 +2673,21 @@ static void diacritic_weights_add(sortkey_data* data, const character_info* info
/* Main sortkey logic */
-static void sortkey_handle_default_character(sortkey_data* data, WCHAR c) +static BOOL sortkey_handle_default_character(sortkey_data* data, WCHAR c) { weight_main_info weightmain; character_info info; + character_info_expansion expansion;
if (!get_char(data, &info, c)) { - return; + if (get_expansion(&expansion, c)) + { + sortkey_handle_default_character(data, (WCHAR)expansion.character_result1); + sortkey_handle_default_character(data, (WCHAR)expansion.character_result2); + return TRUE; + } + return FALSE; }
weightmain = create_weight_main(info.script_member, info.weight_primary); @@ -2661,6 +2699,7 @@ static void sortkey_handle_default_character(sortkey_data* data, WCHAR c) main_weights_add(data, &weightmain);
case_weights_add(data, info.weight_case); + return TRUE; }
static void sortkey_handle_japanese_character(sortkey_data* data, weight_main_info* weightmain, const character_info* info, const character_info* info_other) @@ -2701,7 +2740,7 @@ static BOOL sortkey_handle_character(sortkey_data* data, WCHAR c, const WCHAR* s
if (!get_char(data, &info, c)) { - return FALSE; + return sortkey_handle_default_character(data, c); }
switch (info.script_member) -- 2.26.0
--- dlls/kernel32/tests/locale.c | 20 ++++++++++++++++++++ dlls/kernelbase/locale.c | 26 ++++++++++++++++++++++++-- 2 files changed, 44 insertions(+), 2 deletions(-)
diff --git a/dlls/kernel32/tests/locale.c b/dlls/kernel32/tests/locale.c index 6437f043f7..1aa32055bb 100644 --- a/dlls/kernel32/tests/locale.c +++ b/dlls/kernel32/tests/locale.c @@ -3219,6 +3219,26 @@ static const struct sorting_test_entry unicode_sorting_tests[] = /* 89 */ { L"en-US", 0, L"\x00c6", L"\x0041\x0045", CSTR_EQUAL }, /* Expansion */ /* 90 */ { L"en-US", 0, L"\x0f5c", L"\x0f5b\x0fb7", CSTR_EQUAL }, /* Expansion */ /* 91 */ { L"en-US", 0, L"\x05f0", L"\x05d5\x05d5", CSTR_EQUAL }, /* Expansion */ + /* 92 */ { L"ja-JP", 0, L"\x6df8", L"\x654b\x29e9", CSTR_LESS_THAN }, /* japanese locale */ + /* 93 */ { L"ja-JP", 0, L"\x685d\x1239\x1b61", L"\x59b6\x6542\x2a62\x04a7", CSTR_LESS_THAN }, /* japanese locale */ + /* 94 */ { L"ja-JP", 0, L"\x62f3\x43e9", L"\x5760", CSTR_LESS_THAN }, /* japanese locale */ + /* 95 */ { L"ja-JP", 0, L"\x634c", L"\x2f0d\x5f1c\x7124", CSTR_LESS_THAN }, /* japanese locale */ + /* 96 */ { L"ja-JP", 0, L"\x69e7\x0502", L"\x57cc" , CSTR_LESS_THAN }, /* japanese locale */ + /* 97 */ { L"ja-JP", 0, L"\x7589", L"\x67c5" , CSTR_LESS_THAN }, /* japanese locale */ + /* 98 */ { L"ja-JP", 0, L"\x5ede\x765c", L"\x7324" , CSTR_GREATER_THAN }, /* japanese locale */ + /* 99 */ { L"ja-JP", 0, L"\x5c7f\x5961", L"\x7cbe" , CSTR_GREATER_THAN }, /* japanese locale */ + /* 100 */ { L"ja-JP", 0, L"\x3162", L"\x6a84\x1549\x0b60" , CSTR_GREATER_THAN }, /* japanese locale */ + /* 101 */ { L"ja-JP", 0, L"\x769e\x448e", L"\x4e6e" , CSTR_LESS_THAN }, /* japanese locale */ + /* 102 */ { L"ja-JP", 0, L"\x59a4", L"\x5faa\x607c", CSTR_GREATER_THAN }, /* japanese locale */ + /* 103 */ { L"ja-JP", 0, L"\x529b", L"\x733f", CSTR_GREATER_THAN }, /* japanese locale */ + /* 104 */ { L"ja-JP", 0, L"\x6ff8\x2a0a", L"\x7953\x6712" , CSTR_GREATER_THAN }, /* japanese locale */ + /* 105 */ { L"ja-JP", 0, L"\x6dfb", L"\x6793", CSTR_LESS_THAN }, /* japanese locale */ + /* 106 */ { L"ja-JP", 0, L"\x67ed", L"\x6aa2", CSTR_GREATER_THAN }, /* japanese locale */ + /* 107 */ { L"ja-JP", 0, L"\x4e61", L"\x6350\x6b08", CSTR_GREATER_THAN }, /* japanese locale */ + /* 108 */ { L"ja-JP", 0, L"\x5118", L"\x53b3\x75b4", CSTR_GREATER_THAN }, /* japanese locale */ + /* 109 */ { L"ja-JP", 0, L"\x6bbf", L"\x65a3" , CSTR_LESS_THAN }, /* japanese locale */ + /* 110 */ { L"ja-JP", 0, L"\x5690", L"\x5fa8", CSTR_GREATER_THAN }, /* japanese locale */ + /* 111 */ { L"ja-JP", 0, L"\x61e2", L"\x76e5" , CSTR_GREATER_THAN }, /* japanese locale */ };
static void test_unicode_sorting(void) diff --git a/dlls/kernelbase/locale.c b/dlls/kernelbase/locale.c index a02d1bf855..d74a496217 100644 --- a/dlls/kernelbase/locale.c +++ b/dlls/kernelbase/locale.c @@ -2517,6 +2517,7 @@ typedef struct _sortkey_data list weights_case; list weights_special; list weights_extra; + const struct sortguid* locale; } sortkey_data;
/* List functions */ @@ -2575,9 +2576,28 @@ typedef struct _sortkey_data
/* Helper functions */
+static DWORD get_exception(sortkey_data* data, WCHAR ch) +{ + if (data->locale && data->locale->except) + { + DWORD* table = sort.keys + data->locale->except; + DWORD hi = ch >> 8; + DWORD lo = ch & 0xff; + if (table[hi] == hi * 0x100) + return 0; + if (sort.keys[table[hi] + lo] == sort.keys[hi * 0x100 + lo]) + return 0; + return sort.keys[table[hi] + lo]; + } + return 0; +} + static BOOL get_char(sortkey_data* data, character_info* info, WCHAR ch) { - DWORD value = sort.keys[ch]; + DWORD value = get_exception(data, ch); + if (!value) + value = sort.keys[ch]; + if ((WORD)value == 0x200) /* Expansion */ return FALSE;
@@ -2585,6 +2605,7 @@ static BOOL get_char(sortkey_data* data, character_info* info, WCHAR ch) info->weight_diacritic = (value >> 16) & 0xff; info->script_member = (value >> 8) & 0xff; info->weight_primary = value & 0xff; + return info->script_member != 0; }
@@ -2619,6 +2640,7 @@ static void sortkey_data_init(sortkey_data* data, int flags, const WCHAR* locale LIST_INIT(data->weights_case, BYTE); LIST_INIT(data->weights_special, BYTE); LIST_INIT(data->weights_extra, weight_extra_info); + data->locale = get_language_sort(locale); }
static void sortkey_data_destroy(sortkey_data* data) @@ -5319,7 +5341,7 @@ INT WINAPI DECLSPEC_HOTPATCH LCMapStringEx( const WCHAR *locale, DWORD flags, co TRACE( "(%s,0x%08x,%s,%d,%p,%d)\n", debugstr_w(locale), flags, debugstr_wn(src, srclen), srclen, dst, dstlen );
- if (!(ret = sortkey_generate(flags, L"", src, srclen, (BYTE *)dst, dstlen ))) + if (!(ret = sortkey_generate(flags, locale, src, srclen, (BYTE *)dst, dstlen ))) SetLastError( ERROR_INSUFFICIENT_BUFFER ); return ret; } -- 2.26.0
Hello wine-devel,
this is my new patchset to implement string comparison on top of the official sorting weight tables from windows. While not necessarily finished, I'd like a review of what I should change before I continue.
This time I started with reimplementing sortkey generation, since it's both cleaner and less intrusive to the codebase - a lot more programs use CompareStringX compared to direct sortkey generation. After that implementation is finished, I plan to build CompareStringX on top of that. CompareStringEx "should" yield the same result as a sortkey comparison, but it does not. That because it cuts corners for performance, that's why I focused on the accurate sortkeys first.
A few major changes compared to my last patchset: - Now with more tests that test different edge-cases in sorting - A list implementation that tries to go without allocation memory all the time (Not sure if that's the best way to go about that though) - Only sortkey generation altered - Using Win10 NLS data from Wine
Please note that not all features of a full sortkey generation are implemented, like compression, surrogates, multicharacter sequences, and so on. If you want me to include that for review, please tell me.
Also note that all tests are handcrafted, since I don't know of a way to systematically test string sorting - it's just too many combinations. If you know of a way, please tell me.
Looking forward to your comments!
Regards, Fabian Maurer