New subject: [PATCH 2/5] kernelbase/locale: Implement sortkey punctuation

28 Apr 2020

Signed-off-by: Fabian Maurer dark.shadow4@web.de
---
 dlls/kernel32/tests/locale.c | 110 ++++++++
 dlls/kernelbase/locale.c     | 477 ++++++++++++++++++++++++++---------
 2 files changed, 464 insertions(+), 123 deletions(-)

diff --git a/dlls/kernel32/tests/locale.c b/dlls/kernel32/tests/locale.c
index 4c1e1b4d73..13839bb10a 100644
--- a/dlls/kernel32/tests/locale.c
+++ b/dlls/kernel32/tests/locale.c
@@ -2681,6 +2681,13 @@ static void test_lcmapstring_unicode(lcmapstring_wrapper func_ptr, const char *f
     lstrlenW(symbols_stripped) + 1, ret);
     ok(!lstrcmpW(buf, symbols_stripped), "%s string comparison mismatch\n", func_name);
+    /* test small buffer */
+    lstrcpyW(buf, fooW);
+    ret = func_ptr(LCMAP_SORTKEY, lower_case, -1, buf, 2);
+    ok(ret == 0, "Expected a failure\n");
+    ok(GetLastError() == ERROR_INSUFFICIENT_BUFFER,
+           "%s unexpected error code %d\n", func_name, GetLastError());;
+
     /* test srclen = 0 */
     SetLastError(0xdeadbeef);
     ret = func_ptr(0, upper_case, 0, buf, ARRAY_SIZE(buf));
@@ -3108,6 +3115,108 @@ static void test_sorting(void)
     }
 }
+struct sorting_test_entry {
+    const WCHAR* locale;
+    DWORD flags;
+    const WCHAR* first;
+    const WCHAR* second;
+    int result_sortkey;
+    int result_compare;
+    BOOL broken_on_old_win;
+};
+
+static const struct sorting_test_entry unicode_sorting_tests[] =
+{
+    /*   0 */ { L"en-US", 0, L"\ue6e3\u0a02", L"\ue6e3\u20dc", CSTR_LESS_THAN, 0, TRUE }, /* Test default character, when there is main weight extra there must be no diacritic weight */
+    /*   1 */ { L"en-US", 0, L"\u276a", L"\u2768", CSTR_GREATER_THAN }, /* Test symbols, must add diacritic weight */
+    /*   2 */ { L"en-US", 0, L"\u204d", L"\uff02", CSTR_LESS_THAN }, /* Test symbols, must add case weight */
+    /*   3 */ { L"en-US", 0, L"a \u2060 b", L"a  b", CSTR_EQUAL }, /* Test unsortable characters */
+    /*   4 */ { L"en-US", 0, L"a \xfff0 b", L"a  b", CSTR_EQUAL }, /* Test invalid characters */
+    /*   5 */ { L"en-US", 0, L"\x00fc", L"\x016d", CSTR_LESS_THAN },
+    /*   6 */ { L"en-US", 0, L"\x3fcb\x7fd5", L"\x0006\x3032", CSTR_GREATER_THAN },
+    /*   7 */ { L"en-US", 0, L"\x00fc\x30fd", L"\x00fa\x1833", CSTR_LESS_THAN },
+    /*   8 */ { L"en-US", 0, L"\x0037", L"\x277c", CSTR_LESS_THAN, 0, TRUE }, /* Normal character */
+    /*   9 */ { L"en-US", 0, L"\x1eca", L"\x1ecb", CSTR_GREATER_THAN }, /* Normal character */
+    /*  10 */ { L"en-US", 0, L"\x1d05", L"\x1d48", CSTR_GREATER_THAN }, /* Normal character */
+    /*  11 */ { L"en-US", 0, L"\x19d7", L"\x096d", CSTR_GREATER_THAN }, /* Normal character diacritics */
+    /*  12 */ { L"en-US", 0, L"\x00f5", L"\x1ecf", CSTR_LESS_THAN }, /* Normal character diacritics */
+    /*  13 */ { L"en-US", 0, L"\x2793", L"\x0d70", CSTR_LESS_THAN, 0, TRUE }, /* Normal character diacritics */
+    /*  14 */ { L"en-US", 0, L"A", L"a", CSTR_GREATER_THAN }, /* Normal character case weights */
+    /*  15 */ { L"en-US", 0, L"z", L"Z", CSTR_LESS_THAN }, /* Normal character case weights */
+    /*  16 */ { L"en-US", 0, L"\xe5a6", L"\xe5a5\x0333", CSTR_GREATER_THAN, 0, TRUE }, /* CJK with extra value */
+    /*  17 */ { L"en-US", 0, L"\xe5d7", L"\xe5d6\x0330", CSTR_GREATER_THAN, 0, TRUE }, /* CJK with extra value */
+    /*  18 */ { L"en-US", 0, L"\x1B56\x0330", L"\x1096", CSTR_GREATER_THAN }, /* Diacritic is added */
+    /*  19 */ { L"en-US", 0, L"\x1817\x0333", L"\x19d7", CSTR_GREATER_THAN }, /* Diacritic is added */
+    /*  20 */ { L"en-US", 0, L"\x04de\x05ac", L"\x0499", CSTR_GREATER_THAN }, /* Diacritic is added */
+    /*  21 */ { L"en-US", 0, L"\x01ba\x0654", L"\x01b8", CSTR_LESS_THAN }, /* Diacritic can overflow */
+    /*  22 */ { L"en-US", 0, L"\x06b7\x06eb", L"\x06b6", CSTR_LESS_THAN }, /* Diacritic can overflow */
+    /*  23 */ { L"en-US", 0, L"\x1420\x0333", L"\x141f", CSTR_LESS_THAN }, /* Diacritic can overflow */
+    /*  24 */ { L"en-US", 0, L"\x11bc", L"\x110b", CSTR_GREATER_THAN }, /* Jamo case weight */
+    /*  25 */ { L"en-US", 0, L"\x11c1", L"\x1111", CSTR_GREATER_THAN }, /* Jamo case weight */
+    /*  26 */ { L"en-US", 0, L"\x11af", L"\x1105", CSTR_GREATER_THAN }, /* Jamo case weight */
+    /*  27 */ { L"en-US", 0, L"\x11c2", L"\x11f5", CSTR_LESS_THAN }, /* Jamo main weight */
+    /*  28 */ { L"en-US", 0, L"\x1108", L"\x1121", CSTR_LESS_THAN }, /* Jamo main weight */
+    /*  29 */ { L"en-US", 0, L"\x1116", L"\x11c7", CSTR_LESS_THAN }, /* Jamo main weight */
+    /*  30 */ { L"en-US", 0, L"\x11b1", L"\x11d1", CSTR_LESS_THAN }, /* Jamo main weight */
+    /*  31 */ { L"en-US", 0, L"\x4550\x73d2", L"\x3211\x23ad", CSTR_GREATER_THAN }, /* Script 5 main weight 1 */
+    /*  32 */ { L"en-US", 0, L"\x3265", L"\x4079", CSTR_LESS_THAN }, /* Script 5 main weight 1 */
+    /*  33 */ { L"en-US", 0, L"\x4c19\x68d0\x52d0", L"\x316d", CSTR_GREATER_THAN }, /* Script 5 main weight 1 */
+    /*  34 */ { L"en-US", 0, L"\x72dd", L"\x6b8a", CSTR_GREATER_THAN }, /* Script 5 main weight 2 */
+    /*  35 */ { L"en-US", 0, L"\x6785\x3bff\x6f83", L"\x7550\x34c9\x71a7", CSTR_LESS_THAN }, /* Script 5 main weight 2 */
+    /*  36 */ { L"en-US", 0, L"\x5d61", L"\x3aef", CSTR_LESS_THAN }, /* Script 5 main weight 2 */
+    /*  37 */ { L"en-US", 0, L"\x207a", L"\xfe62", CSTR_GREATER_THAN }, /* Symbols case weights */
+    /*  38 */ { L"en-US", 0, L"\xfe65", L"\xff1e", CSTR_GREATER_THAN }, /* Symbols case weights */
+    /*  39 */ { L"en-US", 0, L"\x2502", L"\xffe8", CSTR_GREATER_THAN }, /* Symbols case weights */
+    /*  40 */ { L"en-US", 0, L"\x21da", L"\x21dc", CSTR_LESS_THAN }, /* Symbols diacritic weights */
+    /*  41 */ { L"en-US", 0, L"\x29fb", L"\x2295", CSTR_LESS_THAN }, /* Symbols diacritic weights */
+    /*  42 */ { L"en-US", 0, L"\x0092", L"\x009c", CSTR_LESS_THAN }, /* Symbols diacritic weights */
+    /*  43 */ { L"en-US", NORM_IGNORESYMBOLS, L"\x21da", L"\x21dc", CSTR_EQUAL }, /* NORM_IGNORESYMBOLS */
+    /*  44 */ { L"en-US", NORM_IGNORESYMBOLS, L"\x29fb", L"\x2295", CSTR_EQUAL }, /* NORM_IGNORESYMBOLS */
+    /*  45 */ { L"en-US", NORM_IGNORESYMBOLS, L"\x0092", L"\x009c", CSTR_EQUAL }, /* NORM_IGNORESYMBOLS */
+    /*  46 */ { L"en-US", 0, L"\x3099", L"\x309a", CSTR_EQUAL }, /* MIN_WEIGHT */
+    /*  47 */ { L"en-US", 0, L"\x309b", L"\x05a2", CSTR_EQUAL }, /* MIN_WEIGHT */
+    /*  48 */ { L"en-US", 0, L"\xff9e", L"\x0e47", CSTR_EQUAL }, /* MIN_WEIGHT */
+};
+
+static void test_unicode_sorting(void)
+{
+    int i;
+    if (!pLCMapStringEx)
+    {
+
+        win_skip("LCMapStringEx not available\n");
+        return;
+    }
+    for (i = 0; i < ARRAY_SIZE(unicode_sorting_tests); i++)
+    {
+        int pos;
+        BYTE buff1[1000];
+        BYTE buff2[1000];
+        int len1, len2;
+        int result = CSTR_EQUAL;
+        const struct sorting_test_entry* entry = &unicode_sorting_tests[i];
+
+        len1 = pLCMapStringEx(entry->locale, LCMAP_SORTKEY | entry->flags, entry->first, -1, (WCHAR*)buff1, ARRAY_SIZE(buff1), NULL, NULL, 0);
+        len2 = pLCMapStringEx(entry->locale, LCMAP_SORTKEY | entry->flags, entry->second, -1, (WCHAR*)buff2, ARRAY_SIZE(buff2), NULL, NULL, 0);
+
+        for (pos = 0; pos < len1 && pos < len2; pos++)
+        {
+            if (buff1[pos] > buff2[pos])
+            {
+                result = CSTR_GREATER_THAN;
+                break;
+            }
+            else if (buff1[pos] < buff2[pos])
+            {
+                result = CSTR_LESS_THAN;
+                break;
+            }
+        }
+
+        ok (result == entry->result_sortkey || broken(entry->broken_on_old_win), "Test %d - Expected %d, got %d\n", i, entry->result_sortkey, result);
+    }
+}
+
 static void test_FoldStringA(void)
 {
   int ret, i, j;
@@ -6897,4 +7006,5 @@ START_TEST(locale)
   test_NLSVersion();
   /* this requires collation table patch to make it MS compatible */
   if (0) test_sorting();
+  test_unicode_sorting();
 }
diff --git a/dlls/kernelbase/locale.c b/dlls/kernelbase/locale.c
index 53e4e42da3..74177371d9 100644
--- a/dlls/kernelbase/locale.c
+++ b/dlls/kernelbase/locale.c
@@ -2126,127 +2126,6 @@ static int wcstombs_codepage( UINT codepage, DWORD flags, const WCHAR *src, int
         return wcstombs_sbcs( info, src, srclen, dst, dstlen );
 }
-
-static int get_sortkey( DWORD flags, const WCHAR *src, int srclen, char *dst, int dstlen )
-{
-    WCHAR dummy[4]; /* no decomposition is larger than 4 chars */
-    int key_len[4];
-    char *key_ptr[4];
-    const WCHAR *src_save = src;
-    int srclen_save = srclen;
-
-    key_len[0] = key_len[1] = key_len[2] = key_len[3] = 0;
-    for (; srclen; srclen--, src++)
-    {
-        unsigned int i, decomposed_len = 1;/*wine_decompose(*src, dummy, 4);*/
-        dummy[0] = *src;
-        if (decomposed_len)
-        {
-            for (i = 0; i < decomposed_len; i++)
-            {
-                WCHAR wch = dummy[i];
-                unsigned int ce;
-
-                if ((flags & NORM_IGNORESYMBOLS) &&
-                    (get_char_type( CT_CTYPE1, wch ) & (C1_PUNCT | C1_SPACE)))
-                    continue;
-
-                if (flags & NORM_IGNORECASE) wch = casemap( nls_info.LowerCaseTable, wch );
-
-                ce = collation_table[collation_table[collation_table[wch >> 8] + ((wch >> 4) & 0x0f)] + (wch & 0xf)];
-                if (ce != (unsigned int)-1)
-                {
-                    if (ce >> 16) key_len[0] += 2;
-                    if ((ce >> 8) & 0xff) key_len[1]++;
-                    if ((ce >> 4) & 0x0f) key_len[2]++;
-                    if (ce & 1)
-                    {
-                        if (wch >> 8) key_len[3]++;
-                        key_len[3]++;
-                    }
-                }
-                else
-                {
-                    key_len[0] += 2;
-                    if (wch >> 8) key_len[0]++;
-                    if (wch & 0xff) key_len[0]++;
-		}
-            }
-        }
-    }
-
-    if (!dstlen) /* compute length */
-        /* 4 * '\1' + key length */
-        return key_len[0] + key_len[1] + key_len[2] + key_len[3] + 4;
-
-    if (dstlen < key_len[0] + key_len[1] + key_len[2] + key_len[3] + 4 + 1)
-        return 0; /* overflow */
-
-    src = src_save;
-    srclen = srclen_save;
-
-    key_ptr[0] = dst;
-    key_ptr[1] = key_ptr[0] + key_len[0] + 1;
-    key_ptr[2] = key_ptr[1] + key_len[1] + 1;
-    key_ptr[3] = key_ptr[2] + key_len[2] + 1;
-
-    for (; srclen; srclen--, src++)
-    {
-        unsigned int i, decomposed_len = 1;/*wine_decompose(*src, dummy, 4);*/
-        dummy[0] = *src;
-        if (decomposed_len)
-        {
-            for (i = 0; i < decomposed_len; i++)
-            {
-                WCHAR wch = dummy[i];
-                unsigned int ce;
-
-                if ((flags & NORM_IGNORESYMBOLS) &&
-                    (get_char_type( CT_CTYPE1, wch ) & (C1_PUNCT | C1_SPACE)))
-                    continue;
-
-                if (flags & NORM_IGNORECASE) wch = casemap( nls_info.LowerCaseTable, wch );
-
-                ce = collation_table[collation_table[collation_table[wch >> 8] + ((wch >> 4) & 0x0f)] + (wch & 0xf)];
-                if (ce != (unsigned int)-1)
-                {
-                    WCHAR key;
-                    if ((key = ce >> 16))
-                    {
-                        *key_ptr[0]++ = key >> 8;
-                        *key_ptr[0]++ = key & 0xff;
-                    }
-                    /* make key 1 start from 2 */
-                    if ((key = (ce >> 8) & 0xff)) *key_ptr[1]++ = key + 1;
-                    /* make key 2 start from 2 */
-                    if ((key = (ce >> 4) & 0x0f)) *key_ptr[2]++ = key + 1;
-                    /* key 3 is always a character code */
-                    if (ce & 1)
-                    {
-                        if (wch >> 8) *key_ptr[3]++ = wch >> 8;
-                        if (wch & 0xff) *key_ptr[3]++ = wch & 0xff;
-                    }
-                }
-                else
-                {
-                    *key_ptr[0]++ = 0xff;
-                    *key_ptr[0]++ = 0xfe;
-                    if (wch >> 8) *key_ptr[0]++ = wch >> 8;
-                    if (wch & 0xff) *key_ptr[0]++ = wch & 0xff;
-                }
-            }
-        }
-    }
-
-    *key_ptr[0] = 1;
-    *key_ptr[1] = 1;
-    *key_ptr[2] = 1;
-    *key_ptr[3]++ = 1;
-    *key_ptr[3] = 0;
-    return key_ptr[3] - dst;
-}
-
-
 /* compose a full-width katakana. return consumed source characters. */
 static int compose_katakana( const WCHAR *src, int srclen, WCHAR *dst )
 {
@@ -2574,6 +2453,358 @@ static int compare_weights(int flags, const WCHAR *str1, int len1,
     return len1 - len2;
 }
+/* Start sortkey handler code. */
+
+/* Defines */
+
+#define JAPANESE 3
+#define MIN_WEIGHT 2
+#define LIST_STACK_BUFFER 1000
+
+/* Internal structures */
+
+typedef struct _character_info
+{
+    BYTE weight_primary;
+    BYTE script_member;
+    BYTE weight_diacritic;
+    BYTE weight_case;
+} character_info;
+
+typedef struct _weight_main_info
+{
+    BYTE script_member;
+    BYTE weight_primary;
+    BYTE extra;
+} weight_main_info;
+
+typedef struct _list
+{
+    int extra_len;
+    int len;
+    BYTE buffer[LIST_STACK_BUFFER];
+    int buffer_count;
+    BYTE* extra;
+    int element_size;
+} list;
+
+typedef struct _sortkey_data
+{
+    int flags;
+    list key;
+    list weights_main;
+    list weights_diacritic;
+    list weights_case;
+} sortkey_data;
+
+/* List functions */
+
+static void LIST_INIT(list* name, int type_size)
+{
+    name->extra_len = 0;
+    name->len = 0;
+    name->extra = 0;
+    name->buffer_count = LIST_STACK_BUFFER / type_size;
+    name->element_size = type_size;
+}
+
+static void LIST_DESTROY(list* name)
+{
+    RtlFreeHeap(GetProcessHeap(), 0, name->extra);
+}
+
+static void* LIST_GET(list* name, int index)
+{
+    if ((index + 1) * name->element_size <= LIST_STACK_BUFFER)
+        return &name->buffer[index * name->element_size];
+    else
+        return &name->extra[index * name->element_size - name->buffer_count];
+}
+
+/* Add entry to list, resizing as needed */
+static void LIST_ADD(list* name, const void *value)
+{
+    void* entry;
+    if ((name->len + 1) * name->element_size > name->extra_len + LIST_STACK_BUFFER)
+    {
+        if (!name->extra) /* First allocation */
+        {
+            name->extra_len = LIST_STACK_BUFFER;
+            name->extra = RtlAllocateHeap(GetProcessHeap(), 0, name->extra_len);
+        }
+        else
+        {
+            name->extra_len *= 2;
+            name->extra = RtlReAllocateHeap(GetProcessHeap(), 0,name->extra, name->extra_len);
+        }
+    }
+    entry = LIST_GET(name, name->len);
+    memcpy(entry, value, name->element_size);
+    name->len++;
+}
+
+/* Append a weight list to the sortkey */
+#define APPEND_LIST_TO_SORTKEY(data, weights, type, statement_get_value, statement_is_ignored) \
+    do {                                                                \
+        int z;                                                          \
+        int end = data->weights.len - 1;                                \
+        while (end >= 0)                                                \
+        {                                                               \
+            const type* element = LIST_GET(&data->weights, end);        \
+            (void)element;                                              \
+            if (!(statement_is_ignored)) break;                         \
+            end--;                                                      \
+        }                                                               \
+        for (z = 0; z <= end; z++)                                      \
+        {                                                               \
+            const type* element = LIST_GET(&data->weights, z);          \
+            LIST_ADD(&data->key, statement_get_value);                  \
+        }                                                               \
+    }  while (0);
+
+/* Helper functions */
+
+static BOOL get_char(sortkey_data* data, character_info* info, WCHAR ch)
+{
+    DWORD value = sort.keys[ch];
+
+    info->weight_case = value >> 24;
+    info->weight_diacritic = (value >> 16) & 0xff;
+    info->script_member = (value >> 8) & 0xff;
+    info->weight_primary = value & 0xff;
+    return info->script_member != 0;
+}
+
+static void sortkey_data_init(sortkey_data* data, int flags, const WCHAR* locale, BOOL is_compare_string)
+{
+    data->flags = flags;
+    LIST_INIT(&data->key, sizeof(BYTE));
+    LIST_INIT(&data->weights_main, sizeof(BYTE));
+    LIST_INIT(&data->weights_diacritic, sizeof(BYTE));
+    LIST_INIT(&data->weights_case, sizeof(BYTE));
+}
+
+static void sortkey_data_destroy(sortkey_data* data)
+{
+    LIST_DESTROY(&data->key);
+    LIST_DESTROY(&data->weights_main);
+    LIST_DESTROY(&data->weights_diacritic);
+    LIST_DESTROY(&data->weights_case);
+}
+
+static weight_main_info create_weight_main(BYTE script_member, BYTE weight_primary)
+{
+    weight_main_info ret = { 0 };
+    ret.script_member = script_member;
+    ret.weight_primary = weight_primary;
+    return ret;
+}
+
+static void case_weights_add(sortkey_data* data, BYTE value)
+{
+    int flags = data->flags;
+    if (NORM_IGNORECASE & flags)
+        value = value & ~(16 + 8);
+    if (NORM_IGNOREWIDTH & flags)
+        value = value & ~(1);
+    if (NORM_IGNOREKANATYPE & flags)
+        value = value & ~(32);
+
+    LIST_ADD(&data->weights_case, &value);
+}
+
+static void main_weights_add(sortkey_data *data, weight_main_info* value)
+{
+    LIST_ADD(&data->weights_main, &value->script_member);
+    LIST_ADD(&data->weights_main, &value->weight_primary);
+    if (value->extra > 0)
+        LIST_ADD(&data->weights_main, &value->extra);
+}
+
+static void diacritic_weights_add(sortkey_data* data, const character_info* info, BYTE value)
+{
+    LIST_ADD(&data->weights_diacritic, &value);
+}
+
+/* Main sortkey logic */
+
+static void sortkey_handle_default_character(sortkey_data* data, WCHAR c)
+{
+    weight_main_info weightmain;
+    character_info info;
+
+    if (!get_char(data, &info, c))
+    {
+        return;
+    }
+
+    weightmain = create_weight_main(info.script_member, info.weight_primary);
+    if (info.script_member >= 0xa9 && info.script_member <= 0xaf) /* Some CJK have extra value */
+        weightmain.extra = info.weight_diacritic;
+    else
+        diacritic_weights_add(data, &info, info.weight_diacritic);
+
+    main_weights_add(data, &weightmain);
+
+    case_weights_add(data, info.weight_case);
+}
+
+static BOOL sortkey_handle_character(sortkey_data* data, WCHAR c, const WCHAR* str, int i)
+{
+    weight_main_info weightmain;
+    character_info info;
+    int flags = data->flags;
+
+    if (!get_char(data, &info, c))
+    {
+        return FALSE;
+    }
+
+    switch (info.script_member)
+    {
+    case 0: /* Not sorted */
+        break;
+
+    case 1:
+        if (data->weights_diacritic.len > 0)
+        {
+            BYTE* entry = LIST_GET(&data->weights_diacritic, data->weights_diacritic.len - 1);
+            *entry += info.weight_diacritic; /* Overflow can happen, that's okay */
+        }
+        else
+            diacritic_weights_add(data, &info, info.weight_diacritic);
+        break;
+
+    case JAPANESE:
+        /* TODO */
+        break;
+
+    case 4: /* Jamo */
+        weightmain = create_weight_main(info.weight_primary, info.weight_diacritic);
+        main_weights_add(data, &weightmain);
+
+        diacritic_weights_add(data, &info, MIN_WEIGHT);
+
+        case_weights_add(data, info.weight_case);
+        break;
+
+    case 5:
+        weightmain = create_weight_main(253, 255);
+        main_weights_add(data, &weightmain);
+
+        weightmain = create_weight_main(info.weight_primary, info.weight_diacritic);
+        main_weights_add(data, &weightmain);
+
+        diacritic_weights_add(data, &info, MIN_WEIGHT);
+
+        case_weights_add(data, MIN_WEIGHT);
+        break;
+
+    case 6: /* Punctuation */
+        /* TODO */
+        break;
+
+    case 7:  /* Symbols */
+    case 8:  /* Symbols */
+    case 9:  /* Symbols */
+    case 10: /* Symbols */
+    case 11: /* Symbols */
+    case 12: /* Symbols */
+        if (flags & NORM_IGNORESYMBOLS)
+            break;
+
+        weightmain = create_weight_main(info.script_member, info.weight_primary);
+        main_weights_add(data, &weightmain);
+
+        diacritic_weights_add(data, &info, info.weight_diacritic);
+
+        case_weights_add(data, info.weight_case);
+        break;
+
+    default:
+        sortkey_handle_default_character(data, c);
+        break;
+    }
+    return TRUE;
+}
+
+static void sortkey_write_result(sortkey_data* data)
+{
+    int flags = data->flags;
+
+    const BYTE SORTKEY_SEPARATOR = 1;
+    const BYTE SORTKEY_TERMINATOR = 0;
+
+    /* Main weights */
+
+    APPEND_LIST_TO_SORTKEY(data, weights_main, BYTE, element, FALSE);
+
+    LIST_ADD(&data->key, &SORTKEY_SEPARATOR);
+
+    /* Diacritic weights */
+
+    if ((flags & NORM_IGNORENONSPACE) == 0)
+    {
+        APPEND_LIST_TO_SORTKEY(data, weights_diacritic, BYTE, element, *element <= MIN_WEIGHT);
+    }
+
+    LIST_ADD(&data->key, &SORTKEY_SEPARATOR);
+
+    /* Case weights */
+    if ((NORM_IGNORECASE & flags) == 0 || (NORM_IGNOREWIDTH & flags) == 0)
+    {
+        APPEND_LIST_TO_SORTKEY(data, weights_case, BYTE, element, FALSE);
+    }
+
+    LIST_ADD(&data->key,  &SORTKEY_SEPARATOR);
+
+    /* Extra weights */
+    /* TODO */
+
+    LIST_ADD(&data->key, &SORTKEY_SEPARATOR);
+
+    /* Special weights */
+    /* TODO */
+
+    LIST_ADD(&data->key, &SORTKEY_TERMINATOR);
+}
+
+static int sortkey_generate(int flags, const WCHAR* locale, const WCHAR* str, int str_len, BYTE* buffer, int buffer_len)
+{
+    int i;
+    sortkey_data data;
+    int ret = 0;
+
+    sortkey_data_init(&data, flags, locale, FALSE);
+
+    if (str_len == -1)
+        str_len = wcslen(str);
+
+    for (i = 0; i < str_len; i++)
+    {
+        sortkey_handle_character(&data, str[i], str, i);
+    }
+
+    sortkey_write_result(&data);
+
+    if (data.key.len <= buffer_len)
+    {
+        for (i = 0; i < data.key.len; i++)
+        {
+            BYTE* value = LIST_GET(&data.key, i);
+            buffer[i] = *value;
+        }
+        ret = data.key.len;
+    }
+    else if (!buffer)
+    {
+        ret = data.key.len;
+    }
+    sortkey_data_destroy(&data);
+    return ret;
+}
+
+/* End sortkey handler code */
static const struct geoinfo *get_geoinfo_ptr( GEOID geoid )
 {
@@ -4964,8 +5195,8 @@ INT WINAPI DECLSPEC_HOTPATCH LCMapStringEx( const WCHAR *locale, DWORD flags, co
         TRACE( "(%s,0x%08x,%s,%d,%p,%d)\n",
                debugstr_w(locale), flags, debugstr_wn(src, srclen), srclen, dst, dstlen );
-        if ((ret = get_sortkey( flags, src, srclen, (char *)dst, dstlen ))) ret++;
-        else SetLastError( ERROR_INSUFFICIENT_BUFFER );
+        if (!(ret = sortkey_generate(flags, L"", src, srclen, (BYTE *)dst, dstlen )))
+            SetLastError( ERROR_INSUFFICIENT_BUFFER );
         return ret;
     }
--
2.26.2

    

[PATCH 1/5] kernelbase/locale: Implement sortkey generation on official tables

Signed-off-by: Fabian Maurer dark.shadow4@web.de