Re: [PATCH 1/5] kernelbase/locale: Implement sortkey generation on official tables

30 Apr 2020

On 4/28/20 1:17 PM, Fabian Maurer wrote:
...
Signed-off-by: Fabian Maurer dark.shadow4@web.de
dlls/kernel32/tests/locale.c | 110 ++++++++
  dlls/kernelbase/locale.c     | 477 ++++++++++++++++++++++++++---------
  2 files changed, 464 insertions(+), 123 deletions(-)
So as far as I understand, the sort key algorithm writes the level 0 
weights (script and alphabetic weight) for the whole string to the sort 
key, then the level 1 weights (diacritic), and so on, right?
In that case, what seems potentially simpler to me is to calculate those 
weights one level at a time, rather than one character at a time. That 
is, you'd end up doing something like
static int get_sortkey( DWORD flags, const WCHAR *src, int srclen, char 
*dst, int dstlen )
{
     int used = 0;
     for (i = 0; i < srclen; ++i)
     {
         used += get_main_weights(src[i], dst + used, dstlen - used);
         if (!(flags & NORM_IGNORENONSPACE))
             used += get_diacritic_weights(src[i], dst + used, dstlen - 
used);
         ...
     }
}
This avoids the need to store temporary buffers.
As that example shows, I also think it's probably simpler to just pass 
the buffer directly to whatever functions are writing sortkey bytes into it.
...

diff --git a/dlls/kernel32/tests/locale.c b/dlls/kernel32/tests/locale.c
index 4c1e1b4d73..13839bb10a 100644
--- a/dlls/kernel32/tests/locale.c
+++ b/dlls/kernel32/tests/locale.c
@@ -2681,6 +2681,13 @@ static void test_lcmapstring_unicode(lcmapstring_wrapper func_ptr, const char *f
      lstrlenW(symbols_stripped) + 1, ret);
      ok(!lstrcmpW(buf, symbols_stripped), "%s string comparison mismatch\n", func_name);

/* test small buffer */
lstrcpyW(buf, fooW);
ret = func_ptr(LCMAP_SORTKEY, lower_case, -1, buf, 2);
ok(ret == 0, "Expected a failure\n");
ok(GetLastError() == ERROR_INSUFFICIENT_BUFFER,
      "%s unexpected error code %d\n", func_name, GetLastError());;


/* test srclen = 0 */
SetLastError(0xdeadbeef);
ret = func_ptr(0, upper_case, 0, buf, ARRAY_SIZE(buf));



@@ -3108,6 +3115,108 @@ static void test_sorting(void)
      }
  }
+struct sorting_test_entry {

const WCHAR* locale;
DWORD flags;
const WCHAR* first;
const WCHAR* second;
int result_sortkey;
int result_compare;
BOOL broken_on_old_win;

+};



+static const struct sorting_test_entry unicode_sorting_tests[] =
+{

/*   0 */ { L"en-US", 0, L"\ue6e3\u0a02", L"\ue6e3\u20dc", CSTR_LESS_THAN, 0, TRUE }, /* Test default character, when there is main weight extra there must be no diacritic weight */
/*   1 */ { L"en-US", 0, L"\u276a", L"\u2768", CSTR_GREATER_THAN }, /* Test symbols, must add diacritic weight */
/*   2 */ { L"en-US", 0, L"\u204d", L"\uff02", CSTR_LESS_THAN }, /* Test symbols, must add case weight */
/*   3 */ { L"en-US", 0, L"a \u2060 b", L"a  b", CSTR_EQUAL }, /* Test unsortable characters */
/*   4 */ { L"en-US", 0, L"a \xfff0 b", L"a  b", CSTR_EQUAL }, /* Test invalid characters */
/*   5 */ { L"en-US", 0, L"\x00fc", L"\x016d", CSTR_LESS_THAN },
/*   6 */ { L"en-US", 0, L"\x3fcb\x7fd5", L"\x0006\x3032", CSTR_GREATER_THAN },
/*   7 */ { L"en-US", 0, L"\x00fc\x30fd", L"\x00fa\x1833", CSTR_LESS_THAN },
/*   8 */ { L"en-US", 0, L"\x0037", L"\x277c", CSTR_LESS_THAN, 0, TRUE }, /* Normal character */
/*   9 */ { L"en-US", 0, L"\x1eca", L"\x1ecb", CSTR_GREATER_THAN }, /* Normal character */
/*  10 */ { L"en-US", 0, L"\x1d05", L"\x1d48", CSTR_GREATER_THAN }, /* Normal character */
/*  11 */ { L"en-US", 0, L"\x19d7", L"\x096d", CSTR_GREATER_THAN }, /* Normal character diacritics */
/*  12 */ { L"en-US", 0, L"\x00f5", L"\x1ecf", CSTR_LESS_THAN }, /* Normal character diacritics */
/*  13 */ { L"en-US", 0, L"\x2793", L"\x0d70", CSTR_LESS_THAN, 0, TRUE }, /* Normal character diacritics */
/*  14 */ { L"en-US", 0, L"A", L"a", CSTR_GREATER_THAN }, /* Normal character case weights */
/*  15 */ { L"en-US", 0, L"z", L"Z", CSTR_LESS_THAN }, /* Normal character case weights */
/*  16 */ { L"en-US", 0, L"\xe5a6", L"\xe5a5\x0333", CSTR_GREATER_THAN, 0, TRUE }, /* CJK with extra value */
/*  17 */ { L"en-US", 0, L"\xe5d7", L"\xe5d6\x0330", CSTR_GREATER_THAN, 0, TRUE }, /* CJK with extra value */
/*  18 */ { L"en-US", 0, L"\x1B56\x0330", L"\x1096", CSTR_GREATER_THAN }, /* Diacritic is added */
/*  19 */ { L"en-US", 0, L"\x1817\x0333", L"\x19d7", CSTR_GREATER_THAN }, /* Diacritic is added */
/*  20 */ { L"en-US", 0, L"\x04de\x05ac", L"\x0499", CSTR_GREATER_THAN }, /* Diacritic is added */
/*  21 */ { L"en-US", 0, L"\x01ba\x0654", L"\x01b8", CSTR_LESS_THAN }, /* Diacritic can overflow */
/*  22 */ { L"en-US", 0, L"\x06b7\x06eb", L"\x06b6", CSTR_LESS_THAN }, /* Diacritic can overflow */
/*  23 */ { L"en-US", 0, L"\x1420\x0333", L"\x141f", CSTR_LESS_THAN }, /* Diacritic can overflow */
/*  24 */ { L"en-US", 0, L"\x11bc", L"\x110b", CSTR_GREATER_THAN }, /* Jamo case weight */
/*  25 */ { L"en-US", 0, L"\x11c1", L"\x1111", CSTR_GREATER_THAN }, /* Jamo case weight */
/*  26 */ { L"en-US", 0, L"\x11af", L"\x1105", CSTR_GREATER_THAN }, /* Jamo case weight */
/*  27 */ { L"en-US", 0, L"\x11c2", L"\x11f5", CSTR_LESS_THAN }, /* Jamo main weight */
/*  28 */ { L"en-US", 0, L"\x1108", L"\x1121", CSTR_LESS_THAN }, /* Jamo main weight */
/*  29 */ { L"en-US", 0, L"\x1116", L"\x11c7", CSTR_LESS_THAN }, /* Jamo main weight */
/*  30 */ { L"en-US", 0, L"\x11b1", L"\x11d1", CSTR_LESS_THAN }, /* Jamo main weight */
/*  31 */ { L"en-US", 0, L"\x4550\x73d2", L"\x3211\x23ad", CSTR_GREATER_THAN }, /* Script 5 main weight 1 */
/*  32 */ { L"en-US", 0, L"\x3265", L"\x4079", CSTR_LESS_THAN }, /* Script 5 main weight 1 */
/*  33 */ { L"en-US", 0, L"\x4c19\x68d0\x52d0", L"\x316d", CSTR_GREATER_THAN }, /* Script 5 main weight 1 */
/*  34 */ { L"en-US", 0, L"\x72dd", L"\x6b8a", CSTR_GREATER_THAN }, /* Script 5 main weight 2 */
/*  35 */ { L"en-US", 0, L"\x6785\x3bff\x6f83", L"\x7550\x34c9\x71a7", CSTR_LESS_THAN }, /* Script 5 main weight 2 */
/*  36 */ { L"en-US", 0, L"\x5d61", L"\x3aef", CSTR_LESS_THAN }, /* Script 5 main weight 2 */
/*  37 */ { L"en-US", 0, L"\x207a", L"\xfe62", CSTR_GREATER_THAN }, /* Symbols case weights */
/*  38 */ { L"en-US", 0, L"\xfe65", L"\xff1e", CSTR_GREATER_THAN }, /* Symbols case weights */
/*  39 */ { L"en-US", 0, L"\x2502", L"\xffe8", CSTR_GREATER_THAN }, /* Symbols case weights */
/*  40 */ { L"en-US", 0, L"\x21da", L"\x21dc", CSTR_LESS_THAN }, /* Symbols diacritic weights */
/*  41 */ { L"en-US", 0, L"\x29fb", L"\x2295", CSTR_LESS_THAN }, /* Symbols diacritic weights */
/*  42 */ { L"en-US", 0, L"\x0092", L"\x009c", CSTR_LESS_THAN }, /* Symbols diacritic weights */
/*  43 */ { L"en-US", NORM_IGNORESYMBOLS, L"\x21da", L"\x21dc", CSTR_EQUAL }, /* NORM_IGNORESYMBOLS */
/*  44 */ { L"en-US", NORM_IGNORESYMBOLS, L"\x29fb", L"\x2295", CSTR_EQUAL }, /* NORM_IGNORESYMBOLS */
/*  45 */ { L"en-US", NORM_IGNORESYMBOLS, L"\x0092", L"\x009c", CSTR_EQUAL }, /* NORM_IGNORESYMBOLS */
/*  46 */ { L"en-US", 0, L"\x3099", L"\x309a", CSTR_EQUAL }, /* MIN_WEIGHT */
/*  47 */ { L"en-US", 0, L"\x309b", L"\x05a2", CSTR_EQUAL }, /* MIN_WEIGHT */
/*  48 */ { L"en-US", 0, L"\xff9e", L"\x0e47", CSTR_EQUAL }, /* MIN_WEIGHT */

+};



+static void test_unicode_sorting(void)
+{

int i;
if (!pLCMapStringEx)
{

   win_skip("LCMapStringEx not available\n");


   return;


}
for (i = 0; i < ARRAY_SIZE(unicode_sorting_tests); i++)
{
   int pos;


   BYTE buff1[1000];


   BYTE buff2[1000];


   int len1, len2;


   int result = CSTR_EQUAL;


   const struct sorting_test_entry* entry = &unicode_sorting_tests[i];



   len1 = pLCMapStringEx(entry->locale, LCMAP_SORTKEY | entry->flags, entry->first, -1, (WCHAR*)buff1, ARRAY_SIZE(buff1), NULL, NULL, 0);


   len2 = pLCMapStringEx(entry->locale, LCMAP_SORTKEY | entry->flags, entry->second, -1, (WCHAR*)buff2, ARRAY_SIZE(buff2), NULL, NULL, 0);



Is there a reason to use LCMapStringEx() here rather than LCMapString()?
...


   for (pos = 0; pos < len1 && pos < len2; pos++)


   {


       if (buff1[pos] > buff2[pos])


       {


           result = CSTR_GREATER_THAN;


           break;


       }


       else if (buff1[pos] < buff2[pos])


       {


           result = CSTR_LESS_THAN;


           break;


       }


   }



   ok (result == entry->result_sortkey || broken(entry->broken_on_old_win), "Test %d - Expected %d, got %d\n", i, entry->result_sortkey, result);


}

+}

static void test_FoldStringA(void)
{
  int ret, i, j;

@@ -6897,4 +7006,5 @@ START_TEST(locale)
    test_NLSVersion();
    /* this requires collation table patch to make it MS compatible */
    if (0) test_sorting();
The fact that this test is commented out never struck me as great. I'm 
pretty sure that with todo_wine added as appropriate, it could pass. A 
first patch in this series could be to do that.
...

test_unicode_sorting();
}

diff --git a/dlls/kernelbase/locale.c b/dlls/kernelbase/locale.c
index 53e4e42da3..74177371d9 100644
--- a/dlls/kernelbase/locale.c
+++ b/dlls/kernelbase/locale.c
@@ -2126,127 +2126,6 @@ static int wcstombs_codepage( UINT codepage, DWORD flags, const WCHAR *src, int
          return wcstombs_sbcs( info, src, srclen, dst, dstlen );
  }



-static int get_sortkey( DWORD flags, const WCHAR *src, int srclen, char *dst, int dstlen )
-{

WCHAR dummy[4]; /* no decomposition is larger than 4 chars */
int key_len[4];
char *key_ptr[4];
const WCHAR *src_save = src;
int srclen_save = srclen;

key_len[0] = key_len[1] = key_len[2] = key_len[3] = 0;
for (; srclen; srclen--, src++)
{
   unsigned int i, decomposed_len = 1;/*wine_decompose(*src, dummy, 4);*/


   dummy[0] = *src;


   if (decomposed_len)


   {


       for (i = 0; i < decomposed_len; i++)


       {


           WCHAR wch = dummy[i];


           unsigned int ce;



           if ((flags & NORM_IGNORESYMBOLS) &&


               (get_char_type( CT_CTYPE1, wch ) & (C1_PUNCT | C1_SPACE)))


               continue;



           if (flags & NORM_IGNORECASE) wch = casemap( nls_info.LowerCaseTable, wch );



           ce = collation_table[collation_table[collation_table[wch >> 8] + ((wch >> 4) & 0x0f)] + (wch & 0xf)];


           if (ce != (unsigned int)-1)


           {


               if (ce >> 16) key_len[0] += 2;


               if ((ce >> 8) & 0xff) key_len[1]++;


               if ((ce >> 4) & 0x0f) key_len[2]++;


               if (ce & 1)


               {


                   if (wch >> 8) key_len[3]++;


                   key_len[3]++;


               }


           }


           else


           {


               key_len[0] += 2;


               if (wch >> 8) key_len[0]++;


               if (wch & 0xff) key_len[0]++;


}


       }


   }


}

if (!dstlen) /* compute length */
   /* 4 * '\1' + key length */


   return key_len[0] + key_len[1] + key_len[2] + key_len[3] + 4;



if (dstlen < key_len[0] + key_len[1] + key_len[2] + key_len[3] + 4 + 1)
   return 0; /* overflow */



src = src_save;
srclen = srclen_save;

key_ptr[0] = dst;
key_ptr[1] = key_ptr[0] + key_len[0] + 1;
key_ptr[2] = key_ptr[1] + key_len[1] + 1;
key_ptr[3] = key_ptr[2] + key_len[2] + 1;

for (; srclen; srclen--, src++)
{
   unsigned int i, decomposed_len = 1;/*wine_decompose(*src, dummy, 4);*/


   dummy[0] = *src;


   if (decomposed_len)


   {


       for (i = 0; i < decomposed_len; i++)


       {


           WCHAR wch = dummy[i];


           unsigned int ce;



           if ((flags & NORM_IGNORESYMBOLS) &&


               (get_char_type( CT_CTYPE1, wch ) & (C1_PUNCT | C1_SPACE)))


               continue;



           if (flags & NORM_IGNORECASE) wch = casemap( nls_info.LowerCaseTable, wch );



           ce = collation_table[collation_table[collation_table[wch >> 8] + ((wch >> 4) & 0x0f)] + (wch & 0xf)];


           if (ce != (unsigned int)-1)


           {


               WCHAR key;


               if ((key = ce >> 16))


               {


                   *key_ptr[0]++ = key >> 8;


                   *key_ptr[0]++ = key & 0xff;


               }


               /* make key 1 start from 2 */


               if ((key = (ce >> 8) & 0xff)) *key_ptr[1]++ = key + 1;


               /* make key 2 start from 2 */


               if ((key = (ce >> 4) & 0x0f)) *key_ptr[2]++ = key + 1;


               /* key 3 is always a character code */


               if (ce & 1)


               {


                   if (wch >> 8) *key_ptr[3]++ = wch >> 8;


                   if (wch & 0xff) *key_ptr[3]++ = wch & 0xff;


               }


           }


           else


           {


               *key_ptr[0]++ = 0xff;


               *key_ptr[0]++ = 0xfe;


               if (wch >> 8) *key_ptr[0]++ = wch >> 8;


               if (wch & 0xff) *key_ptr[0]++ = wch & 0xff;


           }


       }


   }


}

*key_ptr[0] = 1;
*key_ptr[1] = 1;
*key_ptr[2] = 1;
*key_ptr[3]++ = 1;
*key_ptr[3] = 0;
return key_ptr[3] - dst;

-}


/* compose a full-width katakana. return consumed source characters. */
static int compose_katakana( const WCHAR *src, int srclen, WCHAR *dst )
{

@@ -2574,6 +2453,358 @@ static int compare_weights(int flags, const WCHAR *str1, int len1,
      return len1 - len2;
  }
+/* Start sortkey handler code. */



+/* Defines */



+#define JAPANESE 3
+#define MIN_WEIGHT 2
+#define LIST_STACK_BUFFER 1000



+/* Internal structures */
Are these comments useful?
...



+typedef struct _character_info
+{

BYTE weight_primary;
BYTE script_member;
BYTE weight_diacritic;
BYTE weight_case;

+} character_info;



I get the impression that typedefs have largely fallen out of favour.
...
+typedef struct _weight_main_info
+{

BYTE script_member;
BYTE weight_primary;
BYTE extra;

+} weight_main_info;



+typedef struct _list
+{

int extra_len;
int len;
BYTE buffer[LIST_STACK_BUFFER];
int buffer_count;
BYTE* extra;
int element_size;

+} list;



+typedef struct _sortkey_data
+{

int flags;
list key;
list weights_main;
list weights_diacritic;
list weights_case;

+} sortkey_data;



+/* List functions */



+static void LIST_INIT(list* name, int type_size)
+{

name->extra_len = 0;
name->len = 0;
name->extra = 0;
name->buffer_count = LIST_STACK_BUFFER / type_size;
name->element_size = type_size;

+}



+static void LIST_DESTROY(list* name)
+{

RtlFreeHeap(GetProcessHeap(), 0, name->extra);

+}



+static void* LIST_GET(list* name, int index)
+{

if ((index + 1) * name->element_size <= LIST_STACK_BUFFER)
   return &name->buffer[index * name->element_size];


else
   return &name->extra[index * name->element_size - name->buffer_count];



+}



+/* Add entry to list, resizing as needed */
+static void LIST_ADD(list* name, const void *value)
+{

void* entry;
if ((name->len + 1) * name->element_size > name->extra_len + LIST_STACK_BUFFER)
{
   if (!name->extra) /* First allocation */


   {


       name->extra_len = LIST_STACK_BUFFER;


       name->extra = RtlAllocateHeap(GetProcessHeap(), 0, name->extra_len);


   }


   else


   {


       name->extra_len *= 2;


       name->extra = RtlReAllocateHeap(GetProcessHeap(), 0,name->extra, name->extra_len);


   }


}
entry = LIST_GET(name, name->len);
memcpy(entry, value, name->element_size);
name->len++;

+}



+/* Append a weight list to the sortkey */
+#define APPEND_LIST_TO_SORTKEY(data, weights, type, statement_get_value, statement_is_ignored) \

do {                                                                \
   int z;                                                          \


   int end = data->weights.len - 1;                                \


   while (end >= 0)                                                \


   {                                                               \


       const type* element = LIST_GET(&data->weights, end);        \


       (void)element;                                              \


       if (!(statement_is_ignored)) break;                         \


       end--;                                                      \


   }                                                               \


   for (z = 0; z <= end; z++)                                      \


   {                                                               \


       const type* element = LIST_GET(&data->weights, z);          \


       LIST_ADD(&data->key, statement_get_value);                  \


   }                                                               \


}  while (0);


+/* Helper functions */



+static BOOL get_char(sortkey_data* data, character_info* info, WCHAR ch)
+{

DWORD value = sort.keys[ch];

info->weight_case = value >> 24;
info->weight_diacritic = (value >> 16) & 0xff;
info->script_member = (value >> 8) & 0xff;
info->weight_primary = value & 0xff;
return info->script_member != 0;

+}



+static void sortkey_data_init(sortkey_data* data, int flags, const WCHAR* locale, BOOL is_compare_string)
+{

data->flags = flags;
LIST_INIT(&data->key, sizeof(BYTE));
LIST_INIT(&data->weights_main, sizeof(BYTE));
LIST_INIT(&data->weights_diacritic, sizeof(BYTE));
LIST_INIT(&data->weights_case, sizeof(BYTE));

+}



+static void sortkey_data_destroy(sortkey_data* data)
+{

LIST_DESTROY(&data->key);
LIST_DESTROY(&data->weights_main);
LIST_DESTROY(&data->weights_diacritic);
LIST_DESTROY(&data->weights_case);

+}



+static weight_main_info create_weight_main(BYTE script_member, BYTE weight_primary)
+{

weight_main_info ret = { 0 };
ret.script_member = script_member;
ret.weight_primary = weight_primary;
return ret;

+}



+static void case_weights_add(sortkey_data* data, BYTE value)
+{

int flags = data->flags;
if (NORM_IGNORECASE & flags)
   value = value & ~(16 + 8);


if (NORM_IGNOREWIDTH & flags)
   value = value & ~(1);


if (NORM_IGNOREKANATYPE & flags)
   value = value & ~(32);



LIST_ADD(&data->weights_case, &value);

+}



+static void main_weights_add(sortkey_data *data, weight_main_info* value)
+{

LIST_ADD(&data->weights_main, &value->script_member);
LIST_ADD(&data->weights_main, &value->weight_primary);
if (value->extra > 0)
   LIST_ADD(&data->weights_main, &value->extra);



+}



+static void diacritic_weights_add(sortkey_data* data, const character_info* info, BYTE value)
+{

LIST_ADD(&data->weights_diacritic, &value);

+}



+/* Main sortkey logic */



+static void sortkey_handle_default_character(sortkey_data* data, WCHAR c)
+{

weight_main_info weightmain;
character_info info;

if (!get_char(data, &info, c))
{
   return;


}

weightmain = create_weight_main(info.script_member, info.weight_primary);
if (info.script_member >= 0xa9 && info.script_member <= 0xaf) /* Some CJK have extra value */
   weightmain.extra = info.weight_diacritic;


else
   diacritic_weights_add(data, &info, info.weight_diacritic);



main_weights_add(data, &weightmain);

case_weights_add(data, info.weight_case);

+}



+static BOOL sortkey_handle_character(sortkey_data* data, WCHAR c, const WCHAR* str, int i)
+{

weight_main_info weightmain;
character_info info;
int flags = data->flags;

if (!get_char(data, &info, c))
{
   return FALSE;


}

switch (info.script_member)
{
case 0: /* Not sorted */
   break;



case 1:
   if (data->weights_diacritic.len > 0)


   {


       BYTE* entry = LIST_GET(&data->weights_diacritic, data->weights_diacritic.len - 1);


       *entry += info.weight_diacritic; /* Overflow can happen, that's okay */


   }


   else


       diacritic_weights_add(data, &info, info.weight_diacritic);


   break;



case JAPANESE:
   /* TODO */


   break;



case 4: /* Jamo */
   weightmain = create_weight_main(info.weight_primary, info.weight_diacritic);


   main_weights_add(data, &weightmain);



   diacritic_weights_add(data, &info, MIN_WEIGHT);



   case_weights_add(data, info.weight_case);


   break;



case 5:
   weightmain = create_weight_main(253, 255);


   main_weights_add(data, &weightmain);



   weightmain = create_weight_main(info.weight_primary, info.weight_diacritic);


   main_weights_add(data, &weightmain);



   diacritic_weights_add(data, &info, MIN_WEIGHT);



   case_weights_add(data, MIN_WEIGHT);


   break;



case 6: /* Punctuation */
   /* TODO */


   break;



case 7:  /* Symbols */
case 8:  /* Symbols */
case 9:  /* Symbols */
case 10: /* Symbols */
case 11: /* Symbols */
case 12: /* Symbols */
   if (flags & NORM_IGNORESYMBOLS)


       break;



   weightmain = create_weight_main(info.script_member, info.weight_primary);


   main_weights_add(data, &weightmain);



   diacritic_weights_add(data, &info, info.weight_diacritic);



   case_weights_add(data, info.weight_case);


   break;



default:
   sortkey_handle_default_character(data, c);


   break;



The fact that exactly one of these integer cases has a symbolic constant 
attached seems less than ideal.
...

}
return TRUE;

+}



+static void sortkey_write_result(sortkey_data* data)
+{

int flags = data->flags;

const BYTE SORTKEY_SEPARATOR = 1;
const BYTE SORTKEY_TERMINATOR = 0;

/* Main weights */

APPEND_LIST_TO_SORTKEY(data, weights_main, BYTE, element, FALSE);

LIST_ADD(&data->key, &SORTKEY_SEPARATOR);

/* Diacritic weights */

if ((flags & NORM_IGNORENONSPACE) == 0)
{
   APPEND_LIST_TO_SORTKEY(data, weights_diacritic, BYTE, element, *element <= MIN_WEIGHT);


}

LIST_ADD(&data->key, &SORTKEY_SEPARATOR);

/* Case weights */
if ((NORM_IGNORECASE & flags) == 0 || (NORM_IGNOREWIDTH & flags) == 0)
{
   APPEND_LIST_TO_SORTKEY(data, weights_case, BYTE, element, FALSE);


}

LIST_ADD(&data->key,  &SORTKEY_SEPARATOR);

/* Extra weights */
/* TODO */

LIST_ADD(&data->key, &SORTKEY_SEPARATOR);

/* Special weights */
/* TODO */

LIST_ADD(&data->key, &SORTKEY_TERMINATOR);

+}



+static int sortkey_generate(int flags, const WCHAR* locale, const WCHAR* str, int str_len, BYTE* buffer, int buffer_len)
+{

int i;
sortkey_data data;
int ret = 0;

sortkey_data_init(&data, flags, locale, FALSE);

if (str_len == -1)
   str_len = wcslen(str);



for (i = 0; i < str_len; i++)
{
   sortkey_handle_character(&data, str[i], str, i);


}

sortkey_write_result(&data);

if (data.key.len <= buffer_len)
{
   for (i = 0; i < data.key.len; i++)


   {


       BYTE* value = LIST_GET(&data.key, i);


       buffer[i] = *value;


   }


   ret = data.key.len;


}
else if (!buffer)
{
   ret = data.key.len;


}
sortkey_data_destroy(&data);
return ret;

+}



+/* End sortkey handler code */
static const struct geoinfo *get_geoinfo_ptr( GEOID geoid )
  {
@@ -4964,8 +5195,8 @@ INT WINAPI DECLSPEC_HOTPATCH LCMapStringEx( const WCHAR *locale, DWORD flags, co
          TRACE( "(%s,0x%08x,%s,%d,%p,%d)\n",
                 debugstr_w(locale), flags, debugstr_wn(src, srclen), srclen, dst, dstlen );

   if ((ret = get_sortkey( flags, src, srclen, (char *)dst, dstlen ))) ret++;


   else SetLastError( ERROR_INSUFFICIENT_BUFFER );




   if (!(ret = sortkey_generate(flags, L"", src, srclen, (BYTE *)dst, dstlen )))


       SetLastError( ERROR_INSUFFICIENT_BUFFER );
    return ret;
}




--
2.26.2

    

2025

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013

2012

2011

2010

2009

2008

2007

2006

2005

2004

2003

2002

2001

Re: [PATCH 1/5] kernelbase/locale: Implement sortkey generation on official tables

Signed-off-by: Fabian Maurer dark.shadow4@web.de