I originally wanted to add these utils into `unicode.h`, but it cannot be used with msvcrt headers.
This prompted the creation of a new header called `str.h`. The name is generic and broad, but the file may grow to contain more general stuff in the future.
The rationale behind writing the UTF helpers is mainly to solve, at least in part, the complexity.
`WideCharToMultiByte()` and `MultiByteToWideChar()` in particular are quite dangerous as they treat the buffer size as bytes for `char` (8 bits) and as number of characters for `WCHAR` (16 bits).
There are already several wrappers around the codebase, mine should be able to replace them.
From: Davide Beatrici git@davidebeatrici.dev
I originally wanted to add these utils into "unicode.h", but it cannot be used with msvcrt headers.
This prompted the creation of a new header called "str.h". The name is generic and broad, but the file may grow to contain more general stuff in the future.
The rationale behind writing the UTF helpers is mainly to solve, at least in part, the complexity.
WideCharToMultiByte() and MultiByteToWideChar() in particular are quite dangerous as they treat the buffer size as bytes for char (8 bits) and as number of characters for WCHAR (16 bits).
There are already several wrappers around the codebase, mine should be able to replace them. --- include/wine/str.h | 193 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 193 insertions(+) create mode 100644 include/wine/str.h
diff --git a/include/wine/str.h b/include/wine/str.h new file mode 100644 index 00000000000..82990efe94b --- /dev/null +++ b/include/wine/str.h @@ -0,0 +1,193 @@ +/* + * Wine internal string utilities + * + * Copyright 2022 Davide Beatrici + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#ifndef __WINE_WINE_STR_H +#define __WINE_WINE_STR_H + +#include "heap.h" + +#include <windef.h> +#include <winbase.h> +#include <winnls.h> + +#define utf8size(str) (str ? strlen(str) + 1 : 0) + +#define utf16len(str) (str ? wcslen(str) : 0) +#define utf16size(str) (str ? (wcslen(str) + 1) * sizeof(WCHAR) : 0) + +#define utf16to8(dst, dst_size, src, src_size) (utf16to(CP_UTF8, dst, dst_size, src, src_size)) +#define utf8to16(dst, dst_size, src, src_size) (toutf16(CP_UTF8, dst, dst_size, src, src_size)) + +#define utf16to8_alloc(src, src_size) (utf16to_alloc(CP_UTF8, src, src_size)) +#define utf8to16_alloc(src, src_size) (toutf16_alloc(CP_UTF8, src, src_size)) + +static inline SIZE_T utf8len(const char *str) { + SIZE_T length = 0; + + if (!str) + return 0; + + while (*str != '\0') + { + if ((*str & 0x80) == 0x00) + str += 1; + else if ((*str & 0xe0) == 0xc0) + str += 2; + else if ((*str & 0xf0) == 0xe0) + str += 3; + else if ((*str & 0xf8) == 0xf0) + str += 4; + else + // Corrupt string, return the valid part's length. + return length; + + ++length; + } + + return length; +} + +static inline char *utf8dup(const char *src, SIZE_T size) +{ + char *dst; + + if (!src) + return NULL; + + if (size == 0) + size = utf8size(src); + + dst = heap_alloc(size); + + memcpy(dst, src, size); + + return dst; +} + +static inline WCHAR *utf16dup(const WCHAR *src, SIZE_T size) +{ + WCHAR *dst; + + if (!src) + return NULL; + + if (size == 0) + size = utf16size(src); + + dst = heap_alloc(size); + + memcpy(dst, src, size); + + return dst; +} + +static inline UINT32 utf16to(const UINT32 codepage, + char *dst, const UINT32 dst_size, + const WCHAR *src, UINT32 src_size) +{ + int ret; + + if (!src) + return 0; + + if (src_size > 0) + src_size /= sizeof(WCHAR); + else + src_size = utf16len(src) + 1; + + if (dst && dst_size > 0) + { + ret = WideCharToMultiByte(codepage, 0, src, src_size, dst, dst_size, NULL, NULL); + } + else + { + ret = WideCharToMultiByte(codepage, 0, src, src_size, NULL, 0, NULL, NULL); + } + + return ret > 0 ? ret : 0; +} + +static inline char *utf16to_alloc(const UINT32 codepage, + const WCHAR *src, UINT32 src_size) +{ + char *dst; + + const UINT32 dst_size = utf16to(codepage, NULL, 0, src, src_size); + if (dst_size == 0) + return NULL; + + dst = heap_alloc(dst_size); + + if (utf16to(codepage, dst, dst_size, src, src_size) == 0) + { + heap_free(dst); + return NULL; + } + + return dst; +} + +static inline UINT32 toutf16(const UINT32 codepage, + WCHAR *dst, UINT32 dst_size, + const char *src, UINT32 src_size) +{ + int ret; + + if (!src) + return 0; + + if (src_size == 0) + src_size = strlen(src) + 1; + + if (dst && dst_size > 0) + { + dst_size /= sizeof(WCHAR); + + ret = MultiByteToWideChar(codepage, 0, src, src_size, dst, dst_size); + } + else + { + ret = MultiByteToWideChar(codepage, 0, src, src_size, NULL, 0); + } + + return ret > 0 ? ret * sizeof(WCHAR) : 0; +} + +static inline WCHAR *toutf16_alloc(const UINT32 codepage, + const char *src, UINT32 src_size) +{ + WCHAR *dst; + + const UINT32 dst_size = toutf16(codepage, NULL, 0, src, src_size); + if (dst_size == 0) + return NULL; + + dst = heap_alloc(dst_size); + + if (toutf16(codepage, dst, dst_size, src, src_size) == 0) + { + heap_free(dst); + return NULL; + } + + return dst; +} + +#endif
From: Davide Beatrici git@davidebeatrici.dev
--- programs/winemenubuilder/winemenubuilder.c | 71 +++++++--------------- 1 file changed, 22 insertions(+), 49 deletions(-)
diff --git a/programs/winemenubuilder/winemenubuilder.c b/programs/winemenubuilder/winemenubuilder.c index 1579ca8dafa..41ed9e2286e 100644 --- a/programs/winemenubuilder/winemenubuilder.c +++ b/programs/winemenubuilder/winemenubuilder.c @@ -84,8 +84,10 @@ #include <wincodec.h>
#include "wine/debug.h" +#include "wine/heap.h" #include "wine/list.h" #include "wine/rbtree.h" +#include "wine/str.h"
WINE_DEFAULT_DEBUG_CHANNEL(menubuilder);
@@ -94,6 +96,11 @@ WINE_DEFAULT_DEBUG_CHANNEL(menubuilder); #define in_startmenu(csidl) ((csidl)==CSIDL_STARTMENU || \ (csidl)==CSIDL_COMMON_STARTMENU)
+#define wchars_to_utf8_chars(wchars) (utf16to8_alloc(wchars, 0)) +#define utf8_chars_to_wchars(chars) (utf8to16_alloc(chars, 0)) + +#define xwcsdup(wchars) (utf16dup(wchars, 0)) + #define IS_OPTION_TRUE(ch) \ ((ch) == 'y' || (ch) == 'Y' || (ch) == 't' || (ch) == 'T' || (ch) == '1')
@@ -208,7 +215,7 @@ static unsigned short crc16(const WCHAR *string)
static void *xmalloc( size_t size ) { - void *ret = HeapAlloc( GetProcessHeap(), 0, size ); + void *ret = heap_alloc( size ); if (!ret) { ERR( "out of memory\n" ); @@ -219,8 +226,7 @@ static void *xmalloc( size_t size )
static void *xrealloc( void *ptr, size_t size ) { - if (!ptr) return xmalloc( size ); - ptr = HeapReAlloc( GetProcessHeap(), 0, ptr, size ); + ptr = heap_realloc( ptr, size ); if (!ptr) { ERR( "out of memory\n" ); @@ -229,21 +235,6 @@ static void *xrealloc( void *ptr, size_t size ) return ptr; }
-static WCHAR *xwcsdup( const WCHAR *str ) -{ - WCHAR *ret; - - if (!str) return NULL; - ret = xmalloc( (lstrlenW(str) + 1) * sizeof(WCHAR) ); - lstrcpyW( ret, str ); - return ret; -} - -static void heap_free( void *ptr ) -{ - HeapFree( GetProcessHeap(), 0, ptr ); -} - static WCHAR * WINAPIV heap_wprintf(const WCHAR *format, ...) { va_list args; @@ -297,29 +288,11 @@ static BOOL create_directories(WCHAR *directory) return CreateDirectoryW( directory, NULL ) || GetLastError() == ERROR_ALREADY_EXISTS; }
-static char* wchars_to_utf8_chars(LPCWSTR string) -{ - char *ret; - INT size = WideCharToMultiByte(CP_UTF8, 0, string, -1, NULL, 0, NULL, NULL); - ret = xmalloc(size); - WideCharToMultiByte(CP_UTF8, 0, string, -1, ret, size, NULL, NULL); - return ret; -} - -static WCHAR* utf8_chars_to_wchars(LPCSTR string) -{ - WCHAR *ret; - INT size = MultiByteToWideChar(CP_UTF8, 0, string, -1, NULL, 0); - ret = xmalloc(size * sizeof(WCHAR)); - MultiByteToWideChar(CP_UTF8, 0, string, -1, ret, size); - return ret; -} - static char *wchars_to_xml_text(const WCHAR *string) { int i, pos; char *text = wchars_to_utf8_chars( string ); - char *ret = xmalloc( 6 * strlen(text) + 1 ); + char *ret = xmalloc( 6 * utf8size(text) );
for (i = pos = 0; text[i]; i++) { @@ -1056,7 +1029,7 @@ static WCHAR *compute_native_identifier(int exeIndex, LPCWSTR icoPathW, LPCWSTR if (basename == NULL) basename = icoPathW; else basename++; ext = wcsrchr(basename, '.'); - if (!ext) ext = basename + lstrlenW(basename); + if (!ext) ext = basename + utf16len(basename);
return heap_wprintf(L"%04X_%.*s.%d", crc, (int)(ext - basename), basename, exeIndex); } @@ -1200,7 +1173,7 @@ static DWORD register_menus_entry(const WCHAR *menu_file, const WCHAR *windows_f if (hkey) { ret = RegSetValueExW(hkey, menu_file, 0, REG_SZ, (const BYTE*)windows_file, - (lstrlenW(windows_file) + 1) * sizeof(WCHAR)); + utf16size(windows_file)); RegCloseKey(hkey); } else @@ -1215,7 +1188,7 @@ static LPSTR escape(LPCWSTR arg) WCHAR *escaped_string; char *utf8_string;
- escaped_string = xmalloc((4 * lstrlenW(arg) + 1) * sizeof(WCHAR)); + escaped_string = xmalloc(4 * utf16size(arg)); for (i = j = 0; arg[i]; i++) { switch (arg[i]) @@ -1406,7 +1379,7 @@ static BOOL write_menu_file(const WCHAR *windows_link, const WCHAR *link) fprintf(tempfile, "</Menu>\n");
menuPath = heap_wprintf(L"%s\%s", xdg_menu_dir, filename); - lstrcpyW(menuPath + lstrlenW(menuPath) - lstrlenW(L".desktop"), L".menu"); + lstrcpyW(menuPath + utf16len(menuPath) - utf16len(L".desktop"), L".menu");
fclose(tempfile); ret = MoveFileExW( tempfilename, menuPath, MOVEFILE_REPLACE_EXISTING ); @@ -1498,7 +1471,7 @@ static BOOL get_link_location( LPCWSTR linkfile, DWORD *loc, WCHAR **relative ) if (!SHGetSpecialFolderPathW( 0, buffer, locations[i], FALSE )) continue;
- len = lstrlenW(buffer); + len = utf16len(buffer); if (len >= MAX_PATH) continue; /* We've just trashed memory! Hopefully we are OK */
@@ -1760,10 +1733,10 @@ static WCHAR *freedesktop_mime_type_for_extension(struct list *native_mime_types { if (PathMatchSpecW( extensionW, mime_type_entry->glob )) { - if (match == NULL || matchLength < lstrlenW(mime_type_entry->glob)) + if (match == NULL || matchLength < utf16len(mime_type_entry->glob)) { match = mime_type_entry->mimeType; - matchLength = lstrlenW(mime_type_entry->glob); + matchLength = utf16len(mime_type_entry->glob); } } } @@ -1879,12 +1852,12 @@ static void update_association(LPCWSTR extension, const WCHAR *mimeType, const W goto done; }
- RegSetValueExW(subkey, L"MimeType", 0, REG_SZ, (const BYTE*) mimeType, (lstrlenW(mimeType) + 1) * sizeof(WCHAR)); - RegSetValueExW(subkey, L"ProgID", 0, REG_SZ, (const BYTE*) progId, (lstrlenW(progId) + 1) * sizeof(WCHAR)); - RegSetValueExW(subkey, L"AppName", 0, REG_SZ, (const BYTE*) appName, (lstrlenW(appName) + 1) * sizeof(WCHAR)); - RegSetValueExW(subkey, L"DesktopFile", 0, REG_SZ, (const BYTE*) desktopFile, (lstrlenW(desktopFile) + 1) * sizeof(WCHAR)); + RegSetValueExW(subkey, L"MimeType", 0, REG_SZ, (const BYTE*) mimeType, utf16size(mimeType)); + RegSetValueExW(subkey, L"ProgID", 0, REG_SZ, (const BYTE*) progId, utf16size(progId)); + RegSetValueExW(subkey, L"AppName", 0, REG_SZ, (const BYTE*) appName, utf16size(appName)); + RegSetValueExW(subkey, L"DesktopFile", 0, REG_SZ, (const BYTE*) desktopFile, utf16size(desktopFile)); if (openWithIcon) - RegSetValueExW(subkey, L"OpenWithIcon", 0, REG_SZ, (const BYTE*) openWithIcon, (lstrlenW(openWithIcon) + 1) * sizeof(WCHAR)); + RegSetValueExW(subkey, L"OpenWithIcon", 0, REG_SZ, (const BYTE*) openWithIcon, utf16size(openWithIcon)); else RegDeleteValueW(subkey, L"OpenWithIcon");
```c include/wine/str.h:32:29: warning: the address of ‘buffer’ will always evaluate as ‘true’ [-Waddress] 32 | #define utf16len(str) (str ? wcslen(str) : 0) | ^ programs/winemenubuilder/winemenubuilder.c:1474:15: note: in expansion of macro ‘utf16len’ 1474 | len = utf16len(buffer); | ^~~~~~~~ ```
I should probably transform the macros that validate the argument into functions.
What's the best way to test `winemenubuilder` and make sure I didn't break anything?
This doesn't seem like an improvement. Adding non-standard functions with different conventions is not reducing complexity, just the opposite.
This merge request was closed by Alexandre Julliard.
The complexity reduction would be in their usage, not the implementation itself.
As I mentioned in the commit's message, there are already several wrappers for functions such as `WideCharToMultiByte()` and `MultiByteToWideChar()` across the codebase.
If you're referring to `utf8len()`, `utf8size()`, `utf16len()` and `utf16size()`: I can see why they're confusing. We could match the standard naming convention by removing `utf8len()` (which is not used anyway) and renaming:
- `utf8size()` -> `strsize()`. - `utf16size()` -> `wstrsize()`. - `utf16len()` -> `wstrlen()`.
What do you think?
It doesn't reduce complexity in the usage to have two sets of functions that do the same thing only with subtle differences. Obviously you are not going to get rid of the Win32 APIs, so now you'd have to keep both sets of functions in your head to be able to understand the code.
Note also that the vast majority of Unicode functions take character counts, so adding helpers that take byte counts would make it even harder to get right.
It doesn't reduce complexity in the usage to have two sets of functions that do the same thing only with subtle differences. Obviously you are not going to get rid of the Win32 APIs, so now you'd have to keep both sets of functions in your head to be able to understand the code.
My idea was to replace all calls to the other functions, but it would probably be a shock to anyone used to the current code.
In retrospective it's probably not worth it since, as you said, there are only subtle differences and not actual improvements.
Note also that the vast majority of Unicode functions take character counts, so adding helpers that take byte counts would make it even harder to get right.
I actually think it's the opposite, especially when allocating memory: it's very easy to forget to multiply by `sizeof(WCHAR)`.
I'm still convinced the UTF conversion functions provided in this PR would be a great addition, but ideally they should live in `unicode.h` and not a new header.
What's the reason for the header being blocked from coexisting with `msvcrt`? Function naming conflict?
What's the reason for the header being blocked from coexisting with msvcrt? Function naming conflict?
In modules that use msvcrt we can use standard functions like wcslen() and wcscpy(). That's better than inventing our own.
I agree.
Are you interested in `utf16to()` and `toutf16()`? If yes, is there already a header that may be suitable for them?
Are you interested in utf16to() and toutf16()? If yes, is there already a header that may be suitable for them?
No, I don't think that's useful.
No problem, thanks for your comments!