From: Davide Beatrici git@davidebeatrici.dev
I originally wanted to add these utils into "unicode.h", but it cannot be used with msvcrt headers.
This prompted the creation of a new header called "str.h". The name is generic and broad, but the file may grow to contain more general stuff in the future.
The rationale behind writing the UTF helpers is mainly to solve, at least in part, the complexity.
WideCharToMultiByte() and MultiByteToWideChar() in particular are quite dangerous as they treat the buffer size as bytes for char (8 bits) and as number of characters for WCHAR (16 bits).
There are already several wrappers around the codebase, mine should be able to replace them. --- include/wine/str.h | 193 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 193 insertions(+) create mode 100644 include/wine/str.h
diff --git a/include/wine/str.h b/include/wine/str.h new file mode 100644 index 00000000000..82990efe94b --- /dev/null +++ b/include/wine/str.h @@ -0,0 +1,193 @@ +/* + * Wine internal string utilities + * + * Copyright 2022 Davide Beatrici + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#ifndef __WINE_WINE_STR_H +#define __WINE_WINE_STR_H + +#include "heap.h" + +#include <windef.h> +#include <winbase.h> +#include <winnls.h> + +#define utf8size(str) (str ? strlen(str) + 1 : 0) + +#define utf16len(str) (str ? wcslen(str) : 0) +#define utf16size(str) (str ? (wcslen(str) + 1) * sizeof(WCHAR) : 0) + +#define utf16to8(dst, dst_size, src, src_size) (utf16to(CP_UTF8, dst, dst_size, src, src_size)) +#define utf8to16(dst, dst_size, src, src_size) (toutf16(CP_UTF8, dst, dst_size, src, src_size)) + +#define utf16to8_alloc(src, src_size) (utf16to_alloc(CP_UTF8, src, src_size)) +#define utf8to16_alloc(src, src_size) (toutf16_alloc(CP_UTF8, src, src_size)) + +static inline SIZE_T utf8len(const char *str) { + SIZE_T length = 0; + + if (!str) + return 0; + + while (*str != '\0') + { + if ((*str & 0x80) == 0x00) + str += 1; + else if ((*str & 0xe0) == 0xc0) + str += 2; + else if ((*str & 0xf0) == 0xe0) + str += 3; + else if ((*str & 0xf8) == 0xf0) + str += 4; + else + // Corrupt string, return the valid part's length. + return length; + + ++length; + } + + return length; +} + +static inline char *utf8dup(const char *src, SIZE_T size) +{ + char *dst; + + if (!src) + return NULL; + + if (size == 0) + size = utf8size(src); + + dst = heap_alloc(size); + + memcpy(dst, src, size); + + return dst; +} + +static inline WCHAR *utf16dup(const WCHAR *src, SIZE_T size) +{ + WCHAR *dst; + + if (!src) + return NULL; + + if (size == 0) + size = utf16size(src); + + dst = heap_alloc(size); + + memcpy(dst, src, size); + + return dst; +} + +static inline UINT32 utf16to(const UINT32 codepage, + char *dst, const UINT32 dst_size, + const WCHAR *src, UINT32 src_size) +{ + int ret; + + if (!src) + return 0; + + if (src_size > 0) + src_size /= sizeof(WCHAR); + else + src_size = utf16len(src) + 1; + + if (dst && dst_size > 0) + { + ret = WideCharToMultiByte(codepage, 0, src, src_size, dst, dst_size, NULL, NULL); + } + else + { + ret = WideCharToMultiByte(codepage, 0, src, src_size, NULL, 0, NULL, NULL); + } + + return ret > 0 ? ret : 0; +} + +static inline char *utf16to_alloc(const UINT32 codepage, + const WCHAR *src, UINT32 src_size) +{ + char *dst; + + const UINT32 dst_size = utf16to(codepage, NULL, 0, src, src_size); + if (dst_size == 0) + return NULL; + + dst = heap_alloc(dst_size); + + if (utf16to(codepage, dst, dst_size, src, src_size) == 0) + { + heap_free(dst); + return NULL; + } + + return dst; +} + +static inline UINT32 toutf16(const UINT32 codepage, + WCHAR *dst, UINT32 dst_size, + const char *src, UINT32 src_size) +{ + int ret; + + if (!src) + return 0; + + if (src_size == 0) + src_size = strlen(src) + 1; + + if (dst && dst_size > 0) + { + dst_size /= sizeof(WCHAR); + + ret = MultiByteToWideChar(codepage, 0, src, src_size, dst, dst_size); + } + else + { + ret = MultiByteToWideChar(codepage, 0, src, src_size, NULL, 0); + } + + return ret > 0 ? ret * sizeof(WCHAR) : 0; +} + +static inline WCHAR *toutf16_alloc(const UINT32 codepage, + const char *src, UINT32 src_size) +{ + WCHAR *dst; + + const UINT32 dst_size = toutf16(codepage, NULL, 0, src, src_size); + if (dst_size == 0) + return NULL; + + dst = heap_alloc(dst_size); + + if (toutf16(codepage, dst, dst_size, src, src_size) == 0) + { + heap_free(dst); + return NULL; + } + + return dst; +} + +#endif