[PATCH 0/2] MR10422: win32u: Handle UTF-16 surrogate pairs in GDI text rendering
Fixes Bug 59544. -- https://gitlab.winehq.org/wine/wine/-/merge_requests/10422
From: Robert Gerigk <Robert-Gerigk@online.de> --- dlls/gdi32/tests/font.c | 83 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) diff --git a/dlls/gdi32/tests/font.c b/dlls/gdi32/tests/font.c index 81bc5a2f77c..0489ff115b2 100644 --- a/dlls/gdi32/tests/font.c +++ b/dlls/gdi32/tests/font.c @@ -1703,6 +1703,88 @@ static void test_GetGlyphIndices(void) ok(ret, "Failed to delete font file, %ld.\n", GetLastError()); } +static void test_GetGlyphIndices_surrogate_pairs(void) +{ + HDC hdc; + HFONT hfont, hfont_old; + LOGFONTA lf; + DWORD count; + WORD glyphs[4]; + /* U+1F600 GRINNING FACE = D83D DE00 as UTF-16 surrogate pair */ + static const WCHAR surrogate_str[] = { 0xd83d, 0xde00, 0 }; + /* U+1F604 = D83D DE04 */ + static const WCHAR two_surrogates[] = { 0xd83d, 0xde00, 0xd83d, 0xde04 }; + /* Lone high surrogate followed by BMP char (U+23F9, present in emoji fonts) */ + static const WCHAR lone_high[] = { 0xd83d, 0x23f9, 0 }; + /* Lone low surrogate followed by BMP char */ + static const WCHAR lone_low[] = { 0xde00, 0x23f9, 0 }; + + hdc = GetDC(0); + + /* Try Segoe UI Emoji (available on Windows 8.1+) */ + if (!is_font_installed("Segoe UI Emoji")) + { + skip("Segoe UI Emoji not installed, skipping surrogate pair tests\n"); + ReleaseDC(0, hdc); + return; + } + + memset(&lf, 0, sizeof(lf)); + strcpy(lf.lfFaceName, "Segoe UI Emoji"); + lf.lfHeight = 24; + lf.lfCharSet = DEFAULT_CHARSET; + hfont = CreateFontIndirectA(&lf); + ok(hfont != 0, "CreateFontIndirect failed\n"); + hfont_old = SelectObject(hdc, hfont); + + /* Test 1: Surrogate pair should return valid glyph (not .notdef) for both positions. + * Windows maps the combined codepoint and copies the glyph index to both WORD positions. */ + memset(glyphs, 0, sizeof(glyphs)); + count = GetGlyphIndicesW(hdc, surrogate_str, 2, glyphs, GGI_MARK_NONEXISTING_GLYPHS); + ok(count == 2, "expected 2, got %lu\n", count); + ok(glyphs[0] != 0 && glyphs[0] != 0xffff, + "surrogate pair high: expected valid glyph, got %#x\n", glyphs[0]); + ok(glyphs[1] == glyphs[0], + "surrogate pair low: expected same glyph %#x, got %#x\n", glyphs[0], glyphs[1]); + + /* Test 2: Two consecutive surrogate pairs */ + memset(glyphs, 0, sizeof(glyphs)); + count = GetGlyphIndicesW(hdc, two_surrogates, 4, glyphs, GGI_MARK_NONEXISTING_GLYPHS); + ok(count == 4, "expected 4, got %lu\n", count); + ok(glyphs[0] != 0 && glyphs[0] != 0xffff, + "first pair high: expected valid glyph, got %#x\n", glyphs[0]); + ok(glyphs[1] == glyphs[0], + "first pair low: expected same glyph %#x, got %#x\n", glyphs[0], glyphs[1]); + ok(glyphs[2] != 0 && glyphs[2] != 0xffff, + "second pair high: expected valid glyph, got %#x\n", glyphs[2]); + ok(glyphs[3] == glyphs[2], + "second pair low: expected same glyph %#x, got %#x\n", glyphs[2], glyphs[3]); + /* The two emoji should map to different glyphs */ + ok(glyphs[0] != glyphs[2], + "two different emoji should have different glyph indices: %#x vs %#x\n", glyphs[0], glyphs[2]); + + /* Test 3: Lone high surrogate followed by non-surrogate — should NOT combine */ + memset(glyphs, 0, sizeof(glyphs)); + count = GetGlyphIndicesW(hdc, lone_high, 2, glyphs, GGI_MARK_NONEXISTING_GLYPHS); + ok(count == 2, "expected 2, got %lu\n", count); + ok(glyphs[0] == 0xffff || glyphs[0] == 0x001f, + "lone high surrogate: expected .notdef, got %#x\n", glyphs[0]); + ok(glyphs[1] != 0xffff && glyphs[1] != 0, + "BMP char after lone high surrogate: expected valid glyph, got %#x\n", glyphs[1]); + + /* Test 4: Lone low surrogate — should not combine with anything */ + memset(glyphs, 0, sizeof(glyphs)); + count = GetGlyphIndicesW(hdc, lone_low, 2, glyphs, GGI_MARK_NONEXISTING_GLYPHS); + ok(count == 2, "expected 2, got %lu\n", count); + ok(glyphs[0] == 0xffff || glyphs[0] == 0x001f, + "lone low surrogate: expected .notdef, got %#x\n", glyphs[0]); + ok(glyphs[1] != 0xffff && glyphs[1] != 0, + "BMP char after lone low surrogate: expected valid glyph, got %#x\n", glyphs[1]); + + DeleteObject(SelectObject(hdc, hfont_old)); + ReleaseDC(0, hdc); +} + static void test_GetKerningPairs(void) { static const struct kerning_data @@ -8037,6 +8119,7 @@ START_TEST(font) test_GetCharABCWidths(); test_text_extents(); test_GetGlyphIndices(); + test_GetGlyphIndices_surrogate_pairs(); test_GetKerningPairs(); test_GetOutlineTextMetrics(); test_GetOutlineTextMetrics_subst(); -- GitLab https://gitlab.winehq.org/wine/wine/-/merge_requests/10422
From: Robert Gerigk <Robert-Gerigk@online.de> --- dlls/win32u/font.c | 59 +++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 56 insertions(+), 3 deletions(-) diff --git a/dlls/win32u/font.c b/dlls/win32u/font.c index e34cb13d0e5..19ba69f1252 100644 --- a/dlls/win32u/font.c +++ b/dlls/win32u/font.c @@ -4028,6 +4028,10 @@ static DWORD font_GetGlyphIndices( PHYSDEV dev, const WCHAR *str, INT count, WOR { UINT glyph = str[i]; + /* Combine UTF-16 surrogate pairs into a single codepoint */ + if (glyph >= 0xd800 && glyph <= 0xdbff && i + 1 < count && str[i + 1] >= 0xdc00 && str[i + 1] <= 0xdfff) + glyph = 0x10000 + ((glyph - 0xd800) << 10) + (str[i + 1] - 0xdc00); + if (!font_funcs->get_glyph_index( physdev->font, &glyph, TRUE )) { glyph = 0; @@ -4049,6 +4053,13 @@ static DWORD font_GetGlyphIndices( PHYSDEV dev, const WCHAR *str, INT count, WOR gi[i] = default_char; } else gi[i] = get_GSUB_vert_glyph( physdev->font, glyph ); + + /* For surrogate pairs, copy the glyph index to the low surrogate position and advance */ + if (str[i] >= 0xd800 && str[i] <= 0xdbff && i + 1 < count && str[i + 1] >= 0xdc00 && str[i + 1] <= 0xdfff) + { + i++; + gi[i] = gi[i - 1]; + } } pthread_mutex_unlock( &font_lock ); @@ -5529,8 +5540,13 @@ static RECT get_total_extents( HDC hdc, INT x, INT y, UINT flags, UINT aa_flags, for (i = 0; i < count; i++) { GLYPHMETRICS metrics; + UINT ch = str[i]; + + /* Combine UTF-16 surrogate pairs into a single codepoint */ + if (ch >= 0xd800 && ch <= 0xdbff && i + 1 < count && str[i + 1] >= 0xdc00 && str[i + 1] <= 0xdfff) + ch = 0x10000 + ((ch - 0xd800) << 10) + (str[i + 1] - 0xdc00); - if (get_glyph_bitmap( hdc, str[i], flags, aa_flags, &metrics, NULL )) continue; + if (get_glyph_bitmap( hdc, ch, flags, aa_flags, &metrics, NULL )) goto next; rect.left = x + metrics.gmptGlyphOrigin.x; rect.top = y - metrics.gmptGlyphOrigin.y; @@ -5538,6 +5554,7 @@ static RECT get_total_extents( HDC hdc, INT x, INT y, UINT flags, UINT aa_flags, rect.bottom = rect.top + metrics.gmBlackBoxY; add_bounds_rect( &bounds, &rect ); + next: if (dx) { if (flags & ETO_PDY) @@ -5552,6 +5569,21 @@ static RECT get_total_extents( HDC hdc, INT x, INT y, UINT flags, UINT aa_flags, x += metrics.gmCellIncX; y += metrics.gmCellIncY; } + + /* Skip the low surrogate — its dx contribution was already consumed */ + if (str[i] >= 0xd800 && str[i] <= 0xdbff && i + 1 < count && str[i + 1] >= 0xdc00 && str[i + 1] <= 0xdfff) + { + if (dx) + { + if (flags & ETO_PDY) + { + x += dx[ (i + 1) * 2 ]; + y += dx[ (i + 1) * 2 + 1]; + } + else x += dx[ i + 1 ]; + } + i++; + } } return bounds; } @@ -5721,13 +5753,19 @@ BOOL nulldrv_ExtTextOut( PHYSDEV dev, INT x, INT y, UINT flags, const RECT *rect { GLYPHMETRICS metrics; struct gdi_image_bits image; + UINT ch = str[i]; + + /* Combine UTF-16 surrogate pairs into a single codepoint */ + if (ch >= 0xd800 && ch <= 0xdbff && i + 1 < count && str[i + 1] >= 0xdc00 && str[i + 1] <= 0xdfff) + ch = 0x10000 + ((ch - 0xd800) << 10) + (str[i + 1] - 0xdc00); - err = get_glyph_bitmap( dev->hdc, str[i], flags, GGO_BITMAP, &metrics, &image ); - if (err) continue; + err = get_glyph_bitmap( dev->hdc, ch, flags, GGO_BITMAP, &metrics, &image ); + if (err) goto next_glyph; if (image.ptr) draw_glyph( dc, x, y, &metrics, &image, (flags & ETO_CLIPPED) ? rect : NULL ); if (image.free) image.free( &image ); + next_glyph: if (dx) { if (flags & ETO_PDY) @@ -5742,6 +5780,21 @@ BOOL nulldrv_ExtTextOut( PHYSDEV dev, INT x, INT y, UINT flags, const RECT *rect x += metrics.gmCellIncX; y += metrics.gmCellIncY; } + + /* Skip the low surrogate */ + if (str[i] >= 0xd800 && str[i] <= 0xdbff && i + 1 < count && str[i + 1] >= 0xdc00 && str[i + 1] <= 0xdfff) + { + if (dx) + { + if (flags & ETO_PDY) + { + x += dx[ (i + 1) * 2 ]; + y += dx[ (i + 1) * 2 + 1]; + } + else x += dx[ i + 1 ]; + } + i++; + } } NtGdiSelectPen( dev->hdc, orig ); -- GitLab https://gitlab.winehq.org/wine/wine/-/merge_requests/10422
I don't think this was ever supported at this level, on Windows. Also it's not clear to me why WPF would be using any of that. -- https://gitlab.winehq.org/wine/wine/-/merge_requests/10422#note_133245
On Sun Mar 22 22:14:53 2026 +0000, Nikolay Sivov wrote:
I don't think this was ever supported at this level, on Windows. Also it's not clear to me why WPF would be using any of that. You are right. I checked the Windows pipeline output and it confirms your point. GetGlyphIndicesW returns 0xffff for surrogate pairs even with Segoe UI Emoji:
font.c:1745: Test failed: surrogate pair high: expected valid glyph, got 0xffff font.c:1754: Test failed: first pair high: expected valid glyph, got 0xffff\ font.c:1758: Test failed: second pair high: expected valid glyph, got 0xffff font.c:1763: Test failed: two different emoji should have different glyph indices: 0xffff vs 0xffff So Windows does not handle surrogate pairs at the GDI level in GetGlyphIndicesW. The function treats each WCHAR independently, just like Wine currently does. The reason I initially looked at GDI was that WPF text rendering under Wine falls back to GDI paths (nulldrv_ExtTextOut, get_total_extents), wich is where the "two squares per emoji" artifact appears. On Windows, WPF uses DirectWrite for text rendering, so the surrogates are handled at a higher level and never reach GetGlyphIndicesW. I will close this MR and investigate where the fix belongs in the DirectWrite layer (dlls/dwrite/) instead. If there is a better place to look at, I would appreciate any pointers. Sorry for the noise. -- https://gitlab.winehq.org/wine/wine/-/merge_requests/10422#note_133249
This merge request was closed by Jan Robert Gerigk. -- https://gitlab.winehq.org/wine/wine/-/merge_requests/10422
participants (3)
-
Jan Robert Gerigk (@RgSg86) -
Nikolay Sivov (@nsivov) -
Robert Gerigk