This MR enforces alignment of TLS slots as described in PE file.
I'm not 100% happy with the deallocation scheme, as it could be slow. If someone has a better idea, be welcome! Didn't open the option to introduce helpers in heap.c (but maybe that's the way to go).
From: Eric Pouech epouech@codeweavers.com
Signed-off-by: Eric Pouech epouech@codeweavers.com --- dlls/kernel32/tests/loader.c | 4 ++++ 1 file changed, 4 insertions(+)
diff --git a/dlls/kernel32/tests/loader.c b/dlls/kernel32/tests/loader.c index 2c7cc784be4..015a763ea30 100644 --- a/dlls/kernel32/tests/loader.c +++ b/dlls/kernel32/tests/loader.c @@ -2229,6 +2229,7 @@ static void test_import_resolution(void) ADD_RELOC( tls.EndAddressOfRawData ); data.tls.AddressOfIndex = nt.OptionalHeader.ImageBase + DATA_RVA( &data.tls_index ); ADD_RELOC( tls.AddressOfIndex ); + data.tls.Characteristics = IMAGE_SCN_ALIGN_1024BYTES; strcpy( data.tls_data, "hello world" ); data.tls_index = 9999; data.tls_index_hi = 9999; @@ -2292,6 +2293,9 @@ static void test_import_resolution(void) ok( ptr->tls_index < 32, "wrong tls index %d\n", ptr->tls_index ); str = ((char **)NtCurrentTeb()->ThreadLocalStoragePointer)[ptr->tls_index]; ok( !strcmp( str, "hello world" ), "wrong tls data '%s' at %p\n", str, str ); + /* should be aligned on 1024 bytes, 10 lower bits at 0 */ + todo_wine_if( (DWORD_PTR)str & 0x3f ) + ok( !((DWORD_PTR)str & 0x3f), "wrong alignment %p\n", str ); ok(ptr->tls_index_hi == 0, "TLS Index written as a short, high half: %d\n", ptr->tls_index_hi); check_tls_index(mod, ptr->tls_index != 9999); FreeLibrary( mod );
From: Eric Pouech epouech@codeweavers.com
Required for some Chrome integration.
Wine-Bugs: https://bugs.winehq.org/show_bug.cgi?id=57700
Signed-off-by: Eric Pouech epouech@codeweavers.com --- dlls/kernel32/tests/loader.c | 1 - dlls/ntdll/loader.c | 88 +++++++++++++++++++++++++++++------- 2 files changed, 72 insertions(+), 17 deletions(-)
diff --git a/dlls/kernel32/tests/loader.c b/dlls/kernel32/tests/loader.c index 015a763ea30..91c21637f9a 100644 --- a/dlls/kernel32/tests/loader.c +++ b/dlls/kernel32/tests/loader.c @@ -2294,7 +2294,6 @@ static void test_import_resolution(void) str = ((char **)NtCurrentTeb()->ThreadLocalStoragePointer)[ptr->tls_index]; ok( !strcmp( str, "hello world" ), "wrong tls data '%s' at %p\n", str, str ); /* should be aligned on 1024 bytes, 10 lower bits at 0 */ - todo_wine_if( (DWORD_PTR)str & 0x3f ) ok( !((DWORD_PTR)str & 0x3f), "wrong alignment %p\n", str ); ok(ptr->tls_index_hi == 0, "TLS Index written as a short, high half: %d\n", ptr->tls_index_hi); check_tls_index(mod, ptr->tls_index != 9999); diff --git a/dlls/ntdll/loader.c b/dlls/ntdll/loader.c index 0c25fe14133..1768d98e0c3 100644 --- a/dlls/ntdll/loader.c +++ b/dlls/ntdll/loader.c @@ -1270,6 +1270,70 @@ static BOOL is_dll_native_subsystem( LDR_DATA_TABLE_ENTRY *mod, const IMAGE_NT_H return TRUE; }
+ +static void *heap_alloc_aligned( unsigned size, unsigned align ) +{ + void *ptr; + DWORD_PTR mask = ((DWORD_PTR)1 << align) - 1; + + if ((ptr = RtlAllocateHeap( GetProcessHeap(), 0, size )) && ((DWORD_PTR)ptr & mask)) + { + void *new_ptr; + /* if block is not aligned, alloc a greater one which we can realign */ + + if (!(new_ptr = RtlReAllocateHeap( GetProcessHeap(), 0, ptr, mask + size ))) + RtlFreeHeap( GetProcessHeap(), 0, ptr ); + ptr = (void *)(((DWORD_PTR)new_ptr + mask) & ~mask); + } + return ptr; +} + + +/* Allocates the slot from a TLS directory (potentially realigning if necessary) + * - must be freed with heap_free_aligned + */ +static void *alloc_thread_tls_slot( const IMAGE_TLS_DIRECTORY *dir ) +{ + UINT size = dir->EndAddressOfRawData - dir->StartAddressOfRawData; + unsigned align; + void *ptr; + + align = (dir->Characteristics & IMAGE_SCN_ALIGN_MASK) >> 20; + if (!align) align = 5; /* default */ + if ((ptr = heap_alloc_aligned( size + dir->SizeOfZeroFill, align ))) + { + memcpy( ptr, (void *)dir->StartAddressOfRawData, size ); + memset( (char *)ptr + size, 0, dir->SizeOfZeroFill ); + } + + return ptr; +} + + +/* as alloc_thread_tls_slot can realign the allocated block, we need to find back the unaligned allocated block */ +static NTSTATUS heap_free_aligned( void *ptr ) +{ + NTSTATUS status; + + if (RtlLockHeap( GetProcessHeap() )) + { + PROCESS_HEAP_ENTRY entry = {.lpData = NULL}; + while (!(status = RtlWalkHeap( GetProcessHeap(), &entry ))) + { + if ((entry.wFlags & PROCESS_HEAP_ENTRY_BUSY) && + (entry.lpData <= ptr && (DWORD_PTR)ptr < (DWORD_PTR)entry.lpData + entry.cbData)) + { + RtlFreeHeap( GetProcessHeap(), 0, entry.lpData ); + break; + } + } + RtlUnlockHeap( GetProcessHeap() ); + } + else status = STATUS_INVALID_HANDLE; + return status; +} + + /************************************************************************* * alloc_tls_slot * @@ -1348,15 +1412,12 @@ static BOOL alloc_tls_slot( LDR_DATA_TABLE_ENTRY *mod ) /* FIXME: can't free old block here, should be freed at thread exit */ }
- if (!(new_ptr = RtlAllocateHeap( GetProcessHeap(), 0, size + dir->SizeOfZeroFill ))) return -1; - memcpy( new_ptr, (void *)dir->StartAddressOfRawData, size ); - memset( (char *)new_ptr + size, 0, dir->SizeOfZeroFill ); + if (!(new_ptr = alloc_thread_tls_slot( dir ))) return -1;
TRACE( "thread %04lx slot %lu: %lu/%lu bytes at %p\n", HandleToULong(teb->ClientId.UniqueThread), i, size, dir->SizeOfZeroFill, new_ptr );
- RtlFreeHeap( GetProcessHeap(), 0, - InterlockedExchangePointer( (void **)teb->ThreadLocalStoragePointer + i, new_ptr )); + heap_free_aligned( InterlockedExchangePointer( (void **)teb->ThreadLocalStoragePointer + i, new_ptr )); } if (thread) NtClose( thread );
@@ -1569,7 +1630,7 @@ static WINE_MODREF *alloc_module( HMODULE hModule, const UNICODE_STRING *nt_name static NTSTATUS alloc_thread_tls(void) { void **pointers; - UINT i, size; + UINT i;
if (!(pointers = RtlAllocateHeap( GetProcessHeap(), HEAP_ZERO_MEMORY, tls_module_count * sizeof(*pointers) ))) @@ -1579,20 +1640,15 @@ static NTSTATUS alloc_thread_tls(void) { const IMAGE_TLS_DIRECTORY *dir = &tls_dirs[i];
- if (!dir) continue; - size = dir->EndAddressOfRawData - dir->StartAddressOfRawData; - if (!size && !dir->SizeOfZeroFill) continue; + if (!dir || (!(dir->EndAddressOfRawData - dir->StartAddressOfRawData) && !dir->SizeOfZeroFill)) continue;
- if (!(pointers[i] = RtlAllocateHeap( GetProcessHeap(), 0, size + dir->SizeOfZeroFill ))) + if (!(pointers[i] = alloc_thread_tls_slot( dir ))) { - while (i) RtlFreeHeap( GetProcessHeap(), 0, pointers[--i] ); + while (i) heap_free_aligned( pointers[--i] ); RtlFreeHeap( GetProcessHeap(), 0, pointers ); return STATUS_NO_MEMORY; } - memcpy( pointers[i], (void *)dir->StartAddressOfRawData, size ); - memset( (char *)pointers[i] + size, 0, dir->SizeOfZeroFill ); - - TRACE( "slot %u: %u/%lu bytes at %p\n", i, size, dir->SizeOfZeroFill, pointers[i] ); + TRACE( "slot %u: %Iu/%lu bytes at %p\n", i, dir->EndAddressOfRawData - dir->StartAddressOfRawData, dir->SizeOfZeroFill, pointers[i] ); } NtCurrentTeb()->ThreadLocalStoragePointer = pointers; #ifdef __x86_64__ /* macOS-specific hack */ @@ -3889,7 +3945,7 @@ void WINAPI LdrShutdownThread(void) if (NtCurrentTeb()->Instrumentation[0]) ((TEB *)NtCurrentTeb()->Instrumentation[0])->ThreadLocalStoragePointer = NULL; #endif - for (i = 0; i < tls_module_count; i++) RtlFreeHeap( GetProcessHeap(), 0, pointers[i] ); + for (i = 0; i < tls_module_count; i++) heap_free_aligned( pointers[i] ); RtlFreeHeap( GetProcessHeap(), 0, pointers ); } RtlProcessFlsData( NtCurrentTeb()->FlsSlots, 2 );
Hi,
It looks like your patch introduced the new failures shown below. Please investigate and fix them before resubmitting your patch. If they are not new, fixing them anyway would help a lot. Otherwise please ask for the known failures list to be updated.
The tests also ran into some preexisting test failures. If you know how to fix them that would be helpful. See the TestBot job for the details:
The full results can be found at: https://testbot.winehq.org/JobDetails.pl?Key=151211
Your paranoid android.
=== w7u_2qxl (32 bit report) ===
kernel32: loader.c:2297: Test failed: 0: wrong alignment 001845E8
=== w7u_adm (32 bit report) ===
kernel32: loader.c:2297: Test failed: 0: wrong alignment 003A45E8
=== w7u_el (32 bit report) ===
kernel32: loader.c:2297: Test failed: 0: wrong alignment 00314688
=== w7pro64 (64 bit report) ===
kernel32: loader.c:2297: Test failed: 0: wrong alignment 00000000003C05D0
=== debian11b (64 bit WoW report) ===
mf: mf.c:6536: Test failed: Test 3: Unexpected hr 0xc00d36b2.
user32: input.c:4306: Test succeeded inside todo block: button_down_hwnd_todo 1: got MSG_TEST_WIN hwnd 00000000036E00E8, msg WM_LBUTTONDOWN, wparam 0x1, lparam 0x320032
Alfred Agrell (@Alcaro) commented about dlls/kernel32/tests/loader.c:
ok( ptr->tls_index < 32, "wrong tls index %d\n", ptr->tls_index ); str = ((char **)NtCurrentTeb()->ThreadLocalStoragePointer)[ptr->tls_index]; ok( !strcmp( str, "hello world" ), "wrong tls data '%s' at %p\n", str, str );
/* should be aligned on 1024 bytes, 10 lower bits at 0 */
ok( !((DWORD_PTR)str & 0x3f), "wrong alignment %p\n", str );
but 10 bits is 0x3ff?
Alfred Agrell (@Alcaro) commented about dlls/ntdll/loader.c:
+static void *heap_alloc_aligned( unsigned size, unsigned align ) +{
- void *ptr;
- DWORD_PTR mask = ((DWORD_PTR)1 << align) - 1;
- if ((ptr = RtlAllocateHeap( GetProcessHeap(), 0, size )) && ((DWORD_PTR)ptr & mask))
- {
void *new_ptr;
/* if block is not aligned, alloc a greater one which we can realign */
if (!(new_ptr = RtlReAllocateHeap( GetProcessHeap(), 0, ptr, mask + size )))
RtlFreeHeap( GetProcessHeap(), 0, ptr );
ptr = (void *)(((DWORD_PTR)new_ptr + mask) & ~mask);
- }
- return ptr;
I'd vote make this function simply return both aligned and unaligned pointers, and have caller store them somewhere beside each other.
Did you see https://gitlab.winehq.org/wine/wine/-/merge_requests/6549 ? It seems like aligning this way as in this MR will break the test from there (https://gitlab.winehq.org/wine/wine/-/merge_requests/6549/diffs?commit_id=46... , HeapSize in the test won't work), so maybe there is more to it?
Or maybe I am wrong and ThreadLocalStoragePointer is not concerned by this patch, only the pointers in ThreadLocalStoragePointer[] array.
UPDATE: eh, no, my test also checks the array members.
On Tue Feb 4 08:52:48 2025 +0000, Alfred Agrell wrote:
but 10 bits is 0x3ff?
thx
On Tue Feb 4 08:54:16 2025 +0000, Alfred Agrell wrote:
I'd vote make this function simply return both aligned and unaligned pointers, and have caller store them somewhere beside each other.
we can't (as there's only one storage point, not two)... but storing the unaligned pointer just below the returned block shall do (see other discussion)
no, I missed that MR...
interesting indeed
1. I confirm that on Win10 when TLS directory has alignment requirement the returned ThreadLocalStoragePointer[] tls block is realigned from block returned by heap allocation 2. did some more tests and the actual (unaligned pointer) is stored just below the tls slot 3. successfully rerun tests on native from 6549 adapted with
``` diff --git a/dlls/kernel32/tests/module.c b/dlls/kernel32/tests/module.c index 3d6e195e23c..f04ac7f264f 100644 --- a/dlls/kernel32/tests/module.c +++ b/dlls/kernel32/tests/module.c @@ -1710,7 +1710,7 @@ static void test_tls_links(void) NTSTATUS status; ULONG i, count; HANDLE thread; - SIZE_T size; + SIZE_T size, size2; void **ptr;
ok(!!teb->ThreadLocalStoragePointer, "got NULL.\n"); @@ -1743,6 +1743,9 @@ static void test_tls_links(void) if (!ptr[i]) continue; size = HeapSize(GetProcessHeap(), 0, (void **)ptr[i] - 2); ok(size && size < 100000, "got %Iu.\n", size); + size2 = HeapSize(GetProcessHeap(), 0, ((void **)ptr[i])[-1]); + ok(size2 && size2 < 100000, "got %Iu.\n", size2); + ok(size == size2, "mismatch %Ix %Ix\n", size, size2); }
ptr = thread_teb->ThreadLocalStoragePointer; ```
so it looks like tests from 6549 are based on a peculiar alignment option of TLS dir and should be adapted accordingly
since the two MR clearly overlap (yet complement each other), how to you want to proceed? shall we merge them into a single one ?
or order them? (anyway I update 7251 to store properly unaligned pointer)
Hi,
It looks like your patch introduced the new failures shown below. Please investigate and fix them before resubmitting your patch. If they are not new, fixing them anyway would help a lot. Otherwise please ask for the known failures list to be updated.
The full results can be found at: https://testbot.winehq.org/JobDetails.pl?Key=151224
Your paranoid android.
=== build (build log) ===
error: patch failed: dlls/kernel32/tests/module.c:1710 Task: Patch failed to apply
=== debian11 (build log) ===
error: patch failed: dlls/kernel32/tests/module.c:1710 Task: Patch failed to apply
=== debian11b (build log) ===
error: patch failed: dlls/kernel32/tests/module.c:1710 Task: Patch failed to apply