This MR enforces alignment of TLS slots as described in PE file.
I'm not 100% happy with the deallocation scheme, as it could be slow. If someone has a better idea, be welcome! Didn't open the option to introduce helpers in heap.c (but maybe that's the way to go).
-- v2: ntdll: Enforce the alignment of TLS directory entries. kernel32/tests: Add a test about TLS slot alignment.
From: Eric Pouech epouech@codeweavers.com
Signed-off-by: Eric Pouech epouech@codeweavers.com --- dlls/kernel32/tests/loader.c | 39 ++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+)
diff --git a/dlls/kernel32/tests/loader.c b/dlls/kernel32/tests/loader.c index 2c7cc784be4..b839a05325d 100644 --- a/dlls/kernel32/tests/loader.c +++ b/dlls/kernel32/tests/loader.c @@ -2106,6 +2106,34 @@ static void check_tls_index(HANDLE dll, BOOL tls_initialized) ok(found_dll, "Couldn't find dll %p in module list\n", dll); }
+static BOOL is_old_loader_struct(void) +{ + LDR_DATA_TABLE_ENTRY *mod, *mod2; + LDR_DDAG_NODE *ddag_node; + NTSTATUS status; + HMODULE hexe; + + /* Check for old LDR data strcuture. */ + hexe = GetModuleHandleW( NULL ); + ok( !!hexe, "Got NULL exe handle.\n" ); + status = LdrFindEntryForAddress( hexe, &mod ); + ok( !status, "got %#lx.\n", status ); + if (!(ddag_node = mod->DdagNode)) + { + win_skip( "DdagNode is NULL, skipping tests.\n" ); + return TRUE; + } + ok( !!ddag_node->Modules.Flink, "Got NULL module link.\n" ); + mod2 = CONTAINING_RECORD(ddag_node->Modules.Flink, LDR_DATA_TABLE_ENTRY, NodeModuleLink); + ok( mod2 == mod || broken( (void **)mod2 == (void **)mod - 1 ), "got %p, expected %p.\n", mod2, mod ); + if (mod2 != mod) + { + win_skip( "Old LDR_DATA_TABLE_ENTRY structure, skipping tests.\n" ); + return TRUE; + } + return FALSE; +} + static int tls_init_fn_output;
static DWORD WINAPI tls_thread_fn(void* tlsidx_v) @@ -2229,6 +2257,7 @@ static void test_import_resolution(void) ADD_RELOC( tls.EndAddressOfRawData ); data.tls.AddressOfIndex = nt.OptionalHeader.ImageBase + DATA_RVA( &data.tls_index ); ADD_RELOC( tls.AddressOfIndex ); + data.tls.Characteristics = IMAGE_SCN_ALIGN_1024BYTES; strcpy( data.tls_data, "hello world" ); data.tls_index = 9999; data.tls_index_hi = 9999; @@ -2292,6 +2321,16 @@ static void test_import_resolution(void) ok( ptr->tls_index < 32, "wrong tls index %d\n", ptr->tls_index ); str = ((char **)NtCurrentTeb()->ThreadLocalStoragePointer)[ptr->tls_index]; ok( !strcmp( str, "hello world" ), "wrong tls data '%s' at %p\n", str, str ); + if (!is_old_loader_struct()) + { + SIZE_T s; + /* should be aligned on 1024 bytes, 10 lower bits at 0 */ + todo_wine_if( (DWORD_PTR)str & 0x3ff ) + ok( !((DWORD_PTR)str & 0x3ff), "wrong alignment %p\n", str ); + s = HeapSize( GetProcessHeap(), 0, ((void **)str)[-1] ); + todo_wine + ok( s != (SIZE_T)-1, "Error %Ix\n", s); + } ok(ptr->tls_index_hi == 0, "TLS Index written as a short, high half: %d\n", ptr->tls_index_hi); check_tls_index(mod, ptr->tls_index != 9999); FreeLibrary( mod );
From: Eric Pouech epouech@codeweavers.com
Required for some Chrome integration.
Wine-Bugs: https://bugs.winehq.org/show_bug.cgi?id=57700
Signed-off-by: Eric Pouech epouech@codeweavers.com --- dlls/kernel32/tests/loader.c | 2 -- dlls/ntdll/loader.c | 69 +++++++++++++++++++++++++++--------- 2 files changed, 53 insertions(+), 18 deletions(-)
diff --git a/dlls/kernel32/tests/loader.c b/dlls/kernel32/tests/loader.c index b839a05325d..8ecc5614148 100644 --- a/dlls/kernel32/tests/loader.c +++ b/dlls/kernel32/tests/loader.c @@ -2325,10 +2325,8 @@ static void test_import_resolution(void) { SIZE_T s; /* should be aligned on 1024 bytes, 10 lower bits at 0 */ - todo_wine_if( (DWORD_PTR)str & 0x3ff ) ok( !((DWORD_PTR)str & 0x3ff), "wrong alignment %p\n", str ); s = HeapSize( GetProcessHeap(), 0, ((void **)str)[-1] ); - todo_wine ok( s != (SIZE_T)-1, "Error %Ix\n", s); } ok(ptr->tls_index_hi == 0, "TLS Index written as a short, high half: %d\n", ptr->tls_index_hi); diff --git a/dlls/ntdll/loader.c b/dlls/ntdll/loader.c index 0c25fe14133..bfbe2e63143 100644 --- a/dlls/ntdll/loader.c +++ b/dlls/ntdll/loader.c @@ -1270,6 +1270,51 @@ static BOOL is_dll_native_subsystem( LDR_DATA_TABLE_ENTRY *mod, const IMAGE_NT_H return TRUE; }
+static void *heap_alloc_aligned( unsigned size, unsigned align ) +{ + char *ptr; + DWORD_PTR mask = ((DWORD_PTR)1 << align) - 1; + + size += sizeof(void *) + mask; + if ((ptr = RtlAllocateHeap( GetProcessHeap(), 0, size ))) + { + void *unaligned_ptr = ptr; + + /* align block and store unaligned pointer just below the returned block */ + ptr = (char *)(((DWORD_PTR)ptr + sizeof(void *) + mask) & ~mask); + ((void **)ptr)[-1] = unaligned_ptr; + } + return ptr; +} + + +static BOOLEAN heap_free_aligned( void *ptr ) +{ + return ptr ? RtlFreeHeap( GetProcessHeap(), 0, ((void **)ptr)[-1] ) : TRUE; +} + + +/* Allocates the slot from a TLS directory (potentially realigning if necessary) + * - must be freed with heap_free_aligned + */ +static void *alloc_thread_tls_slot( const IMAGE_TLS_DIRECTORY *dir ) +{ + UINT size = dir->EndAddressOfRawData - dir->StartAddressOfRawData; + unsigned align; + void *ptr; + + align = (dir->Characteristics & IMAGE_SCN_ALIGN_MASK) >> 20; + if (!align) align = 5; /* default */ + if ((ptr = heap_alloc_aligned( size + dir->SizeOfZeroFill, align ))) + { + memcpy( ptr, (void *)dir->StartAddressOfRawData, size ); + memset( (char *)ptr + size, 0, dir->SizeOfZeroFill ); + } + + return ptr; +} + + /************************************************************************* * alloc_tls_slot * @@ -1348,15 +1393,12 @@ static BOOL alloc_tls_slot( LDR_DATA_TABLE_ENTRY *mod ) /* FIXME: can't free old block here, should be freed at thread exit */ }
- if (!(new_ptr = RtlAllocateHeap( GetProcessHeap(), 0, size + dir->SizeOfZeroFill ))) return -1; - memcpy( new_ptr, (void *)dir->StartAddressOfRawData, size ); - memset( (char *)new_ptr + size, 0, dir->SizeOfZeroFill ); + if (!(new_ptr = alloc_thread_tls_slot( dir ))) return -1;
TRACE( "thread %04lx slot %lu: %lu/%lu bytes at %p\n", HandleToULong(teb->ClientId.UniqueThread), i, size, dir->SizeOfZeroFill, new_ptr );
- RtlFreeHeap( GetProcessHeap(), 0, - InterlockedExchangePointer( (void **)teb->ThreadLocalStoragePointer + i, new_ptr )); + heap_free_aligned( InterlockedExchangePointer( (void **)teb->ThreadLocalStoragePointer + i, new_ptr )); } if (thread) NtClose( thread );
@@ -1569,7 +1611,7 @@ static WINE_MODREF *alloc_module( HMODULE hModule, const UNICODE_STRING *nt_name static NTSTATUS alloc_thread_tls(void) { void **pointers; - UINT i, size; + UINT i;
if (!(pointers = RtlAllocateHeap( GetProcessHeap(), HEAP_ZERO_MEMORY, tls_module_count * sizeof(*pointers) ))) @@ -1579,20 +1621,15 @@ static NTSTATUS alloc_thread_tls(void) { const IMAGE_TLS_DIRECTORY *dir = &tls_dirs[i];
- if (!dir) continue; - size = dir->EndAddressOfRawData - dir->StartAddressOfRawData; - if (!size && !dir->SizeOfZeroFill) continue; + if (!dir || (dir->EndAddressOfRawData == dir->StartAddressOfRawData && !dir->SizeOfZeroFill)) continue;
- if (!(pointers[i] = RtlAllocateHeap( GetProcessHeap(), 0, size + dir->SizeOfZeroFill ))) + if (!(pointers[i] = alloc_thread_tls_slot( dir ))) { - while (i) RtlFreeHeap( GetProcessHeap(), 0, pointers[--i] ); + while (i) heap_free_aligned( pointers[--i] ); RtlFreeHeap( GetProcessHeap(), 0, pointers ); return STATUS_NO_MEMORY; } - memcpy( pointers[i], (void *)dir->StartAddressOfRawData, size ); - memset( (char *)pointers[i] + size, 0, dir->SizeOfZeroFill ); - - TRACE( "slot %u: %u/%lu bytes at %p\n", i, size, dir->SizeOfZeroFill, pointers[i] ); + TRACE( "slot %u: %u/%lu bytes at %p\n", i, (UINT)(dir->EndAddressOfRawData - dir->StartAddressOfRawData), dir->SizeOfZeroFill, pointers[i] ); } NtCurrentTeb()->ThreadLocalStoragePointer = pointers; #ifdef __x86_64__ /* macOS-specific hack */ @@ -3889,7 +3926,7 @@ void WINAPI LdrShutdownThread(void) if (NtCurrentTeb()->Instrumentation[0]) ((TEB *)NtCurrentTeb()->Instrumentation[0])->ThreadLocalStoragePointer = NULL; #endif - for (i = 0; i < tls_module_count; i++) RtlFreeHeap( GetProcessHeap(), 0, pointers[i] ); + for (i = 0; i < tls_module_count; i++) heap_free_aligned( pointers[i] ); RtlFreeHeap( GetProcessHeap(), 0, pointers ); } RtlProcessFlsData( NtCurrentTeb()->FlsSlots, 2 );
Hi,
It looks like your patch introduced the new failures shown below. Please investigate and fix them before resubmitting your patch. If they are not new, fixing them anyway would help a lot. Otherwise please ask for the known failures list to be updated.
The tests also ran into some preexisting test failures. If you know how to fix them that would be helpful. See the TestBot job for the details:
The full results can be found at: https://testbot.winehq.org/JobDetails.pl?Key=151228
Your paranoid android.
=== debian11b (64 bit WoW report) ===
user32: input.c:4306: Test succeeded inside todo block: button_down_hwnd_todo 1: got MSG_TEST_WIN hwnd 0000000000D900F0, msg WM_LBUTTONDOWN, wparam 0x1, lparam 0x320032
pushed V2:
* fix clang compilation * fix broken test * now storing unaligned pointer in allocated block so that freeing is just an indirection away
since the two MR clearly overlap (yet complement each other), how to you want to proceed? shall we merge them into a single one ?
I think that MR has a bigger part which is unrelated here, probably merging won't increase the chances for this to get in. You are already introducing a special memory allocation handling somewhat similar to the last patch in that MR. I'd suggest that maybe you can incorporate tests from there: HeapSize at correct offset (so that matches Windows and your implementation) and element count (if applicable, maybe there is no such for tls array entries you are changing)?
On Tue Feb 4 17:34:19 2025 +0000, Paul Gofman wrote:
since the two MR clearly overlap (yet complement each other), how to
you want to proceed? shall we merge them into a single one ? I think that MR has a bigger part which is unrelated here, probably merging won't increase the chances for this to get in. You are already introducing a special memory allocation handling somewhat similar to the last patch in that MR. I'd suggest that maybe you can incorporate tests from there: HeapSize at correct offset (so that matches Windows and your implementation) and element count (if applicable, maybe there is no such for tls array entries you are changing)?
ok will do... (need to test for the count in the vector... I'd be tempted to say yes as it really looks like a vector layout with array of elements at ptr, and control structure just below ptr... maybe ptr[-1] is size of element - wild guess here- or unaligned ptr if needed)