From: Rémi Bernon rbernon@codeweavers.com
--- dlls/ntdll/heap.c | 64 +++++++++++++++++++++++++++++++++++++++-- dlls/ntdll/loader.c | 2 ++ dlls/ntdll/ntdll_misc.h | 1 + 3 files changed, 65 insertions(+), 2 deletions(-)
diff --git a/dlls/ntdll/heap.c b/dlls/ntdll/heap.c index 4d6c018dddd..aaa914b28e5 100644 --- a/dlls/ntdll/heap.c +++ b/dlls/ntdll/heap.c @@ -227,6 +227,11 @@ C_ASSERT( BLOCK_SIZE_CATEGORY_COUNT <= 256 ); /* difference between block classes and all possible validation overhead must fit into block tail_size */ C_ASSERT( BLOCK_SIZE_MEDIUM_STEP + 3 * BLOCK_ALIGN <= FIELD_MAX( struct block, tail_size ) );
+/* affinity to tid mapping array, limits the number of thread-local caches, + * and additional threads will fight for the global categories groups + */ +static LONG next_thread_affinity; + /* a category of heap blocks of a certain size */ struct category { @@ -237,6 +242,7 @@ struct category
/* list of groups with free blocks */ SLIST_HEADER groups; + struct group *affinity_group[32]; };
struct heap @@ -1672,7 +1678,11 @@ static NTSTATUS heap_allocate_block( struct heap *heap, ULONG flags, SIZE_T bloc struct DECLSPEC_ALIGN(BLOCK_ALIGN) group { struct block block; - SINGLE_LIST_ENTRY entry; + union + { + SINGLE_LIST_ENTRY entry; + struct list thread_entry; + }; /* one bit for each free block and the highest bit as unlinked flag */ LONG free_bits; }; @@ -1780,9 +1790,13 @@ static NTSTATUS group_release( struct heap *heap, ULONG flags, struct category * static struct group *heap_acquire_category_group( struct heap *heap, ULONG flags, SIZE_T block_size, struct category *category ) { + DWORD affinity = NtCurrentTeb()->HeapVirtualAffinity % ARRAY_SIZE(category->affinity_group); struct group *group; SLIST_ENTRY *entry;
+ if ((group = InterlockedExchangePointer( (void *)&category->affinity_group[affinity], NULL ))) + return group; + if (!(entry = RtlInterlockedPopEntrySList( &category->groups ))) group = group_allocate( heap, flags, block_size, category ); else @@ -1795,8 +1809,15 @@ static struct group *heap_acquire_category_group( struct heap *heap, ULONG flags static NTSTATUS heap_release_category_group( struct heap *heap, ULONG flags, struct category *category, struct group *group ) { + DWORD affinity = NtCurrentTeb()->HeapVirtualAffinity % ARRAY_SIZE(category->affinity_group); NTSTATUS status = STATUS_SUCCESS;
+ /* we cannot use InterlockedExchangePointer here because to the contrary to our current group, + * the current affinity_group might be only partially free. + */ + if (!InterlockedCompareExchangePointer( (void *)&category->affinity_group[affinity], group, NULL )) + return STATUS_SUCCESS; + /* try re-using the block group instead of releasing it */ #ifdef _WIN64 if (category->groups.Header16.Depth <= 16) @@ -1815,6 +1836,14 @@ static struct block *find_free_block_lfh( struct heap *heap, ULONG flags, SIZE_T { struct block *block; struct group *group; + DWORD affinity; + + if (!(affinity = NtCurrentTeb()->HeapVirtualAffinity)) + { + affinity = InterlockedIncrement( &next_thread_affinity ); + NtCurrentTeb()->HeapVirtualAffinity = affinity; + } + affinity %= ARRAY_SIZE(category->affinity_group);
/* acquire a group, the thread will own it and no other thread can clear free bits. * some other thread might still set the free bits if they are freeing blocks. @@ -1831,7 +1860,8 @@ static struct block *find_free_block_lfh( struct heap *heap, ULONG flags, SIZE_T if (group->free_bits & GROUP_FLAG_FREE) return block;
/* otherwise there is still some free blocks, put the group back into the category */ - RtlInterlockedPushEntrySList( &category->groups, (SLIST_ENTRY *)&group->entry ); + if ((group = InterlockedExchangePointer( (void *)&category->affinity_group[affinity], group ))) + RtlInterlockedPushEntrySList( &category->groups, (SLIST_ENTRY *)&group->entry );
return block; } @@ -1899,6 +1929,36 @@ static NTSTATUS heap_free_block_lfh( struct heap *heap, ULONG flags, struct bloc return status; }
+static void heap_thread_detach_category_groups( struct heap *heap ) +{ + DWORD affinity = NtCurrentTeb()->HeapVirtualAffinity % ARRAY_SIZE(heap->categories->affinity_group); + ULONG i; + + if (!heap->categories) return; + + for (i = 0; i < BLOCK_SIZE_CATEGORY_COUNT; ++i) + { + struct category *category = heap->categories + i; + struct group *group; + if (!(group = InterlockedExchangePointer( (void *)&category->affinity_group[affinity], NULL ))) continue; + RtlInterlockedPushEntrySList( &category->groups, (SLIST_ENTRY *)&group->entry ); + } +} + +void heap_thread_detach(void) +{ + struct heap *heap; + + RtlEnterCriticalSection( &process_heap->cs ); + + LIST_FOR_EACH_ENTRY( heap, &process_heap->entry, struct heap, entry ) + heap_thread_detach_category_groups( heap ); + + heap_thread_detach_category_groups( process_heap ); + + RtlLeaveCriticalSection( &process_heap->cs ); +} + /*********************************************************************** * RtlAllocateHeap (NTDLL.@) */ diff --git a/dlls/ntdll/loader.c b/dlls/ntdll/loader.c index 01a30742678..6c643575f8d 100644 --- a/dlls/ntdll/loader.c +++ b/dlls/ntdll/loader.c @@ -3737,6 +3737,8 @@ void WINAPI LdrShutdownThread(void) /* don't call DbgUiGetThreadDebugObject as some apps hook it and terminate if called */ if (NtCurrentTeb()->DbgSsReserved[1]) NtClose( NtCurrentTeb()->DbgSsReserved[1] ); RtlFreeThreadActivationContextStack(); + + heap_thread_detach(); }
diff --git a/dlls/ntdll/ntdll_misc.h b/dlls/ntdll/ntdll_misc.h index d1a7790991b..f6b77b79cde 100644 --- a/dlls/ntdll/ntdll_misc.h +++ b/dlls/ntdll/ntdll_misc.h @@ -127,5 +127,6 @@ static inline void ascii_to_unicode( WCHAR *dst, const char *src, size_t len )
/* FLS data */ extern TEB_FLS_DATA *fls_alloc_data(void) DECLSPEC_HIDDEN; +extern void heap_thread_detach(void) DECLSPEC_HIDDEN;
#endif