Not completely sure if it's worth having for 8.0, but opening this to share the target I'm trying to reach.
-- v12: ntdll: Add a heap thread affinity and per-affinity bin group cache. ntdll: Use atomics and lock-free list for bin groups. ntdll: Implement Low Fragmentation Heap frontend. ntdll: Split heap_resize_block into heap_resize_(block|large) helpers. ntdll: Count allocations and automatically enable LFH. ntdll: Increase heap block tail_size capacity to 16 bits. ntdll: Implement HeapCompatibilityInformation. ntdll: Fix HeapWalk with empty uncommitted consecutive subheaps.
From: Rémi Bernon rbernon@codeweavers.com
--- dlls/ntdll/heap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/dlls/ntdll/heap.c b/dlls/ntdll/heap.c index 9403c83e92c..a2410957a44 100644 --- a/dlls/ntdll/heap.c +++ b/dlls/ntdll/heap.c @@ -653,7 +653,7 @@ static SUBHEAP *find_subheap( const struct heap *heap, const struct block *block if (!check_subheap( subheap, heap )) return NULL; if (contains( first_block( subheap ), blocks_size, block, sizeof(*block) )) return subheap; /* outside of blocks region, possible corruption or heap_walk */ - if (contains( subheap_base( subheap ), subheap_size( subheap ), block, 0 )) return heap_walk ? subheap : NULL; + if (contains( subheap_base( subheap ), subheap_size( subheap ), block, 1 )) return heap_walk ? subheap : NULL; }
return NULL;
From: Rémi Bernon rbernon@codeweavers.com
--- dlls/kernel32/tests/heap.c | 34 +++++++++++++++------- dlls/ntdll/heap.c | 58 ++++++++++++++++++++++++++++++++------ 2 files changed, 73 insertions(+), 19 deletions(-)
diff --git a/dlls/kernel32/tests/heap.c b/dlls/kernel32/tests/heap.c index a81cb1b7a15..307b11771b3 100644 --- a/dlls/kernel32/tests/heap.c +++ b/dlls/kernel32/tests/heap.c @@ -827,11 +827,8 @@ static void test_HeapCreate(void) size = 0; SetLastError( 0xdeadbeef ); ret = pHeapQueryInformation( 0, HeapCompatibilityInformation, &compat_info, sizeof(compat_info), &size ); - todo_wine ok( !ret, "HeapQueryInformation succeeded\n" ); - todo_wine ok( GetLastError() == ERROR_NOACCESS, "got error %lu\n", GetLastError() ); - todo_wine ok( size == 0, "got size %Iu\n", size );
size = 0; @@ -871,7 +868,6 @@ static void test_HeapCreate(void) ok( ret, "HeapSetInformation failed, error %lu\n", GetLastError() ); ret = pHeapQueryInformation( heap, HeapCompatibilityInformation, &compat_info, sizeof(compat_info), &size ); ok( ret, "HeapQueryInformation failed, error %lu\n", GetLastError() ); - todo_wine ok( compat_info == 2, "got HeapCompatibilityInformation %lu\n", compat_info );
/* cannot be undone */ @@ -879,25 +875,44 @@ static void test_HeapCreate(void) compat_info = 0; SetLastError( 0xdeadbeef ); ret = pHeapSetInformation( heap, HeapCompatibilityInformation, &compat_info, sizeof(compat_info) ); - todo_wine ok( !ret, "HeapSetInformation succeeded\n" ); - todo_wine ok( GetLastError() == ERROR_GEN_FAILURE, "got error %lu\n", GetLastError() ); compat_info = 1; SetLastError( 0xdeadbeef ); ret = pHeapSetInformation( heap, HeapCompatibilityInformation, &compat_info, sizeof(compat_info) ); - todo_wine ok( !ret, "HeapSetInformation succeeded\n" ); - todo_wine ok( GetLastError() == ERROR_GEN_FAILURE, "got error %lu\n", GetLastError() ); ret = pHeapQueryInformation( heap, HeapCompatibilityInformation, &compat_info, sizeof(compat_info), &size ); ok( ret, "HeapQueryInformation failed, error %lu\n", GetLastError() ); - todo_wine ok( compat_info == 2, "got HeapCompatibilityInformation %lu\n", compat_info );
ret = HeapDestroy( heap ); ok( ret, "HeapDestroy failed, error %lu\n", GetLastError() );
+ + /* cannot set LFH with HEAP_NO_SERIALIZE */ + + heap = HeapCreate( HEAP_NO_SERIALIZE, 0, 0 ); + ok( !!heap, "HeapCreate failed, error %lu\n", GetLastError() ); + ok( !((ULONG_PTR)heap & 0xffff), "wrong heap alignment\n" ); + + ret = pHeapQueryInformation( heap, HeapCompatibilityInformation, &compat_info, sizeof(compat_info), &size ); + ok( ret, "HeapQueryInformation failed, error %lu\n", GetLastError() ); + ok( compat_info == 0, "got HeapCompatibilityInformation %lu\n", compat_info ); + + compat_info = 2; + SetLastError( 0xdeadbeef ); + ret = pHeapSetInformation( heap, HeapCompatibilityInformation, &compat_info, sizeof(compat_info) ); + ok( !ret, "HeapSetInformation succeeded\n" ); + ok( GetLastError() == ERROR_INVALID_PARAMETER, "got error %lu\n", GetLastError() ); + ret = pHeapQueryInformation( heap, HeapCompatibilityInformation, &compat_info, sizeof(compat_info), &size ); + ok( ret, "HeapQueryInformation failed, error %lu\n", GetLastError() ); + ok( compat_info == 0, "got HeapCompatibilityInformation %lu\n", compat_info ); + + ret = HeapDestroy( heap ); + ok( ret, "HeapDestroy failed, error %lu\n", GetLastError() ); + + /* some allocation pattern automatically enables LFH */
heap = HeapCreate( 0, 0, 0 ); @@ -931,7 +946,6 @@ static void test_HeapCreate(void) ok( ret, "HeapSetInformation failed, error %lu\n", GetLastError() ); ret = pHeapQueryInformation( heap, HeapCompatibilityInformation, &compat_info, sizeof(compat_info), &size ); ok( ret, "HeapQueryInformation failed, error %lu\n", GetLastError() ); - todo_wine ok( compat_info == 2, "got HeapCompatibilityInformation %lu\n", compat_info );
for (i = 0; i < 0x11; i++) ptrs[i] = pHeapAlloc( heap, 0, 24 + 2 * sizeof(void *) ); diff --git a/dlls/ntdll/heap.c b/dlls/ntdll/heap.c index a2410957a44..32808754ddb 100644 --- a/dlls/ntdll/heap.c +++ b/dlls/ntdll/heap.c @@ -39,6 +39,13 @@
WINE_DEFAULT_DEBUG_CHANNEL(heap);
+/* HeapCompatibilityInformation values */ + +#define HEAP_STD 0 +#define HEAP_LAL 1 +#define HEAP_LFH 2 + + /* undocumented RtlWalkHeap structure */
struct rtl_heap_entry @@ -194,6 +201,7 @@ struct heap DWORD force_flags; /* 0044/0074 */ /* end of the Windows 10 compatible struct layout */
+ LONG compat_info; /* HeapCompatibilityInformation / heap frontend type */ struct list entry; /* Entry in process heap list */ struct list subheap_list; /* Sub-heap list */ struct list large_list; /* Large blocks list */ @@ -1378,6 +1386,7 @@ HANDLE WINAPI RtlCreateHeap( ULONG flags, void *addr, SIZE_T total_size, SIZE_T heap->ffeeffee = 0xffeeffee; heap->auto_flags = (flags & HEAP_GROWABLE); heap->flags = (flags & ~HEAP_SHARED); + heap->compat_info = HEAP_STD; heap->magic = HEAP_MAGIC; heap->grow_size = max( HEAP_DEF_SIZE, total_size ); heap->min_size = commit_size; @@ -2039,21 +2048,24 @@ ULONG WINAPI RtlGetProcessHeaps( ULONG count, HANDLE *heaps ) * RtlQueryHeapInformation (NTDLL.@) */ NTSTATUS WINAPI RtlQueryHeapInformation( HANDLE handle, HEAP_INFORMATION_CLASS info_class, - void *info, SIZE_T size_in, PSIZE_T size_out ) + void *info, SIZE_T size_in, SIZE_T *size_out ) { + struct heap *heap; + ULONG flags; + + TRACE( "handle %p, info_class %u, info %p, size_in %Iu, size_out %p.\n", handle, info_class, info, size_in, size_out ); + switch (info_class) { case HeapCompatibilityInformation: + if (!(heap = unsafe_heap_from_handle( handle, 0, &flags ))) return STATUS_ACCESS_VIOLATION; if (size_out) *size_out = sizeof(ULONG); - - if (size_in < sizeof(ULONG)) - return STATUS_BUFFER_TOO_SMALL; - - *(ULONG *)info = 0; /* standard heap */ + if (size_in < sizeof(ULONG)) return STATUS_BUFFER_TOO_SMALL; + *(ULONG *)info = ReadNoFence( &heap->compat_info ); return STATUS_SUCCESS;
default: - FIXME("Unknown heap information class %u\n", info_class); + FIXME( "HEAP_INFORMATION_CLASS %u not implemented!\n", info_class ); return STATUS_INVALID_INFO_CLASS; } } @@ -2063,8 +2075,36 @@ NTSTATUS WINAPI RtlQueryHeapInformation( HANDLE handle, HEAP_INFORMATION_CLASS i */ NTSTATUS WINAPI RtlSetHeapInformation( HANDLE handle, HEAP_INFORMATION_CLASS info_class, void *info, SIZE_T size ) { - FIXME( "handle %p, info_class %d, info %p, size %Id stub!\n", handle, info_class, info, size ); - return STATUS_SUCCESS; + struct heap *heap; + ULONG flags; + + TRACE( "handle %p, info_class %u, info %p, size %Iu.\n", handle, info_class, info, size ); + + switch (info_class) + { + case HeapCompatibilityInformation: + { + ULONG compat_info; + + if (size < sizeof(ULONG)) return STATUS_BUFFER_TOO_SMALL; + if (!(heap = unsafe_heap_from_handle( handle, 0, &flags ))) return STATUS_INVALID_HANDLE; + if (heap->flags & HEAP_NO_SERIALIZE) return STATUS_INVALID_PARAMETER; + + compat_info = *(ULONG *)info; + if (compat_info != HEAP_STD && compat_info != HEAP_LFH) + { + FIXME( "HeapCompatibilityInformation %lu not implemented!\n", compat_info ); + return STATUS_UNSUCCESSFUL; + } + if (InterlockedCompareExchange( &heap->compat_info, compat_info, HEAP_STD ) != HEAP_STD) + return STATUS_UNSUCCESSFUL; + return STATUS_SUCCESS; + } + + default: + FIXME( "HEAP_INFORMATION_CLASS %u not implemented!\n", info_class ); + return STATUS_SUCCESS; + } }
/***********************************************************************
From: Rémi Bernon rbernon@codeweavers.com
We need this for larger LFH block bin steps. Use smaller block types to get more space, they are somewhat redundant with the block flags. --- dlls/ntdll/heap.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-)
diff --git a/dlls/ntdll/heap.c b/dlls/ntdll/heap.c index 32808754ddb..f17064972a1 100644 --- a/dlls/ntdll/heap.c +++ b/dlls/ntdll/heap.c @@ -86,11 +86,14 @@ struct rtl_heap_entry
struct block { - WORD block_size; /* block size in multiple of BLOCK_ALIGN */ + /* block size in multiple of BLOCK_ALIGN */ + WORD block_size; + /* unused size (used block) / high size bits (free block) */ + WORD tail_size; + /* offset to region base */ + WORD base_offset; + BYTE block_type; BYTE block_flags; - BYTE tail_size; /* unused size (used block) / high size bits (free block) */ - WORD base_offset; /* offset to the region base in multiple of REGION_ALIGN */ - WORD block_type; };
C_ASSERT( sizeof(struct block) == 8 ); @@ -131,10 +134,10 @@ typedef struct C_ASSERT( sizeof(ARENA_LARGE) == offsetof(ARENA_LARGE, block) + sizeof(struct block) ); C_ASSERT( sizeof(ARENA_LARGE) == 4 * BLOCK_ALIGN );
-#define BLOCK_TYPE_USED 0x5355 -#define BLOCK_TYPE_DEAD 0xdead -#define BLOCK_TYPE_FREE 0x5246 -#define BLOCK_TYPE_LARGE 0x614c +#define BLOCK_TYPE_USED 'u' +#define BLOCK_TYPE_DEAD 'D' +#define BLOCK_TYPE_FREE 'F' +#define BLOCK_TYPE_LARGE 'L'
#define BLOCK_FILL_USED 0x55 #define BLOCK_FILL_TAIL 0xab @@ -157,8 +160,9 @@ C_ASSERT( sizeof(struct entry) <= HEAP_MIN_BLOCK_SIZE ); #define HEAP_MAX_BLOCK_REGION_SIZE (FIELD_MAX( struct block, base_offset ) * REGION_ALIGN)
C_ASSERT( HEAP_MAX_USED_BLOCK_SIZE == 512 * 1024 * (sizeof(void *) / 4) - BLOCK_ALIGN ); -C_ASSERT( HEAP_MAX_FREE_BLOCK_SIZE == 128 * 1024 * 1024 * (sizeof(void *) / 4) - BLOCK_ALIGN ); -C_ASSERT( HEAP_MAX_BLOCK_REGION_SIZE >= HEAP_MAX_FREE_BLOCK_SIZE ); +C_ASSERT( HEAP_MAX_FREE_BLOCK_SIZE >= 128 * 1024 * 1024 * (sizeof(void *) / 4) - BLOCK_ALIGN ); +C_ASSERT( HEAP_MAX_BLOCK_REGION_SIZE >= 128 * 1024 * 1024 * (sizeof(void *) / 4) - BLOCK_ALIGN ); +C_ASSERT( HEAP_MAX_FREE_BLOCK_SIZE >= HEAP_MAX_BLOCK_REGION_SIZE );
/* minimum size to start allocating large blocks */ #define HEAP_MIN_LARGE_BLOCK_SIZE (HEAP_MAX_USED_BLOCK_SIZE - 0x1000)
From: Rémi Bernon rbernon@codeweavers.com
--- dlls/kernel32/tests/heap.c | 2 - dlls/ntdll/heap.c | 138 ++++++++++++++++++++++++++++++++++++- 2 files changed, 136 insertions(+), 4 deletions(-)
diff --git a/dlls/kernel32/tests/heap.c b/dlls/kernel32/tests/heap.c index 307b11771b3..cf4b4fa7058 100644 --- a/dlls/kernel32/tests/heap.c +++ b/dlls/kernel32/tests/heap.c @@ -928,7 +928,6 @@ static void test_HeapCreate(void)
ret = pHeapQueryInformation( heap, HeapCompatibilityInformation, &compat_info, sizeof(compat_info), &size ); ok( ret, "HeapQueryInformation failed, error %lu\n", GetLastError() ); - todo_wine ok( compat_info == 2, "got HeapCompatibilityInformation %lu\n", compat_info );
ret = HeapDestroy( heap ); @@ -1219,7 +1218,6 @@ static void test_HeapCreate(void)
ret = pHeapQueryInformation( heap, HeapCompatibilityInformation, &compat_info, sizeof(compat_info), &size ); ok( ret, "HeapQueryInformation failed, error %lu\n", GetLastError() ); - todo_wine ok( compat_info == 2, "got HeapCompatibilityInformation %lu\n", compat_info );
/* locking is serialized */ diff --git a/dlls/ntdll/heap.c b/dlls/ntdll/heap.c index f17064972a1..4b7e379da74 100644 --- a/dlls/ntdll/heap.c +++ b/dlls/ntdll/heap.c @@ -90,7 +90,7 @@ struct block WORD block_size; /* unused size (used block) / high size bits (free block) */ WORD tail_size; - /* offset to region base */ + /* offset to region base / first group block (LFH block) */ WORD base_offset; BYTE block_type; BYTE block_flags; @@ -193,6 +193,76 @@ typedef struct DECLSPEC_ALIGN(BLOCK_ALIGN) tagSUBHEAP C_ASSERT( sizeof(SUBHEAP) == offsetof(SUBHEAP, block) + sizeof(struct block) ); C_ASSERT( sizeof(SUBHEAP) == 4 * BLOCK_ALIGN );
+ +/* LFH block size bins */ + +#define BIN_SIZE_MIN_0 0 +#define BIN_SIZE_MIN_1 0x100 +#define BIN_SIZE_MIN_2 0x200 +#define BIN_SIZE_MIN_3 0x400 +#define BIN_SIZE_MIN_4 0x800 +#define BIN_SIZE_MIN_5 0x1000 +#define BIN_SIZE_MIN_6 0x2000 +#define BIN_SIZE_MIN_7 0x4000 +#define BIN_SIZE_MAX 0x8000 + +#define BIN_SIZE_STEP_0 (16) +#define BIN_SIZE_STEP_1 (BIN_SIZE_MIN_1 >> 4) +#define BIN_SIZE_STEP_2 (BIN_SIZE_MIN_2 >> 4) +#define BIN_SIZE_STEP_3 (BIN_SIZE_MIN_3 >> 4) +#define BIN_SIZE_STEP_4 (BIN_SIZE_MIN_4 >> 4) +#define BIN_SIZE_STEP_5 (BIN_SIZE_MIN_5 >> 4) +#define BIN_SIZE_STEP_6 (BIN_SIZE_MIN_6 >> 4) +#define BIN_SIZE_STEP_7 (BIN_SIZE_MIN_7 >> 4) + +#define BLOCK_BIN_SIZE_N( n, bin ) (BIN_SIZE_MIN_##n + (bin + 1) * BIN_SIZE_STEP_##n) +#define BLOCK_SIZE_BIN_N( n, size ) (((size) - 1 - BIN_SIZE_MIN_##n) / BIN_SIZE_STEP_##n) + +#define BLOCK_BIN_SIZE( bin ) ((bin) >= 0x80 ? ~(SIZE_T)0 : \ + (bin) >= 0x70 ? BLOCK_BIN_SIZE_N( 7, bin - 0x70 ) : \ + (bin) >= 0x60 ? BLOCK_BIN_SIZE_N( 6, bin - 0x60 ) : \ + (bin) >= 0x50 ? BLOCK_BIN_SIZE_N( 5, bin - 0x50 ) : \ + (bin) >= 0x40 ? BLOCK_BIN_SIZE_N( 4, bin - 0x40 ) : \ + (bin) >= 0x30 ? BLOCK_BIN_SIZE_N( 3, bin - 0x30 ) : \ + (bin) >= 0x20 ? BLOCK_BIN_SIZE_N( 2, bin - 0x20 ) : \ + (bin) >= 0x10 ? BLOCK_BIN_SIZE_N( 1, bin - 0x10 ) : \ + BLOCK_BIN_SIZE_N( 0, bin )) + +#define BLOCK_SIZE_BIN( size ) ((size) > BIN_SIZE_MAX ? 0x80 : \ + (size) > BIN_SIZE_MIN_7 ? 0x70 + BLOCK_SIZE_BIN_N( 7, size ) : \ + (size) > BIN_SIZE_MIN_6 ? 0x60 + BLOCK_SIZE_BIN_N( 6, size ) : \ + (size) > BIN_SIZE_MIN_5 ? 0x50 + BLOCK_SIZE_BIN_N( 5, size ) : \ + (size) > BIN_SIZE_MIN_4 ? 0x40 + BLOCK_SIZE_BIN_N( 4, size ) : \ + (size) > BIN_SIZE_MIN_3 ? 0x30 + BLOCK_SIZE_BIN_N( 3, size ) : \ + (size) > BIN_SIZE_MIN_2 ? 0x20 + BLOCK_SIZE_BIN_N( 2, size ) : \ + (size) > BIN_SIZE_MIN_1 ? 0x10 + BLOCK_SIZE_BIN_N( 1, size ) : \ + (size) <= BIN_SIZE_MIN_0 ? 0 : BLOCK_SIZE_BIN_N( 0, size )) + +#define BLOCK_SIZE_BIN_COUNT (BLOCK_SIZE_BIN( BIN_SIZE_MAX + 1 ) + 1) + +/* macros sanity checks */ +C_ASSERT( BLOCK_SIZE_BIN( 0 ) == 0 ); +C_ASSERT( BLOCK_SIZE_BIN( 0x10 ) == 0 ); +C_ASSERT( BLOCK_BIN_SIZE( 0 ) == BIN_SIZE_MIN_0 + 1 * BIN_SIZE_STEP_0 ); +C_ASSERT( BLOCK_SIZE_BIN( 0x11 ) == 1 ); +C_ASSERT( BLOCK_BIN_SIZE( 1 ) == BIN_SIZE_MIN_0 + 2 * BIN_SIZE_STEP_0 ); +C_ASSERT( BLOCK_SIZE_BIN( BIN_SIZE_MAX ) == 0x7f ); +C_ASSERT( BLOCK_BIN_SIZE( 0x7f ) == BIN_SIZE_MAX ); +C_ASSERT( BLOCK_SIZE_BIN( BIN_SIZE_MAX + 1 ) == 0x80 ); +C_ASSERT( BLOCK_BIN_SIZE( 0x80 ) == ~(SIZE_T)0 ); + +/* difference between block classes and all possible validation overhead must fit into block tail_size */ +C_ASSERT( BIN_SIZE_STEP_7 + 3 * BLOCK_ALIGN <= FIELD_MAX( struct block, tail_size ) ); + +/* a bin, tracking heap blocks of a certain size */ +struct bin +{ + /* counters for LFH activation */ + LONG count_alloc; + LONG count_freed; + LONG enabled; +}; + struct heap { /* win32/win64 */ DWORD_PTR unknown1[2]; /* 0000/0000 */ @@ -216,6 +286,7 @@ struct heap struct block **pending_free; /* Ring buffer for pending free requests */ RTL_CRITICAL_SECTION cs; struct entry free_lists[HEAP_NB_FREE_LISTS]; + struct bin *bins; SUBHEAP subheap; };
@@ -548,6 +619,16 @@ static void heap_dump( const struct heap *heap ) TRACE( "heap: %p\n", heap ); TRACE( " next %p\n", LIST_ENTRY( heap->entry.next, struct heap, entry ) );
+ TRACE( " bins:\n" ); + for (i = 0; heap->bins && i < BLOCK_SIZE_BIN_COUNT; i++) + { + const struct bin *bin = heap->bins + i; + ULONG alloc = ReadNoFence( &bin->count_alloc ), freed = ReadNoFence( &bin->count_freed ); + if (!alloc && !freed) continue; + TRACE( " %3u: size %#4Ix, alloc %ld, freed %ld, enabled %lu\n", i, BLOCK_BIN_SIZE( i ), + alloc, freed, ReadNoFence( &bin->enabled ) ); + } + TRACE( " free_lists: %p\n", heap->free_lists ); for (i = 0; i < HEAP_NB_FREE_LISTS; i++) TRACE( " %p: size %#8Ix, prev %p, next %p\n", heap->free_lists + i, get_free_list_block_size( i ), @@ -1434,6 +1515,13 @@ HANDLE WINAPI RtlCreateHeap( ULONG flags, void *addr, SIZE_T total_size, SIZE_T
heap_set_debug_flags( heap );
+ if (heap->flags & HEAP_GROWABLE) + { + SIZE_T size = sizeof(struct bin) * BLOCK_SIZE_BIN_COUNT; + NtAllocateVirtualMemory( NtCurrentProcess(), (void *)&heap->bins, + 0, &size, MEM_COMMIT, PAGE_READWRITE ); + } + /* link it into the per-process heap list */ if (process_heap) { @@ -1519,6 +1607,11 @@ HANDLE WINAPI RtlDestroyHeap( HANDLE handle ) NtFreeVirtualMemory( NtCurrentProcess(), &addr, &size, MEM_RELEASE ); } valgrind_notify_free_all( &heap->subheap, heap ); + if ((addr = heap->bins)) + { + size = 0; + NtFreeVirtualMemory( NtCurrentProcess(), &addr, &size, MEM_RELEASE ); + } size = 0; addr = heap; NtFreeVirtualMemory( NtCurrentProcess(), &addr, &size, MEM_RELEASE ); @@ -1576,6 +1669,27 @@ static NTSTATUS heap_allocate_block( struct heap *heap, ULONG flags, SIZE_T bloc return STATUS_SUCCESS; }
+static void bin_try_enable( struct heap *heap, struct bin *bin ) +{ + ULONG alloc = ReadNoFence( &bin->count_alloc ), freed = ReadNoFence( &bin->count_freed ); + SIZE_T block_size = BLOCK_BIN_SIZE( bin - heap->bins ); + BOOL enable = FALSE; + + if (bin == heap->bins && alloc > 0x10) enable = TRUE; + else if (bin - heap->bins < 0x30 && alloc > 0x800) enable = TRUE; + else if (bin - heap->bins < 0x30 && alloc - freed > 0x10) enable = TRUE; + else if (alloc - freed > 0x400000 / block_size) enable = TRUE; + if (!enable) return; + + if (ReadNoFence( &heap->compat_info ) != HEAP_LFH) + { + ULONG info = HEAP_LFH; + RtlSetHeapInformation( heap, HeapCompatibilityInformation, &info, sizeof(info) ); + } + + WriteNoFence( &bin->enabled, TRUE ); +} + /*********************************************************************** * RtlAllocateHeap (NTDLL.@) */ @@ -1598,6 +1712,13 @@ void *WINAPI DECLSPEC_HOTPATCH RtlAllocateHeap( HANDLE handle, ULONG flags, SIZE heap_lock( heap, heap_flags ); status = heap_allocate_block( heap, heap_flags, block_size, size, &ptr ); heap_unlock( heap, heap_flags ); + + if (!status && heap->bins) + { + SIZE_T bin = BLOCK_SIZE_BIN( block_get_size( (struct block *)ptr - 1 ) ); + InterlockedIncrement( &heap->bins[bin].count_alloc ); + if (!ReadNoFence( &heap->bins[bin].enabled )) bin_try_enable( heap, &heap->bins[bin] ); + } }
if (!status) valgrind_notify_alloc( ptr, size, flags & HEAP_ZERO_MEMORY ); @@ -1632,9 +1753,13 @@ BOOLEAN WINAPI DECLSPEC_HOTPATCH RtlFreeHeap( HANDLE handle, ULONG flags, void * status = STATUS_SUCCESS; else { + SIZE_T block_size = block_get_size( block ), bin = BLOCK_SIZE_BIN( block_size ); + heap_lock( heap, heap_flags ); status = heap_free_block( heap, heap_flags, block ); heap_unlock( heap, heap_flags ); + + if (!status && heap->bins) InterlockedIncrement( &heap->bins[bin].count_freed ); }
TRACE( "handle %p, flags %#lx, ptr %p, return %u, status %#lx.\n", handle, flags, ptr, !status, status ); @@ -1646,7 +1771,7 @@ static NTSTATUS heap_resize_block( struct heap *heap, ULONG flags, struct block SIZE_T size, SIZE_T *old_size, void **ret ) { SUBHEAP *subheap = block_get_subheap( heap, block ); - SIZE_T old_block_size; + SIZE_T old_bin, old_block_size; struct entry *entry; struct block *next;
@@ -1673,6 +1798,7 @@ static NTSTATUS heap_resize_block( struct heap *heap, ULONG flags, struct block
old_block_size = block_get_size( block ); *old_size = old_block_size - block_get_overhead( block ); + old_bin = BLOCK_SIZE_BIN( old_block_size );
if (block_size >= HEAP_MIN_LARGE_BLOCK_SIZE) return STATUS_NO_MEMORY; /* growing small block to large block */
@@ -1712,6 +1838,14 @@ static NTSTATUS heap_resize_block( struct heap *heap, ULONG flags, struct block
heap_unlock( heap, flags );
+ if (heap->bins) + { + SIZE_T new_bin = BLOCK_SIZE_BIN( block_size ); + InterlockedIncrement( &heap->bins[old_bin].count_freed ); + InterlockedIncrement( &heap->bins[new_bin].count_alloc ); + if (!ReadNoFence( &heap->bins[new_bin].enabled )) bin_try_enable( heap, &heap->bins[new_bin] ); + } + *ret = block + 1; return STATUS_SUCCESS; }
From: Rémi Bernon rbernon@codeweavers.com
--- dlls/ntdll/heap.c | 92 ++++++++++++++++++++++++++--------------------- 1 file changed, 51 insertions(+), 41 deletions(-)
diff --git a/dlls/ntdll/heap.c b/dlls/ntdll/heap.c index 4b7e379da74..197217c90a7 100644 --- a/dlls/ntdll/heap.c +++ b/dlls/ntdll/heap.c @@ -1767,57 +1767,47 @@ BOOLEAN WINAPI DECLSPEC_HOTPATCH RtlFreeHeap( HANDLE handle, ULONG flags, void * return !status; }
-static NTSTATUS heap_resize_block( struct heap *heap, ULONG flags, struct block *block, SIZE_T block_size, +static NTSTATUS heap_resize_large( struct heap *heap, ULONG flags, struct block *block, SIZE_T block_size, SIZE_T size, SIZE_T *old_size, void **ret ) { - SUBHEAP *subheap = block_get_subheap( heap, block ); - SIZE_T old_bin, old_block_size; - struct entry *entry; - struct block *next; + ARENA_LARGE *large = CONTAINING_RECORD( block, ARENA_LARGE, block ); + SIZE_T old_block_size = large->block_size; + *old_size = large->data_size;
- if (block_get_flags( block ) & BLOCK_FLAG_LARGE) - { - ARENA_LARGE *large = CONTAINING_RECORD( block, ARENA_LARGE, block ); - old_block_size = large->block_size; - *old_size = large->data_size; - - if (block_size < HEAP_MIN_LARGE_BLOCK_SIZE / 4) return STATUS_NO_MEMORY; /* shrinking large block to small block */ - if (old_block_size < block_size) return STATUS_NO_MEMORY; - - /* FIXME: we could remap zero-pages instead */ - valgrind_notify_resize( block + 1, *old_size, size ); - initialize_block( block, *old_size, size, flags ); + if (block_size < HEAP_MIN_LARGE_BLOCK_SIZE / 4) return STATUS_NO_MEMORY; /* shrinking large block to small block */ + if (old_block_size < block_size) return STATUS_NO_MEMORY;
- large->data_size = size; - valgrind_make_noaccess( (char *)block + sizeof(*block) + large->data_size, - old_block_size - sizeof(*block) - large->data_size ); - - *ret = block + 1; - return STATUS_SUCCESS; - } + /* FIXME: we could remap zero-pages instead */ + valgrind_notify_resize( block + 1, *old_size, size ); + initialize_block( block, *old_size, size, flags );
- old_block_size = block_get_size( block ); - *old_size = old_block_size - block_get_overhead( block ); - old_bin = BLOCK_SIZE_BIN( old_block_size ); + large->data_size = size; + valgrind_make_noaccess( (char *)block + sizeof(*block) + large->data_size, + old_block_size - sizeof(*block) - large->data_size );
- if (block_size >= HEAP_MIN_LARGE_BLOCK_SIZE) return STATUS_NO_MEMORY; /* growing small block to large block */ + *ret = block + 1; + return STATUS_SUCCESS; +}
- heap_lock( heap, flags ); +static NTSTATUS heap_resize_block( struct heap *heap, ULONG flags, struct block *block, SIZE_T block_size, + SIZE_T size, SIZE_T old_block_size, SIZE_T *old_size, void **ret ) +{ + SUBHEAP *subheap = block_get_subheap( heap, block ); + struct block *next;
if (block_size > old_block_size) { - if (!(next = next_block( subheap, block )) || !(block_get_flags( next ) & BLOCK_FLAG_FREE) || - block_size > old_block_size + block_get_size( next ) || !subheap_commit( heap, subheap, block, block_size )) - { - heap_unlock( heap, flags ); - return STATUS_NO_MEMORY; - } + /* need to grow block, make sure it's followed by large enough free block */ + if (!(next = next_block( subheap, block ))) return STATUS_NO_MEMORY; + if (!(block_get_flags( next ) & BLOCK_FLAG_FREE)) return STATUS_NO_MEMORY; + if (block_size > old_block_size + block_get_size( next )) return STATUS_NO_MEMORY; + if (!subheap_commit( heap, subheap, block, block_size )) return STATUS_NO_MEMORY; }
if ((next = next_block( subheap, block )) && (block_get_flags( next ) & BLOCK_FLAG_FREE)) { /* merge with next block if it is free */ - entry = (struct entry *)next; + struct entry *entry = (struct entry *)next; list_remove( &entry->entry ); old_block_size += block_get_size( next ); } @@ -1836,9 +1826,30 @@ static NTSTATUS heap_resize_block( struct heap *heap, ULONG flags, struct block
if ((next = next_block( subheap, block ))) block_set_flags( next, BLOCK_FLAG_PREV_FREE, 0 );
+ *ret = block + 1; + return STATUS_SUCCESS; +} + +static NTSTATUS heap_resize_in_place( struct heap *heap, ULONG flags, struct block *block, SIZE_T block_size, + SIZE_T size, SIZE_T *old_size, void **ret ) +{ + SIZE_T old_bin, old_block_size; + NTSTATUS status; + + if (block_get_flags( block ) & BLOCK_FLAG_LARGE) + return heap_resize_large( heap, flags, block, block_size, size, old_size, ret ); + + old_block_size = block_get_size( block ); + *old_size = old_block_size - block_get_overhead( block ); + old_bin = BLOCK_SIZE_BIN( old_block_size ); + + if (block_size >= HEAP_MIN_LARGE_BLOCK_SIZE) return STATUS_NO_MEMORY; /* growing small block to large block */ + + heap_lock( heap, flags ); + status = heap_resize_block( heap, flags, block, block_size, size, old_block_size, old_size, ret ); heap_unlock( heap, flags );
- if (heap->bins) + if (!status && heap->bins) { SIZE_T new_bin = BLOCK_SIZE_BIN( block_size ); InterlockedIncrement( &heap->bins[old_bin].count_freed ); @@ -1846,8 +1857,7 @@ static NTSTATUS heap_resize_block( struct heap *heap, ULONG flags, struct block if (!ReadNoFence( &heap->bins[new_bin].enabled )) bin_try_enable( heap, &heap->bins[new_bin] ); }
- *ret = block + 1; - return STATUS_SUCCESS; + return status; }
/*********************************************************************** @@ -1870,8 +1880,8 @@ void *WINAPI RtlReAllocateHeap( HANDLE handle, ULONG flags, void *ptr, SIZE_T si status = STATUS_NO_MEMORY; else if (!(block = unsafe_block_from_ptr( heap, heap_flags, ptr ))) status = STATUS_INVALID_PARAMETER; - else if ((status = heap_resize_block( heap, heap_flags, block, block_size, size, - &old_size, &ret ))) + else if ((status = heap_resize_in_place( heap, heap_flags, block, block_size, size, + &old_size, &ret ))) { if (flags & HEAP_REALLOC_IN_PLACE_ONLY) status = STATUS_NO_MEMORY;
From: Rémi Bernon rbernon@codeweavers.com
This implements the reduced fragmentation from the heap frontend, by carving smaller blocks out of larger allocated blocks.
The super block and each sub-block are all flagged with BLOCK_FLAG_LFH.
The super-block (struct group) uses a standard struct block header, as well as a list entry to be linked in free list, and a free bit map to track free sub-blocks.
Sub-blocks reference their super block through the base_offset, instead of the subheap, using the block size as radix. --- dlls/kernel32/tests/heap.c | 20 ++- dlls/ntdll/heap.c | 276 ++++++++++++++++++++++++++++++++++++- 2 files changed, 277 insertions(+), 19 deletions(-)
diff --git a/dlls/kernel32/tests/heap.c b/dlls/kernel32/tests/heap.c index cf4b4fa7058..8c06e9d5f53 100644 --- a/dlls/kernel32/tests/heap.c +++ b/dlls/kernel32/tests/heap.c @@ -986,6 +986,7 @@ static void test_HeapCreate(void) ok( entries[0].cbData <= 0x1000 /* sizeof(*heap) */, "got cbData %#lx\n", entries[0].cbData ); ok( entries[0].cbOverhead == 0, "got cbOverhead %#x\n", entries[0].cbOverhead ); ok( entries[0].iRegionIndex == 0, "got iRegionIndex %d\n", entries[0].iRegionIndex ); + todo_wine /* Wine currently reports the LFH group as a single block here */ ok( entries[1].wFlags == 0, "got wFlags %#x\n", entries[1].wFlags );
for (i = 0; i < 0x12; i++) @@ -1066,6 +1067,7 @@ static void test_HeapCreate(void) for (i = 1; i < count - 2; i++) { if (entries[i].wFlags != PROCESS_HEAP_ENTRY_BUSY) continue; + todo_wine /* Wine currently reports the LFH group as a single block */ ok( entries[i].cbData == 0x18 + 2 * sizeof(void *), "got cbData %#lx\n", entries[i].cbData ); ok( entries[i].cbOverhead == 0x8, "got cbOverhead %#x\n", entries[i].cbOverhead ); } @@ -1691,9 +1693,7 @@ static void test_GlobalAlloc(void) ok( size == small_size, "GlobalSize returned %Iu\n", size ); SetLastError( 0xdeadbeef ); tmp_mem = GlobalReAlloc( mem, small_size, 0 ); - todo_wine ok( !tmp_mem, "GlobalReAlloc succeeded\n" ); - todo_wine ok( GetLastError() == ERROR_NOT_ENOUGH_MEMORY, "got error %lu\n", GetLastError() ); if (tmp_mem) mem = tmp_mem; tmp_mem = GlobalReAlloc( mem, 1024 * 1024, GMEM_MODIFY ); @@ -1709,7 +1709,6 @@ static void test_GlobalAlloc(void) ok( !!mem, "GlobalAlloc failed, error %lu\n", GetLastError() ); tmp_mem = GlobalReAlloc( mem, small_size + 1, GMEM_MOVEABLE ); ok( !!tmp_mem, "GlobalReAlloc failed, error %lu\n", GetLastError() ); - todo_wine ok( tmp_mem != mem, "GlobalReAlloc didn't relocate memory\n" ); ptr = GlobalLock( tmp_mem ); ok( !!ptr, "GlobalLock failed, error %lu\n", GetLastError() ); @@ -1856,8 +1855,8 @@ static void test_GlobalAlloc(void) { ok( !is_mem_entry( tmp_mem ), "unexpected moveable %p\n", tmp_mem ); if (flags == GMEM_MODIFY) ok( tmp_mem == mem, "GlobalReAlloc returned %p\n", tmp_mem ); - else if (flags != GMEM_MOVEABLE) todo_wine_if(!flags) ok( !tmp_mem, "GlobalReAlloc succeeded\n" ); - else todo_wine ok( tmp_mem != mem, "GlobalReAlloc returned %p\n", tmp_mem ); + else if (flags != GMEM_MOVEABLE) ok( !tmp_mem, "GlobalReAlloc succeeded\n" ); + else ok( tmp_mem != mem, "GlobalReAlloc returned %p\n", tmp_mem ); } else { @@ -1871,7 +1870,7 @@ static void test_GlobalAlloc(void)
size = GlobalSize( mem ); if (flags == GMEM_MOVEABLE) ok( size == 0 || broken( size == 1 ) /* w7 */, "GlobalSize returned %Iu\n", size ); - else todo_wine_if(!flags) ok( size == small_size, "GlobalSize returned %Iu\n", size ); + else ok( size == small_size, "GlobalSize returned %Iu\n", size );
mem = GlobalFree( mem ); ok( !mem, "GlobalFree failed, error %lu\n", GetLastError() ); @@ -2429,9 +2428,7 @@ static void test_LocalAlloc(void) ok( size == small_size, "LocalSize returned %Iu\n", size ); SetLastError( 0xdeadbeef ); tmp_mem = LocalReAlloc( mem, small_size, 0 ); - todo_wine ok( !tmp_mem, "LocalReAlloc succeeded\n" ); - todo_wine ok( GetLastError() == ERROR_NOT_ENOUGH_MEMORY, "got error %lu\n", GetLastError() ); if (tmp_mem) mem = tmp_mem; tmp_mem = LocalReAlloc( mem, 1024 * 1024, LMEM_MODIFY ); @@ -2447,7 +2444,6 @@ static void test_LocalAlloc(void) ok( !!mem, "LocalAlloc failed, error %lu\n", GetLastError() ); tmp_mem = LocalReAlloc( mem, small_size + 1, LMEM_MOVEABLE ); ok( !!tmp_mem, "LocalReAlloc failed, error %lu\n", GetLastError() ); - todo_wine ok( tmp_mem != mem, "LocalReAlloc didn't relocate memory\n" ); ptr = LocalLock( tmp_mem ); ok( !!ptr, "LocalLock failed, error %lu\n", GetLastError() ); @@ -2540,13 +2536,13 @@ static void test_LocalAlloc(void) tmp_mem = LocalReAlloc( mem, 0, flags ); ok( !is_mem_entry( tmp_mem ), "unexpected moveable %p\n", tmp_mem ); if (flags & LMEM_MODIFY) ok( tmp_mem == mem, "LocalReAlloc returned %p\n", tmp_mem ); - else if (flags != LMEM_MOVEABLE) todo_wine_if(!flags) ok( !tmp_mem, "LocalReAlloc succeeded\n" ); - else todo_wine ok( tmp_mem != mem, "LocalReAlloc returned %p\n", tmp_mem ); + else if (flags != LMEM_MOVEABLE) ok( !tmp_mem, "LocalReAlloc succeeded\n" ); + else ok( tmp_mem != mem, "LocalReAlloc returned %p\n", tmp_mem ); if (tmp_mem) mem = tmp_mem;
size = LocalSize( mem ); if (flags == LMEM_MOVEABLE) ok( size == 0 || broken( size == 1 ) /* w7 */, "LocalSize returned %Iu\n", size ); - else todo_wine_if(!flags) ok( size == small_size, "LocalSize returned %Iu\n", size ); + else ok( size == small_size, "LocalSize returned %Iu\n", size );
mem = LocalFree( mem ); ok( !mem, "LocalFree failed, error %lu\n", GetLastError() ); diff --git a/dlls/ntdll/heap.c b/dlls/ntdll/heap.c index 197217c90a7..1ea2c24a656 100644 --- a/dlls/ntdll/heap.c +++ b/dlls/ntdll/heap.c @@ -84,7 +84,7 @@ struct rtl_heap_entry #define REGION_ALIGN 0x10000 #define BLOCK_ALIGN (2 * sizeof(void *))
-struct block +struct DECLSPEC_ALIGN(8) block { /* block size in multiple of BLOCK_ALIGN */ WORD block_size; @@ -104,6 +104,7 @@ C_ASSERT( sizeof(struct block) == 8 ); #define BLOCK_FLAG_PREV_FREE 0x02 #define BLOCK_FLAG_FREE_LINK 0x03 #define BLOCK_FLAG_LARGE 0x04 +#define BLOCK_FLAG_LFH 0x08 /* block is handled by the LFH frontend */ #define BLOCK_FLAG_USER_INFO 0x10 /* user flags up to 0xf0 */ #define BLOCK_FLAG_USER_MASK 0xf0
@@ -261,6 +262,9 @@ struct bin LONG count_alloc; LONG count_freed; LONG enabled; + + /* list of groups with free blocks */ + struct list groups; };
struct heap @@ -1182,7 +1186,9 @@ static BOOL validate_used_block( const struct heap *heap, const SUBHEAP *subheap err = "invalid block size"; else if (block->tail_size > block_get_size( block ) - sizeof(*block)) err = "invalid block unused size"; - else if ((next = next_block( subheap, block )) && (block_get_flags( next ) & BLOCK_FLAG_PREV_FREE)) + else if ((next = next_block( subheap, block )) && (block_get_flags( next ) & BLOCK_FLAG_PREV_FREE) && + /* LFH blocks do not use BLOCK_FLAG_PREV_FREE or back pointer */ + !(block_get_flags( block ) & BLOCK_FLAG_LFH)) err = "invalid next block flags"; else if (block_get_flags( block ) & BLOCK_FLAG_PREV_FREE) { @@ -1319,6 +1325,16 @@ static inline struct block *unsafe_block_from_ptr( struct heap *heap, ULONG flag
if ((ULONG_PTR)ptr % BLOCK_ALIGN) err = "invalid ptr alignment"; + else if (block_get_type( block ) == BLOCK_TYPE_DEAD) + err = "delayed freed block"; + else if (block_get_type( block ) == BLOCK_TYPE_FREE) + err = "already freed block"; + else if (block_get_flags( block ) & BLOCK_FLAG_LFH) + { + /* LFH block base_offset points to the group, not the subheap */ + if (block_get_type( block ) != BLOCK_TYPE_USED) + err = "invalid block type"; + } else if ((subheap = block_get_subheap( heap, block )) >= (SUBHEAP *)block) err = "invalid base offset"; else if (block_get_type( block ) == BLOCK_TYPE_USED) @@ -1332,12 +1348,10 @@ static inline struct block *unsafe_block_from_ptr( struct heap *heap, ULONG flag ARENA_LARGE *large = subheap_base( subheap ); if (block != &large->block) err = "invalid large block"; } - else if (block_get_type( block ) == BLOCK_TYPE_DEAD) - err = "delayed freed block"; - else if (block_get_type( block ) == BLOCK_TYPE_FREE) - err = "already freed block"; else + { err = "invalid block type"; + }
if (err) WARN( "heap %p, block %p: %s\n", heap, block, err ); return err ? NULL : block; @@ -1520,6 +1534,8 @@ HANDLE WINAPI RtlCreateHeap( ULONG flags, void *addr, SIZE_T total_size, SIZE_T SIZE_T size = sizeof(struct bin) * BLOCK_SIZE_BIN_COUNT; NtAllocateVirtualMemory( NtCurrentProcess(), (void *)&heap->bins, 0, &size, MEM_COMMIT, PAGE_READWRITE ); + for (i = 0; i < BLOCK_SIZE_BIN_COUNT; ++i) + list_init( &heap->bins[i].groups ); }
/* link it into the per-process heap list */ @@ -1669,6 +1685,222 @@ static NTSTATUS heap_allocate_block( struct heap *heap, ULONG flags, SIZE_T bloc return STATUS_SUCCESS; }
+/* Low Fragmentation Heap frontend */ + +/* header for every LFH block group */ +struct DECLSPEC_ALIGN(BLOCK_ALIGN) group +{ + struct list entry; + /* one bit for each free block and the highest bit for GROUP_FLAG_FREE */ + LONG free_bits; + /* first block of a group, required for alignment */ + struct block first_block; +}; + +#define GROUP_BLOCK_COUNT (sizeof(((struct group *)0)->free_bits) * 8 - 1) +#define GROUP_FLAG_FREE (1u << GROUP_BLOCK_COUNT) +#define GROUP_FREE_BITS_MASK (GROUP_FLAG_FREE - 1) + +static inline UINT block_get_group_index( const struct block *block ) +{ + return block->base_offset; +} + +static inline struct group *block_get_group( const struct block *block ) +{ + SIZE_T block_size = block_get_size( block ); + void *first_block = (char *)block - block_get_group_index( block ) * block_size; + return CONTAINING_RECORD( first_block, struct group, first_block ); +} + +static inline void block_set_group( struct block *block, SIZE_T block_size, const struct group *group ) +{ + SIZE_T offset = (char *)block - (char *)&group->first_block; + block->base_offset = offset / block_size; +} + +static inline struct block *group_get_block( struct group *group, SIZE_T block_size, UINT index ) +{ + char *first_block = (char *)&group->first_block; + return (struct block *)(first_block + index * block_size); +} + +/* lookup a free block using the group free_bits, the current thread must own the group */ +static inline struct block *group_find_free_block( struct group *group, SIZE_T block_size ) +{ + ULONG i, free_bits = group->free_bits; + /* free_bits will never be 0 as the group is unlinked when it's fully used */ + BitScanForward( &i, free_bits ); + group->free_bits &= ~(1 << i); + return group_get_block( group, block_size, i ); +} + +/* allocate a new group block using non-LFH allocation, returns a group owned by current thread */ +static struct group *group_allocate( struct heap *heap, ULONG flags, SIZE_T block_size ) +{ + SIZE_T i, group_size, group_block_size; + struct group *group; + NTSTATUS status; + + group_size = offsetof( struct group, first_block ) + GROUP_BLOCK_COUNT * block_size; + group_block_size = heap_get_block_size( heap, flags, group_size ); + + if (group_block_size >= HEAP_MIN_LARGE_BLOCK_SIZE) + status = heap_allocate_large( heap, flags & ~HEAP_ZERO_MEMORY, group_block_size, group_size, (void **)&group ); + else + status = heap_allocate_block( heap, flags & ~HEAP_ZERO_MEMORY, group_block_size, group_size, (void **)&group ); + + if (status) return NULL; + + block_set_flags( (struct block *)group - 1, 0, BLOCK_FLAG_LFH ); + group->free_bits = ~GROUP_FLAG_FREE; + + for (i = 0; i < GROUP_BLOCK_COUNT; ++i) + { + struct block *block = group_get_block( group, block_size, i ); + valgrind_make_writable( block, sizeof(*block) ); + block_set_type( block, BLOCK_TYPE_FREE ); + block_set_flags( block, ~0, BLOCK_FLAG_FREE | BLOCK_FLAG_LFH ); + block_set_group( block, block_size, group ); + block_set_size( block, block_size ); + mark_block_free( block + 1, (char *)block + block_size - (char *)(block + 1), flags ); + } + + return group; +} + +/* release a fully freed group to the non-LFH subheap, group must be owned by current thread */ +static NTSTATUS group_release( struct heap *heap, ULONG flags, struct bin *bin, struct group *group ) +{ + struct block *block = (struct block *)group - 1; + NTSTATUS status; + + block_set_flags( block, BLOCK_FLAG_LFH, 0 ); + + if (block_get_flags( block ) & BLOCK_FLAG_LARGE) + status = heap_free_large( heap, flags, block ); + else + status = heap_free_block( heap, flags, block ); + + return status; +} + +/* acquire a group from the bin, thread takes ownership of a shared group or allocates a new one */ +static struct group *heap_acquire_bin_group( struct heap *heap, ULONG flags, SIZE_T block_size, struct bin *bin ) +{ + struct group *group; + struct list *entry; + + if ((entry = list_head( &bin->groups ))) + { + group = LIST_ENTRY( entry, struct group, entry ); + list_remove( &group->entry ); + return group; + } + + return group_allocate( heap, flags, block_size ); +} + +/* release a thread owned and fully freed group to the bin shared group, or free its memory */ +static NTSTATUS heap_release_bin_group( struct heap *heap, ULONG flags, struct bin *bin, struct group *group ) +{ + /* try re-using the block group instead of releasing it */ + if (list_empty( &bin->groups )) + { + list_add_tail( &bin->groups, &group->entry ); + return STATUS_SUCCESS; + } + + return group_release( heap, flags, bin, group ); +} + +static struct block *find_free_bin_block( struct heap *heap, ULONG flags, SIZE_T block_size, struct bin *bin ) +{ + struct block *block; + struct group *group; + + /* acquire a group, the thread will own it and no other thread can clear free bits. + * some other thread might still set the free bits if they are freeing blocks. + */ + if (!(group = heap_acquire_bin_group( heap, flags, block_size, bin ))) return NULL; + block = group_find_free_block( group, block_size ); + + /* serialize with heap_free_block_lfh: atomically set GROUP_FLAG_FREE when the free bits are all 0. */ + if (!group->free_bits) + group->free_bits = GROUP_FLAG_FREE; + else + { + /* if GROUP_FLAG_FREE isn't set, thread is responsible for putting it back into group list. */ + list_add_tail( &bin->groups, &group->entry ); + } + + return block; +} + +static NTSTATUS heap_allocate_block_lfh( struct heap *heap, ULONG flags, SIZE_T block_size, + SIZE_T size, void **ret ) +{ + struct bin *bin, *last = heap->bins + BLOCK_SIZE_BIN_COUNT - 1; + struct block *block; + + bin = heap->bins + BLOCK_SIZE_BIN( block_size ); + if (bin == last) return STATUS_UNSUCCESSFUL; + + /* paired with WriteRelease in bin_try_enable. */ + if (!ReadAcquire( &bin->enabled )) return STATUS_UNSUCCESSFUL; + + block_size = BLOCK_BIN_SIZE( BLOCK_SIZE_BIN( block_size ) ); + + heap_lock( heap, flags ); + if ((block = find_free_bin_block( heap, flags, block_size, bin ))) + { + block_set_type( block, BLOCK_TYPE_USED ); + block_set_flags( block, ~BLOCK_FLAG_LFH, BLOCK_USER_FLAGS( flags ) ); + block->tail_size = block_size - sizeof(*block) - size; + initialize_block( block, 0, size, flags ); + mark_block_tail( block, flags ); + *ret = block + 1; + } + heap_unlock( heap, flags ); + + return block ? STATUS_SUCCESS : STATUS_NO_MEMORY; +} + +static NTSTATUS heap_free_block_lfh( struct heap *heap, ULONG flags, struct block *block ) +{ + struct bin *bin, *last = heap->bins + BLOCK_SIZE_BIN_COUNT - 1; + SIZE_T i, block_size = block_get_size( block ); + struct group *group = block_get_group( block ); + NTSTATUS status = STATUS_SUCCESS; + + if (!(block_get_flags( block ) & BLOCK_FLAG_LFH)) return STATUS_UNSUCCESSFUL; + + bin = heap->bins + BLOCK_SIZE_BIN( block_size ); + if (bin == last) return STATUS_UNSUCCESSFUL; + + i = block_get_group_index( block ); + valgrind_make_writable( block, sizeof(*block) ); + block_set_type( block, BLOCK_TYPE_FREE ); + block_set_flags( block, ~BLOCK_FLAG_LFH, BLOCK_FLAG_FREE ); + mark_block_free( block + 1, (char *)block + block_size - (char *)(block + 1), flags ); + + heap_lock( heap, flags ); + + group->free_bits |= (1 << i); + + /* if this was the last used block in a group and GROUP_FLAG_FREE was set */ + if (group->free_bits == (GROUP_FREE_BITS_MASK | GROUP_FLAG_FREE)) + { + /* thread now owns the group, and can release it to its bin */ + group->free_bits = ~GROUP_FLAG_FREE; + status = heap_release_bin_group( heap, flags, bin, group ); + } + + heap_unlock( heap, flags ); + + return status; +} + static void bin_try_enable( struct heap *heap, struct bin *bin ) { ULONG alloc = ReadNoFence( &bin->count_alloc ), freed = ReadNoFence( &bin->count_freed ); @@ -1687,7 +1919,15 @@ static void bin_try_enable( struct heap *heap, struct bin *bin ) RtlSetHeapInformation( heap, HeapCompatibilityInformation, &info, sizeof(info) ); }
- WriteNoFence( &bin->enabled, TRUE ); + /* paired with ReadAcquire in heap_allocate_block_lfh. + * + * The acq/rel barrier on the enabled flag is protecting compat_info + * (i.e. compat_info := LFH happens-before enabled := TRUE), so that + * a caller that observes LFH block allocation (alloc request + * succeeds without heap lock) will never observe HEAP_STD when it + * queries the heap. + */ + WriteRelease( &bin->enabled, TRUE ); }
/*********************************************************************** @@ -1707,6 +1947,8 @@ void *WINAPI DECLSPEC_HOTPATCH RtlAllocateHeap( HANDLE handle, ULONG flags, SIZE status = STATUS_NO_MEMORY; else if (block_size >= HEAP_MIN_LARGE_BLOCK_SIZE) status = heap_allocate_large( heap, heap_flags, block_size, size, &ptr ); + else if (heap->bins && !heap_allocate_block_lfh( heap, heap_flags, block_size, size, &ptr )) + status = STATUS_SUCCESS; else { heap_lock( heap, heap_flags ); @@ -1751,6 +1993,8 @@ BOOLEAN WINAPI DECLSPEC_HOTPATCH RtlFreeHeap( HANDLE handle, ULONG flags, void * status = heap_free_large( heap, heap_flags, block ); else if (!(block = heap_delay_free( heap, heap_flags, block ))) status = STATUS_SUCCESS; + else if (!heap_free_block_lfh( heap, heap_flags, block )) + status = STATUS_SUCCESS; else { SIZE_T block_size = block_get_size( block ), bin = BLOCK_SIZE_BIN( block_size ); @@ -1830,6 +2074,21 @@ static NTSTATUS heap_resize_block( struct heap *heap, ULONG flags, struct block return STATUS_SUCCESS; }
+static NTSTATUS heap_resize_block_lfh( struct block *block, ULONG flags, SIZE_T block_size, SIZE_T size, SIZE_T *old_size, void **ret ) +{ + /* as native LFH does it with different block size: refuse to resize even though we could */ + if (ROUND_SIZE( *old_size, BLOCK_ALIGN - 1) != ROUND_SIZE( size, BLOCK_ALIGN - 1)) return STATUS_NO_MEMORY; + if (size >= *old_size) return STATUS_NO_MEMORY; + + block_set_flags( block, BLOCK_FLAG_USER_MASK & ~BLOCK_FLAG_USER_INFO, BLOCK_USER_FLAGS( flags ) ); + block->tail_size = block_size - sizeof(*block) - size; + initialize_block( block, *old_size, size, flags ); + mark_block_tail( block, flags ); + + *ret = block + 1; + return STATUS_SUCCESS; +} + static NTSTATUS heap_resize_in_place( struct heap *heap, ULONG flags, struct block *block, SIZE_T block_size, SIZE_T size, SIZE_T *old_size, void **ret ) { @@ -1845,6 +2104,9 @@ static NTSTATUS heap_resize_in_place( struct heap *heap, ULONG flags, struct blo
if (block_size >= HEAP_MIN_LARGE_BLOCK_SIZE) return STATUS_NO_MEMORY; /* growing small block to large block */
+ if (block_get_flags( block ) & BLOCK_FLAG_LFH) + return heap_resize_block_lfh( block, flags, block_size, size, old_size, ret ); + heap_lock( heap, flags ); status = heap_resize_block( heap, flags, block, block_size, size, old_block_size, old_size, ret ); heap_unlock( heap, flags );
From: Rémi Bernon rbernon@codeweavers.com
--- dlls/kernel32/tests/heap.c | 1 - dlls/ntdll/heap.c | 50 ++++++++++++++++---------------------- 2 files changed, 21 insertions(+), 30 deletions(-)
diff --git a/dlls/kernel32/tests/heap.c b/dlls/kernel32/tests/heap.c index 8c06e9d5f53..b0b56132393 100644 --- a/dlls/kernel32/tests/heap.c +++ b/dlls/kernel32/tests/heap.c @@ -1246,7 +1246,6 @@ static void test_HeapCreate(void) thread_params.flags = 0; SetEvent( thread_params.start_event ); res = WaitForSingleObject( thread_params.ready_event, 100 ); - todo_wine ok( !res, "WaitForSingleObject returned %#lx, error %lu\n", res, GetLastError() ); ret = HeapUnlock( heap ); ok( ret, "HeapUnlock failed, error %lu\n", GetLastError() ); diff --git a/dlls/ntdll/heap.c b/dlls/ntdll/heap.c index 1ea2c24a656..b2d08a1de7d 100644 --- a/dlls/ntdll/heap.c +++ b/dlls/ntdll/heap.c @@ -264,7 +264,7 @@ struct bin LONG enabled;
/* list of groups with free blocks */ - struct list groups; + SLIST_HEADER groups; };
struct heap @@ -1535,7 +1535,7 @@ HANDLE WINAPI RtlCreateHeap( ULONG flags, void *addr, SIZE_T total_size, SIZE_T NtAllocateVirtualMemory( NtCurrentProcess(), (void *)&heap->bins, 0, &size, MEM_COMMIT, PAGE_READWRITE ); for (i = 0; i < BLOCK_SIZE_BIN_COUNT; ++i) - list_init( &heap->bins[i].groups ); + RtlInitializeSListHead( &heap->bins[i].groups ); }
/* link it into the per-process heap list */ @@ -1690,7 +1690,7 @@ static NTSTATUS heap_allocate_block( struct heap *heap, ULONG flags, SIZE_T bloc /* header for every LFH block group */ struct DECLSPEC_ALIGN(BLOCK_ALIGN) group { - struct list entry; + SLIST_ENTRY entry; /* one bit for each free block and the highest bit for GROUP_FLAG_FREE */ LONG free_bits; /* first block of a group, required for alignment */ @@ -1699,7 +1699,6 @@ struct DECLSPEC_ALIGN(BLOCK_ALIGN) group
#define GROUP_BLOCK_COUNT (sizeof(((struct group *)0)->free_bits) * 8 - 1) #define GROUP_FLAG_FREE (1u << GROUP_BLOCK_COUNT) -#define GROUP_FREE_BITS_MASK (GROUP_FLAG_FREE - 1)
static inline UINT block_get_group_index( const struct block *block ) { @@ -1728,10 +1727,10 @@ static inline struct block *group_get_block( struct group *group, SIZE_T block_s /* lookup a free block using the group free_bits, the current thread must own the group */ static inline struct block *group_find_free_block( struct group *group, SIZE_T block_size ) { - ULONG i, free_bits = group->free_bits; + ULONG i, free_bits = ReadNoFence( &group->free_bits ); /* free_bits will never be 0 as the group is unlinked when it's fully used */ BitScanForward( &i, free_bits ); - group->free_bits &= ~(1 << i); + InterlockedAnd( &group->free_bits, ~(1 << i) ); return group_get_block( group, block_size, i ); }
@@ -1745,11 +1744,15 @@ static struct group *group_allocate( struct heap *heap, ULONG flags, SIZE_T bloc group_size = offsetof( struct group, first_block ) + GROUP_BLOCK_COUNT * block_size; group_block_size = heap_get_block_size( heap, flags, group_size );
+ heap_lock( heap, flags ); + if (group_block_size >= HEAP_MIN_LARGE_BLOCK_SIZE) status = heap_allocate_large( heap, flags & ~HEAP_ZERO_MEMORY, group_block_size, group_size, (void **)&group ); else status = heap_allocate_block( heap, flags & ~HEAP_ZERO_MEMORY, group_block_size, group_size, (void **)&group );
+ heap_unlock( heap, flags ); + if (status) return NULL;
block_set_flags( (struct block *)group - 1, 0, BLOCK_FLAG_LFH ); @@ -1775,6 +1778,8 @@ static NTSTATUS group_release( struct heap *heap, ULONG flags, struct bin *bin, struct block *block = (struct block *)group - 1; NTSTATUS status;
+ heap_lock( heap, flags ); + block_set_flags( block, BLOCK_FLAG_LFH, 0 );
if (block_get_flags( block ) & BLOCK_FLAG_LARGE) @@ -1782,21 +1787,18 @@ static NTSTATUS group_release( struct heap *heap, ULONG flags, struct bin *bin, else status = heap_free_block( heap, flags, block );
+ heap_unlock( heap, flags ); + return status; }
/* acquire a group from the bin, thread takes ownership of a shared group or allocates a new one */ static struct group *heap_acquire_bin_group( struct heap *heap, ULONG flags, SIZE_T block_size, struct bin *bin ) { - struct group *group; - struct list *entry; + SLIST_ENTRY *entry;
- if ((entry = list_head( &bin->groups ))) - { - group = LIST_ENTRY( entry, struct group, entry ); - list_remove( &group->entry ); - return group; - } + if ((entry = RtlInterlockedPopEntrySList( &bin->groups ))) + return CONTAINING_RECORD( entry, struct group, entry );
return group_allocate( heap, flags, block_size ); } @@ -1805,9 +1807,9 @@ static struct group *heap_acquire_bin_group( struct heap *heap, ULONG flags, SIZ static NTSTATUS heap_release_bin_group( struct heap *heap, ULONG flags, struct bin *bin, struct group *group ) { /* try re-using the block group instead of releasing it */ - if (list_empty( &bin->groups )) + if (!RtlQueryDepthSList( &bin->groups )) { - list_add_tail( &bin->groups, &group->entry ); + RtlInterlockedPushEntrySList( &bin->groups, &group->entry ); return STATUS_SUCCESS; }
@@ -1826,12 +1828,10 @@ static struct block *find_free_bin_block( struct heap *heap, ULONG flags, SIZE_T block = group_find_free_block( group, block_size );
/* serialize with heap_free_block_lfh: atomically set GROUP_FLAG_FREE when the free bits are all 0. */ - if (!group->free_bits) - group->free_bits = GROUP_FLAG_FREE; - else + if (ReadNoFence( &group->free_bits ) || InterlockedCompareExchange( &group->free_bits, GROUP_FLAG_FREE, 0 )) { /* if GROUP_FLAG_FREE isn't set, thread is responsible for putting it back into group list. */ - list_add_tail( &bin->groups, &group->entry ); + RtlInterlockedPushEntrySList( &bin->groups, &group->entry ); }
return block; @@ -1851,7 +1851,6 @@ static NTSTATUS heap_allocate_block_lfh( struct heap *heap, ULONG flags, SIZE_T
block_size = BLOCK_BIN_SIZE( BLOCK_SIZE_BIN( block_size ) );
- heap_lock( heap, flags ); if ((block = find_free_bin_block( heap, flags, block_size, bin ))) { block_set_type( block, BLOCK_TYPE_USED ); @@ -1861,7 +1860,6 @@ static NTSTATUS heap_allocate_block_lfh( struct heap *heap, ULONG flags, SIZE_T mark_block_tail( block, flags ); *ret = block + 1; } - heap_unlock( heap, flags );
return block ? STATUS_SUCCESS : STATUS_NO_MEMORY; } @@ -1884,20 +1882,14 @@ static NTSTATUS heap_free_block_lfh( struct heap *heap, ULONG flags, struct bloc block_set_flags( block, ~BLOCK_FLAG_LFH, BLOCK_FLAG_FREE ); mark_block_free( block + 1, (char *)block + block_size - (char *)(block + 1), flags );
- heap_lock( heap, flags ); - - group->free_bits |= (1 << i); - /* if this was the last used block in a group and GROUP_FLAG_FREE was set */ - if (group->free_bits == (GROUP_FREE_BITS_MASK | GROUP_FLAG_FREE)) + if (InterlockedOr( &group->free_bits, 1 << i ) == ~(1 << i)) { /* thread now owns the group, and can release it to its bin */ group->free_bits = ~GROUP_FLAG_FREE; status = heap_release_bin_group( heap, flags, bin, group ); }
- heap_unlock( heap, flags ); - return status; }
From: Rémi Bernon rbernon@codeweavers.com
--- dlls/ntdll/heap.c | 90 +++++++++++++++++++++++++++++++++++++++-- dlls/ntdll/loader.c | 2 + dlls/ntdll/ntdll_misc.h | 1 + 3 files changed, 89 insertions(+), 4 deletions(-)
diff --git a/dlls/ntdll/heap.c b/dlls/ntdll/heap.c index b2d08a1de7d..afe38eddaa8 100644 --- a/dlls/ntdll/heap.c +++ b/dlls/ntdll/heap.c @@ -255,6 +255,9 @@ C_ASSERT( BLOCK_BIN_SIZE( 0x80 ) == ~(SIZE_T)0 ); /* difference between block classes and all possible validation overhead must fit into block tail_size */ C_ASSERT( BIN_SIZE_STEP_7 + 3 * BLOCK_ALIGN <= FIELD_MAX( struct block, tail_size ) );
+static BYTE affinity_mapping[] = {20,6,31,15,14,29,27,4,18,24,26,13,0,9,2,30,17,7,23,25,10,19,12,3,22,21,5,16,1,28,11,8}; +static LONG next_thread_affinity; + /* a bin, tracking heap blocks of a certain size */ struct bin { @@ -265,8 +268,20 @@ struct bin
/* list of groups with free blocks */ SLIST_HEADER groups; + + /* array of affinity reserved groups, interleaved with other bins to keep + * all pointers of the same affinity and different bin grouped together, + * and pointers of the same bin and different affinity away from each other, + * hopefully in separate cache lines. + */ + struct group **affinity_group_base; };
+static inline struct group **bin_get_affinity_group( struct bin *bin, BYTE affinity ) +{ + return bin->affinity_group_base + affinity * BLOCK_SIZE_BIN_COUNT; +} + struct heap { /* win32/win64 */ DWORD_PTR unknown1[2]; /* 0000/0000 */ @@ -1531,11 +1546,16 @@ HANDLE WINAPI RtlCreateHeap( ULONG flags, void *addr, SIZE_T total_size, SIZE_T
if (heap->flags & HEAP_GROWABLE) { - SIZE_T size = sizeof(struct bin) * BLOCK_SIZE_BIN_COUNT; + SIZE_T size = (sizeof(struct bin) + sizeof(struct group *) * ARRAY_SIZE(affinity_mapping)) * BLOCK_SIZE_BIN_COUNT; NtAllocateVirtualMemory( NtCurrentProcess(), (void *)&heap->bins, 0, &size, MEM_COMMIT, PAGE_READWRITE ); - for (i = 0; i < BLOCK_SIZE_BIN_COUNT; ++i) + + for (i = 0; heap->bins && i < BLOCK_SIZE_BIN_COUNT; ++i) + { RtlInitializeSListHead( &heap->bins[i].groups ); + /* offset affinity_group_base to interleave the bin affinity group pointers */ + heap->bins[i].affinity_group_base = (struct group **)(heap->bins + BLOCK_SIZE_BIN_COUNT) + i; + } }
/* link it into the per-process heap list */ @@ -1693,6 +1713,8 @@ struct DECLSPEC_ALIGN(BLOCK_ALIGN) group SLIST_ENTRY entry; /* one bit for each free block and the highest bit for GROUP_FLAG_FREE */ LONG free_bits; + /* affinity of the thread which last allocated from this group */ + LONG affinity; /* first block of a group, required for alignment */ struct block first_block; }; @@ -1792,11 +1814,30 @@ static NTSTATUS group_release( struct heap *heap, ULONG flags, struct bin *bin, return status; }
+static inline ULONG heap_current_thread_affinity(void) +{ + ULONG affinity; + + if (!(affinity = NtCurrentTeb()->HeapVirtualAffinity)) + { + affinity = InterlockedIncrement( &next_thread_affinity ); + affinity = affinity_mapping[affinity % ARRAY_SIZE(affinity_mapping)]; + NtCurrentTeb()->HeapVirtualAffinity = affinity; + } + + return affinity; +} + /* acquire a group from the bin, thread takes ownership of a shared group or allocates a new one */ static struct group *heap_acquire_bin_group( struct heap *heap, ULONG flags, SIZE_T block_size, struct bin *bin ) { + ULONG affinity = NtCurrentTeb()->HeapVirtualAffinity; + struct group *group; SLIST_ENTRY *entry;
+ if ((group = InterlockedExchangePointer( (void *)bin_get_affinity_group( bin, affinity ), NULL ))) + return group; + if ((entry = RtlInterlockedPopEntrySList( &bin->groups ))) return CONTAINING_RECORD( entry, struct group, entry );
@@ -1806,8 +1847,16 @@ static struct group *heap_acquire_bin_group( struct heap *heap, ULONG flags, SIZ /* release a thread owned and fully freed group to the bin shared group, or free its memory */ static NTSTATUS heap_release_bin_group( struct heap *heap, ULONG flags, struct bin *bin, struct group *group ) { + ULONG affinity = group->affinity; + + /* using InterlockedExchangePointer here would possibly return a group that has used blocks, + * we prefer keeping our fully freed group instead for reduced memory consumption. + */ + if (!InterlockedCompareExchangePointer( (void *)bin_get_affinity_group( bin, affinity ), group, NULL )) + return STATUS_SUCCESS; + /* try re-using the block group instead of releasing it */ - if (!RtlQueryDepthSList( &bin->groups )) + if (RtlQueryDepthSList( &bin->groups ) <= ARRAY_SIZE(affinity_mapping)) { RtlInterlockedPushEntrySList( &bin->groups, &group->entry ); return STATUS_SUCCESS; @@ -1818,6 +1867,7 @@ static NTSTATUS heap_release_bin_group( struct heap *heap, ULONG flags, struct b
static struct block *find_free_bin_block( struct heap *heap, ULONG flags, SIZE_T block_size, struct bin *bin ) { + ULONG affinity = heap_current_thread_affinity(); struct block *block; struct group *group;
@@ -1825,13 +1875,16 @@ static struct block *find_free_bin_block( struct heap *heap, ULONG flags, SIZE_T * some other thread might still set the free bits if they are freeing blocks. */ if (!(group = heap_acquire_bin_group( heap, flags, block_size, bin ))) return NULL; + group->affinity = affinity; + block = group_find_free_block( group, block_size );
/* serialize with heap_free_block_lfh: atomically set GROUP_FLAG_FREE when the free bits are all 0. */ if (ReadNoFence( &group->free_bits ) || InterlockedCompareExchange( &group->free_bits, GROUP_FLAG_FREE, 0 )) { /* if GROUP_FLAG_FREE isn't set, thread is responsible for putting it back into group list. */ - RtlInterlockedPushEntrySList( &bin->groups, &group->entry ); + if ((group = InterlockedExchangePointer( (void *)bin_get_affinity_group( bin, affinity ), group ))) + RtlInterlockedPushEntrySList( &bin->groups, &group->entry ); }
return block; @@ -1922,6 +1975,35 @@ static void bin_try_enable( struct heap *heap, struct bin *bin ) WriteRelease( &bin->enabled, TRUE ); }
+static void heap_thread_detach_bin_groups( struct heap *heap ) +{ + ULONG i, affinity = NtCurrentTeb()->HeapVirtualAffinity; + + if (!heap->bins) return; + + for (i = 0; i < BLOCK_SIZE_BIN_COUNT; ++i) + { + struct bin *bin = heap->bins + i; + struct group *group; + if (!(group = InterlockedExchangePointer( (void *)bin_get_affinity_group( bin, affinity ), NULL ))) continue; + RtlInterlockedPushEntrySList( &bin->groups, &group->entry ); + } +} + +void heap_thread_detach(void) +{ + struct heap *heap; + + RtlEnterCriticalSection( &process_heap->cs ); + + LIST_FOR_EACH_ENTRY( heap, &process_heap->entry, struct heap, entry ) + heap_thread_detach_bin_groups( heap ); + + heap_thread_detach_bin_groups( process_heap ); + + RtlLeaveCriticalSection( &process_heap->cs ); +} + /*********************************************************************** * RtlAllocateHeap (NTDLL.@) */ diff --git a/dlls/ntdll/loader.c b/dlls/ntdll/loader.c index 01a30742678..6c643575f8d 100644 --- a/dlls/ntdll/loader.c +++ b/dlls/ntdll/loader.c @@ -3737,6 +3737,8 @@ void WINAPI LdrShutdownThread(void) /* don't call DbgUiGetThreadDebugObject as some apps hook it and terminate if called */ if (NtCurrentTeb()->DbgSsReserved[1]) NtClose( NtCurrentTeb()->DbgSsReserved[1] ); RtlFreeThreadActivationContextStack(); + + heap_thread_detach(); }
diff --git a/dlls/ntdll/ntdll_misc.h b/dlls/ntdll/ntdll_misc.h index d1a7790991b..f6b77b79cde 100644 --- a/dlls/ntdll/ntdll_misc.h +++ b/dlls/ntdll/ntdll_misc.h @@ -127,5 +127,6 @@ static inline void ascii_to_unicode( WCHAR *dst, const char *src, size_t len )
/* FLS data */ extern TEB_FLS_DATA *fls_alloc_data(void) DECLSPEC_HIDDEN; +extern void heap_thread_detach(void) DECLSPEC_HIDDEN;
#endif
Hi,
It looks like your patch introduced the new failures shown below. Please investigate and fix them before resubmitting your patch. If they are not new, fixing them anyway would help a lot. Otherwise please ask for the known failures list to be updated.
The tests also ran into some preexisting test failures. If you know how to fix them that would be helpful. See the TestBot job for the details:
The full results can be found at: https://testbot.winehq.org/JobDetails.pl?Key=129426
Your paranoid android.
=== debian11 (32 bit report) ===
d3d11: d3d11.c:25778: Test failed: Got 0, expected 33 (index 1). d3d11.c:25778: Test failed: Got 0, expected 66 (index 2). d3d11.c:25778: Test failed: Got 0, expected 99 (index 3). d3d11.c:25778: Test failed: Got 0, expected 132 (index 4). d3d11.c:25778: Test failed: Got 0, expected 165 (index 5). d3d11.c:25778: Test failed: Got 0, expected 198 (index 6). d3d11.c:25778: Test failed: Got 0, expected 231 (index 7). d3d11.c:25778: Test failed: Got 0, expected 264 (index 8). d3d11.c:25778: Test failed: Got 0, expected 297 (index 9). d3d11.c:25778: Test failed: Got 0, expected 330 (index 10). d3d11.c:25778: Test failed: Got 0, expected 363 (index 11). d3d11.c:25778: Test failed: Got 0, expected 396 (index 12). d3d11.c:25778: Test failed: Got 0, expected 429 (index 13). d3d11.c:25778: Test failed: Got 0, expected 462 (index 14). d3d11.c:25778: Test failed: Got 0, expected 495 (index 15). d3d11.c:25778: Test failed: Got 0, expected 528 (index 16). d3d11.c:25778: Test failed: Got 0, expected 561 (index 17). d3d11.c:25778: Test failed: Got 0, expected 594 (index 18). d3d11.c:25778: Test failed: Got 0, expected 627 (index 19). d3d11.c:25778: Test failed: Got 0, expected 660 (index 20). d3d11.c:25778: Test failed: Got 0, expected 693 (index 21). d3d11.c:25778: Test failed: Got 0, expected 726 (index 22). d3d11.c:25778: Test failed: Got 0, expected 759 (index 23). d3d11.c:25778: Test failed: Got 0, expected 792 (index 24). d3d11.c:25778: Test failed: Got 0, expected 825 (index 25). d3d11.c:25778: Test failed: Got 0, expected 858 (index 26). d3d11.c:25778: Test failed: Got 0, expected 891 (index 27). d3d11.c:25778: Test failed: Got 0, expected 924 (index 28). d3d11.c:25778: Test failed: Got 0, expected 957 (index 29). d3d11.c:25778: Test failed: Got 0, expected 990 (index 30). d3d11.c:25778: Test failed: Got 0, expected 1023 (index 31). d3d11.c:25778: Test failed: Got 0, expected 1056 (index 32). d3d11.c:25778: Test failed: Got 0, expected 1089 (index 33). d3d11.c:25778: Test failed: Got 0, expected 1122 (index 34). d3d11.c:25778: Test failed: Got 0, expected 1155 (index 35). d3d11.c:25778: Test failed: Got 0, expected 1188 (index 36). d3d11.c:25778: Test failed: Got 0, expected 1221 (index 37). d3d11.c:25778: Test failed: Got 0, expected 1254 (index 38). d3d11.c:25778: Test failed: Got 0, expected 1287 (index 39). d3d11.c:25778: Test failed: Got 0, expected 1320 (index 40). d3d11.c:25778: Test failed: Got 0, expected 1353 (index 41). d3d11.c:25778: Test failed: Got 0, expected 1386 (index 42). d3d11.c:25778: Test failed: Got 0, expected 1419 (index 43). d3d11.c:25778: Test failed: Got 0, expected 1452 (index 44). d3d11.c:25778: Test failed: Got 0, expected 1485 (index 45). d3d11.c:25778: Test failed: Got 0, expected 1518 (index 46). d3d11.c:25778: Test failed: Got 0, expected 1551 (index 47). d3d11.c:25778: Test failed: Got 0, expected 1584 (index 48). d3d11.c:25778: Test failed: Got 0, expected 1617 (index 49). d3d11.c:25778: Test failed: Got 0, expected 1650 (index 50). d3d11.c:25778: Test failed: Got 0, expected 1683 (index 51). d3d11.c:25778: Test failed: Got 0, expected 1716 (index 52). d3d11.c:25778: Test failed: Got 0, expected 1749 (index 53). d3d11.c:25778: Test failed: Got 0, expected 1782 (index 54). d3d11.c:25778: Test failed: Got 0, expected 1815 (index 55). d3d11.c:25778: Test failed: Got 0, expected 1848 (index 56). d3d11.c:25778: Test failed: Got 0, expected 1881 (index 57). d3d11.c:25778: Test failed: Got 0, expected 1914 (index 58). d3d11.c:25778: Test failed: Got 0, expected 1947 (index 59). d3d11.c:25778: Test failed: Got 0, expected 1980 (index 60). d3d11.c:25778: Test failed: Got 0, expected 2013 (index 61). d3d11.c:25778: Test failed: Got 0, expected 2046 (index 62). d3d11.c:25778: Test failed: Got 0, expected 2079 (index 63). d3d11.c:25810: Test failed: Got 0, expected 32 (index 0). d3d11.c:25810: Test failed: Got 0, expected 96 (index 1). d3d11.c:25810: Test failed: Got 0, expected 160 (index 2). d3d11.c:25810: Test failed: Got 0, expected 224 (index 3). d3d11.c:25810: Test failed: Got 0, expected 288 (index 4). d3d11.c:25810: Test failed: Got 0, expected 352 (index 5). d3d11.c:25810: Test failed: Got 0, expected 416 (index 6). d3d11.c:25810: Test failed: Got 0, expected 480 (index 7). d3d11.c:25810: Test failed: Got 0, expected 544 (index 8). d3d11.c:25810: Test failed: Got 0, expected 608 (index 9). d3d11.c:25810: Test failed: Got 0, expected 672 (index 10). d3d11.c:25810: Test failed: Got 0, expected 736 (index 11). d3d11.c:25810: Test failed: Got 0, expected 800 (index 12). d3d11.c:25810: Test failed: Got 0, expected 864 (index 13). d3d11.c:25810: Test failed: Got 0, expected 928 (index 14). d3d11.c:25810: Test failed: Got 0, expected 992 (index 15). d3d11.c:25810: Test failed: Got 0, expected 1056 (index 16). d3d11.c:25810: Test failed: Got 0, expected 1120 (index 17). d3d11.c:25810: Test failed: Got 0, expected 1184 (index 18). d3d11.c:25810: Test failed: Got 0, expected 1248 (index 19). d3d11.c:25810: Test failed: Got 0, expected 1312 (index 20). d3d11.c:25810: Test failed: Got 0, expected 1376 (index 21). d3d11.c:25810: Test failed: Got 0, expected 1440 (index 22). d3d11.c:25810: Test failed: Got 0, expected 1504 (index 23). d3d11.c:25810: Test failed: Got 0, expected 1568 (index 24). d3d11.c:25810: Test failed: Got 0, expected 1632 (index 25). d3d11.c:25810: Test failed: Got 0, expected 1696 (index 26). d3d11.c:25810: Test failed: Got 0, expected 1760 (index 27). d3d11.c:25810: Test failed: Got 0, expected 1824 (index 28). d3d11.c:25810: Test failed: Got 0, expected 1888 (index 29). d3d11.c:25810: Test failed: Got 0, expected 1952 (index 30). d3d11.c:25810: Test failed: Got 0, expected 2016 (index 31). d3d11.c:25852: Test failed: Got 0.00000000e+000, expected 1 (index 32). d3d11.c:25854: Test failed: Got 0, expected 1 (index 32). d3d11.c:25852: Test failed: Got 0.00000000e+000, expected 2 (index 33). d3d11.c:25854: Test failed: Got 0, expected 2 (index 33). d3d11.c:25852: Test failed: Got 0.00000000e+000, expected 3 (index 34). d3d11.c:25854: Test failed: Got 0, expected 3 (index 34). d3d11.c:25852: Test failed: Got 0.00000000e+000, expected 4 (index 35). d3d11.c:25854: Test failed: Got 0, expected 4 (index 35). d3d11.c:25852: Test failed: Got 0.00000000e+000, expected 5 (index 36). d3d11.c:25854: Test failed: Got 0, expected 5 (index 36). d3d11.c:25852: Test failed: Got 0.00000000e+000, expected 6 (index 37). d3d11.c:25854: Test failed: Got 0, expected 6 (index 37). d3d11.c:25852: Test failed: Got 0.00000000e+000, expected 7 (index 38). d3d11.c:25854: Test failed: Got 0, expected 7 (index 38). d3d11.c:25852: Test failed: Got 0.00000000e+000, expected 8 (index 39). d3d11.c:25854: Test failed: Got 0, expected 8 (index 39). d3d11.c:25852: Test failed: Got 0.00000000e+000, expected 9 (index 40). d3d11.c:25854: Test failed: Got 0, expected 9 (index 40). d3d11.c:25852: Test failed: Got 0.00000000e+000, expected 10 (index 41). d3d11.c:25854: Test failed: Got 0, expected 10 (index 41). d3d11.c:25852: Test failed: Got 0.00000000e+000, expected 11 (index 42). d3d11.c:25854: Test failed: Got 0, expected 11 (index 42). d3d11.c:25852: Test failed: Got 0.00000000e+000, expected 12 (index 43). d3d11.c:25854: Test failed: Got 0, expected 12 (index 43). d3d11.c:25852: Test failed: Got 0.00000000e+000, expected 13 (index 44). d3d11.c:25854: Test failed: Got 0, expected 13 (index 44). d3d11.c:25852: Test failed: Got 0.00000000e+000, expected 14 (index 45). d3d11.c:25854: Test failed: Got 0, expected 14 (index 45). d3d11.c:25852: Test failed: Got 0.00000000e+000, expected 15 (index 46). d3d11.c:25854: Test failed: Got 0, expected 15 (index 46). d3d11.c:25852: Test failed: Got 0.00000000e+000, expected 16 (index 47). d3d11.c:25854: Test failed: Got 0, expected 16 (index 47). d3d11.c:25852: Test failed: Got 0.00000000e+000, expected 17 (index 48). d3d11.c:25854: Test failed: Got 0, expected 17 (index 48). d3d11.c:25852: Test failed: Got 0.00000000e+000, expected 18 (index 49). d3d11.c:25854: Test failed: Got 0, expected 18 (index 49). d3d11.c:25852: Test failed: Got 0.00000000e+000, expected 19 (index 50). d3d11.c:25854: Test failed: Got 0, expected 19 (index 50). d3d11.c:25852: Test failed: Got 0.00000000e+000, expected 20 (index 51). d3d11.c:25854: Test failed: Got 0, expected 20 (index 51). d3d11.c:25852: Test failed: Got 0.00000000e+000, expected 21 (index 52). d3d11.c:25854: Test failed: Got 0, expected 21 (index 52). d3d11.c:25852: Test failed: Got 0.00000000e+000, expected 22 (index 53). d3d11.c:25854: Test failed: Got 0, expected 22 (index 53). d3d11.c:25852: Test failed: Got 0.00000000e+000, expected 23 (index 54). d3d11.c:25854: Test failed: Got 0, expected 23 (index 54). d3d11.c:25852: Test failed: Got 0.00000000e+000, expected 24 (index 55). d3d11.c:25854: Test failed: Got 0, expected 24 (index 55). d3d11.c:25852: Test failed: Got 0.00000000e+000, expected 25 (index 56). d3d11.c:25854: Test failed: Got 0, expected 25 (index 56). d3d11.c:25852: Test failed: Got 0.00000000e+000, expected 26 (index 57). d3d11.c:25854: Test failed: Got 0, expected 26 (index 57). d3d11.c:25852: Test failed: Got 0.00000000e+000, expected 27 (index 58). d3d11.c:25854: Test failed: Got 0, expected 27 (index 58). d3d11.c:25852: Test failed: Got 0.00000000e+000, expected 28 (index 59). d3d11.c:25854: Test failed: Got 0, expected 28 (index 59). d3d11.c:25852: Test failed: Got 0.00000000e+000, expected 29 (index 60). d3d11.c:25854: Test failed: Got 0, expected 29 (index 60). d3d11.c:25852: Test failed: Got 0.00000000e+000, expected 30 (index 61). d3d11.c:25854: Test failed: Got 0, expected 30 (index 61). d3d11.c:25852: Test failed: Got 0.00000000e+000, expected 31 (index 62). d3d11.c:25854: Test failed: Got 0, expected 31 (index 62). d3d11.c:25852: Test failed: Got 0.00000000e+000, expected 32 (index 63). d3d11.c:25854: Test failed: Got 0, expected 32 (index 63). d3d11.c:25852: Test failed: Got 0.00000000e+000, expected 2 (index 64). d3d11.c:25854: Test failed: Got 0, expected 2 (index 64). d3d11.c:25852: Test failed: Got 0.00000000e+000, expected 4 (index 65). d3d11.c:25854: Test failed: Got 0, expected 4 (index 65). d3d11.c:25852: Test failed: Got 0.00000000e+000, expected 6 (index 66). d3d11.c:25854: Test failed: Got 0, expected 6 (index 66). d3d11.c:25852: Test failed: Got 0.00000000e+000, expected 8 (index 67). d3d11.c:25854: Test failed: Got 0, expected 8 (index 67). d3d11.c:25852: Test failed: Got 0.00000000e+000, expected 10 (index 68). d3d11.c:25854: Test failed: Got 0, expected 10 (index 68). d3d11.c:25852: Test failed: Got 0.00000000e+000, expected 12 (index 69). d3d11.c:25854: Test failed: Got 0, expected 12 (index 69). d3d11.c:25852: Test failed: Got 0.00000000e+000, expected 14 (index 70). d3d11.c:25854: Test failed: Got 0, expected 14 (index 70). d3d11.c:25852: Test failed: Got 0.00000000e+000, expected 16 (index 71). d3d11.c:25854: Test failed: Got 0, expected 16 (index 71). d3d11.c:25852: Test failed: Got 0.00000000e+000, expected 18 (index 72). d3d11.c:25854: Test failed: Got 0, expected 18 (index 72). d3d11.c:25852: Test failed: Got 0.00000000e+000, expected 20 (index 73). d3d11.c:25854: Test failed: Got 0, expected 20 (index 73). d3d11.c:25852: Test failed: Got 0.00000000e+000, expected 22 (index 74). d3d11.c:25854: Test failed: Got 0, expected 22 (index 74). d3d11.c:25852: Test failed: Got 0.00000000e+000, expected 24 (index 75). d3d11.c:25854: Test failed: Got 0, expected 24 (index 75). d3d11.c:25852: Test failed: Got 0.00000000e+000, expected 26 (index 76). d3d11.c:25854: Test failed: Got 0, expected 26 (index 76). d3d11.c:25852: Test failed: Got 0.00000000e+000, expected 28 (index 77). d3d11.c:25854: Test failed: Got 0, expected 28 (index 77). d3d11.c:25852: Test failed: Got 0.00000000e+000, expected 30 (index 78). d3d11.c:25854: Test failed: Got 0, expected 30 (index 78). d3d11.c:25852: Test failed: Got 0.00000000e+000, expected 32 (index 79). d3d11.c:25854: Test failed: Got 0, expected 32 (index 79). d3d11.c:25852: Test failed: Got 0.00000000e+000, expected 34 (index 80). d3d11.c:25854: Test failed: Got 0, expected 34 (index 80). d3d11.c:25852: Test failed: Got 0.00000000e+000, expected 36 (index 81). d3d11.c:25854: Test failed: Got 0, expected 36 (index 81). d3d11.c:25852: Test failed: Got 0.00000000e+000, expected 38 (index 82). d3d11.c:25854: Test failed: Got 0, expected 38 (index 82). d3d11.c:25852: Test failed: Got 0.00000000e+000, expected 40 (index 83). d3d11.c:25854: Test failed: Got 0, expected 40 (index 83). d3d11.c:25852: Test failed: Got 0.00000000e+000, expected 42 (index 84). d3d11.c:25854: Test failed: Got 0, expected 42 (index 84). d3d11.c:25852: Test failed: Got 0.00000000e+000, expected 44 (index 85). d3d11.c:25854: Test failed: Got 0, expected 44 (index 85). d3d11.c:25852: Test failed: Got 0.00000000e+000, expected 46 (index 86). d3d11.c:25854: Test failed: Got 0, expected 46 (index 86). d3d11.c:25852: Test failed: Got 0.00000000e+000, expected 48 (index 87). d3d11.c:25854: Test failed: Got 0, expected 48 (index 87). d3d11.c:25852: Test failed: Got 0.00000000e+000, expected 50 (index 88). d3d11.c:25854: Test failed: Got 0, expected 50 (index 88). d3d11.c:25852: Test failed: Got 0.00000000e+000, expected 52 (index 89). d3d11.c:25854: Test failed: Got 0, expected 52 (index 89). d3d11.c:25852: Test failed: Got 0.00000000e+000, expected 54 (index 90). d3d11.c:25854: Test failed: Got 0, expected 54 (index 90). d3d11.c:25852: Test failed: Got 0.00000000e+000, expected 56 (index 91). d3d11.c:25854: Test failed: Got 0, expected 56 (index 91). d3d11.c:25852: Test failed: Got 0.00000000e+000, expected 58 (index 92). d3d11.c:25854: Test failed: Got 0, expected 58 (index 92). d3d11.c:25852: Test failed: Got 0.00000000e+000, expected 60 (index 93). d3d11.c:25854: Test failed: Got 0, expected 60 (index 93). d3d11.c:25852: Test failed: Got 0.00000000e+000, expected 62 (index 94). d3d11.c:25854: Test failed: Got 0, expected 62 (index 94). d3d11.c:25852: Test failed: Got 0.00000000e+000, expected 64 (index 95). d3d11.c:25854: Test failed: Got 0, expected 64 (index 95).
This helps Overwatch 2 with DXVK, reducing initial game stutter duration from 30s to 15s (measured with `WINEDEBUG=+fps` until the framerate stabilizes in the main menu around ~60fps), as well as in game by increasing average fps from ~130fps to ~230fps while the shaders are compiling.