On both Mac and Linux under Wow64, after ~120 threads are created, the 64-bit stacks start to be allocated above 4GB.
This triggered crashes in alloc_fs_sel() and when the result of get_cpu_area() was used. (On Mac the ntdll threadpool tests reproduced this, but on both platforms a test app that created 256 threads also worked).
From: Brendan Shanks bshanks@codeweavers.com
Signed-off-by: Brendan Shanks bshanks@codeweavers.com --- dlls/ntdll/unix/signal_x86_64.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/dlls/ntdll/unix/signal_x86_64.c b/dlls/ntdll/unix/signal_x86_64.c index 34334f72ff0..b3657644b9c 100644 --- a/dlls/ntdll/unix/signal_x86_64.c +++ b/dlls/ntdll/unix/signal_x86_64.c @@ -93,8 +93,12 @@ __ASM_GLOBAL_FUNC( alloc_fs_sel, /* switch to 32-bit stack */ "pushq %rbx\n\t" "pushq %rdi\n\t" + "pushq %r12\n\t" + "pushq %r13\n\t" "movq %rsp,%rdi\n\t" - "movq %gs:0x8,%rsp\n\t" /* NtCurrentTeb()->Tib.StackBase */ + "movq %gs:0x30,%r12\n\t" /* NtCurrentTeb()->Tib.Self */ + "movsxd 0x180c(%r12),%r13\n\t" /* NtCurrentTeb()->WowTebOffset */ + "movq 0x04(%r12,%r13),%rsp\n\t" /* Wow Tib.StackBase */ "subl $0x10,%esp\n\t" /* setup modify_ldt struct on 32-bit stack */ "movl %ecx,(%rsp)\n\t" /* entry_number */ @@ -108,6 +112,8 @@ __ASM_GLOBAL_FUNC( alloc_fs_sel, /* restore stack */ "movl (%rsp),%eax\n\t" /* entry_number */ "movq %rdi,%rsp\n\t" + "popq %r13\n\t" + "popq %r12\n\t" "popq %rdi\n\t" "popq %rbx\n\t" "ret" );
From: Brendan Shanks bshanks@codeweavers.com
Signed-off-by: Brendan Shanks bshanks@codeweavers.com --- dlls/ntdll/unix/thread.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/dlls/ntdll/unix/thread.c b/dlls/ntdll/unix/thread.c index 503230e4634..6d937675bcb 100644 --- a/dlls/ntdll/unix/thread.c +++ b/dlls/ntdll/unix/thread.c @@ -1111,7 +1111,7 @@ void *get_cpu_area( USHORT machine ) case IMAGE_FILE_MACHINE_ARM64: align = TYPE_ALIGNMENT(ARM64_NT_CONTEXT); break; default: return NULL; } - return (void *)(((ULONG_PTR)(cpu + 1) + align - 1) & ~(align - 1)); + return (void *)(((ULONG_PTR)(cpu + 1) + align - 1) & ~((ULONG_PTR)align - 1)); }