When available. Otherwise the output_cfi calls in winebuild, in the syscall thunks for instance, are just no-op.
Signed-off-by: Rémi Bernon rbernon@codeweavers.com ---
This series makes it possible for Gdb to unwind the stack through the syscall dispatcher. I'm not completely sure it's right, and more precisely I don't know if PATCH 4 is okay, and if the kernel stack must be above the user stack.
Sadly, other tools such as perf or valgrind still struggle with what they consider a huge stack frame between the kernel stack and the user stack when NT syscalls are executed. I'm not completely sure how this could be mitigated.
configure.ac | 1 + 1 file changed, 1 insertion(+)
diff --git a/configure.ac b/configure.ac index c0a126293c4..fb5447addd4 100644 --- a/configure.ac +++ b/configure.ac @@ -2105,6 +2105,7 @@ then DLLFLAGS="$DLLFLAGS -fasynchronous-unwind-tables" LDDLLFLAGS="$LDDLLFLAGS -fasynchronous-unwind-tables" UNIXDLLFLAGS="$UNIXDLLFLAGS -fasynchronous-unwind-tables" + CROSSLDFLAGS="$CROSSLDFLAGS -fasynchronous-unwind-tables" else if test "x$enable_win64" = "xyes" then
So that we don't unnecessarily overwrite the return address.
Signed-off-by: Rémi Bernon rbernon@codeweavers.com --- dlls/ntdll/unix/signal_i386.c | 2 +- dlls/ntdll/unix/signal_x86_64.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/dlls/ntdll/unix/signal_i386.c b/dlls/ntdll/unix/signal_i386.c index e123f2c2a79..6d8cd0f3bd0 100644 --- a/dlls/ntdll/unix/signal_i386.c +++ b/dlls/ntdll/unix/signal_i386.c @@ -2469,9 +2469,9 @@ __ASM_GLOBAL_FUNC( signal_exit_thread, __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, "movl %fs:0x1f8,%ecx\n\t" /* x86_thread_data()->syscall_frame */ "movw $0,0x02(%ecx)\n\t" /* frame->restore_flags */ - "popl 0x08(%ecx)\n\t" /* frame->eip */ "pushfl\n\t" "popl 0x04(%ecx)\n" /* frame->eflags */ + "popl 0x08(%ecx)\n\t" /* frame->eip */ __ASM_NAME("__wine_syscall_dispatcher_prolog_end") ":\n\t" "movl %esp,0x0c(%ecx)\n\t" /* frame->esp */ "movw %cs,0x10(%ecx)\n\t" diff --git a/dlls/ntdll/unix/signal_x86_64.c b/dlls/ntdll/unix/signal_x86_64.c index 9b18180301a..e65e256a674 100644 --- a/dlls/ntdll/unix/signal_x86_64.c +++ b/dlls/ntdll/unix/signal_x86_64.c @@ -3112,9 +3112,9 @@ __ASM_GLOBAL_FUNC( signal_exit_thread, __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, "movq %gs:0x30,%rcx\n\t" "movq 0x328(%rcx),%rcx\n\t" /* amd64_thread_data()->syscall_frame */ - "popq 0x70(%rcx)\n\t" /* frame->rip */ "pushfq\n\t" "popq 0x80(%rcx)\n\t" + "popq 0x70(%rcx)\n\t" /* frame->rip */ "movl $0,0x94(%rcx)\n\t" /* frame->restore_flags */ __ASM_NAME("__wine_syscall_dispatcher_prolog_end") ":\n\t" "movq %rax,0x00(%rcx)\n\t"
Signed-off-by: Rémi Bernon rbernon@codeweavers.com --- dlls/ntdll/unix/signal_i386.c | 13 ++++++++++ dlls/ntdll/unix/signal_x86_64.c | 45 +++++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+)
diff --git a/dlls/ntdll/unix/signal_i386.c b/dlls/ntdll/unix/signal_i386.c index 6d8cd0f3bd0..10908d6752c 100644 --- a/dlls/ntdll/unix/signal_i386.c +++ b/dlls/ntdll/unix/signal_i386.c @@ -2470,8 +2470,11 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, "movl %fs:0x1f8,%ecx\n\t" /* x86_thread_data()->syscall_frame */ "movw $0,0x02(%ecx)\n\t" /* frame->restore_flags */ "pushfl\n\t" + __ASM_CFI(".cfi_adjust_cfa_offset 4\n\t") "popl 0x04(%ecx)\n" /* frame->eflags */ + __ASM_CFI(".cfi_adjust_cfa_offset -4\n\t") "popl 0x08(%ecx)\n\t" /* frame->eip */ + __ASM_CFI(".cfi_adjust_cfa_offset -4\n\t") __ASM_NAME("__wine_syscall_dispatcher_prolog_end") ":\n\t" "movl %esp,0x0c(%ecx)\n\t" /* frame->esp */ "movw %cs,0x10(%ecx)\n\t" @@ -2486,6 +2489,16 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, "movl %esi,0x30(%ecx)\n\t" "movl %ebp,0x34(%ecx)\n\t" "leal 0x34(%ecx),%ebp\n\t" + __ASM_CFI(".cfi_def_cfa %ebp,0\n\t") + __ASM_CFI(".cfi_rel_offset %eip,-0x2c\n\t") + __ASM_CFI(".cfi_rel_offset %esp,-0x28\n\t") + __ASM_CFI(".cfi_rel_offset %eax,-0x18\n\t") + __ASM_CFI(".cfi_rel_offset %ebx,-0x14\n\t") + __ASM_CFI(".cfi_rel_offset %ecx,-0x10\n\t") + __ASM_CFI(".cfi_rel_offset %edx,-0x0c\n\t") + __ASM_CFI(".cfi_rel_offset %edi,-0x08\n\t") + __ASM_CFI(".cfi_rel_offset %esi,-0x04\n\t") + __ASM_CFI(".cfi_rel_offset %ebp,-0x00\n\t") "leal 4(%esp),%esi\n\t" /* first argument */ "movl %eax,%ebx\n\t" "shrl $8,%ebx\n\t" diff --git a/dlls/ntdll/unix/signal_x86_64.c b/dlls/ntdll/unix/signal_x86_64.c index e65e256a674..a82ecbd22bf 100644 --- a/dlls/ntdll/unix/signal_x86_64.c +++ b/dlls/ntdll/unix/signal_x86_64.c @@ -3113,8 +3113,11 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, "movq %gs:0x30,%rcx\n\t" "movq 0x328(%rcx),%rcx\n\t" /* amd64_thread_data()->syscall_frame */ "pushfq\n\t" + __ASM_CFI(".cfi_adjust_cfa_offset 8\n\t") "popq 0x80(%rcx)\n\t" + __ASM_CFI(".cfi_adjust_cfa_offset -8\n\t") "popq 0x70(%rcx)\n\t" /* frame->rip */ + __ASM_CFI(".cfi_adjust_cfa_offset -8\n\t") "movl $0,0x94(%rcx)\n\t" /* frame->restore_flags */ __ASM_NAME("__wine_syscall_dispatcher_prolog_end") ":\n\t" "movq %rax,0x00(%rcx)\n\t" @@ -3134,6 +3137,20 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, "movw %ss,0x90(%rcx)\n\t" "movw %gs,0x92(%rcx)\n\t" "movq %rbp,0x98(%rcx)\n\t" + __ASM_CFI(".cfi_def_cfa %rcx,0\n\t") + __ASM_CFI(".cfi_rel_offset %rax,0x00\n\t") + __ASM_CFI(".cfi_rel_offset %rbx,0x08\n\t") + __ASM_CFI(".cfi_rel_offset %rcx,0x10\n\t") + __ASM_CFI(".cfi_rel_offset %rdx,0x18\n\t") + __ASM_CFI(".cfi_rel_offset %rsi,0x20\n\t") + __ASM_CFI(".cfi_rel_offset %rdi,0x28\n\t") + __ASM_CFI(".cfi_rel_offset %r12,0x50\n\t") + __ASM_CFI(".cfi_rel_offset %r13,0x58\n\t") + __ASM_CFI(".cfi_rel_offset %r14,0x60\n\t") + __ASM_CFI(".cfi_rel_offset %r15,0x68\n\t") + __ASM_CFI(".cfi_rel_offset %rip,0x70\n\t") + __ASM_CFI(".cfi_rel_offset %rsp,0x88\n\t") + __ASM_CFI(".cfi_rel_offset %rbp,0x98\n\t") /* Legends of Runeterra hooks the first system call return instruction, and * depends on us returning to it. Adjust the return address accordingly. */ "subq $0xb,0x70(%rcx)\n\t" @@ -3172,6 +3189,20 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, "leaq -0x98(%rbp),%rcx\n" "2:\n\t" #endif + __ASM_CFI(".cfi_def_cfa %rbp,0\n\t") + __ASM_CFI(".cfi_rel_offset %rax,-0x98\n\t") + __ASM_CFI(".cfi_rel_offset %rbx,-0x90\n\t") + __ASM_CFI(".cfi_rel_offset %rcx,-0x88\n\t") + __ASM_CFI(".cfi_rel_offset %rdx,-0x80\n\t") + __ASM_CFI(".cfi_rel_offset %rsi,-0x78\n\t") + __ASM_CFI(".cfi_rel_offset %rdi,-0x70\n\t") + __ASM_CFI(".cfi_rel_offset %r12,-0x48\n\t") + __ASM_CFI(".cfi_rel_offset %r13,-0x40\n\t") + __ASM_CFI(".cfi_rel_offset %r14,-0x38\n\t") + __ASM_CFI(".cfi_rel_offset %r15,-0x30\n\t") + __ASM_CFI(".cfi_rel_offset %rip,-0x28\n\t") + __ASM_CFI(".cfi_rel_offset %rsp,-0x10\n\t") + __ASM_CFI(".cfi_rel_offset %rbp,-0x00\n\t") "leaq 0x28(%rsp),%rsi\n\t" /* first argument */ "movq %rcx,%rsp\n\t" "movq 0x00(%rcx),%rax\n\t" @@ -3199,6 +3230,20 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, "movq (%rbx),%r10\n\t" /* table->ServiceTable */ "callq *(%r10,%rax,8)\n\t" "leaq -0x98(%rbp),%rcx\n" + __ASM_CFI(".cfi_def_cfa %rcx,0\n\t") + __ASM_CFI(".cfi_rel_offset %rax,0x00\n\t") + __ASM_CFI(".cfi_rel_offset %rbx,0x08\n\t") + __ASM_CFI(".cfi_rel_offset %rcx,0x10\n\t") + __ASM_CFI(".cfi_rel_offset %rdx,0x18\n\t") + __ASM_CFI(".cfi_rel_offset %rsi,0x20\n\t") + __ASM_CFI(".cfi_rel_offset %rdi,0x28\n\t") + __ASM_CFI(".cfi_rel_offset %r12,0x50\n\t") + __ASM_CFI(".cfi_rel_offset %r13,0x58\n\t") + __ASM_CFI(".cfi_rel_offset %r14,0x60\n\t") + __ASM_CFI(".cfi_rel_offset %r15,0x68\n\t") + __ASM_CFI(".cfi_rel_offset %rip,0x70\n\t") + __ASM_CFI(".cfi_rel_offset %rsp,0x88\n\t") + __ASM_CFI(".cfi_rel_offset %rbp,0x98\n\t") "2:\tmovl 0x94(%rcx),%edx\n\t" /* frame->restore_flags */ #ifdef __linux__ "testl $12,%r14d\n\t" /* SYSCALL_HAVE_PTHREAD_TEB | SYSCALL_HAVE_WRFSGSBASE */
Gdb otherwise gets confused, as it considers the syscall frame to be inner to the caller frame, stopping the stack unwinding early.
Signed-off-by: Rémi Bernon rbernon@codeweavers.com --- dlls/ntdll/unix/thread.c | 4 ++-- dlls/ntdll/unix/virtual.c | 18 ++++++++++-------- 2 files changed, 12 insertions(+), 10 deletions(-)
diff --git a/dlls/ntdll/unix/thread.c b/dlls/ntdll/unix/thread.c index e88a935ca31..9ccf03ceeb7 100644 --- a/dlls/ntdll/unix/thread.c +++ b/dlls/ntdll/unix/thread.c @@ -1180,7 +1180,7 @@ NTSTATUS init_thread_stack( TEB *teb, ULONG_PTR zero_bits, SIZE_T reserve_size, teb->Tib.StackBase = teb->TlsSlots[WOW64_TLS_CPURESERVED] = cpu; teb->Tib.StackLimit = stack.StackLimit; teb->DeallocationStack = stack.DeallocationStack; - thread_data->kernel_stack = stack.StackBase; + thread_data->kernel_stack = (char *)stack.DeallocationStack - kernel_stack_size; return STATUS_SUCCESS; #else /* 64-bit stack */ @@ -1201,7 +1201,7 @@ NTSTATUS init_thread_stack( TEB *teb, ULONG_PTR zero_bits, SIZE_T reserve_size, teb->Tib.StackBase = stack.StackBase; teb->Tib.StackLimit = stack.StackLimit; teb->DeallocationStack = stack.DeallocationStack; - thread_data->kernel_stack = stack.StackBase; + thread_data->kernel_stack = (char *)stack.DeallocationStack - kernel_stack_size; return STATUS_SUCCESS; }
diff --git a/dlls/ntdll/unix/virtual.c b/dlls/ntdll/unix/virtual.c index 984af2d4a21..eb878171093 100644 --- a/dlls/ntdll/unix/virtual.c +++ b/dlls/ntdll/unix/virtual.c @@ -3135,13 +3135,6 @@ NTSTATUS virtual_alloc_thread_stack( INITIAL_TEB *stack, ULONG_PTR zero_bits, SI VALGRIND_STACK_REGISTER( view->base, (char *)view->base + view->size ); #endif
- /* setup no access guard page */ - set_page_vprot( view->base, page_size, VPROT_COMMITTED ); - set_page_vprot( (char *)view->base + page_size, page_size, - VPROT_READ | VPROT_WRITE | VPROT_COMMITTED | VPROT_GUARD ); - mprotect_range( view->base, 2 * page_size, 0, 0 ); - VIRTUAL_DEBUG_DUMP_VIEW( view ); - if (extra_size) { struct file_view *extra_view; @@ -3149,16 +3142,25 @@ NTSTATUS virtual_alloc_thread_stack( INITIAL_TEB *stack, ULONG_PTR zero_bits, SI /* shrink the first view and create a second one for the extra size */ /* this allows the app to free the stack without freeing the thread start portion */ view->size -= extra_size; - status = create_view( &extra_view, (char *)view->base + view->size, extra_size, + view->base = (char *)view->base + extra_size; + status = create_view( &extra_view, (char *)view->base - extra_size, extra_size, VPROT_READ | VPROT_WRITE | VPROT_COMMITTED ); if (status != STATUS_SUCCESS) { view->size += extra_size; + view->base = (char *)view->base - extra_size; delete_view( view ); goto done; } }
+ /* setup no access guard page */ + set_page_vprot( view->base, page_size, VPROT_COMMITTED ); + set_page_vprot( (char *)view->base + page_size, page_size, + VPROT_READ | VPROT_WRITE | VPROT_COMMITTED | VPROT_GUARD ); + mprotect_range( view->base, 2 * page_size, 0, 0 ); + VIRTUAL_DEBUG_DUMP_VIEW( view ); + /* note: limit is lower than base since the stack grows down */ stack->OldStackBase = 0; stack->OldStackLimit = 0;