From: Hoshino Lina <lina@lina.yt> When SIGUSR1 is called while running inside __wine_syscall_dispatcher, the CPU context is not in a consistent state. To ensure that the captured context is always consistent: - If the SIGUSR1 arrives while saving the context, set a flag and defer handling it until after the context is fully saved. - If the SIGUSR1 arrives while restoring the context, rewind and retry restoring the context from the beginning. The latter relies on context restore being idempotent. This is ensured by the previous commit (the only non-idempotent path was when instrumentation callbacks are enabled). This fixes a regression introduced by 0a5f7a710365 which started leaking bogus SP values, but also generally fixes all other register value leaks due to SIGUSR1 racing with the syscall dispatcher. Fixes: 0a5f7a710365 ("ntdll: Switch to the user stack before restoring the %fs register.") Wine-Bug: https://bugs.winehq.org/show_bug.cgi?id=59333 --- dlls/ntdll/unix/signal_x86_64.c | 242 +++++++++++++++++++++++--------- 1 file changed, 177 insertions(+), 65 deletions(-) diff --git a/dlls/ntdll/unix/signal_x86_64.c b/dlls/ntdll/unix/signal_x86_64.c index 3151e2af89d..7126f272b8f 100644 --- a/dlls/ntdll/unix/signal_x86_64.c +++ b/dlls/ntdll/unix/signal_x86_64.c @@ -490,11 +490,12 @@ struct syscall_frame void *syscall_cfa; /* 00a8 */ DWORD syscall_id; /* 00b0 */ DWORD restore_flags; /* 00b4 */ - DWORD align[2]; /* 00b8 */ + ULONG64 retval; /* 00b8 */ XSAVE_FORMAT xsave; /* 00c0 */ DECLSPEC_ALIGN(64) XSAVE_AREA_HEADER xstate; /* 02c0 */ }; +C_ASSERT( offsetof( struct syscall_frame, retval ) == 0xb8 ); C_ASSERT( offsetof( struct syscall_frame, xsave ) == 0xc0 ); C_ASSERT( offsetof( struct syscall_frame, xstate ) == 0x2c0 ); C_ASSERT( sizeof( struct syscall_frame ) == 0x300); @@ -513,6 +514,8 @@ struct amd64_thread_data DWORD fs; /* 0338 WOW TEB selector */ DWORD mxcsr; /* 033c Unix-side mxcsr register */ char syscall_dispatch; /* 0340 */ + volatile char sigusr1_blocked; /* 0341 */ + volatile char sigusr1_pending; /* 0342 */ }; C_ASSERT( sizeof(struct amd64_thread_data) <= sizeof(((struct ntdll_thread_data *)0)->cpu_data) ); @@ -522,6 +525,8 @@ C_ASSERT( offsetof( TEB, GdiTebBatch ) + offsetof( struct amd64_thread_data, ins C_ASSERT( offsetof( TEB, GdiTebBatch ) + offsetof( struct amd64_thread_data, fs ) == 0x338 ); C_ASSERT( offsetof( TEB, GdiTebBatch ) + offsetof( struct amd64_thread_data, mxcsr ) == 0x33c ); C_ASSERT( offsetof( TEB, GdiTebBatch ) + offsetof( struct amd64_thread_data, syscall_dispatch ) == 0x340 ); +C_ASSERT( offsetof( TEB, GdiTebBatch ) + offsetof( struct amd64_thread_data, sigusr1_blocked ) == 0x341 ); +C_ASSERT( offsetof( TEB, GdiTebBatch ) + offsetof( struct amd64_thread_data, sigusr1_pending ) == 0x342 ); static inline struct amd64_thread_data *amd64_thread_data(void) { @@ -2637,6 +2642,66 @@ static void quit_handler( int signal, siginfo_t *siginfo, void *sigcontext ) abort_thread( 0 ); } +/********************************************************************** + * usr1_inside_syscall + * + * Shared code to handle SIGUSR1 within a syscall. + */ +static void usr1_inside_syscall(struct xcontext *context) +{ + struct syscall_frame *frame = get_syscall_frame(); + ULONG64 saved_compaction = 0; + I386_CONTEXT *wow_context; + + context->c.ContextFlags = CONTEXT_FULL | CONTEXT_SEGMENTS | CONTEXT_EXCEPTION_REQUEST; + + NtGetContextThread( GetCurrentThread(), &context->c ); + if (xstate_extended_features) + { + if (user_shared_data->XState.CompactionEnabled) + frame->xstate.CompactionMask |= xstate_extended_features; + context_init_xstate( &context->c, &frame->xstate ); + saved_compaction = frame->xstate.CompactionMask; + } + wait_suspend( &context->c ); + if (xstate_extended_features) frame->xstate.CompactionMask = saved_compaction; + if (context->c.ContextFlags & 0x40) + { + /* xstate is updated directly in frame's xstate */ + context->c.ContextFlags &= ~0x40; + frame->restore_flags |= 0x40; + } + if ((wow_context = get_cpu_area( IMAGE_FILE_MACHINE_I386 )) + && (wow_context->ContextFlags & CONTEXT_I386_CONTROL) == CONTEXT_I386_CONTROL) + { + WOW64_CPURESERVED *cpu = NtCurrentTeb()->TlsSlots[WOW64_TLS_CPURESERVED]; + + cpu->Flags |= WOW64_CPURESERVED_FLAG_RESET_STATE; + } + NtSetContextThread( GetCurrentThread(), &context->c ); +} + +/********************************************************************** + * deferred_sigusr1 + * + * Deferred handler for SIGUSR1 + * + * Called after syscall entry, on the kernel stack. + */ +void deferred_sigusr1(void) +{ + struct xcontext context; + + TRACE_(seh)( "handling deferred SIGUSR1\n"); + do { + amd64_thread_data()->sigusr1_pending = 0; + /* USR1 could arrive again immediately since we are not in signal context, + * so block it. */ + amd64_thread_data()->sigusr1_blocked++; + usr1_inside_syscall(&context); + amd64_thread_data()->sigusr1_blocked--; + } while (amd64_thread_data()->sigusr1_pending); +} /********************************************************************** * usr1_handler @@ -2646,54 +2711,37 @@ static void quit_handler( int signal, siginfo_t *siginfo, void *sigcontext ) static void usr1_handler( int signal, siginfo_t *siginfo, void *sigcontext ) { ucontext_t *ucontext = init_handler( sigcontext ); + struct syscall_frame *frame = get_syscall_frame(); + struct xcontext *context; - if (is_inside_syscall( RSP_sig(ucontext) )) + extern const void *__wine_syscall_dispatcher_save_end_ptr; + extern const void *__wine_syscall_dispatcher_return_ptr; + extern const void *__wine_syscall_dispatcher_return_end_ptr; + if (RIP_sig(ucontext) >= (ULONG_PTR)__wine_syscall_dispatcher_instrumentation && + RIP_sig(ucontext) < (ULONG_PTR)__wine_syscall_dispatcher_save_end_ptr) { - struct syscall_frame *frame = get_syscall_frame(); - ULONG64 saved_compaction = 0; - I386_CONTEXT *wow_context; - struct xcontext *context; - - context = (struct xcontext *)(((ULONG_PTR)RSP_sig(ucontext) - 128 /* red zone */ - sizeof(*context)) & ~15); - if ((char *)context < (char *)ntdll_get_thread_data()->kernel_stack) - { - ERR_(seh)( "kernel stack overflow.\n" ); - return; - } - context->c.ContextFlags = CONTEXT_FULL | CONTEXT_SEGMENTS | CONTEXT_EXCEPTION_REQUEST; - if (frame->restore_flags & RESTORE_FLAGS_INCOMPLETE_FRAME_CONTEXT) - { - frame->restore_flags &= ~RESTORE_FLAGS_INCOMPLETE_FRAME_CONTEXT; - frame->eflags = 0x200; - fixup_frame_fpu_state( frame, ucontext ); - } - NtGetContextThread( GetCurrentThread(), &context->c ); - if (xstate_extended_features) - { - if (user_shared_data->XState.CompactionEnabled) - frame->xstate.CompactionMask |= xstate_extended_features; - context_init_xstate( &context->c, &frame->xstate ); - saved_compaction = frame->xstate.CompactionMask; - } - wait_suspend( &context->c ); - if (xstate_extended_features) frame->xstate.CompactionMask = saved_compaction; - if (context->c.ContextFlags & 0x40) - { - /* xstate is updated directly in frame's xstate */ - context->c.ContextFlags &= ~0x40; - frame->restore_flags |= 0x40; - } - if ((wow_context = get_cpu_area( IMAGE_FILE_MACHINE_I386 )) - && (wow_context->ContextFlags & CONTEXT_I386_CONTROL) == CONTEXT_I386_CONTROL) - { - WOW64_CPURESERVED *cpu = NtCurrentTeb()->TlsSlots[WOW64_TLS_CPURESERVED]; - - cpu->Flags |= WOW64_CPURESERVED_FLAG_RESET_STATE; - } - NtSetContextThread( GetCurrentThread(), &context->c ); + /* Interrupted during syscall entry. Defer the USR1. */ + TRACE_(seh)( "deferring SIGUSR1 during syscall entry (rip=0x%lx)\n", (long)RIP_sig(ucontext)); + amd64_thread_data()->sigusr1_pending = 1; + return; } - else + else if (RIP_sig(ucontext) >= (ULONG_PTR)__wine_syscall_dispatcher_return_ptr && + RIP_sig(ucontext) < (ULONG_PTR)__wine_syscall_dispatcher_return_end_ptr) { + /* Interrupted during syscall exit. Rewind the exit and handle as inside syscall. */ + TRACE_(seh)( "rewinding syscall exit due to SIGUSR1 (rip=0x%lx, rax=0x%lx)\n", + (long)RIP_sig(ucontext), (long)RAX_sig(ucontext)); + RAX_sig(ucontext) = (ULONG_PTR)frame->retval; /* Restore return value */ + R13_sig(ucontext) = (ULONG_PTR)NtCurrentTeb(); + RBP_sig(ucontext) = (ULONG_PTR)&frame->rbp; + RIP_sig(ucontext) = (ULONG_PTR)__wine_syscall_dispatcher_return_ptr; + RCX_sig(ucontext) = (ULONG_PTR)frame; + RSP_sig(ucontext) = (ULONG_PTR)frame; + /* Fallthrough */ + } + else if (!is_inside_syscall( RSP_sig(ucontext) )) + { + /* Interrupted outside a syscall. */ struct xcontext context; save_context( &context, ucontext ); @@ -2701,9 +2749,33 @@ static void usr1_handler( int signal, siginfo_t *siginfo, void *sigcontext ) if (is_wow64() && context.c.SegCs == cs64_sel) context.c.ContextFlags |= CONTEXT_EXCEPTION_ACTIVE; wait_suspend( &context.c ); restore_context( &context, ucontext ); + return; } -} + else if (amd64_thread_data()->sigusr1_blocked) + { + /* Interrupted during a syscall that is accessing user context. */ + TRACE_(seh)( "deferring SIGUSR1 during critical syscall (rip=0x%lx, blocked=%d)\n", + (long)RIP_sig(ucontext), amd64_thread_data()->sigusr1_blocked); + amd64_thread_data()->sigusr1_pending = 1; + return; + } + /* Otherwise, interrupted within a syscall with a consistent user context. */ + context = (struct xcontext *)(((ULONG_PTR)RSP_sig(ucontext) - 128 /* red zone */ - sizeof(*context)) & ~15); + + if ((char *)context < (char *)ntdll_get_thread_data()->kernel_stack) + { + ERR_(seh)( "kernel stack overflow.\n" ); + return; + } + if (frame->restore_flags & RESTORE_FLAGS_INCOMPLETE_FRAME_CONTEXT) + { + frame->restore_flags &= ~RESTORE_FLAGS_INCOMPLETE_FRAME_CONTEXT; + frame->eflags = 0x200; + fixup_frame_fpu_state( frame, ucontext ); + } + usr1_inside_syscall(context); +} #if defined(__APPLE__) || defined(PR_SET_SYSCALL_USER_DISPATCH) /********************************************************************** @@ -2979,6 +3051,8 @@ void init_syscall_frame( LPTHREAD_START_ROUTINE entry, void *arg, BOOL suspend, assert( thread_data->frame_size == frame_size ); thread_data->instrumentation_callback = &instrumentation_callback; + thread_data->sigusr1_blocked = 0; + thread_data->sigusr1_pending = 0; #if defined __linux__ arch_prctl( ARCH_SET_GS, teb ); @@ -3104,6 +3178,28 @@ __ASM_GLOBAL_FUNC( signal_start_thread, /*********************************************************************** * __wine_syscall_dispatcher */ + +/* The instrumentation entry point comes first. This allows the range between + * __wine_syscall_dispatcher_instrumentation and __wine_syscall_dispatcher_save_end + * to be considered "entering a syscall" in the SIGUSR1 handler. + */ +__ASM_GLOBAL_FUNC( __wine_syscall_dispatcher_instrumentation, + "movq %gs:0x378,%rcx\n\t" /* thread_data->syscall_frame */ + "popq 0x70(%rcx)\n\t" /* frame->rip */ + __ASM_CFI(".cfi_adjust_cfa_offset -8\n\t") + __ASM_CFI_REG_IS_AT2(rip, rcx, 0xf0,0x00) + "pushfq\n\t" + __ASM_CFI(".cfi_adjust_cfa_offset 8\n\t") + "popq 0x80(%rcx)\n\t" + __ASM_CFI(".cfi_adjust_cfa_offset -8\n\t") + "movl $0x10000,0xb4(%rcx)\n\t" /* frame->restore_flags <- RESTORE_FLAGS_INSTRUMENTATION */ + /* Save %rip, into the r10 save slot, which will be + * used in the instrumentation return path. + */ + "pushq 0x70(%rcx)\n\t" /* frame->rip */ + "popq 0x40(%rcx)\n\t" /* frame->r10 */ + "jmp " __ASM_LOCAL_LABEL("__wine_syscall_dispatcher_prolog_end") ) + __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, __ASM_LOCAL_LABEL("__wine_syscall_dispatcher_gs_load") ":\n\t" "movq %gs:0x378,%rcx\n\t" /* thread_data->syscall_frame */ @@ -3215,7 +3311,24 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, "syscall\n\t" "leaq -0x98(%rbp),%rcx\n" #endif - "ldmxcsr 0x33c(%r13)\n\t" /* amd64_thread_data()->mxcsr */ + /* If the handler was interrupted while saving context, the + * SIGUSR1 is deferred. Handle it now. + */ + "\n" __ASM_LOCAL_LABEL("__wine_syscall_dispatcher_save_end") ":\n\t" + "testl $1,0x342(%r13)\n\t" /* amd64_thread_data()->sigusr1_pending */ + "jz 1f\n\t" + "pushq %r8\n\t" + "pushq %r9\n\t" + "pushq %r10\n\t" + "sub $8,%rsp\n\t" /* align */ + "call " __ASM_NAME("deferred_sigusr1") "\n\t" + "add $8,%rsp\n\t" + "popq %r10\n\t" + "popq %r9\n\t" + "popq %r8\n\t" + "leaq -0x98(%rbp),%rcx\n" + + "1:\tldmxcsr 0x33c(%r13)\n\t" /* amd64_thread_data()->mxcsr */ "movl 0xb0(%rcx),%eax\n\t" /* frame->syscall_id */ "movq 0x18(%rcx),%r11\n\t" /* 2nd argument */ "movl %eax,%ebx\n\t" @@ -3249,6 +3362,16 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, "movq 8(%r15),%r9\n\t" /* 6th argument */ "callq *%r12\n\t" "leaq -0x98(%rbp),%rcx\n\t" + + /* If the handler is interrupted between this point and + * __wine_syscall_dispatcher_return_end below, + * the syscall return will be restarted. Therefore, this + * code should be idempotent. Save the return value first, + * so it can be restored. The register state at this + * point must be restored in usr1_handler(). + */ + "movq %rax,0xb8(%rcx)\n\t" /* frame->retval */ + __ASM_LOCAL_LABEL("__wine_syscall_dispatcher_return") ":\n\t" /* push rbp-based kernel stack cfi */ __ASM_CFI(".cfi_remember_state\n\t") @@ -3371,6 +3494,7 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, "andq $~0x4000,(%rsp)\n\t" /* make sure NT flag is not set, or iretq will fault */ "popfq\n\t" "iretq\n" + __ASM_LOCAL_LABEL("__wine_syscall_dispatcher_return_end") ":\n" /* pop rbp-based kernel stack cfi */ __ASM_CFI("\t.cfi_restore_state\n") @@ -3417,24 +3541,6 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher_return, "jnz " __ASM_LOCAL_LABEL("trace_syscall_ret" ) "\n\t" "jmp " __ASM_LOCAL_LABEL("__wine_syscall_dispatcher_return") ) -__ASM_GLOBAL_FUNC( __wine_syscall_dispatcher_instrumentation, - "movq %gs:0x378,%rcx\n\t" /* thread_data->syscall_frame */ - "popq 0x70(%rcx)\n\t" /* frame->rip */ - __ASM_CFI(".cfi_adjust_cfa_offset -8\n\t") - __ASM_CFI_REG_IS_AT2(rip, rcx, 0xf0,0x00) - "pushfq\n\t" - __ASM_CFI(".cfi_adjust_cfa_offset 8\n\t") - "popq 0x80(%rcx)\n\t" - __ASM_CFI(".cfi_adjust_cfa_offset -8\n\t") - /* Save %rip into the r10 save slot, which will be - * used in the instrumentation return path. - */ - "pushq 0x70(%rcx)\n\t" /* frame->rip */ - "popq 0x40(%rcx)\n\t" /* frame->r10 */ - "movl $0x10000,0xb4(%rcx)\n\t" /* frame->restore_flags <- RESTORE_FLAGS_INSTRUMENTATION */ - "jmp " __ASM_LOCAL_LABEL("__wine_syscall_dispatcher_prolog_end") ) - - /*********************************************************************** * __wine_unix_call_dispatcher */ @@ -3562,5 +3668,11 @@ __ASM_GLOBAL_POINTER( __ASM_NAME("__wine_unix_call_dispatcher_prolog_end_ptr"), __ASM_LOCAL_LABEL("__wine_unix_call_dispatcher_prolog_end") ) __ASM_GLOBAL_POINTER( __ASM_NAME("__wine_unix_call_dispatcher_gs_load_ptr"), __ASM_LOCAL_LABEL("__wine_unix_call_dispatcher_gs_load") ) +__ASM_GLOBAL_POINTER( __ASM_NAME("__wine_syscall_dispatcher_save_end_ptr"), + __ASM_LOCAL_LABEL("__wine_syscall_dispatcher_save_end") ) +__ASM_GLOBAL_POINTER( __ASM_NAME("__wine_syscall_dispatcher_return_ptr"), + __ASM_LOCAL_LABEL("__wine_syscall_dispatcher_return") ) +__ASM_GLOBAL_POINTER( __ASM_NAME("__wine_syscall_dispatcher_return_end_ptr"), + __ASM_LOCAL_LABEL("__wine_syscall_dispatcher_return_end") ) #endif /* __x86_64__ */ -- GitLab https://gitlab.winehq.org/wine/wine/-/merge_requests/10419