Synchronize syscall_frame to match the actual state on syscall return.
This is required for a follow-up patch that addresses incorrect behaviour of __wine_syscall_dispatcher, specifically unconditional clobbering of X16 and X17 registers, while minimizing the performance impact. (Hence the "fastpath")
Signed-off-by: Jinoh Kang jinoh.kang.kr@gmail.com --- dlls/ntdll/unix/signal_arm64.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+)
diff --git a/dlls/ntdll/unix/signal_arm64.c b/dlls/ntdll/unix/signal_arm64.c index fa402a7a83e..da5739b6f1c 100644 --- a/dlls/ntdll/unix/signal_arm64.c +++ b/dlls/ntdll/unix/signal_arm64.c @@ -324,6 +324,17 @@ NTSTATUS CDECL unwind_builtin_dll( ULONG type, DISPATCHER_CONTEXT *dispatch, CON }
+/*********************************************************************** + * syscall_frame_fixup_for_fastpath + * + * Clobbers the frame's X16 and X17 register values. + */ +static void syscall_frame_fixup_for_fastpath( struct syscall_frame *frame ) +{ + frame->x[16] = frame->pc; + frame->x[17] = frame->sp; +} + /*********************************************************************** * save_context * @@ -686,6 +697,7 @@ NTSTATUS call_user_apc_dispatcher( CONTEXT *context, ULONG_PTR arg1, ULONG_PTR a frame->x[3] = arg3; frame->x[4] = (ULONG64)func; frame->restore_flags |= CONTEXT_CONTROL | CONTEXT_INTEGER; + syscall_frame_fixup_for_fastpath( frame ); return status; }
@@ -718,6 +730,7 @@ NTSTATUS call_user_exception_dispatcher( EXCEPTION_RECORD *rec, CONTEXT *context frame->lr = lr; frame->sp = sp; frame->restore_flags |= CONTEXT_INTEGER | CONTEXT_CONTROL; + syscall_frame_fixup_for_fastpath( frame ); return status; }
@@ -757,6 +770,7 @@ NTSTATUS WINAPI KeUserModeCallback( ULONG id, const void *args, ULONG len, void callback_frame.frame.restore_flags = CONTEXT_INTEGER; callback_frame.frame.syscall_table = frame->syscall_table; callback_frame.frame.prev_frame = frame; + syscall_frame_fixup_for_fastpath( &callback_frame.frame ); arm64_thread_data()->syscall_frame = &callback_frame.frame;
__wine_syscall_dispatcher_return( &callback_frame.frame, 0 ); @@ -1187,6 +1201,7 @@ void DECLSPEC_HIDDEN call_init_thunk( LPTHREAD_START_ROUTINE entry, void *arg, B frame->prev_frame = NULL; frame->restore_flags |= CONTEXT_INTEGER; frame->syscall_table = KeServiceDescriptorTable; + syscall_frame_fixup_for_fastpath( frame );
pthread_sigmask( SIG_UNBLOCK, &server_block_set, NULL ); __wine_syscall_dispatcher_return( frame, 0 );
Today, __wine_syscall_dispatcher clobbers X16 and X17 registers even if `syscall_frame->restore_flags` has CONTEXT_INTEGER set. This is because they are used as scratch registers for restoring SP and PC, which cannot be loaded directly from memory or other architectural state.
signal_set_full_context bypasses this clobbering behaviour by relying on kernel instead to fully restore the context. Specifically, it raises SIGUSR2, of which handler moves everything to the sigcontext and returns implicitly via the sigreturn() Unix system call.
However, this approach is not generic and also error-prone: the signal_set_full_context implementation now differs from other architectures by abruptly switching to the specified context instead of taking the proper path of returning via the syscall dispatcher.
Fix this by splitting the ARM64 syscall dispatcher's returning behaviour into a fast path and a slow path.
- If CONTEXT_INTEGER is not set, the dispatcher takes the fast path: the X16 and X17 registers are clobbered as usual.
- If X16 == PC and X17 == SP, the dispatcher also takes the fast path: it can safely use X16 and X17 without corrupting the thread state, since those two registers already have the desired values.
- Otherwise, the dispatcher takes the slow path: it raises SIGUSR2 and does full context restore in the signal handler.
Also, modify ARM64 signal_set_full_context to match that of other architectures. The function will let the dispatcher choose either path as appropriate.
Performance impact is expected to be negligible (if any) when CONTEXT_INTEGER is unset (the usual case), since the number of instructions executed is left unchanged for this case.
Signed-off-by: Jinoh Kang jinoh.kang.kr@gmail.com --- dlls/ntdll/unix/signal_arm64.c | 57 ++++++++++++++++++++++++---------- 1 file changed, 40 insertions(+), 17 deletions(-)
diff --git a/dlls/ntdll/unix/signal_arm64.c b/dlls/ntdll/unix/signal_arm64.c index da5739b6f1c..c5db2e7de64 100644 --- a/dlls/ntdll/unix/signal_arm64.c +++ b/dlls/ntdll/unix/signal_arm64.c @@ -327,6 +327,10 @@ NTSTATUS CDECL unwind_builtin_dll( ULONG type, DISPATCHER_CONTEXT *dispatch, CON /*********************************************************************** * syscall_frame_fixup_for_fastpath * + * Fixes up the given syscall frame such that the syscall dispatcher + * can return via the fast path if CONTEXT_INTEGER is set in + * restore_flags. + * * Clobbers the frame's X16 and X17 register values. */ static void syscall_frame_fixup_for_fastpath( struct syscall_frame *frame ) @@ -426,7 +430,8 @@ NTSTATUS signal_set_full_context( CONTEXT *context ) { NTSTATUS status = NtSetContextThread( GetCurrentThread(), context );
- if (!status && (context->ContextFlags & CONTEXT_INTEGER) == CONTEXT_INTEGER) raise( SIGUSR2 ); + if (!status && (context->ContextFlags & CONTEXT_INTEGER) == CONTEXT_INTEGER) + arm64_thread_data()->syscall_frame->restore_flags |= CONTEXT_INTEGER; return status; }
@@ -1172,6 +1177,17 @@ void signal_init_process(void) }
+/*********************************************************************** + * syscall_dispatcher_return_slowpath + */ +void DECLSPEC_HIDDEN syscall_dispatcher_return_slowpath( void ) +{ + raise( SIGUSR2 ); + + /* should not be reached */ + abort_thread( 0 ); +} + /*********************************************************************** * call_init_thunk */ @@ -1312,13 +1328,28 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, "ldr x16, [x16, x20, lsl 3]\n\t" "blr x16\n\t" "mov sp, x22\n" - "3:\tldp x18, x19, [sp, #0x90]\n\t" + "3:\tldr w16, [sp, #0x10c]\n\t" /* frame->restore_flags */ + "tbz x16, #1, 1f\n\t" /* CONTEXT_INTEGER */ + "ldp x6, x7, [sp, #0x80]\n\t" /* frame->x[16..17] */ + "ldp x8, x9, [sp, #0xf8]\n\t" /* frame->sp, frame->pc */ + "eor x6, x6, x9\n\t" /* frame->x16 == frame->pc? */ + "eor x7, x7, x8\n\t" /* frame->x17 == frame->sp? */ + "orr x6, x6, x7\n\t" + "cbnz x6, 5f\n\t" /* take slowpath if unequal */ + "ldp x0, x1, [sp, #0x00]\n\t" + "ldp x2, x3, [sp, #0x10]\n\t" + "ldp x4, x5, [sp, #0x20]\n\t" + "ldp x6, x7, [sp, #0x30]\n\t" + "ldp x8, x9, [sp, #0x40]\n\t" + "ldp x10, x11, [sp, #0x50]\n\t" + "ldp x12, x13, [sp, #0x60]\n\t" + "ldp x14, x15, [sp, #0x70]\n" + "1:\tldp x18, x19, [sp, #0x90]\n\t" "ldp x20, x21, [sp, #0xa0]\n\t" "ldp x22, x23, [sp, #0xb0]\n\t" "ldp x24, x25, [sp, #0xc0]\n\t" "ldp x26, x27, [sp, #0xd0]\n\t" "ldp x28, x29, [sp, #0xe0]\n\t" - "ldr w16, [sp, #0x10c]\n\t" /* frame->restore_flags */ "tbz x16, #2, 1f\n\t" /* CONTEXT_FLOATING_POINT */ "ldp q0, q1, [sp, #0x130]\n\t" "ldp q2, q3, [sp, #0x150]\n\t" @@ -1336,19 +1367,10 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, "ldp q26, q27, [sp, #0x2d0]\n\t" "ldp q28, q29, [sp, #0x2f0]\n\t" "ldp q30, q31, [sp, #0x310]\n\t" - "ldr w9, [sp, #0x128]\n\t" - "msr FPCR, x9\n\t" - "ldr w9, [sp, #0x12c]\n\t" - "msr FPSR, x9\n" - "1:\ttbz x16, #1, 1f\n\t" /* CONTEXT_INTEGER */ - "ldp x0, x1, [sp, #0x00]\n\t" - "ldp x2, x3, [sp, #0x10]\n\t" - "ldp x4, x5, [sp, #0x20]\n\t" - "ldp x6, x7, [sp, #0x30]\n\t" - "ldp x8, x9, [sp, #0x40]\n\t" - "ldp x10, x11, [sp, #0x50]\n\t" - "ldp x12, x13, [sp, #0x60]\n\t" - "ldp x14, x15, [sp, #0x70]\n" + "ldr w17, [sp, #0x128]\n\t" + "msr FPCR, x17\n\t" + "ldr w17, [sp, #0x12c]\n\t" + "msr FPSR, x17\n" "1:\tldp x16, x17, [sp, #0x100]\n\t" "msr NZCV, x17\n\t" "ldp x30, x17, [sp, #0xf0]\n\t" @@ -1360,7 +1382,8 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, __ASM_NAME("__wine_syscall_dispatcher_return") ":\n\t" "mov sp, x0\n\t" "mov x0, x1\n\t" - "b 3b" ) + "b 3b\n" + "5:\tbl " __ASM_NAME("syscall_dispatcher_return_slowpath") )
/***********************************************************************