From: Jinoh Kang <jinoh.kang.kr(a)gmail.com> Today, NtContinue() on ARM64 does not restore X16 and X17 from the context. This is because X16 and X17 are used as scratch registers for restoring SP and PC respectively in __wine_syscall_dispatcher. Scratch registers are required because ARMv8 does not have an unprivileged (EL0) instruction that loads SP and PC from memory or non-GPR architectural state. Fix this by making ARM64 __wine_syscall_dispatcher perform a full context restore via raise(SIGUSR2) when NtContinue() is used. Since raising a signal is quite expensive, it should be done only when necessary. To achieve this, split the ARM64 syscall dispatcher's returning behaviour into a fast path (that does not involve signals) and a slow path (that involves signals): - If CONTEXT_INTEGER is not set, the dispatcher takes the fast path: the X16 and X17 registers are clobbered as usual. - If X16 == PC and X17 == SP, the dispatcher also takes the fast path: it can safely use X16 and X17 without corrupting the register values, since those two registers already have the desired values. This fast path is used in call_user_apc_dispatcher(), call_user_exception_dispatcher(), and call_init_thunk(). - Otherwise, the dispatcher takes the slow path: it raises SIGUSR2 and does full context restore in the signal handler. Fixes: 88e336214db94318b6657d641919fcce6be4a328 --- dlls/ntdll/unix/signal_arm64.c | 38 ++++++++++++++++++++++++++++++++-- 1 file changed, 36 insertions(+), 2 deletions(-) diff --git a/dlls/ntdll/unix/signal_arm64.c b/dlls/ntdll/unix/signal_arm64.c index 7f998f894d4..a0adf9666c8 100644 --- a/dlls/ntdll/unix/signal_arm64.c +++ b/dlls/ntdll/unix/signal_arm64.c @@ -477,6 +477,21 @@ NTSTATUS unwind_builtin_dll( void *args ) } +/*********************************************************************** + * syscall_frame_fixup_for_fastpath + * + * Fixes up the given syscall frame such that the syscall dispatcher + * can return via the fast path if CONTEXT_INTEGER is set in + * restore_flags. + * + * Clobbers the frame's X16 and X17 register values. + */ +static void syscall_frame_fixup_for_fastpath( struct syscall_frame *frame ) +{ + frame->x[16] = frame->pc; + frame->x[17] = frame->sp; +} + /*********************************************************************** * save_fpu * @@ -1054,6 +1069,7 @@ NTSTATUS call_user_apc_dispatcher( CONTEXT *context, ULONG_PTR arg1, ULONG_PTR a frame->x[3] = arg3; frame->x[4] = (ULONG64)func; frame->restore_flags |= CONTEXT_CONTROL | CONTEXT_INTEGER; + syscall_frame_fixup_for_fastpath( frame ); return status; } @@ -1086,6 +1102,7 @@ NTSTATUS call_user_exception_dispatcher( EXCEPTION_RECORD *rec, CONTEXT *context frame->lr = lr; frame->sp = sp; frame->restore_flags |= CONTEXT_INTEGER | CONTEXT_CONTROL; + syscall_frame_fixup_for_fastpath( frame ); return status; } @@ -1578,6 +1595,14 @@ void signal_init_process(void) } +/*********************************************************************** + * syscall_dispatcher_return_slowpath + */ +void DECLSPEC_HIDDEN syscall_dispatcher_return_slowpath(void) +{ + raise( SIGUSR2 ); +} + /*********************************************************************** * call_init_thunk */ @@ -1638,6 +1663,7 @@ void DECLSPEC_HIDDEN call_init_thunk( LPTHREAD_START_ROUTINE entry, void *arg, B frame->x[18] = (ULONG64)teb; frame->prev_frame = NULL; frame->restore_flags |= CONTEXT_INTEGER; + syscall_frame_fixup_for_fastpath( frame ); frame->syscall_table = KeServiceDescriptorTable; pthread_sigmask( SIG_UNBLOCK, &server_block_set, NULL ); @@ -1734,13 +1760,21 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, "blr x16\n\t" "mov sp, x22\n" __ASM_LOCAL_LABEL("__wine_syscall_dispatcher_return") ":\n\t" - "ldp x18, x19, [sp, #0x90]\n\t" + "ldr w16, [sp, #0x10c]\n\t" /* frame->restore_flags */ + "tbz x16, #1, 1f\n\t" /* CONTEXT_INTEGER */ + "ldp x12, x13, [sp, #0x80]\n\t" /* frame->x[16..17] */ + "ldp x14, x15, [sp, #0xf8]\n\t" /* frame->sp, frame->pc */ + "eor x12, x12, x15\n\t" /* frame->x16 == frame->pc? */ + "eor x13, x13, x14\n\t" /* frame->x17 == frame->sp? */ + "orr x12, x12, x13\n\t" + "cbz x12, 1f\n\t" /* take slowpath if unequal */ + "bl " __ASM_NAME("syscall_dispatcher_return_slowpath") "\n" + "1:\tldp x18, x19, [sp, #0x90]\n\t" "ldp x20, x21, [sp, #0xa0]\n\t" "ldp x22, x23, [sp, #0xb0]\n\t" "ldp x24, x25, [sp, #0xc0]\n\t" "ldp x26, x27, [sp, #0xd0]\n\t" "ldp x28, x29, [sp, #0xe0]\n\t" - "ldr w16, [sp, #0x10c]\n\t" /* frame->restore_flags */ "tbz x16, #2, 1f\n\t" /* CONTEXT_FLOATING_POINT */ "ldp q0, q1, [sp, #0x130]\n\t" "ldp q2, q3, [sp, #0x150]\n\t" -- GitLab https://gitlab.winehq.org/wine/wine/-/merge_requests/3341