From: Brendan Shanks bshanks@codeweavers.com
--- dlls/ntdll/loader.c | 11 --- dlls/ntdll/unix/signal_x86_64.c | 115 +++++++++++++++++++++++++++++--- 2 files changed, 107 insertions(+), 19 deletions(-)
diff --git a/dlls/ntdll/loader.c b/dlls/ntdll/loader.c index 38516115b38..5267dec6647 100644 --- a/dlls/ntdll/loader.c +++ b/dlls/ntdll/loader.c @@ -1403,9 +1403,6 @@ static BOOL alloc_tls_slot( LDR_DATA_TABLE_ENTRY *mod ) if (!new) return FALSE; if (old) memcpy( new, old, old_module_count * sizeof(*new) ); teb->ThreadLocalStoragePointer = new; -#ifdef __x86_64__ /* macOS-specific hack */ - if (teb->Instrumentation[0]) ((TEB *)teb->Instrumentation[0])->ThreadLocalStoragePointer = new; -#endif TRACE( "thread %04lx tls block %p -> %p\n", HandleToULong(teb->ClientId.UniqueThread), old, new ); /* FIXME: can't free old block here, should be freed at thread exit */ } @@ -1657,10 +1654,6 @@ static NTSTATUS alloc_thread_tls(void) TRACE( "slot %u: %u/%lu bytes at %p\n", i, size, dir->SizeOfZeroFill, pointers[i] ); } NtCurrentTeb()->ThreadLocalStoragePointer = pointers; -#ifdef __x86_64__ /* macOS-specific hack */ - if (NtCurrentTeb()->Instrumentation[0]) - ((TEB *)NtCurrentTeb()->Instrumentation[0])->ThreadLocalStoragePointer = pointers; -#endif return STATUS_SUCCESS; }
@@ -3947,10 +3940,6 @@ void WINAPI LdrShutdownThread(void) if ((pointers = NtCurrentTeb()->ThreadLocalStoragePointer)) { NtCurrentTeb()->ThreadLocalStoragePointer = NULL; -#ifdef __x86_64__ /* macOS-specific hack */ - if (NtCurrentTeb()->Instrumentation[0]) - ((TEB *)NtCurrentTeb()->Instrumentation[0])->ThreadLocalStoragePointer = NULL; -#endif for (i = 0; i < tls_module_count; i++) RtlFreeHeap( GetProcessHeap(), 0, pointers[i] ); RtlFreeHeap( GetProcessHeap(), 0, pointers ); } diff --git a/dlls/ntdll/unix/signal_x86_64.c b/dlls/ntdll/unix/signal_x86_64.c index 744273a2b56..e90e2042275 100644 --- a/dlls/ntdll/unix/signal_x86_64.c +++ b/dlls/ntdll/unix/signal_x86_64.c @@ -63,6 +63,7 @@ #endif #ifdef __APPLE__ # include <mach/mach.h> +extern void _thread_set_tsd_base(uint64_t); #endif
#include "ntstatus.h" @@ -423,7 +424,7 @@ struct syscall_frame void *syscall_cfa; /* 00a8 */ DWORD syscall_flags; /* 00b0 */ DWORD restore_flags; /* 00b4 */ - DWORD align[2]; /* 00b8 */ + ULONG64 gsbase; /* 00b8 */ XMM_SAVE_AREA32 xsave; /* 00c0 */ DECLSPEC_ALIGN(64) XSAVE_AREA_HEADER xstate; /* 02c0 */ }; @@ -462,7 +463,7 @@ static inline struct amd64_thread_data *amd64_thread_data(void) return (struct amd64_thread_data *)ntdll_get_thread_data()->cpu_data; }
-#ifdef __linux__ +#if defined(__linux__) || defined(__APPLE__) static inline TEB *get_current_teb(void) { unsigned long rsp; @@ -477,6 +478,15 @@ static BOOL is_inside_syscall( const ucontext_t *sigcontext ) (char *)RSP_sig(sigcontext) <= (char *)amd64_thread_data()->syscall_frame); }
+#ifdef __APPLE__ +static inline UINT64 get_gs30(void) +{ + UINT64 gs30; + __asm__("movq %%gs:0x30,%0" : "=r" (gs30)); + return gs30; +} +#endif +
extern void __wine_syscall_dispatcher_instrumentation(void); static void *instrumentation_callback; @@ -846,6 +856,10 @@ static inline ucontext_t *init_handler( void *sigcontext ) struct ntdll_thread_data *thread_data = (struct ntdll_thread_data *)&get_current_teb()->GdiTebBatch; arch_prctl( ARCH_SET_FS, ((struct amd64_thread_data *)thread_data->cpu_data)->pthread_teb ); } +#endif +#ifdef __APPLE__ + struct ntdll_thread_data *thread_data = (struct ntdll_thread_data *)&get_current_teb()->GdiTebBatch; + _thread_set_tsd_base( (uint64_t)((struct amd64_thread_data *)thread_data->cpu_data)->pthread_teb ); #endif return sigcontext; } @@ -860,6 +874,9 @@ static inline void leave_handler( ucontext_t *sigcontext ) if (fs32_sel && !is_inside_signal_stack( (void *)RSP_sig(sigcontext )) && !is_inside_syscall(sigcontext)) __asm__ volatile( "movw %0,%%fs" :: "r" (fs32_sel) ); #endif +#ifdef __APPLE__ + _thread_set_tsd_base( (uint64_t)NtCurrentTeb() ); +#endif #ifdef DS_sig DS_sig(sigcontext) = ds64_sel; #else @@ -1644,6 +1661,12 @@ __ASM_GLOBAL_FUNC( call_user_mode_callback, "test %r10,%r10\n\t" "jz 1f\n\t" "xchgq %rcx,%r10\n\t" +#ifdef __APPLE__ + "1\t:pushq %rcx\n\t" + "movq %r8,%rdi\n\t" + "call " __ASM_NAME("_thread_set_tsd_base") "\n\t" + "popq %rcx\n\t" +#endif "1\t:jmpq *%rcx" ) /* func */
@@ -1653,6 +1676,16 @@ __ASM_GLOBAL_FUNC( call_user_mode_callback, extern void DECLSPEC_NORETURN user_mode_callback_return( void *ret_ptr, ULONG ret_len, NTSTATUS status, TEB *teb ); __ASM_GLOBAL_FUNC( user_mode_callback_return, +#ifdef __APPLE__ + "pushq %rcx\n\t" + "pushq %rdi\n\t" + "pushq %rsi\n\t" + "movq 0x320(%rcx),%rdi\n\t" /* amd64_thread_data()->pthread_teb */ + "call " __ASM_NAME("_thread_set_tsd_base") "\n\t" + "popq %rsi\n\t" + "popq %rdi\n\t" + "popq %rcx\n\t" +#endif "movq 0x328(%rcx),%r10\n\t" /* amd64_thread_data()->syscall_frame */ "movq 0xa0(%r10),%r11\n\t" /* frame->prev_frame */ "movq %r11,0x328(%rcx)\n\t" /* amd64_thread_data()->syscall_frame = prev_frame */ @@ -2194,6 +2227,9 @@ static void quit_handler( int signal, siginfo_t *siginfo, void *sigcontext ) */ static void usr1_handler( int signal, siginfo_t *siginfo, void *sigcontext ) { +#ifdef __APPLE__ + UINT64 gs30 = get_gs30(); +#endif ucontext_t *ucontext = init_handler( sigcontext );
if (is_inside_syscall( ucontext )) @@ -2225,6 +2261,14 @@ static void usr1_handler( int signal, siginfo_t *siginfo, void *sigcontext ) frame->restore_flags |= 0x40; } NtSetContextThread( GetCurrentThread(), &context->c ); +#ifdef __APPLE__ + /* GSBASE needs to be reset to its value at the entry of this function, + * which could be either the TEB or Mac TSD. + * Check %gs:0x30, which is Self in the TEB and reserved (0) in the TSD. + */ + if (gs30) + _thread_set_tsd_base( (uint64_t)NtCurrentTeb() ); +#endif } else { @@ -2559,13 +2603,7 @@ void call_init_thunk( LPTHREAD_START_ROUTINE entry, void *arg, BOOL suspend, TEB #elif defined(__NetBSD__) sysarch( X86_64_SET_GSBASE, &teb ); #elif defined (__APPLE__) - __asm__ volatile (".byte 0x65\n\tmovq %0,%c1" :: "r" (teb->Tib.Self), "n" (FIELD_OFFSET(TEB, Tib.Self))); - __asm__ volatile (".byte 0x65\n\tmovq %0,%c1" :: "r" (teb->ThreadLocalStoragePointer), "n" (FIELD_OFFSET(TEB, ThreadLocalStoragePointer))); thread_data->pthread_teb = mac_thread_gsbase(); - /* alloc_tls_slot() needs to poke a value to an address relative to each - thread's gsbase. Have each thread record its gsbase pointer into its - TEB so alloc_tls_slot() can find it. */ - teb->Instrumentation[0] = thread_data->pthread_teb; #else # error Please define setting %gs for your architecture #endif @@ -2622,6 +2660,7 @@ void call_init_thunk( LPTHREAD_START_ROUTINE entry, void *arg, BOOL suspend, TEB frame->restore_flags |= CONTEXT_INTEGER; frame->syscall_flags = syscall_flags; frame->syscall_cfa = syscall_cfa; + frame->gsbase = (ULONG64)teb; if ((callback = instrumentation_callback)) { frame->r10 = frame->rip; @@ -2709,6 +2748,10 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, "movw %ss,0x90(%rcx)\n\t" "movq %rbp,0x98(%rcx)\n\t" __ASM_CFI_REG_IS_AT2(rbp, rcx, 0x98, 0x01) +#ifdef __APPLE__ + "movq %gs:0x30,%r14\n\t" + "movq %r14,0xb8(%rcx)\n\t" /* frame->gsbase */ +#endif /* Legends of Runeterra hooks the first system call return instruction, and * depends on us returning to it. Adjust the return address accordingly. */ "subq $0xb,0x70(%rcx)\n\t" @@ -2797,6 +2840,22 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, "andl $0xfff,%eax\n\t" /* syscall number */ "cmpq 16(%rbx),%rax\n\t" /* table->ServiceLimit */ "jae 5f\n\t" +#ifdef __APPLE__ + "pushq %rax\n\t" + "pushq %rcx\n\t" + "pushq %rdi\n\t" + "pushq %rdx\n\t" + "pushq %rsi\n\t" + "pushq %r11\n\t" + "movq %gs:0x320,%rdi\n\t" /* amd64_thread_data()->pthread_teb */ + "call " __ASM_NAME("_thread_set_tsd_base") "\n\t" + "popq %r11\n\t" + "popq %rsi\n\t" + "popq %rdx\n\t" + "popq %rdi\n\t" + "popq %rcx\n\t" + "popq %rax\n\t" +#endif "movq 24(%rbx),%rcx\n\t" /* table->ArgumentTable */ "movzbl (%rcx,%rax),%ecx\n\t" "subq $0x30,%rcx\n\t" @@ -2818,6 +2877,20 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, "callq *(%r10,%rax,8)\n\t" "leaq -0x98(%rbp),%rcx\n\t" __ASM_LOCAL_LABEL("__wine_syscall_dispatcher_return") ":\n\t" +#ifdef __APPLE__ + "pushq %rax\n\t" + "pushq %rcx\n\t" + "pushq %rdi\n\t" + "pushq %rdx\n\t" + "pushq %rsi\n\t" + "movq 0xb8(%rcx),%rdi\n\t" /* frame->gsbase */ + "call " __ASM_NAME("_thread_set_tsd_base") "\n\t" + "popq %rsi\n\t" + "popq %rdx\n\t" + "popq %rdi\n\t" + "popq %rcx\n\t" + "popq %rax\n\t" +#endif "movl 0xb4(%rcx),%edx\n\t" /* frame->restore_flags */ "testl $0x48,%edx\n\t" /* CONTEXT_FLOATING_POINT | CONTEXT_XSTATE */ "jnz 2f\n\t" @@ -2992,6 +3065,10 @@ __ASM_GLOBAL_FUNC( __wine_unix_call_dispatcher, __ASM_CFI_CFA_IS_AT2(rcx, 0x88, 0x01) "movq %rbp,0x98(%rcx)\n\t" __ASM_CFI_REG_IS_AT2(rbp, rcx, 0x98, 0x01) +#ifdef __APPLE__ + "movq %gs:0x30,%r14\n\t" + "movq %r14,0xb8(%rcx)\n\t" /* frame->gsbase */ +#endif "movdqa %xmm6,0x1c0(%rcx)\n\t" "movdqa %xmm7,0x1d0(%rcx)\n\t" "movdqa %xmm8,0x1e0(%rcx)\n\t" @@ -3029,6 +3106,18 @@ __ASM_GLOBAL_FUNC( __wine_unix_call_dispatcher, "mov $158,%eax\n\t" /* SYS_arch_prctl */ "syscall\n\t" "2:\n\t" +#endif +#ifdef __APPLE__ + "pushq %rax\n\t" + "pushq %rcx\n\t" + "pushq %rdx\n\t" + "pushq %rsi\n\t" + "movq %gs:0x320,%rdi\n\t" /* amd64_thread_data()->pthread_teb */ + "call " __ASM_NAME("_thread_set_tsd_base") "\n\t" + "popq %rsi\n\t" + "popq %rdx\n\t" + "popq %rcx\n\t" + "popq %rax\n\t" #endif "movq %r8,%rdi\n\t" /* args */ "callq *(%r10,%rdx,8)\n\t" @@ -3053,6 +3142,16 @@ __ASM_GLOBAL_FUNC( __wine_unix_call_dispatcher, "jz 1f\n\t" "movw %gs:0x338,%fs\n" /* amd64_thread_data()->fs */ "1:\n\t" +#endif +#ifdef __APPLE__ + "pushq %rax\n\t" + "pushq %rcx\n\t" + "pushq %rdx\n\t" + "movq 0xb8(%rcx),%rdi\n\t" /* frame->gsbase */ + "call " __ASM_NAME("_thread_set_tsd_base") "\n\t" + "popq %rdx\n\t" + "popq %rcx\n\t" + "popq %rax\n\t" #endif "movq 0x60(%rcx),%r14\n\t" "movq 0x28(%rcx),%rdi\n\t"