Adapted from [check_invalid_gs](https://gitlab.winehq.org/wine/wine/-/blob/d7a7cae2e2d1ad34af6dbefcb06fbef2f...) in signal_i386.c. This prevents crashing when the %gs register is manipulated in 32-bit code on "new-style" WoW64.
[test.c](https://bugs.winehq.org/attachment.cgi?id=77444) is a simple reproduction of the crash, courtesy of Fabian Maurer from the Bugzilla comments.
Wine-Bug: https://bugs.winehq.org/show_bug.cgi?id=57444
-- v2: ntdll: Check for invalid %gs value in 32-bit code on WoW64. ntdll: Allow get_current_teb assembly to be used on all platforms.
From: William Horvath william@horvath.blog
And move mac_thread_gsbase around. --- dlls/ntdll/unix/signal_x86_64.c | 115 ++++++++++++++++---------------- 1 file changed, 57 insertions(+), 58 deletions(-)
diff --git a/dlls/ntdll/unix/signal_x86_64.c b/dlls/ntdll/unix/signal_x86_64.c index caa85249896..7667e96265b 100644 --- a/dlls/ntdll/unix/signal_x86_64.c +++ b/dlls/ntdll/unix/signal_x86_64.c @@ -462,14 +462,12 @@ static inline struct amd64_thread_data *amd64_thread_data(void) return (struct amd64_thread_data *)ntdll_get_thread_data()->cpu_data; }
-#ifdef __linux__ static inline TEB *get_current_teb(void) { unsigned long rsp; __asm__( "movq %%rsp,%0" : "=r" (rsp) ); return (TEB *)(rsp & ~signal_stack_mask); } -#endif
static BOOL is_inside_syscall( const ucontext_t *sigcontext ) { @@ -1825,6 +1823,63 @@ static inline DWORD is_privileged_instr( CONTEXT *context ) }
+#ifdef __APPLE__ +/********************************************************************** + * mac_thread_gsbase + */ +static void *mac_thread_gsbase(void) +{ + struct thread_identifier_info tiinfo; + unsigned int info_count = THREAD_IDENTIFIER_INFO_COUNT; + static int gsbase_offset = -1; + + mach_port_t self = mach_thread_self(); + kern_return_t kr = thread_info(self, THREAD_IDENTIFIER_INFO, (thread_info_t) &tiinfo, &info_count); + mach_port_deallocate(mach_task_self(), self); + + if (kr == KERN_SUCCESS) return (void*)tiinfo.thread_handle; + + if (gsbase_offset < 0) + { + /* Search for the array of TLS slots within the pthread data structure. + That's what the macOS pthread implementation uses for gsbase. */ + const void* const sentinel1 = (const void*)0x2bffb6b4f11228ae; + const void* const sentinel2 = (const void*)0x0845a7ff6ab76707; + int rc; + pthread_key_t key; + const void** p = (const void**)pthread_self(); + int i; + + gsbase_offset = 0; + if ((rc = pthread_key_create(&key, NULL))) return NULL; + + pthread_setspecific(key, sentinel1); + + for (i = key + 1; i < 2000; i++) /* arbitrary limit */ + { + if (p[i] == sentinel1) + { + pthread_setspecific(key, sentinel2); + + if (p[i] == sentinel2) + { + gsbase_offset = (i - key) * sizeof(*p); + break; + } + + pthread_setspecific(key, sentinel1); + } + } + + pthread_key_delete(key); + } + + if (gsbase_offset) return (char*)pthread_self() + gsbase_offset; + return NULL; +} +#endif + + /*********************************************************************** * handle_interrupt * @@ -2420,62 +2475,6 @@ void signal_free_thread( TEB *teb ) } }
-#ifdef __APPLE__ -/********************************************************************** - * mac_thread_gsbase - */ -static void *mac_thread_gsbase(void) -{ - struct thread_identifier_info tiinfo; - unsigned int info_count = THREAD_IDENTIFIER_INFO_COUNT; - static int gsbase_offset = -1; - - mach_port_t self = mach_thread_self(); - kern_return_t kr = thread_info(self, THREAD_IDENTIFIER_INFO, (thread_info_t) &tiinfo, &info_count); - mach_port_deallocate(mach_task_self(), self); - - if (kr == KERN_SUCCESS) return (void*)tiinfo.thread_handle; - - if (gsbase_offset < 0) - { - /* Search for the array of TLS slots within the pthread data structure. - That's what the macOS pthread implementation uses for gsbase. */ - const void* const sentinel1 = (const void*)0x2bffb6b4f11228ae; - const void* const sentinel2 = (const void*)0x0845a7ff6ab76707; - int rc; - pthread_key_t key; - const void** p = (const void**)pthread_self(); - int i; - - gsbase_offset = 0; - if ((rc = pthread_key_create(&key, NULL))) return NULL; - - pthread_setspecific(key, sentinel1); - - for (i = key + 1; i < 2000; i++) /* arbitrary limit */ - { - if (p[i] == sentinel1) - { - pthread_setspecific(key, sentinel2); - - if (p[i] == sentinel2) - { - gsbase_offset = (i - key) * sizeof(*p); - break; - } - - pthread_setspecific(key, sentinel1); - } - } - - pthread_key_delete(key); - } - - if (gsbase_offset) return (char*)pthread_self() + gsbase_offset; - return NULL; -} -#endif -
/********************************************************************** * signal_init_process
From: William Horvath william@horvath.blog
Adapted from check_invalid_gs in signal_i386.c. This prevents crashing when the %gs register is manipulated in 32-bit code on "new-style" WoW64.
The SYSCALL_HAVE_WRFSGSBASE fast-path for getting/setting the segment registers on Linux is already checked in signal_init_process, and the other platforms' implementations follow existing conventions.
Also note that we check in the TRAP_x86_PAGEFLT case because the error occurs when the invalid %gs value is actually used, i.e. by an internal call to NtCurrentTeb().
Wine-Bug: https://bugs.winehq.org/show_bug.cgi?id=57444 --- dlls/ntdll/unix/signal_x86_64.c | 67 +++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+)
diff --git a/dlls/ntdll/unix/signal_x86_64.c b/dlls/ntdll/unix/signal_x86_64.c index 7667e96265b..51f4d12d1dc 100644 --- a/dlls/ntdll/unix/signal_x86_64.c +++ b/dlls/ntdll/unix/signal_x86_64.c @@ -2022,6 +2022,68 @@ static BOOL handle_syscall_trap( ucontext_t *sigcontext, siginfo_t *siginfo ) }
+/*********************************************************************** + * check_invalid_gs + * + * Check for fault caused by invalid %gs value (some copy protection schemes mess with it). + * + */ +static inline BOOL check_invalid_gs( ucontext_t *sigcontext, CONTEXT *context ) +{ + const BYTE *instr = (BYTE *)context->Rip; + WORD system_gs = ds64_sel; + ULONG_PTR cur_gs = 0; + TEB *teb; + +#ifndef __APPLE__ /* FIXME: why does this break on macOS? */ + /* only handle faults in system libraries */ + if (virtual_is_valid_code_address( instr, 1 )) return FALSE; +#endif + + teb = get_current_teb(); + +#ifdef __linux__ + if (syscall_flags & SYSCALL_HAVE_WRFSGSBASE) + __asm__ volatile ("rdgsbase %0" : "=r" (cur_gs)); + else + cur_gs = arch_prctl( ARCH_GET_GS, teb ); +#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) + amd64_get_gsbase( &cur_gs ); +#elif defined(__NetBSD__) + sysarch( X86_64_GET_GSBASE, &cur_gs ); +#elif defined(__APPLE__) + cur_gs = mac_thread_gsbase(); +#else +# error Please define getting %gs for your architecture +#endif + + if ((WORD)cur_gs == system_gs) return FALSE; + + TRACE( "%04lx/%04x at %p, fixing up\n", (WORD)cur_gs, system_gs, instr ); + +#ifdef __linux__ + if (syscall_flags & SYSCALL_HAVE_WRFSGSBASE) + __asm__ volatile ("wrgsbase %0" :: "r" (teb)); + else + arch_prctl( ARCH_SET_GS, teb ); +#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) + amd64_set_gsbase( teb ); +#elif defined(__NetBSD__) + sysarch( X86_64_SET_GSBASE, &teb ); +#elif defined(__APPLE__) + __asm__ volatile ("movq %0,%%gs:%c1" :: "r" (teb->Tib.Self), + "n" (FIELD_OFFSET(TEB, Tib.Self))); + __asm__ volatile ("movq %0,%%gs:%c1" :: "r" (teb->ThreadLocalStoragePointer), + "n" (FIELD_OFFSET(TEB, ThreadLocalStoragePointer))); +#else +# error Please define setting %gs for your architecture +#endif + + GS_sig(sigcontext) = system_gs; + return TRUE; +} + + /********************************************************************** * segv_handler * @@ -2080,6 +2142,11 @@ static void segv_handler( int signal, siginfo_t *siginfo, void *sigcontext ) /* send EXCEPTION_EXECUTE_FAULT only if data execution prevention is enabled */ if (!(flags & MEM_EXECUTE_OPTION_DISABLE)) rec.ExceptionInformation[0] = EXCEPTION_READ_FAULT; } + if (is_wow64() && check_invalid_gs( sigcontext, &context.c )) + { + leave_handler( ucontext ); + return; + } break; case TRAP_x86_ALIGNFLT: /* Alignment check exception */ if (EFL_sig(ucontext) & 0x00040000)
On Sun Dec 22 12:25:44 2024 +0000, William Horvath wrote:
I'll have to look into the macos build failure and test it there as well.
Some testing on macOS 13 showed that the check in `virtual_is_valid_code_address( instr, 1 )` leads to an early exit that breaks the fix, because the view->protect flags are missing `VPROT_SYSTEM`. I'm not sure what's causing this difference.
For the minimal test program, simply removing that `virtual_is_valid_code_address` check allows it to work, but that doesn't seem like a robust solution.