Adapted from [check_invalid_gs](https://gitlab.winehq.org/wine/wine/-/blob/d7a7cae2e2d1ad34af6dbefcb06fbef2f...) in signal_i386.c. This prevents crashing when the %gs register is manipulated in 32-bit code on "new-style" WoW64.
[test.c](https://bugs.winehq.org/attachment.cgi?id=77444) is a simple reproduction of the crash, courtesy of Fabian Maurer from the Bugzilla comments.
Wine-Bug: https://bugs.winehq.org/show_bug.cgi?id=57444
-- v3: ntdll: Check for invalid %gs value in 32-bit code on WoW64.
From: William Horvath william@horvath.blog
And move mac_thread_gsbase around. --- dlls/ntdll/unix/signal_x86_64.c | 115 ++++++++++++++++---------------- 1 file changed, 57 insertions(+), 58 deletions(-)
diff --git a/dlls/ntdll/unix/signal_x86_64.c b/dlls/ntdll/unix/signal_x86_64.c index caa85249896..7667e96265b 100644 --- a/dlls/ntdll/unix/signal_x86_64.c +++ b/dlls/ntdll/unix/signal_x86_64.c @@ -462,14 +462,12 @@ static inline struct amd64_thread_data *amd64_thread_data(void) return (struct amd64_thread_data *)ntdll_get_thread_data()->cpu_data; }
-#ifdef __linux__ static inline TEB *get_current_teb(void) { unsigned long rsp; __asm__( "movq %%rsp,%0" : "=r" (rsp) ); return (TEB *)(rsp & ~signal_stack_mask); } -#endif
static BOOL is_inside_syscall( const ucontext_t *sigcontext ) { @@ -1825,6 +1823,63 @@ static inline DWORD is_privileged_instr( CONTEXT *context ) }
+#ifdef __APPLE__ +/********************************************************************** + * mac_thread_gsbase + */ +static void *mac_thread_gsbase(void) +{ + struct thread_identifier_info tiinfo; + unsigned int info_count = THREAD_IDENTIFIER_INFO_COUNT; + static int gsbase_offset = -1; + + mach_port_t self = mach_thread_self(); + kern_return_t kr = thread_info(self, THREAD_IDENTIFIER_INFO, (thread_info_t) &tiinfo, &info_count); + mach_port_deallocate(mach_task_self(), self); + + if (kr == KERN_SUCCESS) return (void*)tiinfo.thread_handle; + + if (gsbase_offset < 0) + { + /* Search for the array of TLS slots within the pthread data structure. + That's what the macOS pthread implementation uses for gsbase. */ + const void* const sentinel1 = (const void*)0x2bffb6b4f11228ae; + const void* const sentinel2 = (const void*)0x0845a7ff6ab76707; + int rc; + pthread_key_t key; + const void** p = (const void**)pthread_self(); + int i; + + gsbase_offset = 0; + if ((rc = pthread_key_create(&key, NULL))) return NULL; + + pthread_setspecific(key, sentinel1); + + for (i = key + 1; i < 2000; i++) /* arbitrary limit */ + { + if (p[i] == sentinel1) + { + pthread_setspecific(key, sentinel2); + + if (p[i] == sentinel2) + { + gsbase_offset = (i - key) * sizeof(*p); + break; + } + + pthread_setspecific(key, sentinel1); + } + } + + pthread_key_delete(key); + } + + if (gsbase_offset) return (char*)pthread_self() + gsbase_offset; + return NULL; +} +#endif + + /*********************************************************************** * handle_interrupt * @@ -2420,62 +2475,6 @@ void signal_free_thread( TEB *teb ) } }
-#ifdef __APPLE__ -/********************************************************************** - * mac_thread_gsbase - */ -static void *mac_thread_gsbase(void) -{ - struct thread_identifier_info tiinfo; - unsigned int info_count = THREAD_IDENTIFIER_INFO_COUNT; - static int gsbase_offset = -1; - - mach_port_t self = mach_thread_self(); - kern_return_t kr = thread_info(self, THREAD_IDENTIFIER_INFO, (thread_info_t) &tiinfo, &info_count); - mach_port_deallocate(mach_task_self(), self); - - if (kr == KERN_SUCCESS) return (void*)tiinfo.thread_handle; - - if (gsbase_offset < 0) - { - /* Search for the array of TLS slots within the pthread data structure. - That's what the macOS pthread implementation uses for gsbase. */ - const void* const sentinel1 = (const void*)0x2bffb6b4f11228ae; - const void* const sentinel2 = (const void*)0x0845a7ff6ab76707; - int rc; - pthread_key_t key; - const void** p = (const void**)pthread_self(); - int i; - - gsbase_offset = 0; - if ((rc = pthread_key_create(&key, NULL))) return NULL; - - pthread_setspecific(key, sentinel1); - - for (i = key + 1; i < 2000; i++) /* arbitrary limit */ - { - if (p[i] == sentinel1) - { - pthread_setspecific(key, sentinel2); - - if (p[i] == sentinel2) - { - gsbase_offset = (i - key) * sizeof(*p); - break; - } - - pthread_setspecific(key, sentinel1); - } - } - - pthread_key_delete(key); - } - - if (gsbase_offset) return (char*)pthread_self() + gsbase_offset; - return NULL; -} -#endif -
/********************************************************************** * signal_init_process
From: William Horvath william@horvath.blog
Adapted from check_invalid_gs in signal_i386.c. This prevents crashing when the %gs register is manipulated in 32-bit code on "new-style" WoW64.
The SYSCALL_HAVE_WRFSGSBASE fast-path for getting/setting the segment registers on Linux is already checked in signal_init_process, and the other platforms' implementations follow existing conventions.
Also note that we check in the TRAP_x86_PAGEFLT case because the error occurs when the invalid %gs value is actually used, i.e. by an internal call to NtCurrentTeb().
Wine-Bug: https://bugs.winehq.org/show_bug.cgi?id=57444 --- dlls/ntdll/unix/signal_x86_64.c | 67 +++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+)
diff --git a/dlls/ntdll/unix/signal_x86_64.c b/dlls/ntdll/unix/signal_x86_64.c index 7667e96265b..5d3536b01a4 100644 --- a/dlls/ntdll/unix/signal_x86_64.c +++ b/dlls/ntdll/unix/signal_x86_64.c @@ -2022,6 +2022,68 @@ static BOOL handle_syscall_trap( ucontext_t *sigcontext, siginfo_t *siginfo ) }
+/*********************************************************************** + * check_invalid_gs + * + * Check for fault caused by invalid %gs value (some copy protection schemes mess with it). + * + */ +static inline BOOL check_invalid_gs( ucontext_t *sigcontext, CONTEXT *context ) +{ + const BYTE *instr = (BYTE *)context->Rip; + WORD system_gs = ds64_sel; + ULONG_PTR cur_gs = 0; + TEB *teb; + +#ifndef __APPLE__ /* FIXME: why does this break on macOS? */ + /* only handle faults in system libraries */ + if (virtual_is_valid_code_address( instr, 1 )) return FALSE; +#endif + + teb = get_current_teb(); + +#ifdef __linux__ + if (syscall_flags & SYSCALL_HAVE_WRFSGSBASE) + __asm__ volatile ("rdgsbase %0" : "=r" (cur_gs)); + else + cur_gs = arch_prctl( ARCH_GET_GS, teb ); +#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) + amd64_get_gsbase( &cur_gs ); +#elif defined(__NetBSD__) + sysarch( X86_64_GET_GSBASE, &cur_gs ); +#elif defined(__APPLE__) + cur_gs = mac_thread_gsbase(); +#else +# error Please define getting %gs for your architecture +#endif + + if ((WORD)cur_gs == system_gs) return FALSE; + + TRACE( "%04x/%04x at %p, fixing up\n", (WORD)cur_gs, system_gs, instr ); + +#ifdef __linux__ + if (syscall_flags & SYSCALL_HAVE_WRFSGSBASE) + __asm__ volatile ("wrgsbase %0" :: "r" (teb)); + else + arch_prctl( ARCH_SET_GS, teb ); +#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) + amd64_set_gsbase( teb ); +#elif defined(__NetBSD__) + sysarch( X86_64_SET_GSBASE, &teb ); +#elif defined(__APPLE__) + __asm__ volatile ("movq %0,%%gs:%c1" :: "r" (teb->Tib.Self), + "n" (FIELD_OFFSET(TEB, Tib.Self))); + __asm__ volatile ("movq %0,%%gs:%c1" :: "r" (teb->ThreadLocalStoragePointer), + "n" (FIELD_OFFSET(TEB, ThreadLocalStoragePointer))); +#else +# error Please define setting %gs for your architecture +#endif + + GS_sig(sigcontext) = system_gs; + return TRUE; +} + + /********************************************************************** * segv_handler * @@ -2080,6 +2142,11 @@ static void segv_handler( int signal, siginfo_t *siginfo, void *sigcontext ) /* send EXCEPTION_EXECUTE_FAULT only if data execution prevention is enabled */ if (!(flags & MEM_EXECUTE_OPTION_DISABLE)) rec.ExceptionInformation[0] = EXCEPTION_READ_FAULT; } + if (is_wow64() && check_invalid_gs( sigcontext, &context.c )) + { + leave_handler( ucontext ); + return; + } break; case TRAP_x86_ALIGNFLT: /* Alignment check exception */ if (EFL_sig(ucontext) & 0x00040000)
Jinoh Kang (@iamahuman) commented about dlls/ntdll/unix/signal_x86_64.c:
+/***********************************************************************
check_invalid_gs
- Check for fault caused by invalid %gs value (some copy protection schemes mess with it).
- */
+static inline BOOL check_invalid_gs( ucontext_t *sigcontext, CONTEXT *context ) +{
- const BYTE *instr = (BYTE *)context->Rip;
- WORD system_gs = ds64_sel;
- ULONG_PTR cur_gs = 0;
- TEB *teb;
+#ifndef __APPLE__ /* FIXME: why does this break on macOS? */
- /* only handle faults in system libraries */
- if (virtual_is_valid_code_address( instr, 1 )) return FALSE;
First, thanks for taking care of preserving the system code check.
I'm afraid that this check may be too strict for WoW64; we need a bit more precision.
In pure 32bit (signal_i386.c), checking for system code (Unix side[^em]) was enough, because PE side[^em] didn't rely on the GS segment register at all.
In new-style WoW64, we have another layer that uses GS: the WoW64 itself. The new-style WoW64 code itself is 64-bit, so it needs GS. However, the WoW64 code is in the PE side[^em]. Therefore, `virtual_is_valid_code_address` would return TRUE, leading to early exit of this function (on both macOS and Linux).
| Region | Bitness | System code? | Uses GS? | Behavior of this PR (v3) | |---|---|---|---|---| | 64-bit Unix libraries | 64-bit | Yes (Unix) | Yes | Handled | | 32-bit PE (DLL/EXE) | 32-bit | No (PE) | No | Ignored | | WoW64 layer[^w] | 64-bit | **No (PE)** | **Yes** | **Ignored** |
As you can see in the last row, you need extra check for the WoW64 layer, even if `virtual_is_valid_code_address` returns TRUE. Otherwise, it leads to crash.
[^em]: See https://list.winehq.org/mailman3/hyperkitty/list/wine-devel@winehq.org/threa... and https://list.winehq.org/mailman3/hyperkitty/list/wine-devel@winehq.org/threa... for overview on the PE/Unix split architecture. [^w]: This includes wow64.dll, wow64cpu.dll, wow64win.dll, and even ntdll.dll (yes, every new-style WoW64 process haa both 32-bit and 64-bit versions of NTDLL!)
On Sun Dec 22 16:16:59 2024 +0000, Jinoh Kang wrote:
First, thanks for taking care of preserving the system code check. I'm afraid that this check may be too strict for WoW64; we need a bit more precision. In pure 32bit (signal_i386.c), checking for system code (Unix side[^em]) was enough, because PE side[^em] didn't rely on the GS segment register at all. In new-style WoW64, we have another layer that uses GS: the WoW64 itself. The new-style WoW64 code itself is 64-bit, so it needs GS. However, the WoW64 code is in the PE side[^em]. Therefore, `virtual_is_valid_code_address` would return TRUE, leading to early exit of this function (on both macOS and Linux). | Region | Bitness | System code? | Uses GS? | Behavior of this PR (v3) | |---|---|---|---|---| | 64-bit Unix libraries | 64-bit | Yes (Unix) | Yes | Handled | | 32-bit PE (DLL/EXE) | 32-bit | No (PE) | No | Ignored | | WoW64 layer[^w] | 64-bit | **No (PE)** | **Yes** | **Ignored** | As you can see in the last row, you need extra check for the WoW64 layer, even if `virtual_is_valid_code_address` returns TRUE. Otherwise, it leads to crash. [^em]: See https://list.winehq.org/mailman3/hyperkitty/list/wine-devel@winehq.org/threa... and https://list.winehq.org/mailman3/hyperkitty/list/wine-devel@winehq.org/threa... for overview on the PE/Unix split architecture. [^w]: This includes wow64.dll, wow64cpu.dll, wow64win.dll, and even ntdll.dll (yes, every new-style WoW64 process haa both 32-bit and 64-bit versions of NTDLL!)
Actually, this probably needs to be handled in wow64cpu, not ntdll itself. That would make this PR unnecessary, unless I'm missing something.
On Sun Dec 22 16:16:59 2024 +0000, Jinoh Kang wrote:
First, thanks for taking care of preserving the system code check. I'm afraid that this check may be too strict for WoW64; we need a bit more precision. In pure 32bit (signal_i386.c), checking for system code (Unix side[^em]) was enough, because PE side[^em] didn't rely on the GS segment register at all. In new-style WoW64, we have another layer that uses GS: the WoW64 itself. The new-style WoW64 code itself is 64-bit, so it needs GS. However, the WoW64 code is in the PE side[^em]. Therefore, `virtual_is_valid_code_address` would return TRUE, leading to early exit of this function (on both macOS and Linux). | Region | Bitness | System code? | Uses GS? | Behavior of this PR (v3) | |---|---|---|---|---| | 64-bit Unix libraries | 64-bit | Yes (Unix) | Yes | Handled | | 32-bit PE (DLL/EXE) | 32-bit | No (PE) | No | Ignored | | WoW64 layer[^w] | 64-bit | **No (PE)** | **Yes** | **Ignored** | As you can see in the last row, you need extra check for the WoW64 layer, even if `virtual_is_valid_code_address` returns TRUE. Otherwise, it leads to crash. [^em]: See https://list.winehq.org/mailman3/hyperkitty/list/wine-devel@winehq.org/threa... and https://list.winehq.org/mailman3/hyperkitty/list/wine-devel@winehq.org/threa... for overview on the PE/Unix split architecture. [^w]: This includes wow64.dll, wow64cpu.dll, wow64win.dll, and even ntdll.dll (yes, every new-style WoW64 process haa both 32-bit and 64-bit versions of NTDLL!)
I think the easiest way to achieve this is to check if the current bitness is 64bit (also makes `__APPLE__` check unnecessary)
```suggestion:-2+1 /* only handle faults in 64bit code */ if (CS_sig(sigcontext) != cs64_sel) return FALSE; ```