1. For %fs/fsbase the patch follows the macOS logic with LDT descriptor registration and Linux with switching. One notable difference is that on 32->64 transition we set %fs to GSEL(GUFS32_SEL, SEL_UPL) before restoring fsbase, otherwise FreeBSD will just revert it by reloading the selector [at the first opportunity](https://github.com/freebsd/freebsd-src/blob/5673462af5330df20733008beae48a24...). GSEL(GUFS32_SEL, SEL_UPL) is the default %fs value on FreeBSD and is special-cased to save/restore actual fsbase value to/from PCB. 2. I was told we could get rid of fsbase glitches in signal handlers by blocking signals with [sigfastblock(2)](https://man.freebsd.org/cgi/man.cgi?query=sigfastblock) between %fs reset to the default value and fsbase reset to pthread_teb. This is currently a part of internal API for libthr, which could be exposed as pthread_signal_block_np for Wine. I'm on the fence whether it's worth it. 3. I fully admit I have no idea what registers are worth preserving around fallback sysarch(AMD64_SET_FSBASE) syscalls and whether it's appropriate to push those registers to stack. ("Kernel" stack should be fine, I assume?) Syscalls definitely clobber r8-r11. 4. For %ss see https://lkml.org/lkml/2015/4/24/216. FreeBSD doesn't have a similar workaround in the kernel, so it goes into Wine.
-- v4: ntdll: Unbreak new wow64 mode on FreeBSD.
From: Alexander Shaikhulin iwtcex@gmail.com
This mostly involves fs/fsbase micromanagement. --- dlls/ntdll/unix/signal_x86_64.c | 147 ++++++++++++++++++++++++++++++-- 1 file changed, 138 insertions(+), 9 deletions(-)
diff --git a/dlls/ntdll/unix/signal_x86_64.c b/dlls/ntdll/unix/signal_x86_64.c index dc07c1778ea..6c199d31df4 100644 --- a/dlls/ntdll/unix/signal_x86_64.c +++ b/dlls/ntdll/unix/signal_x86_64.c @@ -152,6 +152,9 @@ __ASM_GLOBAL_FUNC( alloc_fs_sel,
#elif defined(__FreeBSD__) || defined (__FreeBSD_kernel__)
+#include <machine/cpufunc.h> +#include <machine/segments.h> +#include <machine/specialreg.h> #include <machine/trap.h>
#define RAX_sig(context) ((context)->uc_mcontext.mc_rax) @@ -469,7 +472,7 @@ static inline struct amd64_thread_data *amd64_thread_data(void) return (struct amd64_thread_data *)ntdll_get_thread_data()->cpu_data; }
-#if defined(__linux__) || defined(__APPLE__) +#if defined(__linux__) || defined(__APPLE__) || defined(__FreeBSD__) static inline TEB *get_current_teb(void) { unsigned long rsp; @@ -1654,7 +1657,7 @@ __ASM_GLOBAL_FUNC( call_user_mode_callback, "movq %r15,%rcx\n" /* func */ /* switch to user stack */ "1:\tmovq %rdi,%rsp\n\t" /* user_rsp */ -#ifdef __linux__ +#if defined(__linux__) || defined(__FreeBSD__) "testl $4,%r14d\n\t" /* SYSCALL_HAVE_PTHREAD_TEB */ "jz 1f\n\t" "movw 0x338(%r13),%fs\n" /* amd64_thread_data()->fs */ @@ -2375,6 +2378,16 @@ static void ldt_set_entry( WORD sel, LDT_ENTRY entry )
#if defined(__APPLE__) if (i386_set_ldt(index, (union ldt_entry *)&entry, 1) < 0) perror("i386_set_ldt"); +#elif defined(__FreeBSD__) + struct i386_ldt_args p; + p.start = index; + p.descs = (struct user_segment_descriptor *)&entry; + p.num = 1; + if (sysarch(I386_SET_LDT, &p) == -1) + { + perror("i386_set_ldt"); + exit(1); + } #else fprintf( stderr, "No LDT support on this platform\n" ); exit(1); @@ -2485,6 +2498,38 @@ static void *mac_thread_gsbase(void) } #endif
+#ifdef __FreeBSD__ +static __siginfohandler_t *libthr_signal_handlers[_SIG_MAXSIG]; + +/* occasionally signals happen right between %fs reset to GUFS32_SEL and fsbase correction, +which results in fsbase being wrong on handler entry; we'll just restore fsbase ourselves */ +static void libthr_sighandler_wrapper(int sig, siginfo_t *info, void *_ucp) { + struct ntdll_thread_data *thread_data = (struct ntdll_thread_data *)&get_current_teb()->GdiTebBatch; + amd64_set_fsbase(((struct amd64_thread_data *)thread_data->cpu_data)->pthread_teb); + libthr_signal_handlers[sig - 1](sig, info, _ucp); +} + +extern int __sys_sigaction(int, const struct sigaction *, struct sigaction *); + +static int wrap_libthr_signal_handlers(void) { + struct sigaction act; + int sig; + + for (sig = 1; sig <= _SIG_MAXSIG; sig++) { + + if (__sys_sigaction(sig, NULL, &act) == -1) return -1; + if (act.sa_sigaction != NULL) { + + libthr_signal_handlers[sig - 1] = act.sa_sigaction; + act.sa_sigaction = libthr_sighandler_wrapper; + + if (__sys_sigaction(sig, &act, NULL) == -1) return -1; + } + } + + return 0; +} +#endif
/********************************************************************** * signal_init_process @@ -2550,6 +2595,37 @@ void signal_init_process(void) break; } } +#elif defined(__FreeBSD__) + if (wow_teb) + { + u_int p[4]; + u_int cpu_stdext_feature; + + LDT_ENTRY fs32_entry = ldt_make_entry(wow_teb, page_size - 1, LDT_FLAGS_DATA | LDT_FLAGS_32BIT); + + cs32_sel = GSEL(GUCODE32_SEL, SEL_UPL); + + amd64_thread_data()->fs = LSEL(first_ldt_entry, SEL_UPL); + ldt_set_entry(amd64_thread_data()->fs, fs32_entry); + + syscall_flags |= SYSCALL_HAVE_PTHREAD_TEB; + + do_cpuid(0, p); + if (p[0] >= 7) + { + cpuid_count(7, 0, p); + cpu_stdext_feature = p[1]; + } + else + { + cpu_stdext_feature = 0; + } + + if (cpu_stdext_feature & CPUID_STDEXT_FSGSBASE) + { + syscall_flags |= SYSCALL_HAVE_WRFSGSBASE; + } + } #endif
sig_act.sa_mask = server_block_set; @@ -2574,6 +2650,9 @@ void signal_init_process(void) #ifdef __APPLE__ sig_act.sa_sigaction = sigsys_handler; if (sigaction( SIGSYS, &sig_act, NULL ) == -1) goto error; +#endif +#ifdef __FreeBSD__ + if (wrap_libthr_signal_handlers() == -1) goto error; #endif return;
@@ -2603,7 +2682,8 @@ void init_syscall_frame( LPTHREAD_START_ROUTINE entry, void *arg, BOOL suspend, arch_prctl( ARCH_GET_FS, &thread_data->pthread_teb ); if (fs32_sel) alloc_fs_sel( fs32_sel >> 3, get_wow_teb( teb )); #elif defined (__FreeBSD__) || defined (__FreeBSD_kernel__) - amd64_set_gsbase( teb ); + amd64_set_gsbase(teb); + amd64_get_fsbase(&thread_data->pthread_teb); #elif defined(__NetBSD__) sysarch( X86_64_SET_GSBASE, &teb ); #elif defined (__APPLE__) @@ -2805,17 +2885,36 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, /* When on the kernel stack, use %r13 instead of %gs to access the TEB. * (on macOS, signal handlers set gsbase to pthread_teb when on the kernel stack). */ -#ifdef __linux__ +#if defined(__linux__) || defined(__FreeBSD__) "testl $4,%r14d\n\t" /* SYSCALL_HAVE_PTHREAD_TEB */ "jz 2f\n\t" +# ifdef __FreeBSD__ + "movq $0x13,%rsi\n\t" /* GSEL(GUFS32_SEL, SEL_UPL) */ + "movq %rsi,%fs\n\t" +# endif "movq 0x320(%r13),%rsi\n\t" /* amd64_thread_data()->pthread_teb */ "testl $8,%r14d\n\t" /* SYSCALL_HAVE_WRFSGSBASE */ "jz 1f\n\t" "wrfsbase %rsi\n\t" "jmp 2f\n" - "1:\tmov $0x1002,%edi\n\t" /* ARCH_SET_FS */ + "1:\n\t" +# ifdef __linux__ + "mov $0x1002,%edi\n\t" /* ARCH_SET_FS */ "mov $158,%eax\n\t" /* SYS_arch_prctl */ "syscall\n\t" +# elif defined(__FreeBSD__) + "pushq %r8\n\t" + "pushq %r9\n\t" + "pushq %r10\n\t" + "pushq %r11\n\t" + "movq $0xa5,%rax\n\t" /* sysarch */ + "movq $0x81,%rdi\n\t" /* AMD64_SET_FSBASE */ + "syscall\n\t" + "popq %r11\n\t" + "popq %r10\n\t" + "popq %r9\n\t" + "popq %r8\n\t" +# endif "leaq -0x98(%rbp),%rcx\n" "2:\n\t" #elif defined __APPLE__ @@ -2863,10 +2962,15 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, __ASM_CFI(".cfi_remember_state\n\t") __ASM_CFI_CFA_IS_AT2(rcx, 0xa8, 0x01) /* frame->syscall_cfa */ "leaq 0x70(%rcx),%rsp\n\t" /* %rsp > frame means no longer inside syscall */ -#ifdef __linux__ +#if defined(__linux__) || defined(__FreeBSD__) "testl $4,%r14d\n\t" /* SYSCALL_HAVE_PTHREAD_TEB */ "jz 1f\n\t" "movw 0x338(%r13),%fs\n" /* amd64_thread_data()->fs */ +# ifdef __FreeBSD__ + /* reset %ss (after sysret) for AMD */ + "movw $0x3b,%r14w\n\t" /* GSEL(GUDATA_SEL, SEL_UPL) */ + "movw %r14w,%ss\n\t" +# endif "1:\n\t" #elif defined __APPLE__ "movq %rax,%r8\n\t" @@ -3091,17 +3195,37 @@ __ASM_GLOBAL_FUNC( __wine_unix_call_dispatcher, __ASM_CFI(".cfi_offset %r15,-0x38\n\t") __ASM_CFI(".cfi_undefined %rdi\n\t") __ASM_CFI(".cfi_undefined %rsi\n\t") -#ifdef __linux__ +#if defined(__linux__) || defined(__FreeBSD__) "testl $4,%r14d\n\t" /* SYSCALL_HAVE_PTHREAD_TEB */ "jz 2f\n\t" +# ifdef __FreeBSD__ + "movq $0x13,%rsi\n\t" /* GSEL(GUFS32_SEL, SEL_UPL) */ + "movq %rsi,%fs\n\t" +# endif + "movq 0x320(%r13),%rsi\n\t" /* amd64_thread_data()->pthread_teb */ "testl $8,%r14d\n\t" /* SYSCALL_HAVE_WRFSGSBASE */ "jz 1f\n\t" "wrfsbase %rsi\n\t" "jmp 2f\n" - "1:\tmov $0x1002,%edi\n\t" /* ARCH_SET_FS */ + "1:\n\t" +# ifdef __linux__ + "mov $0x1002,%edi\n\t" /* ARCH_SET_FS */ "mov $158,%eax\n\t" /* SYS_arch_prctl */ "syscall\n\t" +# elif defined(__FreeBSD__) + "pushq %r8\n\t" + "pushq %r9\n\t" + "pushq %r10\n\t" + "pushq %r11\n\t" + "movq $0xa5,%rax\n\t" /* sysarch */ + "movq $0x81,%rdi\n\t" /* AMD64_SET_FSBASE */ + "syscall\n\t" + "popq %r11\n\t" + "popq %r10\n\t" + "popq %r9\n\t" + "popq %r8\n\t" +#endif "2:\n\t" #elif defined __APPLE__ "movq 0x320(%r13),%rdi\n\t" /* amd64_thread_data()->pthread_teb */ @@ -3127,10 +3251,15 @@ __ASM_GLOBAL_FUNC( __wine_unix_call_dispatcher, /* switch to user stack */ "movq 0x88(%rcx),%rsp\n\t" __ASM_CFI(".cfi_restore_state\n\t") -#ifdef __linux__ +#if defined(__linux__) || defined(__FreeBSD__) "testl $4,%r14d\n\t" /* SYSCALL_HAVE_PTHREAD_TEB */ "jz 1f\n\t" "movw 0x338(%r13),%fs\n" /* amd64_thread_data()->fs */ +# ifdef __FreeBSD__ + /* reset %ss (after sysret) for AMD */ + "movw $0x3b,%r14w\n\t" /* GSEL(GUDATA_SEL, SEL_UPL) */ + "movw %r14w,%ss\n\t" +# endif "1:\n\t" #elif defined __APPLE__ "movq %rax,%rdx\n\t"
Alexandre Julliard (@julliard) commented about dlls/ntdll/unix/signal_x86_64.c:
} #endif
+#ifdef __FreeBSD__ +static __siginfohandler_t *libthr_signal_handlers[_SIG_MAXSIG];
+/* occasionally signals happen right between %fs reset to GUFS32_SEL and fsbase correction, +which results in fsbase being wrong on handler entry; we'll just restore fsbase ourselves */ +static void libthr_sighandler_wrapper(int sig, siginfo_t *info, void *_ucp) {
- struct ntdll_thread_data *thread_data = (struct ntdll_thread_data *)&get_current_teb()->GdiTebBatch;
- amd64_set_fsbase(((struct amd64_thread_data *)thread_data->cpu_data)->pthread_teb);
- libthr_signal_handlers[sig - 1](sig, info, _ucp);
+}
Any reason you are not using `init_handler` for this?
Alexandre Julliard (@julliard) commented about dlls/ntdll/unix/signal_x86_64.c:
__ASM_CFI(".cfi_remember_state\n\t") __ASM_CFI_CFA_IS_AT2(rcx, 0xa8, 0x01) /* frame->syscall_cfa */ "leaq 0x70(%rcx),%rsp\n\t" /* %rsp > frame means no longer inside syscall */
-#ifdef __linux__ +#if defined(__linux__) || defined(__FreeBSD__) "testl $4,%r14d\n\t" /* SYSCALL_HAVE_PTHREAD_TEB */ "jz 1f\n\t" "movw 0x338(%r13),%fs\n" /* amd64_thread_data()->fs */ +# ifdef __FreeBSD__
/* reset %ss (after sysret) for AMD */
"movw $0x3b,%r14w\n\t" /* GSEL(GUDATA_SEL, SEL_UPL) */
"movw %r14w,%ss\n\t"
+# endif
I don't think you should be changing %r14 here. It's also not clear why resetting %ss is necessary, where does it get cleared?
Alexandre Julliard (@julliard) commented about dlls/ntdll/unix/signal_x86_64.c:
/* When on the kernel stack, use %r13 instead of %gs to access the TEB. * (on macOS, signal handlers set gsbase to pthread_teb when on the kernel stack). */
-#ifdef __linux__ +#if defined(__linux__) || defined(__FreeBSD__) "testl $4,%r14d\n\t" /* SYSCALL_HAVE_PTHREAD_TEB */ "jz 2f\n\t" +# ifdef __FreeBSD__
"movq $0x13,%rsi\n\t" /* GSEL(GUFS32_SEL, SEL_UPL) */
"movq %rsi,%fs\n\t"
+# endif "movq 0x320(%r13),%rsi\n\t" /* amd64_thread_data()->pthread_teb */
Please keep the Linux and FreeBSD case separate, unless they are truly identical.
It may be a good idea to split this in separate patches.
On Thu May 22 20:00:31 2025 +0000, Alexandre Julliard wrote:
Any reason you are not using `init_handler` for this?
As far as I understand it, libc's sigaction is normally being redirected to __thr_sigaction, which wraps every signal handler, that [wrapper](https://github.com/freebsd/freebsd-src/blob/3c3dd62966123b424657983d9a4d173f...) relies on TLS. Thus it's to late to set fsbase in init_handler. (Also FreeBSD will do leave_handler's job itself, no need to worry about that one.)
This is still a bit obnoxious and we are actually discussing getting of that particular annoyance on the FreeBSD side. Would take some time, though.
On Thu May 22 20:00:31 2025 +0000, Alexandre Julliard wrote:
I don't think you should be changing %r14 here. It's also not clear why resetting %ss is necessary, where does it get cleared?
Do you mean my comment in the patch is unclear or the explanation in MR is unclear? Which one should I adjust? Without %ss reload (after a syscall), any AMD (Ryzen, ThreadRipper, etc) machine would crash with something like `addr = f7f0, signo = 11, code = 1, trapno = 12` (`RSP = 0x12f7f0`) as soon as it switches to the protected mode, meaning it operates in some kind of 16-bit mode for the stack (don't know what it is, don't care either).
On Thu May 22 20:59:42 2025 +0000, Alexander Shaikhulin wrote:
Do you mean my comment in the patch is unclear or the explanation in MR is unclear? Which one should I adjust? Without %ss reload (after a syscall), any AMD (Ryzen, ThreadRipper, etc) machine would crash with something like `addr = f7f0, signo = 11, code = 1, trapno = 12` (`RSP = 0x12f7f0`) as soon as it switches to the protected mode, meaning it operates in some kind of 16-bit mode for the stack (don't know what it is, don't care either).
(32-bit FreeBSD apps on amd64 don't run into this issue because they use int 0x80.)