From: Rémi Bernon rbernon@codeweavers.com
Using the signal context for the volatile FPU state, assuming that it hasn't been modified by the unix call. --- dlls/ntdll/unix/signal_i386.c | 61 ++++++++++++++++++++++++--- dlls/ntdll/unix/signal_x86_64.c | 74 ++++++++++++++++++++++++++++++--- 2 files changed, 125 insertions(+), 10 deletions(-)
diff --git a/dlls/ntdll/unix/signal_i386.c b/dlls/ntdll/unix/signal_i386.c index f2c9982ce0f..262a0b8072f 100644 --- a/dlls/ntdll/unix/signal_i386.c +++ b/dlls/ntdll/unix/signal_i386.c @@ -493,6 +493,7 @@ C_ASSERT( offsetof( TEB, GdiTebBatch ) + offsetof( struct x86_thread_data, sysca #define SYSCALL_HAVE_XSAVE 1 #define SYSCALL_HAVE_XSAVEC 2 #define SYSCALL_HAVE_FXSAVE 4 +#define SYSCALL_NEED_XSTATE 8
static unsigned int syscall_flags;
@@ -741,6 +742,36 @@ static inline void restore_fpu( const CONTEXT *context ) }
+/*********************************************************************** + * save_context_xstate + * + * Set the fltsave and xstate values from a sigcontext. + */ +static void save_context_xstate( struct xcontext *xcontext, const ucontext_t *sigcontext ) +{ + FLOATING_SAVE_AREA *fpu = FPU_sig(sigcontext); + XSAVE_FORMAT *fpux = FPUX_sig(sigcontext); + CONTEXT *context = &xcontext->c; + + if (fpu) + { + context->ContextFlags |= CONTEXT_FLOATING_POINT; + context->FloatSave = *fpu; + } + if (fpux) + { + XSTATE *xs; + + context->ContextFlags |= CONTEXT_FLOATING_POINT | CONTEXT_EXTENDED_REGISTERS; + memcpy( context->ExtendedRegisters, fpux, sizeof(*fpux) ); + if (!fpu) fpux_to_fpu( &context->FloatSave, fpux ); + if ((cpu_info.ProcessorFeatureBits & CPU_FEATURE_AVX) && (xs = XState_sig(fpux))) + *xstate_from_context( context ) = *xs; + } + if (!fpu && !fpux) save_fpu( context ); +} + + /*********************************************************************** * save_context * @@ -1046,7 +1077,11 @@ NTSTATUS WINAPI NtGetContextThread( HANDLE handle, CONTEXT *context ) } if (needed_flags & CONTEXT_FLOATING_POINT) { - if (!(cpu_info.ProcessorFeatureBits & CPU_FEATURE_FXSR)) + if (frame->syscall_flags & SYSCALL_NEED_XSTATE) + { + /* nothing to do, already copied from sigcontext */ + } + else if (!(cpu_info.ProcessorFeatureBits & CPU_FEATURE_FXSR)) { context->FloatSave = frame->u.fsave; } @@ -1066,8 +1101,11 @@ NTSTATUS WINAPI NtGetContextThread( HANDLE handle, CONTEXT *context ) { XSAVE_FORMAT *xs = (XSAVE_FORMAT *)context->ExtendedRegisters;
- if (!xstate_compaction_enabled || - (frame->xstate.Mask & XSTATE_MASK_LEGACY_FLOATING_POINT)) + if (frame->syscall_flags & SYSCALL_NEED_XSTATE) + { + /* nothing to do, already copied from sigcontext */ + } + else if (!xstate_compaction_enabled || (frame->xstate.Mask & XSTATE_MASK_LEGACY_FLOATING_POINT)) { memcpy( xs, &frame->u.xsave, FIELD_OFFSET( XSAVE_FORMAT, MxCsr )); memcpy( xs->FloatRegisters, frame->u.xsave.FloatRegisters, @@ -1080,7 +1118,12 @@ NTSTATUS WINAPI NtGetContextThread( HANDLE handle, CONTEXT *context ) xs->ControlWord = 0x37f; }
- if (!xstate_compaction_enabled || (frame->xstate.Mask & XSTATE_MASK_LEGACY_SSE)) + if (frame->syscall_flags & SYSCALL_NEED_XSTATE) + { + memcpy( xs->XmmRegisters + 6, frame->u.xsave.XmmRegisters + 6, + sizeof( frame->u.xsave.XmmRegisters ) - 6 * sizeof(*frame->u.xsave.XmmRegisters) ); + } + else if (!xstate_compaction_enabled || (frame->xstate.Mask & XSTATE_MASK_LEGACY_SSE)) { memcpy( xs->XmmRegisters, frame->u.xsave.XmmRegisters, sizeof( xs->XmmRegisters )); xs->MxCsr = frame->u.xsave.MxCsr; @@ -1095,7 +1138,8 @@ NTSTATUS WINAPI NtGetContextThread( HANDLE handle, CONTEXT *context )
context->ContextFlags |= CONTEXT_EXTENDED_REGISTERS; } - if ((needed_flags & CONTEXT_XSTATE) && (cpu_info.ProcessorFeatureBits & CPU_FEATURE_AVX)) + if ((needed_flags & CONTEXT_XSTATE) && (cpu_info.ProcessorFeatureBits & CPU_FEATURE_AVX) && + !(frame->syscall_flags & SYSCALL_NEED_XSTATE)) { CONTEXT_EX *context_ex = (CONTEXT_EX *)(context + 1); XSTATE *xstate = (XSTATE *)((char *)context_ex + context_ex->XState.Offset); @@ -1595,6 +1639,7 @@ __ASM_GLOBAL_FUNC( call_user_mode_callback, "movl %ebp,0x380(%esp)\n\t" "movl 0x1f8(%edx),%ecx\n\t" /* x86_thread_data()->syscall_frame */ "movl (%ecx),%eax\n\t" /* frame->syscall_flags */ + "andl $~8,%eax\n\t" /* syscall_flags &= ~SYSCALL_NEED_XSTATE */ "movl %eax,(%esp)\n\t" "movl 0x38(%ecx),%eax\n\t" /* frame->syscall_table */ "movl %eax,0x38(%esp)\n\t" @@ -2077,6 +2122,7 @@ static void usr1_handler( int signal, siginfo_t *siginfo, void *sigcontext ) DECLSPEC_ALIGN(64) XSTATE xs; xcontext.c.ContextFlags = CONTEXT_FULL; context_init_xstate( &xcontext.c, &xs ); + save_context_xstate( &xcontext, sigcontext );
NtGetContextThread( GetCurrentThread(), &xcontext.c ); wait_suspend( &xcontext.c ); @@ -2570,6 +2616,8 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, "shrl $8,%ebx\n\t" "andl $0x30,%ebx\n\t" /* syscall table number */ "addl 0x38(%ecx),%ebx\n\t" /* frame->syscall_table */ + "testl $0x80000000,%eax\n\t" + "jnz .L__wine_syscall_dispatcher_nofpu_save\n\t" "testl $3,(%ecx)\n\t" /* frame->syscall_flags & (SYSCALL_HAVE_XSAVE | SYSCALL_HAVE_XSAVEC) */ "jz .L__wine_syscall_dispatcher_no_xsave\n\t" "movl $7,%eax\n\t" @@ -2605,6 +2653,9 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, "\n.L__wine_syscall_dispatcher_no_fxsave:\n\t" "fnsave 0x40(%ecx)\n\t" "fwait\n\t" + "jmp .L__wine_syscall_dispatcher_fpu_saved\n\t" + "\n.L__wine_syscall_dispatcher_nofpu_save:\n\t" + "orl $0x10,(%ecx)\n\t" /* frame->syscall_flags |= SYSCALL_NEED_XSTATE */ "\n.L__wine_syscall_dispatcher_fpu_saved:\n\t" "movl %ecx,%esp\n\t" "movl 0x1c(%esp),%edx\n\t" /* frame->eax */ diff --git a/dlls/ntdll/unix/signal_x86_64.c b/dlls/ntdll/unix/signal_x86_64.c index eeba97d5de9..d123e381882 100644 --- a/dlls/ntdll/unix/signal_x86_64.c +++ b/dlls/ntdll/unix/signal_x86_64.c @@ -364,6 +364,7 @@ C_ASSERT( sizeof(struct stack_layout) == 0x590 ); /* Should match the size in ca #define SYSCALL_HAVE_XSAVEC 2 #define SYSCALL_HAVE_PTHREAD_TEB 4 #define SYSCALL_HAVE_WRFSGSBASE 8 +#define SYSCALL_NEED_XSTATE 0x10
static unsigned int syscall_flags;
@@ -823,6 +824,28 @@ static inline void leave_handler( const ucontext_t *sigcontext ) }
+/*********************************************************************** + * save_context_xstate + * + * Set the fltsave and xstate values from a sigcontext. + */ +static void save_context_xstate( struct xcontext *xcontext, const ucontext_t *sigcontext ) +{ + CONTEXT *context = &xcontext->c; + + if (FPU_sig(sigcontext)) + { + XSTATE *xs; + + context->ContextFlags |= CONTEXT_FLOATING_POINT; + context->u.FltSave = *FPU_sig(sigcontext); + context->MxCsr = context->u.FltSave.MxCsr; + if ((cpu_info.ProcessorFeatureBits & CPU_FEATURE_AVX) && (xs = XState_sig(FPU_sig(sigcontext)))) + *xstate_from_context( context ) = *xs; + } +} + + /*********************************************************************** * save_context * @@ -1113,8 +1136,11 @@ NTSTATUS WINAPI NtGetContextThread( HANDLE handle, CONTEXT *context ) } if (needed_flags & CONTEXT_FLOATING_POINT) { - if (!xstate_compaction_enabled || - (frame->xstate.Mask & XSTATE_MASK_LEGACY_FLOATING_POINT)) + if (frame->syscall_flags & SYSCALL_NEED_XSTATE) + { + /* nothing to do, already copied from sigcontext */ + } + else if (!xstate_compaction_enabled || (frame->xstate.Mask & XSTATE_MASK_LEGACY_FLOATING_POINT)) { memcpy( &context->u.FltSave, &frame->xsave, FIELD_OFFSET( XSAVE_FORMAT, MxCsr )); memcpy( context->u.FltSave.FloatRegisters, frame->xsave.FloatRegisters, @@ -1128,7 +1154,12 @@ NTSTATUS WINAPI NtGetContextThread( HANDLE handle, CONTEXT *context ) context->u.FltSave.ControlWord = 0x37f; }
- if (!xstate_compaction_enabled || (frame->xstate.Mask & XSTATE_MASK_LEGACY_SSE)) + if (frame->syscall_flags & SYSCALL_NEED_XSTATE) + { + memcpy( context->u.FltSave.XmmRegisters + 6, frame->xsave.XmmRegisters + 6, + sizeof( frame->xsave.XmmRegisters ) - 6 * sizeof(*frame->xsave.XmmRegisters) ); + } + else if (!xstate_compaction_enabled || (frame->xstate.Mask & XSTATE_MASK_LEGACY_SSE)) { memcpy( context->u.FltSave.XmmRegisters, frame->xsave.XmmRegisters, sizeof( context->u.FltSave.XmmRegisters )); @@ -1146,7 +1177,8 @@ NTSTATUS WINAPI NtGetContextThread( HANDLE handle, CONTEXT *context ) context->MxCsr = context->u.FltSave.MxCsr; context->ContextFlags |= CONTEXT_FLOATING_POINT; } - if ((needed_flags & CONTEXT_XSTATE) && (cpu_info.ProcessorFeatureBits & CPU_FEATURE_AVX)) + if ((needed_flags & CONTEXT_XSTATE) && (cpu_info.ProcessorFeatureBits & CPU_FEATURE_AVX) && + !(frame->syscall_flags & SYSCALL_NEED_XSTATE)) { CONTEXT_EX *context_ex = (CONTEXT_EX *)(context + 1); XSTATE *xstate = (XSTATE *)((char *)context_ex + context_ex->XState.Offset); @@ -1609,6 +1641,7 @@ __ASM_GLOBAL_FUNC( call_user_mode_callback, "movq 0xa8(%r10),%rax\n\t" /* prev_frame->syscall_table */ "movq %rax,0xa8(%rsp)\n\t" /* frame->syscall_table */ "movl 0xb0(%r10),%r14d\n\t" /* prev_frame->syscall_flags */ + "andl $~0x10,%r14d\n\t" /* syscall_flags &= ~SYSCALL_NEED_XSTATE */ "movl %r14d,0xb0(%rsp)\n\t" /* frame->syscall_flags */ "movq %r10,0xa0(%rsp)\n\t" /* frame->prev_frame */ "movq %rsp,0x328(%r11)\n\t" /* amd64_thread_data()->syscall_frame */ @@ -2152,6 +2185,7 @@ static void usr1_handler( int signal, siginfo_t *siginfo, void *ucontext ) DECLSPEC_ALIGN(64) XSTATE xs; context.c.ContextFlags = CONTEXT_FULL; context_init_xstate( &context.c, &xs ); + save_context_xstate( &context, ucontext );
NtGetContextThread( GetCurrentThread(), &context.c ); wait_suspend( &context.c ); @@ -2648,6 +2682,8 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, * depends on us returning to it. Adjust the return address accordingly. */ "subq $0xb,0x70(%rcx)\n\t" "movl 0xb0(%rcx),%r14d\n\t" /* frame->syscall_flags */ + "testl $0x80000000,%eax\n\t" + "jnz .L__wine_syscall_dispatcher_nofpu_save\n\t" "testl $3,%r14d\n\t" /* SYSCALL_HAVE_XSAVE | SYSCALL_HAVE_XSAVEC */ "jz .L__wine_syscall_dispatcher_no_xsave\n\t" "movl $7,%eax\n\t" @@ -2669,6 +2705,20 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, "jmp .L__wine_syscall_dispatcher_fpu_saved\n\t" "\n.L__wine_syscall_dispatcher_no_xsave:\n\t" "fxsave64 0xc0(%rcx)\n\t" + "jmp .L__wine_syscall_dispatcher_fpu_saved\n\t" + "\n.L__wine_syscall_dispatcher_nofpu_save:\n\t" + "orq $0x10,%r14\n\t" + "movq %r14,0xb0(%rcx)\n\t" /* frame->syscall_flags |= SYSCALL_NEED_XSTATE */ + "movdqa %xmm6,0x1c0(%rcx)\n\t" + "movdqa %xmm7,0x1d0(%rcx)\n\t" + "movdqa %xmm8,0x1e0(%rcx)\n\t" + "movdqa %xmm9,0x1f0(%rcx)\n\t" + "movdqa %xmm10,0x200(%rcx)\n\t" + "movdqa %xmm11,0x210(%rcx)\n\t" + "movdqa %xmm12,0x220(%rcx)\n\t" + "movdqa %xmm13,0x230(%rcx)\n\t" + "movdqa %xmm14,0x240(%rcx)\n\t" + "movdqa %xmm15,0x250(%rcx)\n\t" "\n.L__wine_syscall_dispatcher_fpu_saved:\n\t" /* remember state when $rcx is pointing to "frame" */ __ASM_CFI(".cfi_remember_state\n\t") @@ -2739,7 +2789,7 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, "1:\n\t" #endif "testl $0x48,%edx\n\t" /* CONTEXT_FLOATING_POINT | CONTEXT_XSTATE */ - "jz .L__wine_syscall_dispatcher_fpu_restored\n\t" + "jz .L__wine_syscall_dispatcher_nofpu_restore\n\t" "testl $3,%r14d\n\t" /* SYSCALL_HAVE_XSAVE | SYSCALL_HAVE_XSAVEC */ "jz .L__wine_syscall_dispatcher_no_xrstor\n\t" "movq %rax,%r11\n\t" @@ -2751,6 +2801,20 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, "jmp .L__wine_syscall_dispatcher_fpu_restored\n\t" "\n.L__wine_syscall_dispatcher_no_xrstor:\n\t" "fxrstor64 0xc0(%rcx)\n\t" + "jmp .L__wine_syscall_dispatcher_fpu_restored\n\t" + "\n.L__wine_syscall_dispatcher_nofpu_restore:\n\t" + "testl $0x10,%r14d\n\t" /* SYSCALL_NEED_XSTATE */ + "jz .L__wine_syscall_dispatcher_fpu_restored\n\t" + "movdqa 0x1c0(%rcx),%xmm6\n\t" + "movdqa 0x1d0(%rcx),%xmm7\n\t" + "movdqa 0x1e0(%rcx),%xmm8\n\t" + "movdqa 0x1f0(%rcx),%xmm9\n\t" + "movdqa 0x200(%rcx),%xmm10\n\t" + "movdqa 0x210(%rcx),%xmm11\n\t" + "movdqa 0x220(%rcx),%xmm12\n\t" + "movdqa 0x230(%rcx),%xmm13\n\t" + "movdqa 0x240(%rcx),%xmm14\n\t" + "movdqa 0x250(%rcx),%xmm15\n\t" "\n.L__wine_syscall_dispatcher_fpu_restored:\n\t" "movq 0x98(%rcx),%rbp\n\t" __ASM_CFI(".cfi_same_value rbp\n\t")