This is a preparation for supporting more xstate features in the contexts.
The remaining parts are: - support arbitrary xstate in usr1_handler(); - support arbitrary xstate in the server contexts (probably transferring only the present part); - support arbitrary xstate in context manipulation functions; - enabling more xstates in system.c and in user shared data.
-- v4: ntdll: Don't hardcode xstate size in exception stack layout. ntdll: Don't hardcode xstate size in syscall frame. ntdll: Don't hardcode xstate feature mask.
From: Paul Gofman pgofman@codeweavers.com
--- dlls/ntdll/unix/signal_i386.c | 61 +++++++++++++++++---------------- dlls/ntdll/unix/signal_x86_64.c | 61 +++++++++++++++++++++------------ dlls/ntdll/unix/system.c | 4 +++ dlls/ntdll/unix/unix_private.h | 9 +++++ 4 files changed, 84 insertions(+), 51 deletions(-)
diff --git a/dlls/ntdll/unix/signal_i386.c b/dlls/ntdll/unix/signal_i386.c index cd6417b57b4..d634e46a80f 100644 --- a/dlls/ntdll/unix/signal_i386.c +++ b/dlls/ntdll/unix/signal_i386.c @@ -525,6 +525,7 @@ struct x86_thread_data UINT dr7; /* 1f0 */ SYSTEM_SERVICE_TABLE *syscall_table; /* 1f4 syscall table */ struct syscall_frame *syscall_frame; /* 1f8 frame pointer on syscall entry */ + UINT64 xstate_features_mask; /* 1fc */ };
C_ASSERT( sizeof(struct x86_thread_data) <= sizeof(((struct ntdll_thread_data *)0)->cpu_data) ); @@ -609,8 +610,6 @@ struct xcontext ULONG64 host_compaction_mask; };
-extern BOOL xstate_compaction_enabled; - static inline XSTATE *xstate_from_context( const CONTEXT *context ) { CONTEXT_EX *xctx = (CONTEXT_EX *)(context + 1); @@ -832,7 +831,7 @@ static inline void save_context( struct xcontext *xcontext, const ucontext_t *si context->ContextFlags |= CONTEXT_FLOATING_POINT | CONTEXT_EXTENDED_REGISTERS; memcpy( context->ExtendedRegisters, fpux, sizeof(*fpux) ); if (!fpu) fpux_to_fpu( &context->FloatSave, fpux ); - if ((cpu_info.ProcessorFeatureBits & CPU_FEATURE_AVX) && (xs = XState_sig(fpux))) + if (xstate_extended_features() && (xs = XState_sig(fpux))) { context_init_xstate( context, xs ); xcontext->host_compaction_mask = xs->CompactionMask; @@ -936,7 +935,7 @@ NTSTATUS WINAPI NtSetContextThread( HANDLE handle, const CONTEXT *context ) DWORD flags = context->ContextFlags & ~CONTEXT_i386; BOOL self = (handle == GetCurrentThread());
- if ((flags & CONTEXT_XSTATE) && (cpu_info.ProcessorFeatureBits & CPU_FEATURE_AVX)) + if ((flags & CONTEXT_XSTATE) && xstate_extended_features()) { CONTEXT_EX *context_ex = (CONTEXT_EX *)(context + 1); XSTATE *xs = (XSTATE *)((char *)context_ex + context_ex->XState.Offset); @@ -944,7 +943,7 @@ NTSTATUS WINAPI NtSetContextThread( HANDLE handle, const CONTEXT *context ) if (context_ex->XState.Length < offsetof(XSTATE, YmmContext) || context_ex->XState.Length > sizeof(XSTATE)) return STATUS_INVALID_PARAMETER; - if ((xs->Mask & XSTATE_MASK_GSSE) && (context_ex->XState.Length < sizeof(XSTATE))) + if ((xs->Mask & xstate_extended_features()) && (context_ex->XState.Length < sizeof(XSTATE))) return STATUS_BUFFER_OVERFLOW; } else flags &= ~CONTEXT_XSTATE; @@ -1138,7 +1137,7 @@ NTSTATUS WINAPI NtGetContextThread( HANDLE handle, CONTEXT *context )
context->ContextFlags |= CONTEXT_EXTENDED_REGISTERS; } - if ((needed_flags & CONTEXT_XSTATE) && (cpu_info.ProcessorFeatureBits & CPU_FEATURE_AVX)) + if ((needed_flags & CONTEXT_XSTATE) && xstate_extended_features()) { CONTEXT_EX *context_ex = (CONTEXT_EX *)(context + 1); XSTATE *xstate = (XSTATE *)((char *)context_ex + context_ex->XState.Offset); @@ -1148,7 +1147,7 @@ NTSTATUS WINAPI NtGetContextThread( HANDLE handle, CONTEXT *context ) || context_ex->XState.Length > sizeof(XSTATE)) return STATUS_INVALID_PARAMETER;
- mask = (xstate_compaction_enabled ? xstate->CompactionMask : xstate->Mask) & XSTATE_MASK_GSSE; + mask = (xstate_compaction_enabled ? xstate->CompactionMask : xstate->Mask) & xstate_extended_features(); xstate->Mask = frame->xstate.Mask & mask; xstate->CompactionMask = xstate_compaction_enabled ? (0x8000000000000000 | mask) : 0; memset( xstate->Reserved, 0, sizeof(xstate->Reserved) ); @@ -1485,7 +1484,7 @@ static void setup_raise_exception( ucontext_t *sigcontext, void *stack_ptr,
context_init_xstate( &stack->context, dst_xs ); memset( dst_xs, 0, offsetof(XSTATE, YmmContext) ); - dst_xs->CompactionMask = xstate_compaction_enabled ? 0x8000000000000004 : 0; + dst_xs->CompactionMask = xstate_compaction_enabled ? 0x8000000000000000 | xstate_extended_features() : 0; if (src_xs->Mask & 4) { dst_xs->Mask = 4; @@ -1587,7 +1586,7 @@ NTSTATUS call_user_exception_dispatcher( EXCEPTION_RECORD *rec, CONTEXT *context
context_init_xstate( &stack->context, dst_xs ); memset( dst_xs, 0, offsetof(XSTATE, YmmContext) ); - dst_xs->CompactionMask = xstate_compaction_enabled ? 0x8000000000000004 : 0; + dst_xs->CompactionMask = xstate_compaction_enabled ? 0x8000000000000000 | xstate_extended_features() : 0; if (src_xs->Mask & 4) { dst_xs->Mask = 4; @@ -2481,6 +2480,7 @@ void call_init_thunk( LPTHREAD_START_ROUTINE entry, void *arg, BOOL suspend, TEB ldt_set_fs( thread_data->fs, teb ); thread_data->gs = get_gs(); thread_data->syscall_table = KeServiceDescriptorTable; + thread_data->xstate_features_mask = xstate_supported_features_mask;
context.SegCs = get_cs(); context.SegDs = get_ds(); @@ -2504,6 +2504,8 @@ void call_init_thunk( LPTHREAD_START_ROUTINE entry, void *arg, BOOL suspend, TEB *ctx = context; ctx->ContextFlags = CONTEXT_FULL | CONTEXT_FLOATING_POINT | CONTEXT_EXTENDED_REGISTERS; memset( frame, 0, sizeof(*frame) ); + if (xstate_compaction_enabled) + frame->xstate.CompactionMask = 0x8000000000000000 | xstate_supported_features_mask; NtSetContextThread( GetCurrentThread(), ctx );
stack = (DWORD *)ctx; @@ -2605,26 +2607,27 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, "addl %fs:0x1f4,%ebx\n\t" /* x86_thread_data()->syscall_table */ "testl $3,(%ecx)\n\t" /* frame->syscall_flags & (SYSCALL_HAVE_XSAVE | SYSCALL_HAVE_XSAVEC) */ "jz 2f\n\t" - "movl $7,%eax\n\t" - "xorl %edx,%edx\n\t" - "movl %edx,0x240(%ecx)\n\t" - "movl %edx,0x244(%ecx)\n\t" - "movl %edx,0x248(%ecx)\n\t" - "movl %edx,0x24c(%ecx)\n\t" - "movl %edx,0x250(%ecx)\n\t" - "movl %edx,0x254(%ecx)\n\t" + "movl %fs:0x1fc,%eax\n\t" /* x86_thread_data()->xstate_features_mask */ + "movl %fs:0x200,%edx\n\t" /* x86_thread_data()->xstate_features_mask high dword */ + "xorl %edi,%edi\n\t" + "movl %edi,0x240(%ecx)\n\t" + "movl %edi,0x244(%ecx)\n\t" + "movl %edi,0x248(%ecx)\n\t" + "movl %edi,0x24c(%ecx)\n\t" + "movl %edi,0x250(%ecx)\n\t" + "movl %edi,0x254(%ecx)\n\t" "testl $2,(%ecx)\n\t" /* frame->syscall_flags & SYSCALL_HAVE_XSAVEC */ "jz 1f\n\t" - "movl %edx,0x258(%ecx)\n\t" - "movl %edx,0x25c(%ecx)\n\t" - "movl %edx,0x260(%ecx)\n\t" - "movl %edx,0x264(%ecx)\n\t" - "movl %edx,0x268(%ecx)\n\t" - "movl %edx,0x26c(%ecx)\n\t" - "movl %edx,0x270(%ecx)\n\t" - "movl %edx,0x274(%ecx)\n\t" - "movl %edx,0x278(%ecx)\n\t" - "movl %edx,0x27c(%ecx)\n\t" + "movl %edi,0x258(%ecx)\n\t" + "movl %edi,0x25c(%ecx)\n\t" + "movl %edi,0x260(%ecx)\n\t" + "movl %edi,0x264(%ecx)\n\t" + "movl %edi,0x268(%ecx)\n\t" + "movl %edi,0x26c(%ecx)\n\t" + "movl %edi,0x270(%ecx)\n\t" + "movl %edi,0x274(%ecx)\n\t" + "movl %edi,0x278(%ecx)\n\t" + "movl %edi,0x27c(%ecx)\n\t" /* The xsavec instruction is not supported by * binutils < 2.25. */ ".byte 0x0f, 0xc7, 0x61, 0x40\n\t" /* xsavec 0x40(%ecx) */ @@ -2669,8 +2672,8 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, "testl $3,%ecx\n\t" /* SYSCALL_HAVE_XSAVE | SYSCALL_HAVE_XSAVEC */ "jz 1f\n\t" "movl %eax,%esi\n\t" - "movl $7,%eax\n\t" - "xorl %edx,%edx\n\t" + "movl %fs:0x1fc,%eax\n\t" /* x86_thread_data()->xstate_features_mask */ + "movl %fs:0x200,%edx\n\t" /* x86_thread_data()->xstate_features_mask high dword */ "xrstor 0x40(%esp)\n\t" "movl %esi,%eax\n\t" "jmp 3f\n" diff --git a/dlls/ntdll/unix/signal_x86_64.c b/dlls/ntdll/unix/signal_x86_64.c index 405ac49e5a3..aaa9ea0b638 100644 --- a/dlls/ntdll/unix/signal_x86_64.c +++ b/dlls/ntdll/unix/signal_x86_64.c @@ -441,6 +441,8 @@ struct amd64_thread_data struct syscall_frame *syscall_frame; /* 0328 syscall frame pointer */ SYSTEM_SERVICE_TABLE *syscall_table; /* 0330 syscall table */ DWORD fs; /* 0338 WOW TEB selector */ + DWORD align; + UINT64 xstate_features_mask; /* 0340 */ };
C_ASSERT( sizeof(struct amd64_thread_data) <= sizeof(((struct ntdll_thread_data *)0)->cpu_data) ); @@ -448,6 +450,7 @@ C_ASSERT( offsetof( TEB, GdiTebBatch ) + offsetof( struct amd64_thread_data, pth C_ASSERT( offsetof( TEB, GdiTebBatch ) + offsetof( struct amd64_thread_data, syscall_frame ) == 0x328 ); C_ASSERT( offsetof( TEB, GdiTebBatch ) + offsetof( struct amd64_thread_data, syscall_table ) == 0x330 ); C_ASSERT( offsetof( TEB, GdiTebBatch ) + offsetof( struct amd64_thread_data, fs ) == 0x338 ); +C_ASSERT( offsetof( TEB, GdiTebBatch ) + offsetof( struct amd64_thread_data, xstate_features_mask ) == 0x340 );
static inline struct amd64_thread_data *amd64_thread_data(void) { @@ -477,8 +480,6 @@ struct xcontext ULONG64 host_compaction_mask; };
-extern BOOL xstate_compaction_enabled; - static inline XSTATE *xstate_from_context( const CONTEXT *context ) { CONTEXT_EX *xctx = (CONTEXT_EX *)(context + 1); @@ -898,7 +899,7 @@ static void save_context( struct xcontext *xcontext, const ucontext_t *sigcontex context->ContextFlags |= CONTEXT_FLOATING_POINT; context->FltSave = *FPU_sig(sigcontext); context->MxCsr = context->FltSave.MxCsr; - if ((cpu_info.ProcessorFeatureBits & CPU_FEATURE_AVX) && (xs = XState_sig(FPU_sig(sigcontext)))) + if (xstate_extended_features() && (xs = XState_sig(FPU_sig(sigcontext)))) { /* xcontext and sigcontext are both on the signal stack, so we can * just reference sigcontext without overflowing 32 bit XState.Offset */ @@ -928,7 +929,7 @@ static void restore_context( const struct xcontext *xcontext, ucontext_t *sigcon amd64_thread_data()->dr7 = context->Dr7; set_sigcontext( context, sigcontext ); if (FPU_sig(sigcontext)) *FPU_sig(sigcontext) = context->FltSave; - if ((cpu_info.ProcessorFeatureBits & CPU_FEATURE_AVX) && (xs = XState_sig(FPU_sig(sigcontext)))) + if (xstate_extended_features() && (xs = XState_sig(FPU_sig(sigcontext)))) xs->CompactionMask = xcontext->host_compaction_mask; leave_handler( sigcontext ); } @@ -977,7 +978,7 @@ NTSTATUS WINAPI NtSetContextThread( HANDLE handle, const CONTEXT *context ) BOOL self = (handle == GetCurrentThread()); struct syscall_frame *frame = amd64_thread_data()->syscall_frame;
- if ((flags & CONTEXT_XSTATE) && (cpu_info.ProcessorFeatureBits & CPU_FEATURE_AVX)) + if ((flags & CONTEXT_XSTATE) && xstate_extended_features()) { CONTEXT_EX *context_ex = (CONTEXT_EX *)(context + 1); XSTATE *xs = (XSTATE *)((char *)context_ex + context_ex->XState.Offset); @@ -985,7 +986,7 @@ NTSTATUS WINAPI NtSetContextThread( HANDLE handle, const CONTEXT *context ) if (context_ex->XState.Length < offsetof(XSTATE, YmmContext) || context_ex->XState.Length > sizeof(XSTATE)) return STATUS_INVALID_PARAMETER; - if ((xs->Mask & XSTATE_MASK_GSSE) && (context_ex->XState.Length < sizeof(XSTATE))) + if ((xs->Mask & xstate_extended_features()) && (context_ex->XState.Length < sizeof(XSTATE))) return STATUS_BUFFER_OVERFLOW; } else flags &= ~CONTEXT_XSTATE; @@ -1155,7 +1156,7 @@ NTSTATUS WINAPI NtGetContextThread( HANDLE handle, CONTEXT *context ) context->MxCsr = context->FltSave.MxCsr; context->ContextFlags |= CONTEXT_FLOATING_POINT; } - if ((needed_flags & CONTEXT_XSTATE) && (cpu_info.ProcessorFeatureBits & CPU_FEATURE_AVX)) + if ((needed_flags & CONTEXT_XSTATE) && xstate_extended_features()) { CONTEXT_EX *context_ex = (CONTEXT_EX *)(context + 1); XSTATE *xstate = (XSTATE *)((char *)context_ex + context_ex->XState.Offset); @@ -1165,7 +1166,7 @@ NTSTATUS WINAPI NtGetContextThread( HANDLE handle, CONTEXT *context ) || context_ex->XState.Length > sizeof(XSTATE)) return STATUS_INVALID_PARAMETER;
- mask = (xstate_compaction_enabled ? xstate->CompactionMask : xstate->Mask) & XSTATE_MASK_GSSE; + mask = (xstate_compaction_enabled ? xstate->CompactionMask : xstate->Mask) & xstate_extended_features(); xstate->Mask = frame->xstate.Mask & mask; xstate->CompactionMask = xstate_compaction_enabled ? (0x8000000000000000 | mask) : 0; memset( xstate->Reserved, 0, sizeof(xstate->Reserved) ); @@ -1377,7 +1378,7 @@ NTSTATUS get_thread_wow64_context( HANDLE handle, void *ctx, ULONG size ) context_ex->XState.Length > sizeof(XSTATE)) return STATUS_INVALID_PARAMETER;
- mask = (xstate_compaction_enabled ? xstate->CompactionMask : xstate->Mask) & XSTATE_MASK_GSSE; + mask = (xstate_compaction_enabled ? xstate->CompactionMask : xstate->Mask) & xstate_extended_features(); xstate->Mask = frame->xstate.Mask & mask; xstate->CompactionMask = xstate_compaction_enabled ? (0x8000000000000000 | mask) : 0; memset( xstate->Reserved, 0, sizeof(xstate->Reserved) ); @@ -1442,7 +1443,7 @@ static void setup_raise_exception( ucontext_t *sigcontext, EXCEPTION_RECORD *rec assert( !((ULONG_PTR)&stack->xstate & 63) ); context_init_xstate( &stack->context, &stack->xstate ); memset( &stack->xstate, 0, offsetof(XSTATE, YmmContext) ); - stack->xstate.CompactionMask = xstate_compaction_enabled ? 0x8000000000000004 : 0; + stack->xstate.CompactionMask = xstate_compaction_enabled ? 0x8000000000000000 | xstate_extended_features() : 0; if (src_xs->Mask & 4) { stack->xstate.Mask = 4; @@ -2479,6 +2480,7 @@ void call_init_thunk( LPTHREAD_START_ROUTINE entry, void *arg, BOOL suspend, TEB I386_CONTEXT *wow_context;
thread_data->syscall_table = KeServiceDescriptorTable; + thread_data->xstate_features_mask = xstate_supported_features_mask;
#if defined __linux__ arch_prctl( ARCH_SET_GS, teb ); @@ -2539,6 +2541,8 @@ void call_init_thunk( LPTHREAD_START_ROUTINE entry, void *arg, BOOL suspend, TEB *ctx = context; ctx->ContextFlags = CONTEXT_FULL; memset( frame, 0, sizeof(*frame) ); + if (xstate_compaction_enabled) + frame->xstate.CompactionMask = 0x8000000000000000 | xstate_supported_features_mask; NtSetContextThread( GetCurrentThread(), ctx );
frame->cs = cs64_sel; @@ -2636,18 +2640,25 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, "movl 0xb0(%rcx),%r14d\n\t" /* frame->syscall_flags */ "testl $3,%r14d\n\t" /* SYSCALL_HAVE_XSAVE | SYSCALL_HAVE_XSAVEC */ "jz 2f\n\t" - "movl $7,%eax\n\t" - "xorl %edx,%edx\n\t" - "movq %rdx,0x2c0(%rcx)\n\t" - "movq %rdx,0x2c8(%rcx)\n\t" - "movq %rdx,0x2d0(%rcx)\n\t" +#ifdef __APPLE__ + "movq %gs:0x30,%rdx\n\t" + "movl 0x340(%rdx),%eax\n\t" + "movl 0x344(%rdx),%edx\n\t" +#else + "movl %gs:0x340,%eax\n\t" /* amd64_thread_data()->xstate_features_mask */ + "movl %gs:0x344,%edx\n\t" /* amd64_thread_data()->xstate_features_mask high dword */ +#endif + "xorq %rbp,%rbp\n\t" + "movq %rbp,0x2c0(%rcx)\n\t" + "movq %rbp,0x2c8(%rcx)\n\t" + "movq %rbp,0x2d0(%rcx)\n\t" "testl $2,%r14d\n\t" /* SYSCALL_HAVE_XSAVEC */ "jz 1f\n\t" - "movq %rdx,0x2d8(%rcx)\n\t" - "movq %rdx,0x2e0(%rcx)\n\t" - "movq %rdx,0x2e8(%rcx)\n\t" - "movq %rdx,0x2f0(%rcx)\n\t" - "movq %rdx,0x2f8(%rcx)\n\t" + "movq %rbp,0x2d8(%rcx)\n\t" + "movq %rbp,0x2e0(%rcx)\n\t" + "movq %rbp,0x2e8(%rcx)\n\t" + "movq %rbp,0x2f0(%rcx)\n\t" + "movq %rbp,0x2f8(%rcx)\n\t" /* The xsavec instruction is not supported by * binutils < 2.25. */ ".byte 0x48, 0x0f, 0xc7, 0xa1, 0xc0, 0x00, 0x00, 0x00\n\t" /* xsavec64 0xc0(%rcx) */ @@ -2749,8 +2760,14 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, "2:\ttestl $3,%r14d\n\t" /* SYSCALL_HAVE_XSAVE | SYSCALL_HAVE_XSAVEC */ "jz 3f\n\t" "movq %rax,%r11\n\t" - "movl $7,%eax\n\t" - "xorl %edx,%edx\n\t" +#ifdef __APPLE__ + "movq %gs:0x30,%rdx\n\t" + "movl 0x340(%rdx),%eax\n\t" + "movl 0x344(%rdx),%edx\n\t" +#else + "movl %gs:0x340,%eax\n\t" /* amd64_thread_data()->xstate_features_mask */ + "movl %gs:0x344,%edx\n\t" /* amd64_thread_data()->xstate_features_mask high dword */ +#endif "xrstor64 0xc0(%rcx)\n\t" "movq %r11,%rax\n\t" "movl 0xb4(%rcx),%edx\n\t" /* frame->restore_flags */ diff --git a/dlls/ntdll/unix/system.c b/dlls/ntdll/unix/system.c index cb99b7c2cc3..2c71cc67835 100644 --- a/dlls/ntdll/unix/system.c +++ b/dlls/ntdll/unix/system.c @@ -247,6 +247,7 @@ static pthread_mutex_t timezone_mutex = PTHREAD_MUTEX_INITIALIZER; #if defined(__i386__) || defined(__x86_64__)
BOOL xstate_compaction_enabled = FALSE; +UINT64 xstate_supported_features_mask;
#define AUTH 0x68747541 /* "Auth" */ #define ENTI 0x69746e65 /* "enti" */ @@ -396,6 +397,9 @@ static void get_cpuinfo( SYSTEM_CPU_INFORMATION *info ) { do_cpuid( 0x0000000d, 1, regs3 ); /* get XSAVE details */ if (regs3[0] & 2) xstate_compaction_enabled = TRUE; + xstate_supported_features_mask = 3; + if (features & CPU_FEATURE_AVX) + xstate_supported_features_mask |= (UINT64)1 << XSTATE_AVX; }
if (regs[1] == AUTH && regs[3] == ENTI && regs[2] == CAMD) diff --git a/dlls/ntdll/unix/unix_private.h b/dlls/ntdll/unix/unix_private.h index cbf72651a9a..1311e5a1070 100644 --- a/dlls/ntdll/unix/unix_private.h +++ b/dlls/ntdll/unix/unix_private.h @@ -214,6 +214,15 @@ extern int server_pipe( int fd[2] );
extern void fpux_to_fpu( I386_FLOATING_SAVE_AREA *fpu, const XSAVE_FORMAT *fpux ); extern void fpu_to_fpux( XSAVE_FORMAT *fpux, const I386_FLOATING_SAVE_AREA *fpu ); + +extern BOOL xstate_compaction_enabled; +extern UINT64 xstate_supported_features_mask; + +static inline UINT64 xstate_extended_features(void) +{ + return xstate_supported_features_mask & ~(UINT64)3; +} + extern void *get_cpu_area( USHORT machine ); extern void set_thread_id( TEB *teb, DWORD pid, DWORD tid ); extern NTSTATUS init_thread_stack( TEB *teb, ULONG_PTR limit, SIZE_T reserve_size, SIZE_T commit_size );
From: Paul Gofman pgofman@codeweavers.com
--- dlls/ntdll/unix/signal_i386.c | 53 ++++++++-------- dlls/ntdll/unix/signal_x86_64.c | 104 +++++++++++++++++++------------- dlls/ntdll/unix/system.c | 21 +++++++ dlls/ntdll/unix/unix_private.h | 3 + include/winnt.h | 8 +++ 5 files changed, 123 insertions(+), 66 deletions(-)
diff --git a/dlls/ntdll/unix/signal_i386.c b/dlls/ntdll/unix/signal_i386.c index d634e46a80f..25b2100af0a 100644 --- a/dlls/ntdll/unix/signal_i386.c +++ b/dlls/ntdll/unix/signal_i386.c @@ -508,10 +508,10 @@ struct syscall_frame /* Leave space for the whole set of YMM registers. They're not used in * 32-bit mode, but some processors fault if they're not in writable memory. */ - DECLSPEC_ALIGN(64) XSTATE xstate; /* 240 */ + DECLSPEC_ALIGN(64) XSAVE_AREA_HEADER xstate; /* 240 */ };
-C_ASSERT( sizeof(struct syscall_frame) == 0x380 ); +C_ASSERT( sizeof(struct syscall_frame) == 0x280 );
struct x86_thread_data { @@ -526,12 +526,14 @@ struct x86_thread_data SYSTEM_SERVICE_TABLE *syscall_table; /* 1f4 syscall table */ struct syscall_frame *syscall_frame; /* 1f8 frame pointer on syscall entry */ UINT64 xstate_features_mask; /* 1fc */ + UINT xstate_features_size; /* 204 */ };
C_ASSERT( sizeof(struct x86_thread_data) <= sizeof(((struct ntdll_thread_data *)0)->cpu_data) ); C_ASSERT( offsetof( TEB, GdiTebBatch ) + offsetof( struct x86_thread_data, gs ) == 0x1d8 ); C_ASSERT( offsetof( TEB, GdiTebBatch ) + offsetof( struct x86_thread_data, syscall_table ) == 0x1f4 ); C_ASSERT( offsetof( TEB, GdiTebBatch ) + offsetof( struct x86_thread_data, syscall_frame ) == 0x1f8 ); +C_ASSERT( offsetof( TEB, GdiTebBatch ) + offsetof( struct x86_thread_data, xstate_features_size ) == 0x204 );
/* flags to control the behavior of the syscall dispatcher */ #define SYSCALL_HAVE_XSAVE 1 @@ -938,12 +940,13 @@ NTSTATUS WINAPI NtSetContextThread( HANDLE handle, const CONTEXT *context ) if ((flags & CONTEXT_XSTATE) && xstate_extended_features()) { CONTEXT_EX *context_ex = (CONTEXT_EX *)(context + 1); - XSTATE *xs = (XSTATE *)((char *)context_ex + context_ex->XState.Offset); + XSAVE_AREA_HEADER *xs = (XSAVE_AREA_HEADER *)((char *)context_ex + context_ex->XState.Offset);
- if (context_ex->XState.Length < offsetof(XSTATE, YmmContext) || - context_ex->XState.Length > sizeof(XSTATE)) + if (context_ex->XState.Length < sizeof(XSAVE_AREA_HEADER) || + context_ex->XState.Length > sizeof(XSAVE_AREA_HEADER) + xstate_features_size) return STATUS_INVALID_PARAMETER; - if ((xs->Mask & xstate_extended_features()) && (context_ex->XState.Length < sizeof(XSTATE))) + if ((xs->Mask & xstate_extended_features()) + && (context_ex->XState.Length < xstate_get_size( xs->CompactionMask, xs->Mask ))) return STATUS_BUFFER_OVERFLOW; } else flags &= ~CONTEXT_XSTATE; @@ -1019,14 +1022,9 @@ NTSTATUS WINAPI NtSetContextThread( HANDLE handle, const CONTEXT *context ) if (flags & CONTEXT_XSTATE) { CONTEXT_EX *context_ex = (CONTEXT_EX *)(context + 1); - XSTATE *xs = (XSTATE *)((char *)context_ex + context_ex->XState.Offset); + XSAVE_AREA_HEADER *xs = (XSAVE_AREA_HEADER *)((char *)context_ex + context_ex->XState.Offset);
- if (xs->Mask & XSTATE_MASK_GSSE) - { - frame->xstate.Mask |= XSTATE_MASK_GSSE; - frame->xstate.YmmContext = xs->YmmContext; - } - else frame->xstate.Mask &= ~XSTATE_MASK_GSSE; + copy_xstate( &frame->xstate, xs, xs->Mask ); }
frame->restore_flags |= flags & ~CONTEXT_INTEGER; @@ -1140,21 +1138,22 @@ NTSTATUS WINAPI NtGetContextThread( HANDLE handle, CONTEXT *context ) if ((needed_flags & CONTEXT_XSTATE) && xstate_extended_features()) { CONTEXT_EX *context_ex = (CONTEXT_EX *)(context + 1); - XSTATE *xstate = (XSTATE *)((char *)context_ex + context_ex->XState.Offset); - unsigned int mask; + XSAVE_AREA_HEADER *xstate = (XSAVE_AREA_HEADER *)((char *)context_ex + context_ex->XState.Offset); + UINT64 mask;
- if (context_ex->XState.Length < offsetof(XSTATE, YmmContext) - || context_ex->XState.Length > sizeof(XSTATE)) + if (context_ex->XState.Length < sizeof(XSAVE_AREA_HEADER) || + context_ex->XState.Length > sizeof(XSAVE_AREA_HEADER) + xstate_features_size) return STATUS_INVALID_PARAMETER;
mask = (xstate_compaction_enabled ? xstate->CompactionMask : xstate->Mask) & xstate_extended_features(); xstate->Mask = frame->xstate.Mask & mask; xstate->CompactionMask = xstate_compaction_enabled ? (0x8000000000000000 | mask) : 0; - memset( xstate->Reserved, 0, sizeof(xstate->Reserved) ); + memset( xstate->Reserved2, 0, sizeof(xstate->Reserved2) ); if (xstate->Mask) { - if (context_ex->XState.Length < sizeof(XSTATE)) return STATUS_BUFFER_OVERFLOW; - xstate->YmmContext = frame->xstate.YmmContext; + if (context_ex->XState.Length < xstate_get_size( xstate->CompactionMask, xstate->Mask )) + return STATUS_BUFFER_OVERFLOW; + copy_xstate( xstate, &frame->xstate, xstate->Mask ); } } /* update the cached version of the debug registers */ @@ -1483,7 +1482,7 @@ static void setup_raise_exception( ucontext_t *sigcontext, void *stack_ptr, XSTATE *dst_xs = (XSTATE *)(((ULONG_PTR)stack->xstate + 63) & ~63);
context_init_xstate( &stack->context, dst_xs ); - memset( dst_xs, 0, offsetof(XSTATE, YmmContext) ); + memset( dst_xs, 0, sizeof(XSAVE_AREA_HEADER) ); dst_xs->CompactionMask = xstate_compaction_enabled ? 0x8000000000000000 | xstate_extended_features() : 0; if (src_xs->Mask & 4) { @@ -1623,7 +1622,8 @@ __ASM_GLOBAL_FUNC( call_user_mode_callback, __ASM_CFI(".cfi_rel_offset %edi,-12\n\t") "movl 0x18(%ebp),%edx\n\t" /* teb */ "pushl 0(%edx)\n\t" /* teb->Tib.ExceptionList */ - "subl $0x380,%esp\n\t" /* sizeof(struct syscall_frame) */ + "subl $0x280,%esp\n\t" /* sizeof(struct syscall_frame) */ + "subl %fs:0x204,%esp\n\t" /* x86_thread_data()->xstate_features_size */ "andl $~63,%esp\n\t" "leal 8(%ebp),%eax\n\t" "movl %eax,0x38(%esp)\n\t" /* frame->syscall_cfa */ @@ -2403,6 +2403,7 @@ NTSTATUS signal_alloc_thread( TEB *teb ) else thread_data->fs = gdt_fs_sel;
teb->WOW32Reserved = __wine_syscall_dispatcher; + thread_data->xstate_features_size = xstate_features_size; return STATUS_SUCCESS; }
@@ -2431,7 +2432,9 @@ void signal_init_process(void) struct sigaction sig_act; void *kernel_stack = (char *)ntdll_get_thread_data()->kernel_stack + kernel_stack_size;
- x86_thread_data()->syscall_frame = (struct syscall_frame *)kernel_stack - 1; + x86_thread_data()->syscall_frame = (struct syscall_frame *)((ULONG_PTR)((char *)kernel_stack + - sizeof(struct syscall_frame) - xstate_features_size) & ~(ULONG_PTR)63); + x86_thread_data()->xstate_features_size = xstate_features_size;
if (cpu_info.ProcessorFeatureBits & CPU_FEATURE_FXSR) syscall_flags |= SYSCALL_HAVE_FXSAVE; if (cpu_info.ProcessorFeatureBits & CPU_FEATURE_XSAVE) syscall_flags |= SYSCALL_HAVE_XSAVE; @@ -2481,6 +2484,7 @@ void call_init_thunk( LPTHREAD_START_ROUTINE entry, void *arg, BOOL suspend, TEB thread_data->gs = get_gs(); thread_data->syscall_table = KeServiceDescriptorTable; thread_data->xstate_features_mask = xstate_supported_features_mask; + assert( thread_data->xstate_features_size == xstate_features_size );
context.SegCs = get_cs(); context.SegDs = get_ds(); @@ -2547,7 +2551,8 @@ __ASM_GLOBAL_FUNC( signal_start_thread, "movl 0x1f8(%ecx),%eax\n\t" /* x86_thread_data()->syscall_frame */ "orl %eax,%eax\n\t" "jnz 1f\n\t" - "leal -0x380(%esp),%eax\n\t" /* sizeof(struct syscall_frame) */ + "leal -0x280(%esp),%eax\n\t" /* sizeof(struct syscall_frame) */ + "subl %fs:0x204,%eax\n\t" /* x86_thread_data()->xstate_features_size */ "andl $~63,%eax\n\t" "movl %eax,0x1f8(%ecx)\n" /* x86_thread_data()->syscall_frame */ /* switch to kernel stack */ diff --git a/dlls/ntdll/unix/signal_x86_64.c b/dlls/ntdll/unix/signal_x86_64.c index aaa9ea0b638..cf85db0d627 100644 --- a/dlls/ntdll/unix/signal_x86_64.c +++ b/dlls/ntdll/unix/signal_x86_64.c @@ -424,10 +424,12 @@ struct syscall_frame DWORD restore_flags; /* 00b4 */ DWORD align[2]; /* 00b8 */ XMM_SAVE_AREA32 xsave; /* 00c0 */ - DECLSPEC_ALIGN(64) XSTATE xstate; /* 02c0 */ + DECLSPEC_ALIGN(64) XSAVE_AREA_HEADER xstate; /* 02c0 */ };
-C_ASSERT( sizeof( struct syscall_frame ) == 0x400); +C_ASSERT( offsetof( struct syscall_frame, xsave ) == 0xc0 ); +C_ASSERT( offsetof( struct syscall_frame, xstate ) == 0x2c0 ); +C_ASSERT( sizeof( struct syscall_frame ) == 0x300);
struct amd64_thread_data { @@ -441,7 +443,7 @@ struct amd64_thread_data struct syscall_frame *syscall_frame; /* 0328 syscall frame pointer */ SYSTEM_SERVICE_TABLE *syscall_table; /* 0330 syscall table */ DWORD fs; /* 0338 WOW TEB selector */ - DWORD align; + DWORD xstate_features_size; /* 033c */ UINT64 xstate_features_mask; /* 0340 */ };
@@ -450,6 +452,7 @@ C_ASSERT( offsetof( TEB, GdiTebBatch ) + offsetof( struct amd64_thread_data, pth C_ASSERT( offsetof( TEB, GdiTebBatch ) + offsetof( struct amd64_thread_data, syscall_frame ) == 0x328 ); C_ASSERT( offsetof( TEB, GdiTebBatch ) + offsetof( struct amd64_thread_data, syscall_table ) == 0x330 ); C_ASSERT( offsetof( TEB, GdiTebBatch ) + offsetof( struct amd64_thread_data, fs ) == 0x338 ); +C_ASSERT( offsetof( TEB, GdiTebBatch ) + offsetof( struct amd64_thread_data, xstate_features_size ) == 0x33c ); C_ASSERT( offsetof( TEB, GdiTebBatch ) + offsetof( struct amd64_thread_data, xstate_features_mask ) == 0x340 );
static inline struct amd64_thread_data *amd64_thread_data(void) @@ -981,12 +984,13 @@ NTSTATUS WINAPI NtSetContextThread( HANDLE handle, const CONTEXT *context ) if ((flags & CONTEXT_XSTATE) && xstate_extended_features()) { CONTEXT_EX *context_ex = (CONTEXT_EX *)(context + 1); - XSTATE *xs = (XSTATE *)((char *)context_ex + context_ex->XState.Offset); + XSAVE_AREA_HEADER *xs = (XSAVE_AREA_HEADER *)((char *)context_ex + context_ex->XState.Offset);
- if (context_ex->XState.Length < offsetof(XSTATE, YmmContext) || - context_ex->XState.Length > sizeof(XSTATE)) + if (context_ex->XState.Length < sizeof(XSAVE_AREA_HEADER) || + context_ex->XState.Length > sizeof(XSAVE_AREA_HEADER) + xstate_features_size) return STATUS_INVALID_PARAMETER; - if ((xs->Mask & xstate_extended_features()) && (context_ex->XState.Length < sizeof(XSTATE))) + if ((xs->Mask & xstate_extended_features()) + && (context_ex->XState.Length < xstate_get_size( xs->CompactionMask, xs->Mask ))) return STATUS_BUFFER_OVERFLOW; } else flags &= ~CONTEXT_XSTATE; @@ -1051,14 +1055,9 @@ NTSTATUS WINAPI NtSetContextThread( HANDLE handle, const CONTEXT *context ) if (flags & CONTEXT_XSTATE) { CONTEXT_EX *context_ex = (CONTEXT_EX *)(context + 1); - XSTATE *xs = (XSTATE *)((char *)context_ex + context_ex->XState.Offset); + XSAVE_AREA_HEADER *xs = (XSAVE_AREA_HEADER *)((char *)context_ex + context_ex->XState.Offset);
- if (xs->Mask & XSTATE_MASK_GSSE) - { - frame->xstate.Mask |= XSTATE_MASK_GSSE; - memcpy( &frame->xstate.YmmContext, &xs->YmmContext, sizeof(xs->YmmContext) ); - } - else frame->xstate.Mask &= ~XSTATE_MASK_GSSE; + copy_xstate( &frame->xstate, xs, xs->Mask ); }
frame->restore_flags |= flags & ~CONTEXT_INTEGER; @@ -1159,21 +1158,22 @@ NTSTATUS WINAPI NtGetContextThread( HANDLE handle, CONTEXT *context ) if ((needed_flags & CONTEXT_XSTATE) && xstate_extended_features()) { CONTEXT_EX *context_ex = (CONTEXT_EX *)(context + 1); - XSTATE *xstate = (XSTATE *)((char *)context_ex + context_ex->XState.Offset); - unsigned int mask; + XSAVE_AREA_HEADER *xstate = (XSAVE_AREA_HEADER *)((char *)context_ex + context_ex->XState.Offset); + UINT64 mask;
- if (context_ex->XState.Length < offsetof(XSTATE, YmmContext) - || context_ex->XState.Length > sizeof(XSTATE)) + if (context_ex->XState.Length < sizeof(XSAVE_AREA_HEADER) || + context_ex->XState.Length > sizeof(XSAVE_AREA_HEADER) + xstate_features_size) return STATUS_INVALID_PARAMETER;
mask = (xstate_compaction_enabled ? xstate->CompactionMask : xstate->Mask) & xstate_extended_features(); xstate->Mask = frame->xstate.Mask & mask; xstate->CompactionMask = xstate_compaction_enabled ? (0x8000000000000000 | mask) : 0; - memset( xstate->Reserved, 0, sizeof(xstate->Reserved) ); + memset( xstate->Reserved2, 0, sizeof(xstate->Reserved2) ); if (xstate->Mask) { - if (context_ex->XState.Length < sizeof(XSTATE)) return STATUS_BUFFER_OVERFLOW; - memcpy( &xstate->YmmContext, &frame->xstate.YmmContext, sizeof(xstate->YmmContext) ); + if (context_ex->XState.Length < xstate_get_size( xstate->CompactionMask, xstate->Mask )) + return STATUS_BUFFER_OVERFLOW; + copy_xstate( xstate, &frame->xstate, xstate->Mask ); } } /* update the cached version of the debug registers */ @@ -1280,14 +1280,9 @@ NTSTATUS set_thread_wow64_context( HANDLE handle, const void *ctx, ULONG size ) if (flags & CONTEXT_I386_XSTATE) { CONTEXT_EX *context_ex = (CONTEXT_EX *)(context + 1); - XSTATE *xs = (XSTATE *)((char *)context_ex + context_ex->XState.Offset); + XSAVE_AREA_HEADER *xs = (XSAVE_AREA_HEADER *)((char *)context_ex + context_ex->XState.Offset);
- if (xs->Mask & XSTATE_MASK_GSSE) - { - frame->xstate.Mask |= XSTATE_MASK_GSSE; - memcpy( &frame->xstate.YmmContext, &xs->YmmContext, sizeof(xs->YmmContext) ); - } - else frame->xstate.Mask &= ~XSTATE_MASK_GSSE; + copy_xstate( &frame->xstate, xs, xs->Mask ); frame->restore_flags |= CONTEXT_XSTATE; } return STATUS_SUCCESS; @@ -1368,24 +1363,25 @@ NTSTATUS get_thread_wow64_context( HANDLE handle, void *ctx, ULONG size ) fpux_to_fpu( &context->FloatSave, &frame->xsave ); context->ContextFlags |= CONTEXT_I386_FLOATING_POINT; } - if ((needed_flags & CONTEXT_I386_XSTATE) && (cpu_info.ProcessorFeatureBits & CPU_FEATURE_AVX)) + if ((needed_flags & CONTEXT_I386_XSTATE) && xstate_extended_features()) { CONTEXT_EX *context_ex = (CONTEXT_EX *)(context + 1); - XSTATE *xstate = (XSTATE *)((char *)context_ex + context_ex->XState.Offset); - unsigned int mask; + XSAVE_AREA_HEADER *xstate = (XSAVE_AREA_HEADER *)((char *)context_ex + context_ex->XState.Offset); + UINT64 mask;
- if (context_ex->XState.Length < offsetof(XSTATE, YmmContext) || - context_ex->XState.Length > sizeof(XSTATE)) + if (context_ex->XState.Length < sizeof(XSAVE_AREA_HEADER) || + context_ex->XState.Length > sizeof(XSAVE_AREA_HEADER) + xstate_features_size) return STATUS_INVALID_PARAMETER;
mask = (xstate_compaction_enabled ? xstate->CompactionMask : xstate->Mask) & xstate_extended_features(); xstate->Mask = frame->xstate.Mask & mask; xstate->CompactionMask = xstate_compaction_enabled ? (0x8000000000000000 | mask) : 0; - memset( xstate->Reserved, 0, sizeof(xstate->Reserved) ); + memset( xstate->Reserved2, 0, sizeof(xstate->Reserved2) ); if (xstate->Mask) { - if (context_ex->XState.Length < sizeof(XSTATE)) return STATUS_BUFFER_OVERFLOW; - memcpy( &xstate->YmmContext, &frame->xstate.YmmContext, sizeof(xstate->YmmContext) ); + if (context_ex->XState.Length < xstate_get_size( xstate->CompactionMask, xstate->Mask )) + return STATUS_BUFFER_OVERFLOW; + copy_xstate( xstate, &frame->xstate, xstate->Mask ); } } return STATUS_SUCCESS; @@ -1544,7 +1540,7 @@ NTSTATUS call_user_exception_dispatcher( EXCEPTION_RECORD *rec, CONTEXT *context { assert( !((ULONG_PTR)&stack->xstate & 63) ); context_init_xstate( &stack->context, &stack->xstate ); - memcpy( &stack->xstate, &frame->xstate, sizeof(frame->xstate) ); + memcpy( &stack->xstate, &frame->xstate, sizeof(XSAVE_AREA_HEADER) + xstate_features_size ); } else context_init_xstate( &stack->context, NULL );
@@ -1586,13 +1582,20 @@ __ASM_GLOBAL_FUNC( call_user_mode_callback, "fnstcw -0x2c(%rbp)\n\t" "movq %rsi,-0x38(%rbp)\n\t" /* ret_ptr */ "movq %rdx,-0x40(%rbp)\n\t" /* ret_len */ - "subq $0x408,%rsp\n\t" /* sizeof(struct syscall_frame) + exception */ + "subq $0x308,%rsp\n\t" /* sizeof(struct syscall_frame) + exception */ +#ifdef __APPLE__ + "movq %gs:0x30,%rsi\n\t" + "movl 0x33c(%rsi),%esi\n\t" +#else + "movl %gs:0x33c,%esi\n\t" /* amd64_thread_data()->xstate_features_size */ +#endif + "subq %rsi,%rsp\n\t" "andq $~63,%rsp\n\t" "leaq 0x10(%rbp),%rax\n\t" "movq %rax,0xa8(%rsp)\n\t" /* frame->syscall_cfa */ "movq 0x328(%r8),%r10\n\t" /* amd64_thread_data()->syscall_frame */ "movq (%r8),%rax\n\t" /* NtCurrentTeb()->Tib.ExceptionList */ - "movq %rax,0x400(%rsp)\n\t" + "movq %rax,0x300(%rsp,%rsi)\n\t" "movl 0xb0(%r10),%r14d\n\t" /* prev_frame->syscall_flags */ "movl %r14d,0xb0(%rsp)\n\t" /* frame->syscall_flags */ "movq %r10,0xa0(%rsp)\n\t" /* frame->prev_frame */ @@ -1625,7 +1628,13 @@ __ASM_GLOBAL_FUNC( user_mode_callback_return, __ASM_CFI(".cfi_rel_offset %r13,-0x18\n\t") __ASM_CFI(".cfi_rel_offset %r14,-0x20\n\t") __ASM_CFI(".cfi_rel_offset %r15,-0x28\n\t") - "movq 0x400(%r10),%rax\n\t" /* exception list */ +#ifdef __APPLE__ + "movq %gs:0x30,%rax\n\t" + "movl 0x33c(%rax),%eax\n\t" +#else + "movl %gs:0x33c,%eax\n\t" /* amd64_thread_data()->xstate_features_size */ +#endif + "movq 0x300(%r10,%rax),%rax\n\t" /* exception list */ "movq %rax,0(%rcx)\n\t" /* teb->Tib.ExceptionList */ "movq -0x38(%rbp),%r10\n\t" /* ret_ptr */ "movq -0x40(%rbp),%r11\n\t" /* ret_len */ @@ -2303,6 +2312,7 @@ NTSTATUS signal_alloc_thread( TEB *teb ) } else thread_data->fs = fs32_sel; } + thread_data->xstate_features_size = xstate_features_size; return STATUS_SUCCESS; }
@@ -2389,7 +2399,9 @@ void signal_init_process(void) WOW_TEB *wow_teb = get_wow_teb( NtCurrentTeb() ); void *ptr, *kernel_stack = (char *)ntdll_get_thread_data()->kernel_stack + kernel_stack_size;
- amd64_thread_data()->syscall_frame = (struct syscall_frame *)kernel_stack - 1; + amd64_thread_data()->syscall_frame = (struct syscall_frame *)((ULONG_PTR)((char *)kernel_stack + - sizeof(struct syscall_frame) - xstate_features_size) & ~(ULONG_PTR)63); + amd64_thread_data()->xstate_features_size = xstate_features_size;
/* sneak in a syscall dispatcher pointer at a fixed address (7ffe1000) */ ptr = (char *)user_shared_data + page_size; @@ -2481,6 +2493,7 @@ void call_init_thunk( LPTHREAD_START_ROUTINE entry, void *arg, BOOL suspend, TEB
thread_data->syscall_table = KeServiceDescriptorTable; thread_data->xstate_features_mask = xstate_supported_features_mask; + assert( thread_data->xstate_features_size == xstate_features_size );
#if defined __linux__ arch_prctl( ARCH_SET_GS, teb ); @@ -2585,7 +2598,14 @@ __ASM_GLOBAL_FUNC( signal_start_thread, "movq 0x328(%rcx),%r8\n\t" /* amd64_thread_data()->syscall_frame */ "orq %r8,%r8\n\t" "jnz 1f\n\t" - "leaq -0x400(%rsp),%r8\n\t" /* sizeof(struct syscall_frame) */ + "leaq -0x300(%rsp),%r8\n\t" /* sizeof(struct syscall_frame) */ +#ifdef __APPLE__ + "movq %gs:0x30,%rax\n\t" + "movl 0x33c(%rax),%eax\n\t" +#else + "movl %gs:0x33c,%eax\n\t" /* amd64_thread_data()->xstate_features_size */ +#endif + "subq %rax,%r8\n\t" "andq $~63,%r8\n\t" "movq %r8,0x328(%rcx)\n" /* amd64_thread_data()->syscall_frame */ /* switch to kernel stack */ diff --git a/dlls/ntdll/unix/system.c b/dlls/ntdll/unix/system.c index 2c71cc67835..72bdd6b31a5 100644 --- a/dlls/ntdll/unix/system.c +++ b/dlls/ntdll/unix/system.c @@ -248,6 +248,23 @@ static pthread_mutex_t timezone_mutex = PTHREAD_MUTEX_INITIALIZER;
BOOL xstate_compaction_enabled = FALSE; UINT64 xstate_supported_features_mask; +UINT64 xstate_features_size; + +unsigned int xstate_get_size( UINT64 compaction_mask, UINT64 mask ) +{ + if (!(mask & ((UINT64)1 << XSTATE_AVX))) return sizeof(XSAVE_AREA_HEADER); + return sizeof(XSAVE_AREA_HEADER) + sizeof(YMMCONTEXT); +} + +void copy_xstate( XSAVE_AREA_HEADER *dst, XSAVE_AREA_HEADER *src, UINT64 mask ) +{ + mask &= xstate_extended_features() & src->Mask; + if (src->CompactionMask) mask &= src->CompactionMask; + if (dst->CompactionMask) mask &= dst->CompactionMask; + dst->Mask = (dst->Mask & ~xstate_extended_features()) | mask; + if (mask & ((UINT64)1 << XSTATE_AVX)) + *(YMMCONTEXT *)(dst + 1) = *(YMMCONTEXT *)(src + 1); +}
#define AUTH 0x68747541 /* "Auth" */ #define ENTI 0x69746e65 /* "enti" */ @@ -400,6 +417,10 @@ static void get_cpuinfo( SYSTEM_CPU_INFORMATION *info ) xstate_supported_features_mask = 3; if (features & CPU_FEATURE_AVX) xstate_supported_features_mask |= (UINT64)1 << XSTATE_AVX; + xstate_features_size = xstate_get_size( xstate_compaction_enabled ? 0x8000000000000000 + | xstate_supported_features_mask : 0, xstate_supported_features_mask ) + - sizeof(XSAVE_AREA_HEADER); + xstate_features_size = (xstate_features_size + 15) & ~15; }
if (regs[1] == AUTH && regs[3] == ENTI && regs[2] == CAMD) diff --git a/dlls/ntdll/unix/unix_private.h b/dlls/ntdll/unix/unix_private.h index 1311e5a1070..b249570d421 100644 --- a/dlls/ntdll/unix/unix_private.h +++ b/dlls/ntdll/unix/unix_private.h @@ -217,6 +217,9 @@ extern void fpu_to_fpux( XSAVE_FORMAT *fpux, const I386_FLOATING_SAVE_AREA *fpu
extern BOOL xstate_compaction_enabled; extern UINT64 xstate_supported_features_mask; +extern UINT64 xstate_features_size; +extern unsigned int xstate_get_size( UINT64 compaction_mask, UINT64 mask ); +extern void copy_xstate( XSAVE_AREA_HEADER *dst, XSAVE_AREA_HEADER *src, UINT64 mask );
static inline UINT64 xstate_extended_features(void) { diff --git a/include/winnt.h b/include/winnt.h index 620e70189be..9ca3a0ce64d 100644 --- a/include/winnt.h +++ b/include/winnt.h @@ -1483,6 +1483,14 @@ typedef struct _XSTATE_CONFIGURATION ULONG64 EnabledUserVisibleSupervisorFeatures; } XSTATE_CONFIGURATION, *PXSTATE_CONFIGURATION;
+typedef struct _XSAVE_AREA_HEADER +{ + DWORD64 Mask; + DWORD64 CompactionMask; + DWORD64 Reserved2[6]; +} +XSAVE_AREA_HEADER, *PXSAVE_AREA_HEADER; + typedef struct _YMMCONTEXT { M128A Ymm0;
From: Paul Gofman pgofman@codeweavers.com
--- dlls/ntdll/unix/signal_i386.c | 63 +++++++++++++++------------------ dlls/ntdll/unix/signal_x86_64.c | 47 ++++++++++++------------ 2 files changed, 53 insertions(+), 57 deletions(-)
diff --git a/dlls/ntdll/unix/signal_i386.c b/dlls/ntdll/unix/signal_i386.c index 25b2100af0a..93bcf607f41 100644 --- a/dlls/ntdll/unix/signal_i386.c +++ b/dlls/ntdll/unix/signal_i386.c @@ -151,7 +151,7 @@ typedef struct ucontext
#define FPU_sig(context) ((FLOATING_SAVE_AREA*)((context)->uc_mcontext.fpregs)) #define FPUX_sig(context) (FPU_sig(context) && !((context)->uc_mcontext.fpregs->status >> 16) ? (XSAVE_FORMAT *)(FPU_sig(context) + 1) : NULL) -#define XState_sig(fpu) (((unsigned int *)fpu->Reserved4)[12] == FP_XSTATE_MAGIC1 ? (XSTATE *)(fpu + 1) : NULL) +#define XState_sig(fpu) (((unsigned int *)fpu->Reserved4)[12] == FP_XSTATE_MAGIC1 ? (XSAVE_AREA_HEADER *)(fpu + 1) : NULL)
#ifdef __ANDROID__ /* custom signal restorer since we may have unmapped the one in vdso, and bionic doesn't check for that */ @@ -443,12 +443,10 @@ struct exc_stack_layout EXCEPTION_RECORD rec; /* 008 */ CONTEXT context; /* 058 */ CONTEXT_EX context_ex; /* 324 */ - BYTE xstate[sizeof(XSTATE)+64]; /* 33c extra space to allow for 64-byte alignment */ - DWORD align; /* 4bc */ + DWORD align; /* 33c */ }; C_ASSERT( offsetof(struct exc_stack_layout, context) == 0x58 ); -C_ASSERT( offsetof(struct exc_stack_layout, xstate) == 0x33c ); -C_ASSERT( sizeof(struct exc_stack_layout) == 0x4c0 ); +C_ASSERT( sizeof(struct exc_stack_layout) == 0x340 );
/* stack layout when calling KiUserApcDispatcher */ struct apc_stack_layout @@ -612,12 +610,12 @@ struct xcontext ULONG64 host_compaction_mask; };
-static inline XSTATE *xstate_from_context( const CONTEXT *context ) +static inline XSAVE_AREA_HEADER *xstate_from_context( const CONTEXT *context ) { CONTEXT_EX *xctx = (CONTEXT_EX *)(context + 1);
if ((context->ContextFlags & CONTEXT_XSTATE) != CONTEXT_XSTATE) return NULL; - return (XSTATE *)((char *)xctx + xctx->XState.Offset); + return (XSAVE_AREA_HEADER *)((char *)xctx + xctx->XState.Offset); }
static inline void context_init_xstate( CONTEXT *context, void *xstate_buffer ) @@ -630,7 +628,7 @@ static inline void context_init_xstate( CONTEXT *context, void *xstate_buffer )
if (xstate_buffer) { - xctx->XState.Length = sizeof(XSTATE); + xctx->XState.Length = sizeof(XSAVE_AREA_HEADER) + xstate_features_size; xctx->XState.Offset = (BYTE *)xstate_buffer - (BYTE *)xctx; context->ContextFlags |= CONTEXT_XSTATE;
@@ -828,7 +826,7 @@ static inline void save_context( struct xcontext *xcontext, const ucontext_t *si } if (fpux) { - XSTATE *xs; + XSAVE_AREA_HEADER *xs;
context->ContextFlags |= CONTEXT_FLOATING_POINT | CONTEXT_EXTENDED_REGISTERS; memcpy( context->ExtendedRegisters, fpux, sizeof(*fpux) ); @@ -880,16 +878,12 @@ static inline void restore_context( const struct xcontext *xcontext, ucontext_t if (fpu) *fpu = context->FloatSave; if (fpux) { - XSTATE *src_xs, *dst_xs; + XSAVE_AREA_HEADER *xs;
memcpy( fpux, context->ExtendedRegisters, sizeof(*fpux) );
- if ((dst_xs = XState_sig(fpux)) && (src_xs = xstate_from_context( context ))) - { - memcpy( &dst_xs->YmmContext, &src_xs->YmmContext, sizeof(dst_xs->YmmContext) ); - dst_xs->Mask |= src_xs->Mask; - dst_xs->CompactionMask = xcontext->host_compaction_mask; - } + if (xstate_extended_features() && (xs = XState_sig(fpux))) + xs->CompactionMask = xcontext->host_compaction_mask; } if (!fpu && !fpux) restore_fpu( context ); } @@ -1458,8 +1452,10 @@ static void setup_raise_exception( ucontext_t *sigcontext, void *stack_ptr, EXCEPTION_RECORD *rec, struct xcontext *xcontext ) { CONTEXT *context = &xcontext->c; - XSTATE *src_xs; + XSAVE_AREA_HEADER *src_xs; struct exc_stack_layout *stack; + size_t stack_size; + unsigned int xstate_size; NTSTATUS status = send_debug_event( rec, context, TRUE );
if (status == DBG_CONTINUE || status == DBG_EXCEPTION_HANDLED) @@ -1471,7 +1467,9 @@ static void setup_raise_exception( ucontext_t *sigcontext, void *stack_ptr, /* fix up instruction pointer in context for EXCEPTION_BREAKPOINT */ if (rec->ExceptionCode == EXCEPTION_BREAKPOINT) context->Eip--;
- stack = virtual_setup_exception( stack_ptr, sizeof(*stack), rec ); + xstate_size = sizeof(XSAVE_AREA_HEADER) + xstate_features_size; + stack_size = (ULONG_PTR)stack_ptr - (((ULONG_PTR)stack_ptr - sizeof(*stack) - xstate_size) & ~(ULONG_PTR)63); + stack = virtual_setup_exception( stack_ptr, stack_size, rec ); stack->rec_ptr = &stack->rec; stack->context_ptr = &stack->context; stack->rec = *rec; @@ -1479,16 +1477,13 @@ static void setup_raise_exception( ucontext_t *sigcontext, void *stack_ptr,
if ((src_xs = xstate_from_context( context ))) { - XSTATE *dst_xs = (XSTATE *)(((ULONG_PTR)stack->xstate + 63) & ~63); + XSAVE_AREA_HEADER *dst_xs = (XSAVE_AREA_HEADER *)(stack + 1);
+ assert( !((ULONG_PTR)dst_xs & 63) ); context_init_xstate( &stack->context, dst_xs ); - memset( dst_xs, 0, sizeof(XSAVE_AREA_HEADER) ); + memset( dst_xs, 0, sizeof(*dst_xs) ); dst_xs->CompactionMask = xstate_compaction_enabled ? 0x8000000000000000 | xstate_extended_features() : 0; - if (src_xs->Mask & 4) - { - dst_xs->Mask = 4; - memcpy( &dst_xs->YmmContext, &src_xs->YmmContext, sizeof(dst_xs->YmmContext) ); - } + copy_xstate( dst_xs, src_xs, src_xs->Mask ); } else { @@ -1569,11 +1564,14 @@ NTSTATUS call_user_exception_dispatcher( EXCEPTION_RECORD *rec, CONTEXT *context { struct syscall_frame *frame = x86_thread_data()->syscall_frame; ULONG esp = (frame->esp - sizeof(struct exc_stack_layout)) & ~3; - struct exc_stack_layout *stack = (struct exc_stack_layout *)esp; - XSTATE *src_xs; + struct exc_stack_layout *stack; + XSAVE_AREA_HEADER *src_xs; + unsigned int xstate_size;
if (rec->ExceptionCode == EXCEPTION_BREAKPOINT) context->Eip--;
+ xstate_size = sizeof(XSAVE_AREA_HEADER) + xstate_features_size; + stack = (struct exc_stack_layout *)((esp - sizeof(*stack) - xstate_size) & ~(ULONG_PTR)63); stack->rec_ptr = &stack->rec; stack->context_ptr = &stack->context; stack->rec = *rec; @@ -1581,16 +1579,13 @@ NTSTATUS call_user_exception_dispatcher( EXCEPTION_RECORD *rec, CONTEXT *context
if ((src_xs = xstate_from_context( context ))) { - XSTATE *dst_xs = (XSTATE *)(((ULONG_PTR)stack->xstate + 63) & ~63); + XSAVE_AREA_HEADER *dst_xs = (XSAVE_AREA_HEADER *)(stack + 1);
context_init_xstate( &stack->context, dst_xs ); - memset( dst_xs, 0, offsetof(XSTATE, YmmContext) ); + assert( !((ULONG_PTR)dst_xs & 63) ); + memset( dst_xs, 0, sizeof(*dst_xs) ); dst_xs->CompactionMask = xstate_compaction_enabled ? 0x8000000000000000 | xstate_extended_features() : 0; - if (src_xs->Mask & 4) - { - dst_xs->Mask = 4; - memcpy( &dst_xs->YmmContext, &src_xs->YmmContext, sizeof(dst_xs->YmmContext) ); - } + copy_xstate( dst_xs, src_xs, src_xs->Mask ); } else { diff --git a/dlls/ntdll/unix/signal_x86_64.c b/dlls/ntdll/unix/signal_x86_64.c index cf85db0d627..501ae14cae5 100644 --- a/dlls/ntdll/unix/signal_x86_64.c +++ b/dlls/ntdll/unix/signal_x86_64.c @@ -140,7 +140,7 @@ __ASM_GLOBAL_FUNC( alloc_fs_sel, #define TRAP_sig(context) ((context)->uc_mcontext.gregs[REG_TRAPNO]) #define ERROR_sig(context) ((context)->uc_mcontext.gregs[REG_ERR]) #define FPU_sig(context) ((XMM_SAVE_AREA32 *)((context)->uc_mcontext.fpregs)) -#define XState_sig(fpu) (((unsigned int *)fpu->Reserved4)[12] == FP_XSTATE_MAGIC1 ? (XSTATE *)(fpu + 1) : NULL) +#define XState_sig(fpu) (((unsigned int *)fpu->Reserved4)[12] == FP_XSTATE_MAGIC1 ? (XSAVE_AREA_HEADER *)(fpu + 1) : NULL)
#elif defined(__FreeBSD__) || defined (__FreeBSD_kernel__)
@@ -359,11 +359,10 @@ struct exc_stack_layout ULONG64 align; /* 588 */ struct machine_frame machine_frame; /* 590 */ ULONG64 align2; /* 5b8 */ - XSTATE xstate; /* 5c0 */ }; C_ASSERT( offsetof(struct exc_stack_layout, rec) == 0x4f0 ); C_ASSERT( offsetof(struct exc_stack_layout, machine_frame) == 0x590 ); -C_ASSERT( sizeof(struct exc_stack_layout) == 0x700 ); +C_ASSERT( sizeof(struct exc_stack_layout) == 0x5c0 );
/* stack layout when calling KiUserApcDispatcher */ struct apc_stack_layout @@ -483,12 +482,12 @@ struct xcontext ULONG64 host_compaction_mask; };
-static inline XSTATE *xstate_from_context( const CONTEXT *context ) +static inline XSAVE_AREA_HEADER *xstate_from_context( const CONTEXT *context ) { CONTEXT_EX *xctx = (CONTEXT_EX *)(context + 1);
if ((context->ContextFlags & CONTEXT_XSTATE) != CONTEXT_XSTATE) return NULL; - return (XSTATE *)((char *)xctx + xctx->XState.Offset); + return (XSAVE_AREA_HEADER *)((char *)xctx + xctx->XState.Offset); }
static inline void context_init_xstate( CONTEXT *context, void *xstate_buffer ) @@ -501,7 +500,7 @@ static inline void context_init_xstate( CONTEXT *context, void *xstate_buffer )
if (xstate_buffer) { - xctx->XState.Length = sizeof(XSTATE); + xctx->XState.Length = sizeof(XSAVE_AREA_HEADER) + xstate_features_size; xctx->XState.Offset = (BYTE *)xstate_buffer - (BYTE *)xctx; context->ContextFlags |= CONTEXT_XSTATE;
@@ -897,7 +896,7 @@ static void save_context( struct xcontext *xcontext, const ucontext_t *sigcontex context->Dr7 = amd64_thread_data()->dr7; if (FPU_sig(sigcontext)) { - XSTATE *xs; + XSAVE_AREA_HEADER *xs;
context->ContextFlags |= CONTEXT_FLOATING_POINT; context->FltSave = *FPU_sig(sigcontext); @@ -922,7 +921,7 @@ static void save_context( struct xcontext *xcontext, const ucontext_t *sigcontex static void restore_context( const struct xcontext *xcontext, ucontext_t *sigcontext ) { const CONTEXT *context = &xcontext->c; - XSTATE *xs; + XSAVE_AREA_HEADER *xs;
amd64_thread_data()->dr0 = context->Dr0; amd64_thread_data()->dr1 = context->Dr1; @@ -1398,7 +1397,8 @@ static void setup_raise_exception( ucontext_t *sigcontext, EXCEPTION_RECORD *rec struct exc_stack_layout *stack; size_t stack_size; NTSTATUS status; - XSTATE *src_xs; + XSAVE_AREA_HEADER *src_xs; + unsigned int xstate_size;
if (rec->ExceptionCode == EXCEPTION_SINGLE_STEP) { @@ -1427,7 +1427,8 @@ static void setup_raise_exception( ucontext_t *sigcontext, EXCEPTION_RECORD *rec /* fix up instruction pointer in context for EXCEPTION_BREAKPOINT */ if (rec->ExceptionCode == EXCEPTION_BREAKPOINT) context->Rip--;
- stack_size = (ULONG_PTR)stack_ptr - (((ULONG_PTR)stack_ptr - sizeof(*stack)) & ~(ULONG_PTR)63); + xstate_size = sizeof(XSAVE_AREA_HEADER) + xstate_features_size; + stack_size = (ULONG_PTR)stack_ptr - (((ULONG_PTR)stack_ptr - sizeof(*stack) - xstate_size) & ~(ULONG_PTR)63); stack = virtual_setup_exception( stack_ptr, stack_size, rec ); stack->rec = *rec; stack->context = *context; @@ -1436,15 +1437,12 @@ static void setup_raise_exception( ucontext_t *sigcontext, EXCEPTION_RECORD *rec
if ((src_xs = xstate_from_context( context ))) { - assert( !((ULONG_PTR)&stack->xstate & 63) ); - context_init_xstate( &stack->context, &stack->xstate ); - memset( &stack->xstate, 0, offsetof(XSTATE, YmmContext) ); - stack->xstate.CompactionMask = xstate_compaction_enabled ? 0x8000000000000000 | xstate_extended_features() : 0; - if (src_xs->Mask & 4) - { - stack->xstate.Mask = 4; - memcpy( &stack->xstate.YmmContext, &src_xs->YmmContext, sizeof(stack->xstate.YmmContext) ); - } + XSAVE_AREA_HEADER *dst_xs = (XSAVE_AREA_HEADER *)(stack + 1); + assert( !((ULONG_PTR)dst_xs & 63) ); + context_init_xstate( &stack->context, dst_xs ); + memset( dst_xs, 0, sizeof(*dst_xs) ); + dst_xs->CompactionMask = xstate_compaction_enabled ? 0x8000000000000000 | xstate_extended_features() : 0; + copy_xstate( dst_xs, src_xs, src_xs->Mask ); } else { @@ -1531,16 +1529,19 @@ NTSTATUS call_user_exception_dispatcher( EXCEPTION_RECORD *rec, CONTEXT *context struct syscall_frame *frame = amd64_thread_data()->syscall_frame; struct exc_stack_layout *stack; NTSTATUS status = NtSetContextThread( GetCurrentThread(), context ); + unsigned int xstate_size;
if (status) return status; - stack = (struct exc_stack_layout *)((context->Rsp - sizeof(*stack)) & ~(ULONG_PTR)63); + xstate_size = sizeof(XSAVE_AREA_HEADER) + xstate_features_size; + stack = (struct exc_stack_layout *)((context->Rsp - sizeof(*stack) - xstate_size) & ~(ULONG_PTR)63); memmove( &stack->context, context, sizeof(*context) );
if ((context->ContextFlags & CONTEXT_XSTATE) == CONTEXT_XSTATE) { - assert( !((ULONG_PTR)&stack->xstate & 63) ); - context_init_xstate( &stack->context, &stack->xstate ); - memcpy( &stack->xstate, &frame->xstate, sizeof(XSAVE_AREA_HEADER) + xstate_features_size ); + XSAVE_AREA_HEADER *dst_xs = (XSAVE_AREA_HEADER *)(stack + 1); + assert( !((ULONG_PTR)dst_xs & 63) ); + context_init_xstate( &stack->context, dst_xs ); + memcpy( dst_xs, &frame->xstate, sizeof(XSAVE_AREA_HEADER) + xstate_features_size ); } else context_init_xstate( &stack->context, NULL );
v4: - avoid direct TEB access for macOS.