This is a preparation for supporting more xstate features in the contexts.
The remaining parts are: - support arbitrary xstate in usr1_handler(); - support arbitrary xstate in the server contexts (probably transferring only the present part); - support arbitrary xstate in context manipulation functions; - enabling more xstates in system.c and in user shared data.
From: Paul Gofman pgofman@codeweavers.com
--- dlls/ntdll/unix/signal_i386.c | 61 +++++++++++++++++---------------- dlls/ntdll/unix/signal_x86_64.c | 45 ++++++++++++------------ dlls/ntdll/unix/system.c | 4 +++ dlls/ntdll/unix/unix_private.h | 9 +++++ 4 files changed, 68 insertions(+), 51 deletions(-)
diff --git a/dlls/ntdll/unix/signal_i386.c b/dlls/ntdll/unix/signal_i386.c index cd6417b57b4..d634e46a80f 100644 --- a/dlls/ntdll/unix/signal_i386.c +++ b/dlls/ntdll/unix/signal_i386.c @@ -525,6 +525,7 @@ struct x86_thread_data UINT dr7; /* 1f0 */ SYSTEM_SERVICE_TABLE *syscall_table; /* 1f4 syscall table */ struct syscall_frame *syscall_frame; /* 1f8 frame pointer on syscall entry */ + UINT64 xstate_features_mask; /* 1fc */ };
C_ASSERT( sizeof(struct x86_thread_data) <= sizeof(((struct ntdll_thread_data *)0)->cpu_data) ); @@ -609,8 +610,6 @@ struct xcontext ULONG64 host_compaction_mask; };
-extern BOOL xstate_compaction_enabled; - static inline XSTATE *xstate_from_context( const CONTEXT *context ) { CONTEXT_EX *xctx = (CONTEXT_EX *)(context + 1); @@ -832,7 +831,7 @@ static inline void save_context( struct xcontext *xcontext, const ucontext_t *si context->ContextFlags |= CONTEXT_FLOATING_POINT | CONTEXT_EXTENDED_REGISTERS; memcpy( context->ExtendedRegisters, fpux, sizeof(*fpux) ); if (!fpu) fpux_to_fpu( &context->FloatSave, fpux ); - if ((cpu_info.ProcessorFeatureBits & CPU_FEATURE_AVX) && (xs = XState_sig(fpux))) + if (xstate_extended_features() && (xs = XState_sig(fpux))) { context_init_xstate( context, xs ); xcontext->host_compaction_mask = xs->CompactionMask; @@ -936,7 +935,7 @@ NTSTATUS WINAPI NtSetContextThread( HANDLE handle, const CONTEXT *context ) DWORD flags = context->ContextFlags & ~CONTEXT_i386; BOOL self = (handle == GetCurrentThread());
- if ((flags & CONTEXT_XSTATE) && (cpu_info.ProcessorFeatureBits & CPU_FEATURE_AVX)) + if ((flags & CONTEXT_XSTATE) && xstate_extended_features()) { CONTEXT_EX *context_ex = (CONTEXT_EX *)(context + 1); XSTATE *xs = (XSTATE *)((char *)context_ex + context_ex->XState.Offset); @@ -944,7 +943,7 @@ NTSTATUS WINAPI NtSetContextThread( HANDLE handle, const CONTEXT *context ) if (context_ex->XState.Length < offsetof(XSTATE, YmmContext) || context_ex->XState.Length > sizeof(XSTATE)) return STATUS_INVALID_PARAMETER; - if ((xs->Mask & XSTATE_MASK_GSSE) && (context_ex->XState.Length < sizeof(XSTATE))) + if ((xs->Mask & xstate_extended_features()) && (context_ex->XState.Length < sizeof(XSTATE))) return STATUS_BUFFER_OVERFLOW; } else flags &= ~CONTEXT_XSTATE; @@ -1138,7 +1137,7 @@ NTSTATUS WINAPI NtGetContextThread( HANDLE handle, CONTEXT *context )
context->ContextFlags |= CONTEXT_EXTENDED_REGISTERS; } - if ((needed_flags & CONTEXT_XSTATE) && (cpu_info.ProcessorFeatureBits & CPU_FEATURE_AVX)) + if ((needed_flags & CONTEXT_XSTATE) && xstate_extended_features()) { CONTEXT_EX *context_ex = (CONTEXT_EX *)(context + 1); XSTATE *xstate = (XSTATE *)((char *)context_ex + context_ex->XState.Offset); @@ -1148,7 +1147,7 @@ NTSTATUS WINAPI NtGetContextThread( HANDLE handle, CONTEXT *context ) || context_ex->XState.Length > sizeof(XSTATE)) return STATUS_INVALID_PARAMETER;
- mask = (xstate_compaction_enabled ? xstate->CompactionMask : xstate->Mask) & XSTATE_MASK_GSSE; + mask = (xstate_compaction_enabled ? xstate->CompactionMask : xstate->Mask) & xstate_extended_features(); xstate->Mask = frame->xstate.Mask & mask; xstate->CompactionMask = xstate_compaction_enabled ? (0x8000000000000000 | mask) : 0; memset( xstate->Reserved, 0, sizeof(xstate->Reserved) ); @@ -1485,7 +1484,7 @@ static void setup_raise_exception( ucontext_t *sigcontext, void *stack_ptr,
context_init_xstate( &stack->context, dst_xs ); memset( dst_xs, 0, offsetof(XSTATE, YmmContext) ); - dst_xs->CompactionMask = xstate_compaction_enabled ? 0x8000000000000004 : 0; + dst_xs->CompactionMask = xstate_compaction_enabled ? 0x8000000000000000 | xstate_extended_features() : 0; if (src_xs->Mask & 4) { dst_xs->Mask = 4; @@ -1587,7 +1586,7 @@ NTSTATUS call_user_exception_dispatcher( EXCEPTION_RECORD *rec, CONTEXT *context
context_init_xstate( &stack->context, dst_xs ); memset( dst_xs, 0, offsetof(XSTATE, YmmContext) ); - dst_xs->CompactionMask = xstate_compaction_enabled ? 0x8000000000000004 : 0; + dst_xs->CompactionMask = xstate_compaction_enabled ? 0x8000000000000000 | xstate_extended_features() : 0; if (src_xs->Mask & 4) { dst_xs->Mask = 4; @@ -2481,6 +2480,7 @@ void call_init_thunk( LPTHREAD_START_ROUTINE entry, void *arg, BOOL suspend, TEB ldt_set_fs( thread_data->fs, teb ); thread_data->gs = get_gs(); thread_data->syscall_table = KeServiceDescriptorTable; + thread_data->xstate_features_mask = xstate_supported_features_mask;
context.SegCs = get_cs(); context.SegDs = get_ds(); @@ -2504,6 +2504,8 @@ void call_init_thunk( LPTHREAD_START_ROUTINE entry, void *arg, BOOL suspend, TEB *ctx = context; ctx->ContextFlags = CONTEXT_FULL | CONTEXT_FLOATING_POINT | CONTEXT_EXTENDED_REGISTERS; memset( frame, 0, sizeof(*frame) ); + if (xstate_compaction_enabled) + frame->xstate.CompactionMask = 0x8000000000000000 | xstate_supported_features_mask; NtSetContextThread( GetCurrentThread(), ctx );
stack = (DWORD *)ctx; @@ -2605,26 +2607,27 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, "addl %fs:0x1f4,%ebx\n\t" /* x86_thread_data()->syscall_table */ "testl $3,(%ecx)\n\t" /* frame->syscall_flags & (SYSCALL_HAVE_XSAVE | SYSCALL_HAVE_XSAVEC) */ "jz 2f\n\t" - "movl $7,%eax\n\t" - "xorl %edx,%edx\n\t" - "movl %edx,0x240(%ecx)\n\t" - "movl %edx,0x244(%ecx)\n\t" - "movl %edx,0x248(%ecx)\n\t" - "movl %edx,0x24c(%ecx)\n\t" - "movl %edx,0x250(%ecx)\n\t" - "movl %edx,0x254(%ecx)\n\t" + "movl %fs:0x1fc,%eax\n\t" /* x86_thread_data()->xstate_features_mask */ + "movl %fs:0x200,%edx\n\t" /* x86_thread_data()->xstate_features_mask high dword */ + "xorl %edi,%edi\n\t" + "movl %edi,0x240(%ecx)\n\t" + "movl %edi,0x244(%ecx)\n\t" + "movl %edi,0x248(%ecx)\n\t" + "movl %edi,0x24c(%ecx)\n\t" + "movl %edi,0x250(%ecx)\n\t" + "movl %edi,0x254(%ecx)\n\t" "testl $2,(%ecx)\n\t" /* frame->syscall_flags & SYSCALL_HAVE_XSAVEC */ "jz 1f\n\t" - "movl %edx,0x258(%ecx)\n\t" - "movl %edx,0x25c(%ecx)\n\t" - "movl %edx,0x260(%ecx)\n\t" - "movl %edx,0x264(%ecx)\n\t" - "movl %edx,0x268(%ecx)\n\t" - "movl %edx,0x26c(%ecx)\n\t" - "movl %edx,0x270(%ecx)\n\t" - "movl %edx,0x274(%ecx)\n\t" - "movl %edx,0x278(%ecx)\n\t" - "movl %edx,0x27c(%ecx)\n\t" + "movl %edi,0x258(%ecx)\n\t" + "movl %edi,0x25c(%ecx)\n\t" + "movl %edi,0x260(%ecx)\n\t" + "movl %edi,0x264(%ecx)\n\t" + "movl %edi,0x268(%ecx)\n\t" + "movl %edi,0x26c(%ecx)\n\t" + "movl %edi,0x270(%ecx)\n\t" + "movl %edi,0x274(%ecx)\n\t" + "movl %edi,0x278(%ecx)\n\t" + "movl %edi,0x27c(%ecx)\n\t" /* The xsavec instruction is not supported by * binutils < 2.25. */ ".byte 0x0f, 0xc7, 0x61, 0x40\n\t" /* xsavec 0x40(%ecx) */ @@ -2669,8 +2672,8 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, "testl $3,%ecx\n\t" /* SYSCALL_HAVE_XSAVE | SYSCALL_HAVE_XSAVEC */ "jz 1f\n\t" "movl %eax,%esi\n\t" - "movl $7,%eax\n\t" - "xorl %edx,%edx\n\t" + "movl %fs:0x1fc,%eax\n\t" /* x86_thread_data()->xstate_features_mask */ + "movl %fs:0x200,%edx\n\t" /* x86_thread_data()->xstate_features_mask high dword */ "xrstor 0x40(%esp)\n\t" "movl %esi,%eax\n\t" "jmp 3f\n" diff --git a/dlls/ntdll/unix/signal_x86_64.c b/dlls/ntdll/unix/signal_x86_64.c index 405ac49e5a3..7d6edffbc36 100644 --- a/dlls/ntdll/unix/signal_x86_64.c +++ b/dlls/ntdll/unix/signal_x86_64.c @@ -477,8 +477,6 @@ struct xcontext ULONG64 host_compaction_mask; };
-extern BOOL xstate_compaction_enabled; - static inline XSTATE *xstate_from_context( const CONTEXT *context ) { CONTEXT_EX *xctx = (CONTEXT_EX *)(context + 1); @@ -898,7 +896,7 @@ static void save_context( struct xcontext *xcontext, const ucontext_t *sigcontex context->ContextFlags |= CONTEXT_FLOATING_POINT; context->FltSave = *FPU_sig(sigcontext); context->MxCsr = context->FltSave.MxCsr; - if ((cpu_info.ProcessorFeatureBits & CPU_FEATURE_AVX) && (xs = XState_sig(FPU_sig(sigcontext)))) + if (xstate_extended_features() && (xs = XState_sig(FPU_sig(sigcontext)))) { /* xcontext and sigcontext are both on the signal stack, so we can * just reference sigcontext without overflowing 32 bit XState.Offset */ @@ -928,7 +926,7 @@ static void restore_context( const struct xcontext *xcontext, ucontext_t *sigcon amd64_thread_data()->dr7 = context->Dr7; set_sigcontext( context, sigcontext ); if (FPU_sig(sigcontext)) *FPU_sig(sigcontext) = context->FltSave; - if ((cpu_info.ProcessorFeatureBits & CPU_FEATURE_AVX) && (xs = XState_sig(FPU_sig(sigcontext)))) + if (xstate_extended_features() && (xs = XState_sig(FPU_sig(sigcontext)))) xs->CompactionMask = xcontext->host_compaction_mask; leave_handler( sigcontext ); } @@ -977,7 +975,7 @@ NTSTATUS WINAPI NtSetContextThread( HANDLE handle, const CONTEXT *context ) BOOL self = (handle == GetCurrentThread()); struct syscall_frame *frame = amd64_thread_data()->syscall_frame;
- if ((flags & CONTEXT_XSTATE) && (cpu_info.ProcessorFeatureBits & CPU_FEATURE_AVX)) + if ((flags & CONTEXT_XSTATE) && xstate_extended_features()) { CONTEXT_EX *context_ex = (CONTEXT_EX *)(context + 1); XSTATE *xs = (XSTATE *)((char *)context_ex + context_ex->XState.Offset); @@ -985,7 +983,7 @@ NTSTATUS WINAPI NtSetContextThread( HANDLE handle, const CONTEXT *context ) if (context_ex->XState.Length < offsetof(XSTATE, YmmContext) || context_ex->XState.Length > sizeof(XSTATE)) return STATUS_INVALID_PARAMETER; - if ((xs->Mask & XSTATE_MASK_GSSE) && (context_ex->XState.Length < sizeof(XSTATE))) + if ((xs->Mask & xstate_extended_features()) && (context_ex->XState.Length < sizeof(XSTATE))) return STATUS_BUFFER_OVERFLOW; } else flags &= ~CONTEXT_XSTATE; @@ -1155,7 +1153,7 @@ NTSTATUS WINAPI NtGetContextThread( HANDLE handle, CONTEXT *context ) context->MxCsr = context->FltSave.MxCsr; context->ContextFlags |= CONTEXT_FLOATING_POINT; } - if ((needed_flags & CONTEXT_XSTATE) && (cpu_info.ProcessorFeatureBits & CPU_FEATURE_AVX)) + if ((needed_flags & CONTEXT_XSTATE) && xstate_extended_features()) { CONTEXT_EX *context_ex = (CONTEXT_EX *)(context + 1); XSTATE *xstate = (XSTATE *)((char *)context_ex + context_ex->XState.Offset); @@ -1165,7 +1163,7 @@ NTSTATUS WINAPI NtGetContextThread( HANDLE handle, CONTEXT *context ) || context_ex->XState.Length > sizeof(XSTATE)) return STATUS_INVALID_PARAMETER;
- mask = (xstate_compaction_enabled ? xstate->CompactionMask : xstate->Mask) & XSTATE_MASK_GSSE; + mask = (xstate_compaction_enabled ? xstate->CompactionMask : xstate->Mask) & xstate_extended_features(); xstate->Mask = frame->xstate.Mask & mask; xstate->CompactionMask = xstate_compaction_enabled ? (0x8000000000000000 | mask) : 0; memset( xstate->Reserved, 0, sizeof(xstate->Reserved) ); @@ -1377,7 +1375,7 @@ NTSTATUS get_thread_wow64_context( HANDLE handle, void *ctx, ULONG size ) context_ex->XState.Length > sizeof(XSTATE)) return STATUS_INVALID_PARAMETER;
- mask = (xstate_compaction_enabled ? xstate->CompactionMask : xstate->Mask) & XSTATE_MASK_GSSE; + mask = (xstate_compaction_enabled ? xstate->CompactionMask : xstate->Mask) & xstate_extended_features(); xstate->Mask = frame->xstate.Mask & mask; xstate->CompactionMask = xstate_compaction_enabled ? (0x8000000000000000 | mask) : 0; memset( xstate->Reserved, 0, sizeof(xstate->Reserved) ); @@ -1442,7 +1440,7 @@ static void setup_raise_exception( ucontext_t *sigcontext, EXCEPTION_RECORD *rec assert( !((ULONG_PTR)&stack->xstate & 63) ); context_init_xstate( &stack->context, &stack->xstate ); memset( &stack->xstate, 0, offsetof(XSTATE, YmmContext) ); - stack->xstate.CompactionMask = xstate_compaction_enabled ? 0x8000000000000004 : 0; + stack->xstate.CompactionMask = xstate_compaction_enabled ? 0x8000000000000000 | xstate_extended_features() : 0; if (src_xs->Mask & 4) { stack->xstate.Mask = 4; @@ -2539,6 +2537,8 @@ void call_init_thunk( LPTHREAD_START_ROUTINE entry, void *arg, BOOL suspend, TEB *ctx = context; ctx->ContextFlags = CONTEXT_FULL; memset( frame, 0, sizeof(*frame) ); + if (xstate_compaction_enabled) + frame->xstate.CompactionMask = 0x8000000000000000 | xstate_supported_features_mask; NtSetContextThread( GetCurrentThread(), ctx );
frame->cs = cs64_sel; @@ -2636,18 +2636,19 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, "movl 0xb0(%rcx),%r14d\n\t" /* frame->syscall_flags */ "testl $3,%r14d\n\t" /* SYSCALL_HAVE_XSAVE | SYSCALL_HAVE_XSAVEC */ "jz 2f\n\t" - "movl $7,%eax\n\t" - "xorl %edx,%edx\n\t" - "movq %rdx,0x2c0(%rcx)\n\t" - "movq %rdx,0x2c8(%rcx)\n\t" - "movq %rdx,0x2d0(%rcx)\n\t" + "movl " __ASM_NAME("xstate_supported_features_mask") "(%rip),%eax\n\t" + "movl 4 + " __ASM_NAME("xstate_supported_features_mask") "(%rip),%edx\n\t" + "xorq %rbp,%rbp\n\t" + "movq %rbp,0x2c0(%rcx)\n\t" + "movq %rbp,0x2c8(%rcx)\n\t" + "movq %rbp,0x2d0(%rcx)\n\t" "testl $2,%r14d\n\t" /* SYSCALL_HAVE_XSAVEC */ "jz 1f\n\t" - "movq %rdx,0x2d8(%rcx)\n\t" - "movq %rdx,0x2e0(%rcx)\n\t" - "movq %rdx,0x2e8(%rcx)\n\t" - "movq %rdx,0x2f0(%rcx)\n\t" - "movq %rdx,0x2f8(%rcx)\n\t" + "movq %rbp,0x2d8(%rcx)\n\t" + "movq %rbp,0x2e0(%rcx)\n\t" + "movq %rbp,0x2e8(%rcx)\n\t" + "movq %rbp,0x2f0(%rcx)\n\t" + "movq %rbp,0x2f8(%rcx)\n\t" /* The xsavec instruction is not supported by * binutils < 2.25. */ ".byte 0x48, 0x0f, 0xc7, 0xa1, 0xc0, 0x00, 0x00, 0x00\n\t" /* xsavec64 0xc0(%rcx) */ @@ -2749,8 +2750,8 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, "2:\ttestl $3,%r14d\n\t" /* SYSCALL_HAVE_XSAVE | SYSCALL_HAVE_XSAVEC */ "jz 3f\n\t" "movq %rax,%r11\n\t" - "movl $7,%eax\n\t" - "xorl %edx,%edx\n\t" + "movl " __ASM_NAME("xstate_supported_features_mask") "(%rip),%eax\n\t" + "movl 4 + " __ASM_NAME("xstate_supported_features_mask") "(%rip),%edx\n\t" "xrstor64 0xc0(%rcx)\n\t" "movq %r11,%rax\n\t" "movl 0xb4(%rcx),%edx\n\t" /* frame->restore_flags */ diff --git a/dlls/ntdll/unix/system.c b/dlls/ntdll/unix/system.c index d2df1dd50c2..c3ffdc6921e 100644 --- a/dlls/ntdll/unix/system.c +++ b/dlls/ntdll/unix/system.c @@ -247,6 +247,7 @@ static pthread_mutex_t timezone_mutex = PTHREAD_MUTEX_INITIALIZER; #if defined(__i386__) || defined(__x86_64__)
BOOL xstate_compaction_enabled = FALSE; +UINT64 xstate_supported_features_mask;
#define AUTH 0x68747541 /* "Auth" */ #define ENTI 0x69746e65 /* "enti" */ @@ -396,6 +397,9 @@ static void get_cpuinfo( SYSTEM_CPU_INFORMATION *info ) { do_cpuid( 0x0000000d, 1, regs3 ); /* get XSAVE details */ if (regs3[0] & 2) xstate_compaction_enabled = TRUE; + xstate_supported_features_mask = 3; + if (features & CPU_FEATURE_AVX) + xstate_supported_features_mask |= (UINT64)1 << XSTATE_AVX; }
if (regs[1] == AUTH && regs[3] == ENTI && regs[2] == CAMD) diff --git a/dlls/ntdll/unix/unix_private.h b/dlls/ntdll/unix/unix_private.h index 07f2724eac7..75c81b21f84 100644 --- a/dlls/ntdll/unix/unix_private.h +++ b/dlls/ntdll/unix/unix_private.h @@ -212,6 +212,15 @@ extern int server_pipe( int fd[2] );
extern void fpux_to_fpu( I386_FLOATING_SAVE_AREA *fpu, const XSAVE_FORMAT *fpux ); extern void fpu_to_fpux( XSAVE_FORMAT *fpux, const I386_FLOATING_SAVE_AREA *fpu ); + +extern BOOL xstate_compaction_enabled; +extern UINT64 xstate_supported_features_mask; + +static inline UINT64 xstate_extended_features(void) +{ + return xstate_supported_features_mask & ~(UINT64)3; +} + extern void *get_cpu_area( USHORT machine ); extern void set_thread_id( TEB *teb, DWORD pid, DWORD tid ); extern NTSTATUS init_thread_stack( TEB *teb, ULONG_PTR limit, SIZE_T reserve_size, SIZE_T commit_size );
From: Paul Gofman pgofman@codeweavers.com
--- dlls/ntdll/unix/signal_i386.c | 62 +++++++++++----------- dlls/ntdll/unix/signal_x86_64.c | 92 ++++++++++++++++----------------- dlls/ntdll/unix/system.c | 19 +++++++ dlls/ntdll/unix/unix_private.h | 3 ++ include/winnt.h | 8 +++ 5 files changed, 106 insertions(+), 78 deletions(-)
diff --git a/dlls/ntdll/unix/signal_i386.c b/dlls/ntdll/unix/signal_i386.c index d634e46a80f..afcac44fa50 100644 --- a/dlls/ntdll/unix/signal_i386.c +++ b/dlls/ntdll/unix/signal_i386.c @@ -508,10 +508,10 @@ struct syscall_frame /* Leave space for the whole set of YMM registers. They're not used in * 32-bit mode, but some processors fault if they're not in writable memory. */ - DECLSPEC_ALIGN(64) XSTATE xstate; /* 240 */ + DECLSPEC_ALIGN(64) XSAVE_AREA_HEADER xstate_hdr; /* 240 */ };
-C_ASSERT( sizeof(struct syscall_frame) == 0x380 ); +C_ASSERT( sizeof(struct syscall_frame) == 0x280 );
struct x86_thread_data { @@ -526,12 +526,14 @@ struct x86_thread_data SYSTEM_SERVICE_TABLE *syscall_table; /* 1f4 syscall table */ struct syscall_frame *syscall_frame; /* 1f8 frame pointer on syscall entry */ UINT64 xstate_features_mask; /* 1fc */ + UINT xstate_features_size; /* 204 */ };
C_ASSERT( sizeof(struct x86_thread_data) <= sizeof(((struct ntdll_thread_data *)0)->cpu_data) ); C_ASSERT( offsetof( TEB, GdiTebBatch ) + offsetof( struct x86_thread_data, gs ) == 0x1d8 ); C_ASSERT( offsetof( TEB, GdiTebBatch ) + offsetof( struct x86_thread_data, syscall_table ) == 0x1f4 ); C_ASSERT( offsetof( TEB, GdiTebBatch ) + offsetof( struct x86_thread_data, syscall_frame ) == 0x1f8 ); +C_ASSERT( offsetof( TEB, GdiTebBatch ) + offsetof( struct x86_thread_data, xstate_features_size ) == 0x204 );
/* flags to control the behavior of the syscall dispatcher */ #define SYSCALL_HAVE_XSAVE 1 @@ -938,12 +940,12 @@ NTSTATUS WINAPI NtSetContextThread( HANDLE handle, const CONTEXT *context ) if ((flags & CONTEXT_XSTATE) && xstate_extended_features()) { CONTEXT_EX *context_ex = (CONTEXT_EX *)(context + 1); - XSTATE *xs = (XSTATE *)((char *)context_ex + context_ex->XState.Offset); + XSAVE_AREA_HEADER *xs = (XSAVE_AREA_HEADER *)((char *)context_ex + context_ex->XState.Offset);
- if (context_ex->XState.Length < offsetof(XSTATE, YmmContext) || - context_ex->XState.Length > sizeof(XSTATE)) + if (context_ex->XState.Length < sizeof(XSAVE_AREA_HEADER) || + context_ex->XState.Length > sizeof(XSAVE_AREA_HEADER) + xstate_features_size) return STATUS_INVALID_PARAMETER; - if ((xs->Mask & xstate_extended_features()) && (context_ex->XState.Length < sizeof(XSTATE))) + if ((xs->Mask & xstate_extended_features()) && (context_ex->XState.Length < xstate_get_size( xs, xs->Mask ))) return STATUS_BUFFER_OVERFLOW; } else flags &= ~CONTEXT_XSTATE; @@ -1002,7 +1004,7 @@ NTSTATUS WINAPI NtSetContextThread( HANDLE handle, const CONTEXT *context ) memcpy( &frame->u.xsave, context->ExtendedRegisters, sizeof(frame->u.xsave) ); /* reset the current interrupt status */ frame->u.xsave.StatusWord &= frame->u.xsave.ControlWord | 0xff80; - frame->xstate.Mask |= XSTATE_MASK_LEGACY; + frame->xstate_hdr.Mask |= XSTATE_MASK_LEGACY; } else if (flags & CONTEXT_FLOATING_POINT) { @@ -1014,19 +1016,14 @@ NTSTATUS WINAPI NtSetContextThread( HANDLE handle, const CONTEXT *context ) { frame->u.fsave = context->FloatSave; } - frame->xstate.Mask |= XSTATE_MASK_LEGACY_FLOATING_POINT; + frame->xstate_hdr.Mask |= XSTATE_MASK_LEGACY_FLOATING_POINT; } if (flags & CONTEXT_XSTATE) { CONTEXT_EX *context_ex = (CONTEXT_EX *)(context + 1); - XSTATE *xs = (XSTATE *)((char *)context_ex + context_ex->XState.Offset); + XSAVE_AREA_HEADER *xs = (XSAVE_AREA_HEADER *)((char *)context_ex + context_ex->XState.Offset);
- if (xs->Mask & XSTATE_MASK_GSSE) - { - frame->xstate.Mask |= XSTATE_MASK_GSSE; - frame->xstate.YmmContext = xs->YmmContext; - } - else frame->xstate.Mask &= ~XSTATE_MASK_GSSE; + copy_xstate( &frame->xstate_hdr, xs, xs->Mask ); }
frame->restore_flags |= flags & ~CONTEXT_INTEGER; @@ -1093,7 +1090,7 @@ NTSTATUS WINAPI NtGetContextThread( HANDLE handle, CONTEXT *context ) context->FloatSave = frame->u.fsave; } else if (!xstate_compaction_enabled || - (frame->xstate.Mask & XSTATE_MASK_LEGACY_FLOATING_POINT)) + (frame->xstate_hdr.Mask & XSTATE_MASK_LEGACY_FLOATING_POINT)) { fpux_to_fpu( &context->FloatSave, &frame->u.xsave ); } @@ -1109,7 +1106,7 @@ NTSTATUS WINAPI NtGetContextThread( HANDLE handle, CONTEXT *context ) XSAVE_FORMAT *xs = (XSAVE_FORMAT *)context->ExtendedRegisters;
if (!xstate_compaction_enabled || - (frame->xstate.Mask & XSTATE_MASK_LEGACY_FLOATING_POINT)) + (frame->xstate_hdr.Mask & XSTATE_MASK_LEGACY_FLOATING_POINT)) { memcpy( xs, &frame->u.xsave, FIELD_OFFSET( XSAVE_FORMAT, MxCsr )); memcpy( xs->FloatRegisters, frame->u.xsave.FloatRegisters, @@ -1122,7 +1119,7 @@ NTSTATUS WINAPI NtGetContextThread( HANDLE handle, CONTEXT *context ) xs->ControlWord = 0x37f; }
- if (!xstate_compaction_enabled || (frame->xstate.Mask & XSTATE_MASK_LEGACY_SSE)) + if (!xstate_compaction_enabled || (frame->xstate_hdr.Mask & XSTATE_MASK_LEGACY_SSE)) { memcpy( xs->XmmRegisters, frame->u.xsave.XmmRegisters, sizeof( xs->XmmRegisters )); xs->MxCsr = frame->u.xsave.MxCsr; @@ -1140,21 +1137,21 @@ NTSTATUS WINAPI NtGetContextThread( HANDLE handle, CONTEXT *context ) if ((needed_flags & CONTEXT_XSTATE) && xstate_extended_features()) { CONTEXT_EX *context_ex = (CONTEXT_EX *)(context + 1); - XSTATE *xstate = (XSTATE *)((char *)context_ex + context_ex->XState.Offset); - unsigned int mask; + XSAVE_AREA_HEADER *xstate = (XSAVE_AREA_HEADER *)((char *)context_ex + context_ex->XState.Offset); + UINT64 mask;
- if (context_ex->XState.Length < offsetof(XSTATE, YmmContext) - || context_ex->XState.Length > sizeof(XSTATE)) + if (context_ex->XState.Length < sizeof(XSAVE_AREA_HEADER) || + context_ex->XState.Length > sizeof(XSAVE_AREA_HEADER) + xstate_features_size) return STATUS_INVALID_PARAMETER;
mask = (xstate_compaction_enabled ? xstate->CompactionMask : xstate->Mask) & xstate_extended_features(); - xstate->Mask = frame->xstate.Mask & mask; + xstate->Mask = frame->xstate_hdr.Mask & mask; xstate->CompactionMask = xstate_compaction_enabled ? (0x8000000000000000 | mask) : 0; - memset( xstate->Reserved, 0, sizeof(xstate->Reserved) ); + memset( xstate->Reserved2, 0, sizeof(xstate->Reserved2) ); if (xstate->Mask) { - if (context_ex->XState.Length < sizeof(XSTATE)) return STATUS_BUFFER_OVERFLOW; - xstate->YmmContext = frame->xstate.YmmContext; + if (context_ex->XState.Length < xstate_get_size( xstate, xstate->Mask )) return STATUS_BUFFER_OVERFLOW; + copy_xstate( xstate, &frame->xstate_hdr, xstate->Mask ); } } /* update the cached version of the debug registers */ @@ -1483,7 +1480,7 @@ static void setup_raise_exception( ucontext_t *sigcontext, void *stack_ptr, XSTATE *dst_xs = (XSTATE *)(((ULONG_PTR)stack->xstate + 63) & ~63);
context_init_xstate( &stack->context, dst_xs ); - memset( dst_xs, 0, offsetof(XSTATE, YmmContext) ); + memset( dst_xs, 0, sizeof(XSAVE_AREA_HEADER) ); dst_xs->CompactionMask = xstate_compaction_enabled ? 0x8000000000000000 | xstate_extended_features() : 0; if (src_xs->Mask & 4) { @@ -1623,7 +1620,8 @@ __ASM_GLOBAL_FUNC( call_user_mode_callback, __ASM_CFI(".cfi_rel_offset %edi,-12\n\t") "movl 0x18(%ebp),%edx\n\t" /* teb */ "pushl 0(%edx)\n\t" /* teb->Tib.ExceptionList */ - "subl $0x380,%esp\n\t" /* sizeof(struct syscall_frame) */ + "subl $0x280,%esp\n\t" /* sizeof(struct syscall_frame) */ + "subl %fs:0x204,%esp\n\t" /* x86_thread_data()->xstate_features_size */ "andl $~63,%esp\n\t" "leal 8(%ebp),%eax\n\t" "movl %eax,0x38(%esp)\n\t" /* frame->syscall_cfa */ @@ -2431,7 +2429,7 @@ void signal_init_process(void) struct sigaction sig_act; void *kernel_stack = (char *)ntdll_get_thread_data()->kernel_stack + kernel_stack_size;
- x86_thread_data()->syscall_frame = (struct syscall_frame *)kernel_stack - 1; + x86_thread_data()->syscall_frame = (struct syscall_frame *)((ULONG_PTR)((char *)kernel_stack - sizeof(struct syscall_frame) - xstate_features_size) & ~(ULONG_PTR)63);
if (cpu_info.ProcessorFeatureBits & CPU_FEATURE_FXSR) syscall_flags |= SYSCALL_HAVE_FXSAVE; if (cpu_info.ProcessorFeatureBits & CPU_FEATURE_XSAVE) syscall_flags |= SYSCALL_HAVE_XSAVE; @@ -2481,6 +2479,7 @@ void call_init_thunk( LPTHREAD_START_ROUTINE entry, void *arg, BOOL suspend, TEB thread_data->gs = get_gs(); thread_data->syscall_table = KeServiceDescriptorTable; thread_data->xstate_features_mask = xstate_supported_features_mask; + thread_data->xstate_features_size = xstate_features_size;
context.SegCs = get_cs(); context.SegDs = get_ds(); @@ -2505,7 +2504,7 @@ void call_init_thunk( LPTHREAD_START_ROUTINE entry, void *arg, BOOL suspend, TEB ctx->ContextFlags = CONTEXT_FULL | CONTEXT_FLOATING_POINT | CONTEXT_EXTENDED_REGISTERS; memset( frame, 0, sizeof(*frame) ); if (xstate_compaction_enabled) - frame->xstate.CompactionMask = 0x8000000000000000 | xstate_supported_features_mask; + frame->xstate_hdr.CompactionMask = 0x8000000000000000 | xstate_supported_features_mask; NtSetContextThread( GetCurrentThread(), ctx );
stack = (DWORD *)ctx; @@ -2547,7 +2546,8 @@ __ASM_GLOBAL_FUNC( signal_start_thread, "movl 0x1f8(%ecx),%eax\n\t" /* x86_thread_data()->syscall_frame */ "orl %eax,%eax\n\t" "jnz 1f\n\t" - "leal -0x380(%esp),%eax\n\t" /* sizeof(struct syscall_frame) */ + "leal -0x280(%esp),%eax\n\t" /* sizeof(struct syscall_frame) */ + "subl %fs:0x204,%eax\n\t" /* x86_thread_data()->xstate_features_size */ "andl $~63,%eax\n\t" "movl %eax,0x1f8(%ecx)\n" /* x86_thread_data()->syscall_frame */ /* switch to kernel stack */ diff --git a/dlls/ntdll/unix/signal_x86_64.c b/dlls/ntdll/unix/signal_x86_64.c index 7d6edffbc36..efbabe7970f 100644 --- a/dlls/ntdll/unix/signal_x86_64.c +++ b/dlls/ntdll/unix/signal_x86_64.c @@ -424,10 +424,12 @@ struct syscall_frame DWORD restore_flags; /* 00b4 */ DWORD align[2]; /* 00b8 */ XMM_SAVE_AREA32 xsave; /* 00c0 */ - DECLSPEC_ALIGN(64) XSTATE xstate; /* 02c0 */ + DECLSPEC_ALIGN(64) XSAVE_AREA_HEADER xstate_hdr; /* 02c0 */ };
-C_ASSERT( sizeof( struct syscall_frame ) == 0x400); +C_ASSERT( offsetof( struct syscall_frame, xsave ) == 0xc0 ); +C_ASSERT( offsetof( struct syscall_frame, xstate_hdr ) == 0x2c0 ); +C_ASSERT( sizeof( struct syscall_frame ) == 0x300);
struct amd64_thread_data { @@ -978,12 +980,12 @@ NTSTATUS WINAPI NtSetContextThread( HANDLE handle, const CONTEXT *context ) if ((flags & CONTEXT_XSTATE) && xstate_extended_features()) { CONTEXT_EX *context_ex = (CONTEXT_EX *)(context + 1); - XSTATE *xs = (XSTATE *)((char *)context_ex + context_ex->XState.Offset); + XSAVE_AREA_HEADER *xs = (XSAVE_AREA_HEADER *)((char *)context_ex + context_ex->XState.Offset);
- if (context_ex->XState.Length < offsetof(XSTATE, YmmContext) || - context_ex->XState.Length > sizeof(XSTATE)) + if (context_ex->XState.Length < sizeof(XSAVE_AREA_HEADER) || + context_ex->XState.Length > sizeof(XSAVE_AREA_HEADER) + xstate_features_size) return STATUS_INVALID_PARAMETER; - if ((xs->Mask & xstate_extended_features()) && (context_ex->XState.Length < sizeof(XSTATE))) + if ((xs->Mask & xstate_extended_features()) && (context_ex->XState.Length < xstate_get_size( xs, xs->Mask ))) return STATUS_BUFFER_OVERFLOW; } else flags &= ~CONTEXT_XSTATE; @@ -1043,19 +1045,14 @@ NTSTATUS WINAPI NtSetContextThread( HANDLE handle, const CONTEXT *context ) if (flags & CONTEXT_FLOATING_POINT) { frame->xsave = context->FltSave; - frame->xstate.Mask |= XSTATE_MASK_LEGACY; + frame->xstate_hdr.Mask |= XSTATE_MASK_LEGACY; } if (flags & CONTEXT_XSTATE) { CONTEXT_EX *context_ex = (CONTEXT_EX *)(context + 1); - XSTATE *xs = (XSTATE *)((char *)context_ex + context_ex->XState.Offset); + XSAVE_AREA_HEADER *xs = (XSAVE_AREA_HEADER *)((char *)context_ex + context_ex->XState.Offset);
- if (xs->Mask & XSTATE_MASK_GSSE) - { - frame->xstate.Mask |= XSTATE_MASK_GSSE; - memcpy( &frame->xstate.YmmContext, &xs->YmmContext, sizeof(xs->YmmContext) ); - } - else frame->xstate.Mask &= ~XSTATE_MASK_GSSE; + copy_xstate( &frame->xstate_hdr, xs, xs->Mask ); }
frame->restore_flags |= flags & ~CONTEXT_INTEGER; @@ -1121,7 +1118,7 @@ NTSTATUS WINAPI NtGetContextThread( HANDLE handle, CONTEXT *context ) if (needed_flags & CONTEXT_FLOATING_POINT) { if (!xstate_compaction_enabled || - (frame->xstate.Mask & XSTATE_MASK_LEGACY_FLOATING_POINT)) + (frame->xstate_hdr.Mask & XSTATE_MASK_LEGACY_FLOATING_POINT)) { memcpy( &context->FltSave, &frame->xsave, FIELD_OFFSET( XSAVE_FORMAT, MxCsr )); memcpy( context->FltSave.FloatRegisters, frame->xsave.FloatRegisters, @@ -1135,7 +1132,7 @@ NTSTATUS WINAPI NtGetContextThread( HANDLE handle, CONTEXT *context ) context->FltSave.ControlWord = 0x37f; }
- if (!xstate_compaction_enabled || (frame->xstate.Mask & XSTATE_MASK_LEGACY_SSE)) + if (!xstate_compaction_enabled || (frame->xstate_hdr.Mask & XSTATE_MASK_LEGACY_SSE)) { memcpy( context->FltSave.XmmRegisters, frame->xsave.XmmRegisters, sizeof( context->FltSave.XmmRegisters )); @@ -1156,21 +1153,21 @@ NTSTATUS WINAPI NtGetContextThread( HANDLE handle, CONTEXT *context ) if ((needed_flags & CONTEXT_XSTATE) && xstate_extended_features()) { CONTEXT_EX *context_ex = (CONTEXT_EX *)(context + 1); - XSTATE *xstate = (XSTATE *)((char *)context_ex + context_ex->XState.Offset); - unsigned int mask; + XSAVE_AREA_HEADER *xstate = (XSAVE_AREA_HEADER *)((char *)context_ex + context_ex->XState.Offset); + UINT64 mask;
- if (context_ex->XState.Length < offsetof(XSTATE, YmmContext) - || context_ex->XState.Length > sizeof(XSTATE)) + if (context_ex->XState.Length < sizeof(XSAVE_AREA_HEADER) || + context_ex->XState.Length > sizeof(XSAVE_AREA_HEADER) + xstate_features_size) return STATUS_INVALID_PARAMETER;
mask = (xstate_compaction_enabled ? xstate->CompactionMask : xstate->Mask) & xstate_extended_features(); - xstate->Mask = frame->xstate.Mask & mask; + xstate->Mask = frame->xstate_hdr.Mask & mask; xstate->CompactionMask = xstate_compaction_enabled ? (0x8000000000000000 | mask) : 0; - memset( xstate->Reserved, 0, sizeof(xstate->Reserved) ); + memset( xstate->Reserved2, 0, sizeof(xstate->Reserved2) ); if (xstate->Mask) { - if (context_ex->XState.Length < sizeof(XSTATE)) return STATUS_BUFFER_OVERFLOW; - memcpy( &xstate->YmmContext, &frame->xstate.YmmContext, sizeof(xstate->YmmContext) ); + if (context_ex->XState.Length < xstate_get_size( xstate, xstate->Mask )) return STATUS_BUFFER_OVERFLOW; + copy_xstate( xstate, &frame->xstate_hdr, xstate->Mask ); } } /* update the cached version of the debug registers */ @@ -1277,14 +1274,9 @@ NTSTATUS set_thread_wow64_context( HANDLE handle, const void *ctx, ULONG size ) if (flags & CONTEXT_I386_XSTATE) { CONTEXT_EX *context_ex = (CONTEXT_EX *)(context + 1); - XSTATE *xs = (XSTATE *)((char *)context_ex + context_ex->XState.Offset); + XSAVE_AREA_HEADER *xs = (XSAVE_AREA_HEADER *)((char *)context_ex + context_ex->XState.Offset);
- if (xs->Mask & XSTATE_MASK_GSSE) - { - frame->xstate.Mask |= XSTATE_MASK_GSSE; - memcpy( &frame->xstate.YmmContext, &xs->YmmContext, sizeof(xs->YmmContext) ); - } - else frame->xstate.Mask &= ~XSTATE_MASK_GSSE; + copy_xstate( &frame->xstate_hdr, xs, xs->Mask ); frame->restore_flags |= CONTEXT_XSTATE; } return STATUS_SUCCESS; @@ -1365,24 +1357,24 @@ NTSTATUS get_thread_wow64_context( HANDLE handle, void *ctx, ULONG size ) fpux_to_fpu( &context->FloatSave, &frame->xsave ); context->ContextFlags |= CONTEXT_I386_FLOATING_POINT; } - if ((needed_flags & CONTEXT_I386_XSTATE) && (cpu_info.ProcessorFeatureBits & CPU_FEATURE_AVX)) + if ((needed_flags & CONTEXT_I386_XSTATE) && xstate_extended_features()) { CONTEXT_EX *context_ex = (CONTEXT_EX *)(context + 1); - XSTATE *xstate = (XSTATE *)((char *)context_ex + context_ex->XState.Offset); - unsigned int mask; + XSAVE_AREA_HEADER *xstate = (XSAVE_AREA_HEADER *)((char *)context_ex + context_ex->XState.Offset); + UINT64 mask;
- if (context_ex->XState.Length < offsetof(XSTATE, YmmContext) || - context_ex->XState.Length > sizeof(XSTATE)) + if (context_ex->XState.Length < sizeof(XSAVE_AREA_HEADER) || + context_ex->XState.Length > sizeof(XSAVE_AREA_HEADER) + xstate_features_size) return STATUS_INVALID_PARAMETER;
mask = (xstate_compaction_enabled ? xstate->CompactionMask : xstate->Mask) & xstate_extended_features(); - xstate->Mask = frame->xstate.Mask & mask; + xstate->Mask = frame->xstate_hdr.Mask & mask; xstate->CompactionMask = xstate_compaction_enabled ? (0x8000000000000000 | mask) : 0; - memset( xstate->Reserved, 0, sizeof(xstate->Reserved) ); + memset( xstate->Reserved2, 0, sizeof(xstate->Reserved2) ); if (xstate->Mask) { - if (context_ex->XState.Length < sizeof(XSTATE)) return STATUS_BUFFER_OVERFLOW; - memcpy( &xstate->YmmContext, &frame->xstate.YmmContext, sizeof(xstate->YmmContext) ); + if (context_ex->XState.Length < xstate_get_size( xstate, xstate->Mask )) return STATUS_BUFFER_OVERFLOW; + copy_xstate( xstate, &frame->xstate_hdr, xstate->Mask ); } } return STATUS_SUCCESS; @@ -1541,7 +1533,7 @@ NTSTATUS call_user_exception_dispatcher( EXCEPTION_RECORD *rec, CONTEXT *context { assert( !((ULONG_PTR)&stack->xstate & 63) ); context_init_xstate( &stack->context, &stack->xstate ); - memcpy( &stack->xstate, &frame->xstate, sizeof(frame->xstate) ); + memcpy( &stack->xstate, &frame->xstate_hdr, sizeof(XSAVE_AREA_HEADER) + xstate_features_size ); } else context_init_xstate( &stack->context, NULL );
@@ -1583,13 +1575,15 @@ __ASM_GLOBAL_FUNC( call_user_mode_callback, "fnstcw -0x2c(%rbp)\n\t" "movq %rsi,-0x38(%rbp)\n\t" /* ret_ptr */ "movq %rdx,-0x40(%rbp)\n\t" /* ret_len */ - "subq $0x408,%rsp\n\t" /* sizeof(struct syscall_frame) + exception */ + "subq $0x308,%rsp\n\t" /* sizeof(struct syscall_frame) + exception */ + "movq " __ASM_NAME("xstate_features_size") "(%rip),%rsi\n\t" + "subq %rsi,%rsp\n\t" "andq $~63,%rsp\n\t" "leaq 0x10(%rbp),%rax\n\t" "movq %rax,0xa8(%rsp)\n\t" /* frame->syscall_cfa */ "movq 0x328(%r8),%r10\n\t" /* amd64_thread_data()->syscall_frame */ "movq (%r8),%rax\n\t" /* NtCurrentTeb()->Tib.ExceptionList */ - "movq %rax,0x400(%rsp)\n\t" + "movq %rax,0x300(%rsp,%rsi)\n\t" "movl 0xb0(%r10),%r14d\n\t" /* prev_frame->syscall_flags */ "movl %r14d,0xb0(%rsp)\n\t" /* frame->syscall_flags */ "movq %r10,0xa0(%rsp)\n\t" /* frame->prev_frame */ @@ -1622,7 +1616,8 @@ __ASM_GLOBAL_FUNC( user_mode_callback_return, __ASM_CFI(".cfi_rel_offset %r13,-0x18\n\t") __ASM_CFI(".cfi_rel_offset %r14,-0x20\n\t") __ASM_CFI(".cfi_rel_offset %r15,-0x28\n\t") - "movq 0x400(%r10),%rax\n\t" /* exception list */ + "movq " __ASM_NAME("xstate_features_size") "(%rip),%rax\n\t" + "movq 0x300(%r10,%rax),%rax\n\t" /* exception list */ "movq %rax,0(%rcx)\n\t" /* teb->Tib.ExceptionList */ "movq -0x38(%rbp),%r10\n\t" /* ret_ptr */ "movq -0x40(%rbp),%r11\n\t" /* ret_len */ @@ -2386,7 +2381,9 @@ void signal_init_process(void) WOW_TEB *wow_teb = get_wow_teb( NtCurrentTeb() ); void *ptr, *kernel_stack = (char *)ntdll_get_thread_data()->kernel_stack + kernel_stack_size;
- amd64_thread_data()->syscall_frame = (struct syscall_frame *)kernel_stack - 1; + amd64_thread_data()->syscall_frame = (struct syscall_frame *)((ULONG_PTR)((char *)kernel_stack - sizeof(struct syscall_frame) - xstate_features_size) & ~(ULONG_PTR)63); + + TRACE("xstate_features_size %d, kernel_stack %p, &xstate_hdr %p.\n", (int)xstate_features_size, kernel_stack, &amd64_thread_data()->syscall_frame->xstate_hdr);
/* sneak in a syscall dispatcher pointer at a fixed address (7ffe1000) */ ptr = (char *)user_shared_data + page_size; @@ -2538,7 +2535,7 @@ void call_init_thunk( LPTHREAD_START_ROUTINE entry, void *arg, BOOL suspend, TEB ctx->ContextFlags = CONTEXT_FULL; memset( frame, 0, sizeof(*frame) ); if (xstate_compaction_enabled) - frame->xstate.CompactionMask = 0x8000000000000000 | xstate_supported_features_mask; + frame->xstate_hdr.CompactionMask = 0x8000000000000000 | xstate_supported_features_mask; NtSetContextThread( GetCurrentThread(), ctx );
frame->cs = cs64_sel; @@ -2581,7 +2578,8 @@ __ASM_GLOBAL_FUNC( signal_start_thread, "movq 0x328(%rcx),%r8\n\t" /* amd64_thread_data()->syscall_frame */ "orq %r8,%r8\n\t" "jnz 1f\n\t" - "leaq -0x400(%rsp),%r8\n\t" /* sizeof(struct syscall_frame) */ + "leaq -0x300(%rsp),%r8\n\t" /* sizeof(struct syscall_frame) */ + "subq " __ASM_NAME("xstate_features_size") "(%rip),%r8\n\t" "andq $~63,%r8\n\t" "movq %r8,0x328(%rcx)\n" /* amd64_thread_data()->syscall_frame */ /* switch to kernel stack */ diff --git a/dlls/ntdll/unix/system.c b/dlls/ntdll/unix/system.c index c3ffdc6921e..6bed29104d6 100644 --- a/dlls/ntdll/unix/system.c +++ b/dlls/ntdll/unix/system.c @@ -248,6 +248,23 @@ static pthread_mutex_t timezone_mutex = PTHREAD_MUTEX_INITIALIZER;
BOOL xstate_compaction_enabled = FALSE; UINT64 xstate_supported_features_mask; +UINT64 xstate_features_size; + +unsigned int xstate_get_size( XSAVE_AREA_HEADER *xsave, UINT64 mask ) +{ + if (!(mask & ((UINT64)1 << XSTATE_AVX))) return sizeof(XSAVE_AREA_HEADER); + return sizeof(XSAVE_AREA_HEADER) + sizeof(YMMCONTEXT); +} + +void copy_xstate( XSAVE_AREA_HEADER *dst, XSAVE_AREA_HEADER *src, UINT64 mask ) +{ + mask &= xstate_extended_features() & src->Mask; + if (src->CompactionMask) mask &= src->CompactionMask; + if (dst->CompactionMask) mask &= dst->CompactionMask; + dst->Mask = (dst->Mask & ~xstate_extended_features()) | mask; + if (mask & ((UINT64)1 << XSTATE_AVX)) + *(YMMCONTEXT *)(dst + 1) = *(YMMCONTEXT *)(src + 1); +}
#define AUTH 0x68747541 /* "Auth" */ #define ENTI 0x69746e65 /* "enti" */ @@ -400,6 +417,8 @@ static void get_cpuinfo( SYSTEM_CPU_INFORMATION *info ) xstate_supported_features_mask = 3; if (features & CPU_FEATURE_AVX) xstate_supported_features_mask |= (UINT64)1 << XSTATE_AVX; + xstate_features_size = xstate_get_size( NULL, xstate_supported_features_mask ) - sizeof(XSAVE_AREA_HEADER); + xstate_features_size = (xstate_features_size + 15) & ~15; }
if (regs[1] == AUTH && regs[3] == ENTI && regs[2] == CAMD) diff --git a/dlls/ntdll/unix/unix_private.h b/dlls/ntdll/unix/unix_private.h index 75c81b21f84..5e7509d6d49 100644 --- a/dlls/ntdll/unix/unix_private.h +++ b/dlls/ntdll/unix/unix_private.h @@ -215,6 +215,9 @@ extern void fpu_to_fpux( XSAVE_FORMAT *fpux, const I386_FLOATING_SAVE_AREA *fpu
extern BOOL xstate_compaction_enabled; extern UINT64 xstate_supported_features_mask; +extern UINT64 xstate_features_size; +extern unsigned int xstate_get_size( XSAVE_AREA_HEADER *xsave, UINT64 mask ); +extern void copy_xstate( XSAVE_AREA_HEADER *dst, XSAVE_AREA_HEADER *src, UINT64 mask );
static inline UINT64 xstate_extended_features(void) { diff --git a/include/winnt.h b/include/winnt.h index e1de2f29ecd..3674c777e97 100644 --- a/include/winnt.h +++ b/include/winnt.h @@ -1470,6 +1470,14 @@ typedef struct _XSTATE_CONFIGURATION ULONG64 EnabledUserVisibleSupervisorFeatures; } XSTATE_CONFIGURATION, *PXSTATE_CONFIGURATION;
+typedef struct _XSAVE_AREA_HEADER +{ + DWORD64 Mask; + DWORD64 CompactionMask; + DWORD64 Reserved2[6]; +} +XSAVE_AREA_HEADER, *PXSAVE_AREA_HEADER; + typedef struct _YMMCONTEXT { M128A Ymm0;
From: Paul Gofman pgofman@codeweavers.com
--- dlls/ntdll/unix/signal_i386.c | 63 +++++++++++++++------------------ dlls/ntdll/unix/signal_x86_64.c | 47 ++++++++++++------------ 2 files changed, 53 insertions(+), 57 deletions(-)
diff --git a/dlls/ntdll/unix/signal_i386.c b/dlls/ntdll/unix/signal_i386.c index afcac44fa50..dda8d1052c6 100644 --- a/dlls/ntdll/unix/signal_i386.c +++ b/dlls/ntdll/unix/signal_i386.c @@ -151,7 +151,7 @@ typedef struct ucontext
#define FPU_sig(context) ((FLOATING_SAVE_AREA*)((context)->uc_mcontext.fpregs)) #define FPUX_sig(context) (FPU_sig(context) && !((context)->uc_mcontext.fpregs->status >> 16) ? (XSAVE_FORMAT *)(FPU_sig(context) + 1) : NULL) -#define XState_sig(fpu) (((unsigned int *)fpu->Reserved4)[12] == FP_XSTATE_MAGIC1 ? (XSTATE *)(fpu + 1) : NULL) +#define XState_sig(fpu) (((unsigned int *)fpu->Reserved4)[12] == FP_XSTATE_MAGIC1 ? (XSAVE_AREA_HEADER *)(fpu + 1) : NULL)
#ifdef __ANDROID__ /* custom signal restorer since we may have unmapped the one in vdso, and bionic doesn't check for that */ @@ -443,12 +443,10 @@ struct exc_stack_layout EXCEPTION_RECORD rec; /* 008 */ CONTEXT context; /* 058 */ CONTEXT_EX context_ex; /* 324 */ - BYTE xstate[sizeof(XSTATE)+64]; /* 33c extra space to allow for 64-byte alignment */ - DWORD align; /* 4bc */ + DWORD align; /* 33c */ }; C_ASSERT( offsetof(struct exc_stack_layout, context) == 0x58 ); -C_ASSERT( offsetof(struct exc_stack_layout, xstate) == 0x33c ); -C_ASSERT( sizeof(struct exc_stack_layout) == 0x4c0 ); +C_ASSERT( sizeof(struct exc_stack_layout) == 0x340 );
/* stack layout when calling KiUserApcDispatcher */ struct apc_stack_layout @@ -612,12 +610,12 @@ struct xcontext ULONG64 host_compaction_mask; };
-static inline XSTATE *xstate_from_context( const CONTEXT *context ) +static inline XSAVE_AREA_HEADER *xstate_from_context( const CONTEXT *context ) { CONTEXT_EX *xctx = (CONTEXT_EX *)(context + 1);
if ((context->ContextFlags & CONTEXT_XSTATE) != CONTEXT_XSTATE) return NULL; - return (XSTATE *)((char *)xctx + xctx->XState.Offset); + return (XSAVE_AREA_HEADER *)((char *)xctx + xctx->XState.Offset); }
static inline void context_init_xstate( CONTEXT *context, void *xstate_buffer ) @@ -630,7 +628,7 @@ static inline void context_init_xstate( CONTEXT *context, void *xstate_buffer )
if (xstate_buffer) { - xctx->XState.Length = sizeof(XSTATE); + xctx->XState.Length = sizeof(XSAVE_AREA_HEADER) + xstate_features_size; xctx->XState.Offset = (BYTE *)xstate_buffer - (BYTE *)xctx; context->ContextFlags |= CONTEXT_XSTATE;
@@ -828,7 +826,7 @@ static inline void save_context( struct xcontext *xcontext, const ucontext_t *si } if (fpux) { - XSTATE *xs; + XSAVE_AREA_HEADER *xs;
context->ContextFlags |= CONTEXT_FLOATING_POINT | CONTEXT_EXTENDED_REGISTERS; memcpy( context->ExtendedRegisters, fpux, sizeof(*fpux) ); @@ -880,16 +878,12 @@ static inline void restore_context( const struct xcontext *xcontext, ucontext_t if (fpu) *fpu = context->FloatSave; if (fpux) { - XSTATE *src_xs, *dst_xs; + XSAVE_AREA_HEADER *xs;
memcpy( fpux, context->ExtendedRegisters, sizeof(*fpux) );
- if ((dst_xs = XState_sig(fpux)) && (src_xs = xstate_from_context( context ))) - { - memcpy( &dst_xs->YmmContext, &src_xs->YmmContext, sizeof(dst_xs->YmmContext) ); - dst_xs->Mask |= src_xs->Mask; - dst_xs->CompactionMask = xcontext->host_compaction_mask; - } + if (xstate_extended_features() && (xs = XState_sig(fpux))) + xs->CompactionMask = xcontext->host_compaction_mask; } if (!fpu && !fpux) restore_fpu( context ); } @@ -1456,8 +1450,10 @@ static void setup_raise_exception( ucontext_t *sigcontext, void *stack_ptr, EXCEPTION_RECORD *rec, struct xcontext *xcontext ) { CONTEXT *context = &xcontext->c; - XSTATE *src_xs; + XSAVE_AREA_HEADER *src_xs; struct exc_stack_layout *stack; + size_t stack_size; + unsigned int xstate_size; NTSTATUS status = send_debug_event( rec, context, TRUE );
if (status == DBG_CONTINUE || status == DBG_EXCEPTION_HANDLED) @@ -1469,7 +1465,9 @@ static void setup_raise_exception( ucontext_t *sigcontext, void *stack_ptr, /* fix up instruction pointer in context for EXCEPTION_BREAKPOINT */ if (rec->ExceptionCode == EXCEPTION_BREAKPOINT) context->Eip--;
- stack = virtual_setup_exception( stack_ptr, sizeof(*stack), rec ); + xstate_size = sizeof(XSAVE_AREA_HEADER) + xstate_features_size; + stack_size = (ULONG_PTR)stack_ptr - (((ULONG_PTR)stack_ptr - sizeof(*stack) - xstate_size) & ~(ULONG_PTR)63); + stack = virtual_setup_exception( stack_ptr, stack_size, rec ); stack->rec_ptr = &stack->rec; stack->context_ptr = &stack->context; stack->rec = *rec; @@ -1477,16 +1475,13 @@ static void setup_raise_exception( ucontext_t *sigcontext, void *stack_ptr,
if ((src_xs = xstate_from_context( context ))) { - XSTATE *dst_xs = (XSTATE *)(((ULONG_PTR)stack->xstate + 63) & ~63); + XSAVE_AREA_HEADER *dst_xs = (XSAVE_AREA_HEADER *)(stack + 1);
+ assert( !((ULONG_PTR)dst_xs & 63) ); context_init_xstate( &stack->context, dst_xs ); - memset( dst_xs, 0, sizeof(XSAVE_AREA_HEADER) ); + memset( dst_xs, 0, sizeof(*dst_xs) ); dst_xs->CompactionMask = xstate_compaction_enabled ? 0x8000000000000000 | xstate_extended_features() : 0; - if (src_xs->Mask & 4) - { - dst_xs->Mask = 4; - memcpy( &dst_xs->YmmContext, &src_xs->YmmContext, sizeof(dst_xs->YmmContext) ); - } + copy_xstate( dst_xs, src_xs, src_xs->Mask ); } else { @@ -1567,11 +1562,14 @@ NTSTATUS call_user_exception_dispatcher( EXCEPTION_RECORD *rec, CONTEXT *context { struct syscall_frame *frame = x86_thread_data()->syscall_frame; ULONG esp = (frame->esp - sizeof(struct exc_stack_layout)) & ~3; - struct exc_stack_layout *stack = (struct exc_stack_layout *)esp; - XSTATE *src_xs; + struct exc_stack_layout *stack; + XSAVE_AREA_HEADER *src_xs; + unsigned int xstate_size;
if (rec->ExceptionCode == EXCEPTION_BREAKPOINT) context->Eip--;
+ xstate_size = sizeof(XSAVE_AREA_HEADER) + xstate_features_size; + stack = (struct exc_stack_layout *)((esp - sizeof(*stack) - xstate_size) & ~(ULONG_PTR)63); stack->rec_ptr = &stack->rec; stack->context_ptr = &stack->context; stack->rec = *rec; @@ -1579,16 +1577,13 @@ NTSTATUS call_user_exception_dispatcher( EXCEPTION_RECORD *rec, CONTEXT *context
if ((src_xs = xstate_from_context( context ))) { - XSTATE *dst_xs = (XSTATE *)(((ULONG_PTR)stack->xstate + 63) & ~63); + XSAVE_AREA_HEADER *dst_xs = (XSAVE_AREA_HEADER *)(stack + 1);
context_init_xstate( &stack->context, dst_xs ); - memset( dst_xs, 0, offsetof(XSTATE, YmmContext) ); + assert( !((ULONG_PTR)dst_xs & 63) ); + memset( dst_xs, 0, sizeof(*dst_xs) ); dst_xs->CompactionMask = xstate_compaction_enabled ? 0x8000000000000000 | xstate_extended_features() : 0; - if (src_xs->Mask & 4) - { - dst_xs->Mask = 4; - memcpy( &dst_xs->YmmContext, &src_xs->YmmContext, sizeof(dst_xs->YmmContext) ); - } + copy_xstate( dst_xs, src_xs, src_xs->Mask ); } else { diff --git a/dlls/ntdll/unix/signal_x86_64.c b/dlls/ntdll/unix/signal_x86_64.c index efbabe7970f..0aada563f06 100644 --- a/dlls/ntdll/unix/signal_x86_64.c +++ b/dlls/ntdll/unix/signal_x86_64.c @@ -140,7 +140,7 @@ __ASM_GLOBAL_FUNC( alloc_fs_sel, #define TRAP_sig(context) ((context)->uc_mcontext.gregs[REG_TRAPNO]) #define ERROR_sig(context) ((context)->uc_mcontext.gregs[REG_ERR]) #define FPU_sig(context) ((XMM_SAVE_AREA32 *)((context)->uc_mcontext.fpregs)) -#define XState_sig(fpu) (((unsigned int *)fpu->Reserved4)[12] == FP_XSTATE_MAGIC1 ? (XSTATE *)(fpu + 1) : NULL) +#define XState_sig(fpu) (((unsigned int *)fpu->Reserved4)[12] == FP_XSTATE_MAGIC1 ? (XSAVE_AREA_HEADER *)(fpu + 1) : NULL)
#elif defined(__FreeBSD__) || defined (__FreeBSD_kernel__)
@@ -359,11 +359,10 @@ struct exc_stack_layout ULONG64 align; /* 588 */ struct machine_frame machine_frame; /* 590 */ ULONG64 align2; /* 5b8 */ - XSTATE xstate; /* 5c0 */ }; C_ASSERT( offsetof(struct exc_stack_layout, rec) == 0x4f0 ); C_ASSERT( offsetof(struct exc_stack_layout, machine_frame) == 0x590 ); -C_ASSERT( sizeof(struct exc_stack_layout) == 0x700 ); +C_ASSERT( sizeof(struct exc_stack_layout) == 0x5c0 );
/* stack layout when calling KiUserApcDispatcher */ struct apc_stack_layout @@ -479,12 +478,12 @@ struct xcontext ULONG64 host_compaction_mask; };
-static inline XSTATE *xstate_from_context( const CONTEXT *context ) +static inline XSAVE_AREA_HEADER *xstate_from_context( const CONTEXT *context ) { CONTEXT_EX *xctx = (CONTEXT_EX *)(context + 1);
if ((context->ContextFlags & CONTEXT_XSTATE) != CONTEXT_XSTATE) return NULL; - return (XSTATE *)((char *)xctx + xctx->XState.Offset); + return (XSAVE_AREA_HEADER *)((char *)xctx + xctx->XState.Offset); }
static inline void context_init_xstate( CONTEXT *context, void *xstate_buffer ) @@ -497,7 +496,7 @@ static inline void context_init_xstate( CONTEXT *context, void *xstate_buffer )
if (xstate_buffer) { - xctx->XState.Length = sizeof(XSTATE); + xctx->XState.Length = sizeof(XSAVE_AREA_HEADER) + xstate_features_size; xctx->XState.Offset = (BYTE *)xstate_buffer - (BYTE *)xctx; context->ContextFlags |= CONTEXT_XSTATE;
@@ -893,7 +892,7 @@ static void save_context( struct xcontext *xcontext, const ucontext_t *sigcontex context->Dr7 = amd64_thread_data()->dr7; if (FPU_sig(sigcontext)) { - XSTATE *xs; + XSAVE_AREA_HEADER *xs;
context->ContextFlags |= CONTEXT_FLOATING_POINT; context->FltSave = *FPU_sig(sigcontext); @@ -918,7 +917,7 @@ static void save_context( struct xcontext *xcontext, const ucontext_t *sigcontex static void restore_context( const struct xcontext *xcontext, ucontext_t *sigcontext ) { const CONTEXT *context = &xcontext->c; - XSTATE *xs; + XSAVE_AREA_HEADER *xs;
amd64_thread_data()->dr0 = context->Dr0; amd64_thread_data()->dr1 = context->Dr1; @@ -1391,7 +1390,8 @@ static void setup_raise_exception( ucontext_t *sigcontext, EXCEPTION_RECORD *rec struct exc_stack_layout *stack; size_t stack_size; NTSTATUS status; - XSTATE *src_xs; + XSAVE_AREA_HEADER *src_xs; + unsigned int xstate_size;
if (rec->ExceptionCode == EXCEPTION_SINGLE_STEP) { @@ -1420,7 +1420,8 @@ static void setup_raise_exception( ucontext_t *sigcontext, EXCEPTION_RECORD *rec /* fix up instruction pointer in context for EXCEPTION_BREAKPOINT */ if (rec->ExceptionCode == EXCEPTION_BREAKPOINT) context->Rip--;
- stack_size = (ULONG_PTR)stack_ptr - (((ULONG_PTR)stack_ptr - sizeof(*stack)) & ~(ULONG_PTR)63); + xstate_size = sizeof(XSAVE_AREA_HEADER) + xstate_features_size; + stack_size = (ULONG_PTR)stack_ptr - (((ULONG_PTR)stack_ptr - sizeof(*stack) - xstate_size) & ~(ULONG_PTR)63); stack = virtual_setup_exception( stack_ptr, stack_size, rec ); stack->rec = *rec; stack->context = *context; @@ -1429,15 +1430,12 @@ static void setup_raise_exception( ucontext_t *sigcontext, EXCEPTION_RECORD *rec
if ((src_xs = xstate_from_context( context ))) { - assert( !((ULONG_PTR)&stack->xstate & 63) ); - context_init_xstate( &stack->context, &stack->xstate ); - memset( &stack->xstate, 0, offsetof(XSTATE, YmmContext) ); - stack->xstate.CompactionMask = xstate_compaction_enabled ? 0x8000000000000000 | xstate_extended_features() : 0; - if (src_xs->Mask & 4) - { - stack->xstate.Mask = 4; - memcpy( &stack->xstate.YmmContext, &src_xs->YmmContext, sizeof(stack->xstate.YmmContext) ); - } + XSAVE_AREA_HEADER *dst_xs = (XSAVE_AREA_HEADER *)(stack + 1); + assert( !((ULONG_PTR)dst_xs & 63) ); + context_init_xstate( &stack->context, dst_xs ); + memset( dst_xs, 0, sizeof(*dst_xs) ); + dst_xs->CompactionMask = xstate_compaction_enabled ? 0x8000000000000000 | xstate_extended_features() : 0; + copy_xstate( dst_xs, src_xs, src_xs->Mask ); } else { @@ -1524,16 +1522,19 @@ NTSTATUS call_user_exception_dispatcher( EXCEPTION_RECORD *rec, CONTEXT *context struct syscall_frame *frame = amd64_thread_data()->syscall_frame; struct exc_stack_layout *stack; NTSTATUS status = NtSetContextThread( GetCurrentThread(), context ); + unsigned int xstate_size;
if (status) return status; - stack = (struct exc_stack_layout *)((context->Rsp - sizeof(*stack)) & ~(ULONG_PTR)63); + xstate_size = sizeof(XSAVE_AREA_HEADER) + xstate_features_size; + stack = (struct exc_stack_layout *)((context->Rsp - sizeof(*stack) - xstate_size) & ~(ULONG_PTR)63); memmove( &stack->context, context, sizeof(*context) );
if ((context->ContextFlags & CONTEXT_XSTATE) == CONTEXT_XSTATE) { - assert( !((ULONG_PTR)&stack->xstate & 63) ); - context_init_xstate( &stack->context, &stack->xstate ); - memcpy( &stack->xstate, &frame->xstate_hdr, sizeof(XSAVE_AREA_HEADER) + xstate_features_size ); + XSAVE_AREA_HEADER *dst_xs = (XSAVE_AREA_HEADER *)(stack + 1); + assert( !((ULONG_PTR)dst_xs & 63) ); + context_init_xstate( &stack->context, dst_xs ); + memcpy( dst_xs, &frame->xstate_hdr, sizeof(XSAVE_AREA_HEADER) + xstate_features_size ); } else context_init_xstate( &stack->context, NULL );
The support for avx512 state in contexts (specifically delivering avx512 state in hardware exception context and restoring that in NtContinue) is required for The Finals on avx512-enabled machines.
Alexandre Julliard (@julliard) commented about dlls/ntdll/unix/signal_x86_64.c:
__ASM_CFI(".cfi_rel_offset %r13,-0x18\n\t") __ASM_CFI(".cfi_rel_offset %r14,-0x20\n\t") __ASM_CFI(".cfi_rel_offset %r15,-0x28\n\t")
"movq 0x400(%r10),%rax\n\t" /* exception list */
"movq " __ASM_NAME("xstate_features_size") "(%rip),%rax\n\t"
Accessing global variables that way is not portable.
Alexandre Julliard (@julliard) commented about dlls/ntdll/unix/signal_i386.c:
/* Leave space for the whole set of YMM registers. They're not used in * 32-bit mode, but some processors fault if they're not in writable memory. */
- DECLSPEC_ALIGN(64) XSTATE xstate; /* 240 */
- DECLSPEC_ALIGN(64) XSAVE_AREA_HEADER xstate_hdr; /* 240 */
It doesn't seem necessary to change the name. That would help reduce the size of the diff.
Alexandre Julliard (@julliard) commented about dlls/ntdll/unix/system.c:
#if defined(__i386__) || defined(__x86_64__)
BOOL xstate_compaction_enabled = FALSE; +UINT64 xstate_supported_features_mask; +UINT64 xstate_features_size;
+unsigned int xstate_get_size( XSAVE_AREA_HEADER *xsave, UINT64 mask ) +{
- if (!(mask & ((UINT64)1 << XSTATE_AVX))) return sizeof(XSAVE_AREA_HEADER);
- return sizeof(XSAVE_AREA_HEADER) + sizeof(YMMCONTEXT);
+}
This helper doesn't make much sense as is, you may need to show more of part 2 to justify this.
On Wed Feb 7 10:34:52 2024 +0000, Alexandre Julliard wrote:
This helper doesn't make much sense as is, you may need to show more of part 2 to justify this.
Here is my current full branch: https://gitlab.winehq.org/gofman/wine/-/commits/xstate_avx512/?ref_type=head...
In the (current) final state the function minds generic feature mask to get the size which corresponds to it, and also xsave->CompactionMask: ``` unsigned int xstate_get_size( XSAVE_AREA_HEADER *xsave, UINT64 mask ) { UINT64 compaction_mask; unsigned int i; int off;
if (xsave) compaction_mask = xsave->CompactionMask; else if (xstate_compaction_enabled) compaction_mask = ((UINT64)1 << 63) | mask; else compaction_mask = 0;
mask >>= 2; off = sizeof(XSAVE_AREA_HEADER); i = 2; while (mask) { if (mask == 1) return off + xstate_feature_size[i]; off = next_xstate_offset( off, compaction_mask, i ); mask >>= 1; ++i; } return off; } ```
While looking at xsave->CompactionMask is not needed when dealing with Win context (which mask should always match system compaction to be valid), our "internal" xstate from signals always has CompactionMask of zero (Linux always copies xstate in uncompacted format for user signals). So I wanted xstate_get_size` / copy_xstate to deal with it transparently. Do you think I can leave the handle as is in attempt to reduce the amount of diff in the later patches, or should I introduce that only when really needed? Also, maybe if you have some general omments on the generic direction of those patches...
which mask should always match system compaction to be valid
- eh, this is not correct actually, the presence of compaction should match, but compaction mask in user context may include less features than fully supported in the system.