From: Paul Gofman pgofman@codeweavers.com
--- dlls/ntdll/unix/signal_i386.c | 62 +++++++++++----------- dlls/ntdll/unix/signal_x86_64.c | 92 ++++++++++++++++----------------- dlls/ntdll/unix/system.c | 19 +++++++ dlls/ntdll/unix/unix_private.h | 3 ++ include/winnt.h | 8 +++ 5 files changed, 106 insertions(+), 78 deletions(-)
diff --git a/dlls/ntdll/unix/signal_i386.c b/dlls/ntdll/unix/signal_i386.c index d634e46a80f..afcac44fa50 100644 --- a/dlls/ntdll/unix/signal_i386.c +++ b/dlls/ntdll/unix/signal_i386.c @@ -508,10 +508,10 @@ struct syscall_frame /* Leave space for the whole set of YMM registers. They're not used in * 32-bit mode, but some processors fault if they're not in writable memory. */ - DECLSPEC_ALIGN(64) XSTATE xstate; /* 240 */ + DECLSPEC_ALIGN(64) XSAVE_AREA_HEADER xstate_hdr; /* 240 */ };
-C_ASSERT( sizeof(struct syscall_frame) == 0x380 ); +C_ASSERT( sizeof(struct syscall_frame) == 0x280 );
struct x86_thread_data { @@ -526,12 +526,14 @@ struct x86_thread_data SYSTEM_SERVICE_TABLE *syscall_table; /* 1f4 syscall table */ struct syscall_frame *syscall_frame; /* 1f8 frame pointer on syscall entry */ UINT64 xstate_features_mask; /* 1fc */ + UINT xstate_features_size; /* 204 */ };
C_ASSERT( sizeof(struct x86_thread_data) <= sizeof(((struct ntdll_thread_data *)0)->cpu_data) ); C_ASSERT( offsetof( TEB, GdiTebBatch ) + offsetof( struct x86_thread_data, gs ) == 0x1d8 ); C_ASSERT( offsetof( TEB, GdiTebBatch ) + offsetof( struct x86_thread_data, syscall_table ) == 0x1f4 ); C_ASSERT( offsetof( TEB, GdiTebBatch ) + offsetof( struct x86_thread_data, syscall_frame ) == 0x1f8 ); +C_ASSERT( offsetof( TEB, GdiTebBatch ) + offsetof( struct x86_thread_data, xstate_features_size ) == 0x204 );
/* flags to control the behavior of the syscall dispatcher */ #define SYSCALL_HAVE_XSAVE 1 @@ -938,12 +940,12 @@ NTSTATUS WINAPI NtSetContextThread( HANDLE handle, const CONTEXT *context ) if ((flags & CONTEXT_XSTATE) && xstate_extended_features()) { CONTEXT_EX *context_ex = (CONTEXT_EX *)(context + 1); - XSTATE *xs = (XSTATE *)((char *)context_ex + context_ex->XState.Offset); + XSAVE_AREA_HEADER *xs = (XSAVE_AREA_HEADER *)((char *)context_ex + context_ex->XState.Offset);
- if (context_ex->XState.Length < offsetof(XSTATE, YmmContext) || - context_ex->XState.Length > sizeof(XSTATE)) + if (context_ex->XState.Length < sizeof(XSAVE_AREA_HEADER) || + context_ex->XState.Length > sizeof(XSAVE_AREA_HEADER) + xstate_features_size) return STATUS_INVALID_PARAMETER; - if ((xs->Mask & xstate_extended_features()) && (context_ex->XState.Length < sizeof(XSTATE))) + if ((xs->Mask & xstate_extended_features()) && (context_ex->XState.Length < xstate_get_size( xs, xs->Mask ))) return STATUS_BUFFER_OVERFLOW; } else flags &= ~CONTEXT_XSTATE; @@ -1002,7 +1004,7 @@ NTSTATUS WINAPI NtSetContextThread( HANDLE handle, const CONTEXT *context ) memcpy( &frame->u.xsave, context->ExtendedRegisters, sizeof(frame->u.xsave) ); /* reset the current interrupt status */ frame->u.xsave.StatusWord &= frame->u.xsave.ControlWord | 0xff80; - frame->xstate.Mask |= XSTATE_MASK_LEGACY; + frame->xstate_hdr.Mask |= XSTATE_MASK_LEGACY; } else if (flags & CONTEXT_FLOATING_POINT) { @@ -1014,19 +1016,14 @@ NTSTATUS WINAPI NtSetContextThread( HANDLE handle, const CONTEXT *context ) { frame->u.fsave = context->FloatSave; } - frame->xstate.Mask |= XSTATE_MASK_LEGACY_FLOATING_POINT; + frame->xstate_hdr.Mask |= XSTATE_MASK_LEGACY_FLOATING_POINT; } if (flags & CONTEXT_XSTATE) { CONTEXT_EX *context_ex = (CONTEXT_EX *)(context + 1); - XSTATE *xs = (XSTATE *)((char *)context_ex + context_ex->XState.Offset); + XSAVE_AREA_HEADER *xs = (XSAVE_AREA_HEADER *)((char *)context_ex + context_ex->XState.Offset);
- if (xs->Mask & XSTATE_MASK_GSSE) - { - frame->xstate.Mask |= XSTATE_MASK_GSSE; - frame->xstate.YmmContext = xs->YmmContext; - } - else frame->xstate.Mask &= ~XSTATE_MASK_GSSE; + copy_xstate( &frame->xstate_hdr, xs, xs->Mask ); }
frame->restore_flags |= flags & ~CONTEXT_INTEGER; @@ -1093,7 +1090,7 @@ NTSTATUS WINAPI NtGetContextThread( HANDLE handle, CONTEXT *context ) context->FloatSave = frame->u.fsave; } else if (!xstate_compaction_enabled || - (frame->xstate.Mask & XSTATE_MASK_LEGACY_FLOATING_POINT)) + (frame->xstate_hdr.Mask & XSTATE_MASK_LEGACY_FLOATING_POINT)) { fpux_to_fpu( &context->FloatSave, &frame->u.xsave ); } @@ -1109,7 +1106,7 @@ NTSTATUS WINAPI NtGetContextThread( HANDLE handle, CONTEXT *context ) XSAVE_FORMAT *xs = (XSAVE_FORMAT *)context->ExtendedRegisters;
if (!xstate_compaction_enabled || - (frame->xstate.Mask & XSTATE_MASK_LEGACY_FLOATING_POINT)) + (frame->xstate_hdr.Mask & XSTATE_MASK_LEGACY_FLOATING_POINT)) { memcpy( xs, &frame->u.xsave, FIELD_OFFSET( XSAVE_FORMAT, MxCsr )); memcpy( xs->FloatRegisters, frame->u.xsave.FloatRegisters, @@ -1122,7 +1119,7 @@ NTSTATUS WINAPI NtGetContextThread( HANDLE handle, CONTEXT *context ) xs->ControlWord = 0x37f; }
- if (!xstate_compaction_enabled || (frame->xstate.Mask & XSTATE_MASK_LEGACY_SSE)) + if (!xstate_compaction_enabled || (frame->xstate_hdr.Mask & XSTATE_MASK_LEGACY_SSE)) { memcpy( xs->XmmRegisters, frame->u.xsave.XmmRegisters, sizeof( xs->XmmRegisters )); xs->MxCsr = frame->u.xsave.MxCsr; @@ -1140,21 +1137,21 @@ NTSTATUS WINAPI NtGetContextThread( HANDLE handle, CONTEXT *context ) if ((needed_flags & CONTEXT_XSTATE) && xstate_extended_features()) { CONTEXT_EX *context_ex = (CONTEXT_EX *)(context + 1); - XSTATE *xstate = (XSTATE *)((char *)context_ex + context_ex->XState.Offset); - unsigned int mask; + XSAVE_AREA_HEADER *xstate = (XSAVE_AREA_HEADER *)((char *)context_ex + context_ex->XState.Offset); + UINT64 mask;
- if (context_ex->XState.Length < offsetof(XSTATE, YmmContext) - || context_ex->XState.Length > sizeof(XSTATE)) + if (context_ex->XState.Length < sizeof(XSAVE_AREA_HEADER) || + context_ex->XState.Length > sizeof(XSAVE_AREA_HEADER) + xstate_features_size) return STATUS_INVALID_PARAMETER;
mask = (xstate_compaction_enabled ? xstate->CompactionMask : xstate->Mask) & xstate_extended_features(); - xstate->Mask = frame->xstate.Mask & mask; + xstate->Mask = frame->xstate_hdr.Mask & mask; xstate->CompactionMask = xstate_compaction_enabled ? (0x8000000000000000 | mask) : 0; - memset( xstate->Reserved, 0, sizeof(xstate->Reserved) ); + memset( xstate->Reserved2, 0, sizeof(xstate->Reserved2) ); if (xstate->Mask) { - if (context_ex->XState.Length < sizeof(XSTATE)) return STATUS_BUFFER_OVERFLOW; - xstate->YmmContext = frame->xstate.YmmContext; + if (context_ex->XState.Length < xstate_get_size( xstate, xstate->Mask )) return STATUS_BUFFER_OVERFLOW; + copy_xstate( xstate, &frame->xstate_hdr, xstate->Mask ); } } /* update the cached version of the debug registers */ @@ -1483,7 +1480,7 @@ static void setup_raise_exception( ucontext_t *sigcontext, void *stack_ptr, XSTATE *dst_xs = (XSTATE *)(((ULONG_PTR)stack->xstate + 63) & ~63);
context_init_xstate( &stack->context, dst_xs ); - memset( dst_xs, 0, offsetof(XSTATE, YmmContext) ); + memset( dst_xs, 0, sizeof(XSAVE_AREA_HEADER) ); dst_xs->CompactionMask = xstate_compaction_enabled ? 0x8000000000000000 | xstate_extended_features() : 0; if (src_xs->Mask & 4) { @@ -1623,7 +1620,8 @@ __ASM_GLOBAL_FUNC( call_user_mode_callback, __ASM_CFI(".cfi_rel_offset %edi,-12\n\t") "movl 0x18(%ebp),%edx\n\t" /* teb */ "pushl 0(%edx)\n\t" /* teb->Tib.ExceptionList */ - "subl $0x380,%esp\n\t" /* sizeof(struct syscall_frame) */ + "subl $0x280,%esp\n\t" /* sizeof(struct syscall_frame) */ + "subl %fs:0x204,%esp\n\t" /* x86_thread_data()->xstate_features_size */ "andl $~63,%esp\n\t" "leal 8(%ebp),%eax\n\t" "movl %eax,0x38(%esp)\n\t" /* frame->syscall_cfa */ @@ -2431,7 +2429,7 @@ void signal_init_process(void) struct sigaction sig_act; void *kernel_stack = (char *)ntdll_get_thread_data()->kernel_stack + kernel_stack_size;
- x86_thread_data()->syscall_frame = (struct syscall_frame *)kernel_stack - 1; + x86_thread_data()->syscall_frame = (struct syscall_frame *)((ULONG_PTR)((char *)kernel_stack - sizeof(struct syscall_frame) - xstate_features_size) & ~(ULONG_PTR)63);
if (cpu_info.ProcessorFeatureBits & CPU_FEATURE_FXSR) syscall_flags |= SYSCALL_HAVE_FXSAVE; if (cpu_info.ProcessorFeatureBits & CPU_FEATURE_XSAVE) syscall_flags |= SYSCALL_HAVE_XSAVE; @@ -2481,6 +2479,7 @@ void call_init_thunk( LPTHREAD_START_ROUTINE entry, void *arg, BOOL suspend, TEB thread_data->gs = get_gs(); thread_data->syscall_table = KeServiceDescriptorTable; thread_data->xstate_features_mask = xstate_supported_features_mask; + thread_data->xstate_features_size = xstate_features_size;
context.SegCs = get_cs(); context.SegDs = get_ds(); @@ -2505,7 +2504,7 @@ void call_init_thunk( LPTHREAD_START_ROUTINE entry, void *arg, BOOL suspend, TEB ctx->ContextFlags = CONTEXT_FULL | CONTEXT_FLOATING_POINT | CONTEXT_EXTENDED_REGISTERS; memset( frame, 0, sizeof(*frame) ); if (xstate_compaction_enabled) - frame->xstate.CompactionMask = 0x8000000000000000 | xstate_supported_features_mask; + frame->xstate_hdr.CompactionMask = 0x8000000000000000 | xstate_supported_features_mask; NtSetContextThread( GetCurrentThread(), ctx );
stack = (DWORD *)ctx; @@ -2547,7 +2546,8 @@ __ASM_GLOBAL_FUNC( signal_start_thread, "movl 0x1f8(%ecx),%eax\n\t" /* x86_thread_data()->syscall_frame */ "orl %eax,%eax\n\t" "jnz 1f\n\t" - "leal -0x380(%esp),%eax\n\t" /* sizeof(struct syscall_frame) */ + "leal -0x280(%esp),%eax\n\t" /* sizeof(struct syscall_frame) */ + "subl %fs:0x204,%eax\n\t" /* x86_thread_data()->xstate_features_size */ "andl $~63,%eax\n\t" "movl %eax,0x1f8(%ecx)\n" /* x86_thread_data()->syscall_frame */ /* switch to kernel stack */ diff --git a/dlls/ntdll/unix/signal_x86_64.c b/dlls/ntdll/unix/signal_x86_64.c index 7d6edffbc36..efbabe7970f 100644 --- a/dlls/ntdll/unix/signal_x86_64.c +++ b/dlls/ntdll/unix/signal_x86_64.c @@ -424,10 +424,12 @@ struct syscall_frame DWORD restore_flags; /* 00b4 */ DWORD align[2]; /* 00b8 */ XMM_SAVE_AREA32 xsave; /* 00c0 */ - DECLSPEC_ALIGN(64) XSTATE xstate; /* 02c0 */ + DECLSPEC_ALIGN(64) XSAVE_AREA_HEADER xstate_hdr; /* 02c0 */ };
-C_ASSERT( sizeof( struct syscall_frame ) == 0x400); +C_ASSERT( offsetof( struct syscall_frame, xsave ) == 0xc0 ); +C_ASSERT( offsetof( struct syscall_frame, xstate_hdr ) == 0x2c0 ); +C_ASSERT( sizeof( struct syscall_frame ) == 0x300);
struct amd64_thread_data { @@ -978,12 +980,12 @@ NTSTATUS WINAPI NtSetContextThread( HANDLE handle, const CONTEXT *context ) if ((flags & CONTEXT_XSTATE) && xstate_extended_features()) { CONTEXT_EX *context_ex = (CONTEXT_EX *)(context + 1); - XSTATE *xs = (XSTATE *)((char *)context_ex + context_ex->XState.Offset); + XSAVE_AREA_HEADER *xs = (XSAVE_AREA_HEADER *)((char *)context_ex + context_ex->XState.Offset);
- if (context_ex->XState.Length < offsetof(XSTATE, YmmContext) || - context_ex->XState.Length > sizeof(XSTATE)) + if (context_ex->XState.Length < sizeof(XSAVE_AREA_HEADER) || + context_ex->XState.Length > sizeof(XSAVE_AREA_HEADER) + xstate_features_size) return STATUS_INVALID_PARAMETER; - if ((xs->Mask & xstate_extended_features()) && (context_ex->XState.Length < sizeof(XSTATE))) + if ((xs->Mask & xstate_extended_features()) && (context_ex->XState.Length < xstate_get_size( xs, xs->Mask ))) return STATUS_BUFFER_OVERFLOW; } else flags &= ~CONTEXT_XSTATE; @@ -1043,19 +1045,14 @@ NTSTATUS WINAPI NtSetContextThread( HANDLE handle, const CONTEXT *context ) if (flags & CONTEXT_FLOATING_POINT) { frame->xsave = context->FltSave; - frame->xstate.Mask |= XSTATE_MASK_LEGACY; + frame->xstate_hdr.Mask |= XSTATE_MASK_LEGACY; } if (flags & CONTEXT_XSTATE) { CONTEXT_EX *context_ex = (CONTEXT_EX *)(context + 1); - XSTATE *xs = (XSTATE *)((char *)context_ex + context_ex->XState.Offset); + XSAVE_AREA_HEADER *xs = (XSAVE_AREA_HEADER *)((char *)context_ex + context_ex->XState.Offset);
- if (xs->Mask & XSTATE_MASK_GSSE) - { - frame->xstate.Mask |= XSTATE_MASK_GSSE; - memcpy( &frame->xstate.YmmContext, &xs->YmmContext, sizeof(xs->YmmContext) ); - } - else frame->xstate.Mask &= ~XSTATE_MASK_GSSE; + copy_xstate( &frame->xstate_hdr, xs, xs->Mask ); }
frame->restore_flags |= flags & ~CONTEXT_INTEGER; @@ -1121,7 +1118,7 @@ NTSTATUS WINAPI NtGetContextThread( HANDLE handle, CONTEXT *context ) if (needed_flags & CONTEXT_FLOATING_POINT) { if (!xstate_compaction_enabled || - (frame->xstate.Mask & XSTATE_MASK_LEGACY_FLOATING_POINT)) + (frame->xstate_hdr.Mask & XSTATE_MASK_LEGACY_FLOATING_POINT)) { memcpy( &context->FltSave, &frame->xsave, FIELD_OFFSET( XSAVE_FORMAT, MxCsr )); memcpy( context->FltSave.FloatRegisters, frame->xsave.FloatRegisters, @@ -1135,7 +1132,7 @@ NTSTATUS WINAPI NtGetContextThread( HANDLE handle, CONTEXT *context ) context->FltSave.ControlWord = 0x37f; }
- if (!xstate_compaction_enabled || (frame->xstate.Mask & XSTATE_MASK_LEGACY_SSE)) + if (!xstate_compaction_enabled || (frame->xstate_hdr.Mask & XSTATE_MASK_LEGACY_SSE)) { memcpy( context->FltSave.XmmRegisters, frame->xsave.XmmRegisters, sizeof( context->FltSave.XmmRegisters )); @@ -1156,21 +1153,21 @@ NTSTATUS WINAPI NtGetContextThread( HANDLE handle, CONTEXT *context ) if ((needed_flags & CONTEXT_XSTATE) && xstate_extended_features()) { CONTEXT_EX *context_ex = (CONTEXT_EX *)(context + 1); - XSTATE *xstate = (XSTATE *)((char *)context_ex + context_ex->XState.Offset); - unsigned int mask; + XSAVE_AREA_HEADER *xstate = (XSAVE_AREA_HEADER *)((char *)context_ex + context_ex->XState.Offset); + UINT64 mask;
- if (context_ex->XState.Length < offsetof(XSTATE, YmmContext) - || context_ex->XState.Length > sizeof(XSTATE)) + if (context_ex->XState.Length < sizeof(XSAVE_AREA_HEADER) || + context_ex->XState.Length > sizeof(XSAVE_AREA_HEADER) + xstate_features_size) return STATUS_INVALID_PARAMETER;
mask = (xstate_compaction_enabled ? xstate->CompactionMask : xstate->Mask) & xstate_extended_features(); - xstate->Mask = frame->xstate.Mask & mask; + xstate->Mask = frame->xstate_hdr.Mask & mask; xstate->CompactionMask = xstate_compaction_enabled ? (0x8000000000000000 | mask) : 0; - memset( xstate->Reserved, 0, sizeof(xstate->Reserved) ); + memset( xstate->Reserved2, 0, sizeof(xstate->Reserved2) ); if (xstate->Mask) { - if (context_ex->XState.Length < sizeof(XSTATE)) return STATUS_BUFFER_OVERFLOW; - memcpy( &xstate->YmmContext, &frame->xstate.YmmContext, sizeof(xstate->YmmContext) ); + if (context_ex->XState.Length < xstate_get_size( xstate, xstate->Mask )) return STATUS_BUFFER_OVERFLOW; + copy_xstate( xstate, &frame->xstate_hdr, xstate->Mask ); } } /* update the cached version of the debug registers */ @@ -1277,14 +1274,9 @@ NTSTATUS set_thread_wow64_context( HANDLE handle, const void *ctx, ULONG size ) if (flags & CONTEXT_I386_XSTATE) { CONTEXT_EX *context_ex = (CONTEXT_EX *)(context + 1); - XSTATE *xs = (XSTATE *)((char *)context_ex + context_ex->XState.Offset); + XSAVE_AREA_HEADER *xs = (XSAVE_AREA_HEADER *)((char *)context_ex + context_ex->XState.Offset);
- if (xs->Mask & XSTATE_MASK_GSSE) - { - frame->xstate.Mask |= XSTATE_MASK_GSSE; - memcpy( &frame->xstate.YmmContext, &xs->YmmContext, sizeof(xs->YmmContext) ); - } - else frame->xstate.Mask &= ~XSTATE_MASK_GSSE; + copy_xstate( &frame->xstate_hdr, xs, xs->Mask ); frame->restore_flags |= CONTEXT_XSTATE; } return STATUS_SUCCESS; @@ -1365,24 +1357,24 @@ NTSTATUS get_thread_wow64_context( HANDLE handle, void *ctx, ULONG size ) fpux_to_fpu( &context->FloatSave, &frame->xsave ); context->ContextFlags |= CONTEXT_I386_FLOATING_POINT; } - if ((needed_flags & CONTEXT_I386_XSTATE) && (cpu_info.ProcessorFeatureBits & CPU_FEATURE_AVX)) + if ((needed_flags & CONTEXT_I386_XSTATE) && xstate_extended_features()) { CONTEXT_EX *context_ex = (CONTEXT_EX *)(context + 1); - XSTATE *xstate = (XSTATE *)((char *)context_ex + context_ex->XState.Offset); - unsigned int mask; + XSAVE_AREA_HEADER *xstate = (XSAVE_AREA_HEADER *)((char *)context_ex + context_ex->XState.Offset); + UINT64 mask;
- if (context_ex->XState.Length < offsetof(XSTATE, YmmContext) || - context_ex->XState.Length > sizeof(XSTATE)) + if (context_ex->XState.Length < sizeof(XSAVE_AREA_HEADER) || + context_ex->XState.Length > sizeof(XSAVE_AREA_HEADER) + xstate_features_size) return STATUS_INVALID_PARAMETER;
mask = (xstate_compaction_enabled ? xstate->CompactionMask : xstate->Mask) & xstate_extended_features(); - xstate->Mask = frame->xstate.Mask & mask; + xstate->Mask = frame->xstate_hdr.Mask & mask; xstate->CompactionMask = xstate_compaction_enabled ? (0x8000000000000000 | mask) : 0; - memset( xstate->Reserved, 0, sizeof(xstate->Reserved) ); + memset( xstate->Reserved2, 0, sizeof(xstate->Reserved2) ); if (xstate->Mask) { - if (context_ex->XState.Length < sizeof(XSTATE)) return STATUS_BUFFER_OVERFLOW; - memcpy( &xstate->YmmContext, &frame->xstate.YmmContext, sizeof(xstate->YmmContext) ); + if (context_ex->XState.Length < xstate_get_size( xstate, xstate->Mask )) return STATUS_BUFFER_OVERFLOW; + copy_xstate( xstate, &frame->xstate_hdr, xstate->Mask ); } } return STATUS_SUCCESS; @@ -1541,7 +1533,7 @@ NTSTATUS call_user_exception_dispatcher( EXCEPTION_RECORD *rec, CONTEXT *context { assert( !((ULONG_PTR)&stack->xstate & 63) ); context_init_xstate( &stack->context, &stack->xstate ); - memcpy( &stack->xstate, &frame->xstate, sizeof(frame->xstate) ); + memcpy( &stack->xstate, &frame->xstate_hdr, sizeof(XSAVE_AREA_HEADER) + xstate_features_size ); } else context_init_xstate( &stack->context, NULL );
@@ -1583,13 +1575,15 @@ __ASM_GLOBAL_FUNC( call_user_mode_callback, "fnstcw -0x2c(%rbp)\n\t" "movq %rsi,-0x38(%rbp)\n\t" /* ret_ptr */ "movq %rdx,-0x40(%rbp)\n\t" /* ret_len */ - "subq $0x408,%rsp\n\t" /* sizeof(struct syscall_frame) + exception */ + "subq $0x308,%rsp\n\t" /* sizeof(struct syscall_frame) + exception */ + "movq " __ASM_NAME("xstate_features_size") "(%rip),%rsi\n\t" + "subq %rsi,%rsp\n\t" "andq $~63,%rsp\n\t" "leaq 0x10(%rbp),%rax\n\t" "movq %rax,0xa8(%rsp)\n\t" /* frame->syscall_cfa */ "movq 0x328(%r8),%r10\n\t" /* amd64_thread_data()->syscall_frame */ "movq (%r8),%rax\n\t" /* NtCurrentTeb()->Tib.ExceptionList */ - "movq %rax,0x400(%rsp)\n\t" + "movq %rax,0x300(%rsp,%rsi)\n\t" "movl 0xb0(%r10),%r14d\n\t" /* prev_frame->syscall_flags */ "movl %r14d,0xb0(%rsp)\n\t" /* frame->syscall_flags */ "movq %r10,0xa0(%rsp)\n\t" /* frame->prev_frame */ @@ -1622,7 +1616,8 @@ __ASM_GLOBAL_FUNC( user_mode_callback_return, __ASM_CFI(".cfi_rel_offset %r13,-0x18\n\t") __ASM_CFI(".cfi_rel_offset %r14,-0x20\n\t") __ASM_CFI(".cfi_rel_offset %r15,-0x28\n\t") - "movq 0x400(%r10),%rax\n\t" /* exception list */ + "movq " __ASM_NAME("xstate_features_size") "(%rip),%rax\n\t" + "movq 0x300(%r10,%rax),%rax\n\t" /* exception list */ "movq %rax,0(%rcx)\n\t" /* teb->Tib.ExceptionList */ "movq -0x38(%rbp),%r10\n\t" /* ret_ptr */ "movq -0x40(%rbp),%r11\n\t" /* ret_len */ @@ -2386,7 +2381,9 @@ void signal_init_process(void) WOW_TEB *wow_teb = get_wow_teb( NtCurrentTeb() ); void *ptr, *kernel_stack = (char *)ntdll_get_thread_data()->kernel_stack + kernel_stack_size;
- amd64_thread_data()->syscall_frame = (struct syscall_frame *)kernel_stack - 1; + amd64_thread_data()->syscall_frame = (struct syscall_frame *)((ULONG_PTR)((char *)kernel_stack - sizeof(struct syscall_frame) - xstate_features_size) & ~(ULONG_PTR)63); + + TRACE("xstate_features_size %d, kernel_stack %p, &xstate_hdr %p.\n", (int)xstate_features_size, kernel_stack, &amd64_thread_data()->syscall_frame->xstate_hdr);
/* sneak in a syscall dispatcher pointer at a fixed address (7ffe1000) */ ptr = (char *)user_shared_data + page_size; @@ -2538,7 +2535,7 @@ void call_init_thunk( LPTHREAD_START_ROUTINE entry, void *arg, BOOL suspend, TEB ctx->ContextFlags = CONTEXT_FULL; memset( frame, 0, sizeof(*frame) ); if (xstate_compaction_enabled) - frame->xstate.CompactionMask = 0x8000000000000000 | xstate_supported_features_mask; + frame->xstate_hdr.CompactionMask = 0x8000000000000000 | xstate_supported_features_mask; NtSetContextThread( GetCurrentThread(), ctx );
frame->cs = cs64_sel; @@ -2581,7 +2578,8 @@ __ASM_GLOBAL_FUNC( signal_start_thread, "movq 0x328(%rcx),%r8\n\t" /* amd64_thread_data()->syscall_frame */ "orq %r8,%r8\n\t" "jnz 1f\n\t" - "leaq -0x400(%rsp),%r8\n\t" /* sizeof(struct syscall_frame) */ + "leaq -0x300(%rsp),%r8\n\t" /* sizeof(struct syscall_frame) */ + "subq " __ASM_NAME("xstate_features_size") "(%rip),%r8\n\t" "andq $~63,%r8\n\t" "movq %r8,0x328(%rcx)\n" /* amd64_thread_data()->syscall_frame */ /* switch to kernel stack */ diff --git a/dlls/ntdll/unix/system.c b/dlls/ntdll/unix/system.c index c3ffdc6921e..6bed29104d6 100644 --- a/dlls/ntdll/unix/system.c +++ b/dlls/ntdll/unix/system.c @@ -248,6 +248,23 @@ static pthread_mutex_t timezone_mutex = PTHREAD_MUTEX_INITIALIZER;
BOOL xstate_compaction_enabled = FALSE; UINT64 xstate_supported_features_mask; +UINT64 xstate_features_size; + +unsigned int xstate_get_size( XSAVE_AREA_HEADER *xsave, UINT64 mask ) +{ + if (!(mask & ((UINT64)1 << XSTATE_AVX))) return sizeof(XSAVE_AREA_HEADER); + return sizeof(XSAVE_AREA_HEADER) + sizeof(YMMCONTEXT); +} + +void copy_xstate( XSAVE_AREA_HEADER *dst, XSAVE_AREA_HEADER *src, UINT64 mask ) +{ + mask &= xstate_extended_features() & src->Mask; + if (src->CompactionMask) mask &= src->CompactionMask; + if (dst->CompactionMask) mask &= dst->CompactionMask; + dst->Mask = (dst->Mask & ~xstate_extended_features()) | mask; + if (mask & ((UINT64)1 << XSTATE_AVX)) + *(YMMCONTEXT *)(dst + 1) = *(YMMCONTEXT *)(src + 1); +}
#define AUTH 0x68747541 /* "Auth" */ #define ENTI 0x69746e65 /* "enti" */ @@ -400,6 +417,8 @@ static void get_cpuinfo( SYSTEM_CPU_INFORMATION *info ) xstate_supported_features_mask = 3; if (features & CPU_FEATURE_AVX) xstate_supported_features_mask |= (UINT64)1 << XSTATE_AVX; + xstate_features_size = xstate_get_size( NULL, xstate_supported_features_mask ) - sizeof(XSAVE_AREA_HEADER); + xstate_features_size = (xstate_features_size + 15) & ~15; }
if (regs[1] == AUTH && regs[3] == ENTI && regs[2] == CAMD) diff --git a/dlls/ntdll/unix/unix_private.h b/dlls/ntdll/unix/unix_private.h index 75c81b21f84..5e7509d6d49 100644 --- a/dlls/ntdll/unix/unix_private.h +++ b/dlls/ntdll/unix/unix_private.h @@ -215,6 +215,9 @@ extern void fpu_to_fpux( XSAVE_FORMAT *fpux, const I386_FLOATING_SAVE_AREA *fpu
extern BOOL xstate_compaction_enabled; extern UINT64 xstate_supported_features_mask; +extern UINT64 xstate_features_size; +extern unsigned int xstate_get_size( XSAVE_AREA_HEADER *xsave, UINT64 mask ); +extern void copy_xstate( XSAVE_AREA_HEADER *dst, XSAVE_AREA_HEADER *src, UINT64 mask );
static inline UINT64 xstate_extended_features(void) { diff --git a/include/winnt.h b/include/winnt.h index e1de2f29ecd..3674c777e97 100644 --- a/include/winnt.h +++ b/include/winnt.h @@ -1470,6 +1470,14 @@ typedef struct _XSTATE_CONFIGURATION ULONG64 EnabledUserVisibleSupervisorFeatures; } XSTATE_CONFIGURATION, *PXSTATE_CONFIGURATION;
+typedef struct _XSAVE_AREA_HEADER +{ + DWORD64 Mask; + DWORD64 CompactionMask; + DWORD64 Reserved2[6]; +} +XSAVE_AREA_HEADER, *PXSAVE_AREA_HEADER; + typedef struct _YMMCONTEXT { M128A Ymm0;