Fixes a regression introduced by 308a5e7c4d01525184b057058949c68c0ee77dba.
This was previously broken, but happened to work until the aforementioned commit put the YMM state right at the end of a page.
Signed-off-by: Zebediah Figura z.figura12@gmail.com --- I don't know how accurate "some" is; both of my development machines suffer from this. It manifests as a stack overflow trying to execute any 32-bit program. The processors in question are AMD cores manufactured in 2012 and 2018.
dlls/ntdll/unix/signal_i386.c | 53 +++++++++++------------------------ 1 file changed, 16 insertions(+), 37 deletions(-)
diff --git a/dlls/ntdll/unix/signal_i386.c b/dlls/ntdll/unix/signal_i386.c index 0b2b269505f..446fed25eb8 100644 --- a/dlls/ntdll/unix/signal_i386.c +++ b/dlls/ntdll/unix/signal_i386.c @@ -461,24 +461,6 @@ enum i386_trap_code #endif };
-struct syscall_xsave -{ - union - { - XSAVE_FORMAT xsave; - FLOATING_SAVE_AREA fsave; - } u; - struct - { - ULONG64 mask; - ULONG64 compaction_mask; - ULONG64 reserved[6]; - M128A ymm_high[8]; - } xstate; -}; - -C_ASSERT( sizeof(struct syscall_xsave) == 0x2c0 ); - struct syscall_frame { DWORD restore_flags; /* 000 */ @@ -504,16 +486,13 @@ struct syscall_frame XSAVE_FORMAT xsave; FLOATING_SAVE_AREA fsave; } u; - struct /* 240 */ - { - ULONG64 mask; - ULONG64 compaction_mask; - ULONG64 reserved[6]; - M128A ymm_high[8]; - } xstate; + /* Leave space for the whole set of YMM registers. They're not used in + * 32-bit mode, but some processors fault if they're not in writable memory. + */ + XSTATE xstate; /* 240 */ };
-C_ASSERT( sizeof(struct syscall_frame) == 0x300 ); +C_ASSERT( sizeof(struct syscall_frame) == 0x380 );
struct x86_thread_data { @@ -1011,7 +990,7 @@ NTSTATUS WINAPI NtSetContextThread( HANDLE handle, const CONTEXT *context ) memcpy( &frame->u.xsave, context->ExtendedRegisters, sizeof(frame->u.xsave) ); /* reset the current interrupt status */ frame->u.xsave.StatusWord &= frame->u.xsave.ControlWord | 0xff80; - frame->xstate.mask |= XSTATE_MASK_LEGACY; + frame->xstate.Mask |= XSTATE_MASK_LEGACY; } else if (flags & CONTEXT_FLOATING_POINT) { @@ -1023,7 +1002,7 @@ NTSTATUS WINAPI NtSetContextThread( HANDLE handle, const CONTEXT *context ) { frame->u.fsave = context->FloatSave; } - frame->xstate.mask |= XSTATE_MASK_LEGACY_FLOATING_POINT; + frame->xstate.Mask |= XSTATE_MASK_LEGACY_FLOATING_POINT; } if (flags & CONTEXT_XSTATE) { @@ -1032,10 +1011,10 @@ NTSTATUS WINAPI NtSetContextThread( HANDLE handle, const CONTEXT *context )
if (xs->Mask & XSTATE_MASK_GSSE) { - frame->xstate.mask |= XSTATE_MASK_GSSE; - memcpy( &frame->xstate.ymm_high, &xs->YmmContext, sizeof(frame->xstate.ymm_high) ); + frame->xstate.Mask |= XSTATE_MASK_GSSE; + frame->xstate.YmmContext = xs->YmmContext; } - else frame->xstate.mask &= ~XSTATE_MASK_GSSE; + else frame->xstate.Mask &= ~XSTATE_MASK_GSSE; }
frame->restore_flags |= flags & ~CONTEXT_INTEGER; @@ -1104,7 +1083,7 @@ NTSTATUS WINAPI NtGetContextThread( HANDLE handle, CONTEXT *context ) context->FloatSave = frame->u.fsave; } else if (!xstate_compaction_enabled || - (frame->xstate.mask & XSTATE_MASK_LEGACY_FLOATING_POINT)) + (frame->xstate.Mask & XSTATE_MASK_LEGACY_FLOATING_POINT)) { fpux_to_fpu( &context->FloatSave, &frame->u.xsave ); } @@ -1120,7 +1099,7 @@ NTSTATUS WINAPI NtGetContextThread( HANDLE handle, CONTEXT *context ) XSAVE_FORMAT *xs = (XSAVE_FORMAT *)context->ExtendedRegisters;
if (!xstate_compaction_enabled || - (frame->xstate.mask & XSTATE_MASK_LEGACY_FLOATING_POINT)) + (frame->xstate.Mask & XSTATE_MASK_LEGACY_FLOATING_POINT)) { memcpy( xs, &frame->u.xsave, FIELD_OFFSET( XSAVE_FORMAT, MxCsr )); memcpy( xs->FloatRegisters, frame->u.xsave.FloatRegisters, @@ -1133,7 +1112,7 @@ NTSTATUS WINAPI NtGetContextThread( HANDLE handle, CONTEXT *context ) xs->ControlWord = 0x37f; }
- if (!xstate_compaction_enabled || (frame->xstate.mask & XSTATE_MASK_LEGACY_SSE)) + if (!xstate_compaction_enabled || (frame->xstate.Mask & XSTATE_MASK_LEGACY_SSE)) { memcpy( xs->XmmRegisters, frame->u.xsave.XmmRegisters, sizeof( xs->XmmRegisters )); xs->MxCsr = frame->u.xsave.MxCsr; @@ -1168,7 +1147,7 @@ NTSTATUS WINAPI NtGetContextThread( HANDLE handle, CONTEXT *context ) return STATUS_INVALID_PARAMETER;
mask = (xstate_compaction_enabled ? xstate->CompactionMask : xstate->Mask) & XSTATE_MASK_GSSE; - xstate->Mask = frame->xstate.mask & mask; + xstate->Mask = frame->xstate.Mask & mask; xstate->CompactionMask = xstate_compaction_enabled ? (0x8000000000000000 | mask) : 0; memset( xstate->Reserved, 0, sizeof(xstate->Reserved) ); if (xstate->Mask) @@ -1176,7 +1155,7 @@ NTSTATUS WINAPI NtGetContextThread( HANDLE handle, CONTEXT *context ) if (context_ex->XState.Length < sizeof(XSTATE)) return STATUS_BUFFER_OVERFLOW;
- memcpy( &xstate->YmmContext, frame->xstate.ymm_high, sizeof(frame->xstate.ymm_high) ); + xstate->YmmContext = frame->xstate.YmmContext; } } } @@ -2401,7 +2380,7 @@ __ASM_GLOBAL_FUNC( signal_start_thread, /* set syscall frame */ "cmpl $0,%fs:0x1f8\n\t" /* x86_thread_data()->syscall_frame */ "jnz 1f\n\t" - "leal -0x300(%esp),%eax\n\t" /* sizeof(struct syscall_frame) */ + "leal -0x380(%esp),%eax\n\t" /* sizeof(struct syscall_frame) */ "andl $~63,%eax\n\t" "movl %eax,%fs:0x1f8\n" /* x86_thread_data()->syscall_frame */ /* switch to thread stack */