From: Paul Gofman pgofman@codeweavers.com
--- dlls/ntdll/unix/signal_i386.c | 6 +++++- dlls/ntdll/unix/signal_x86_64.c | 9 +++++++-- 2 files changed, 12 insertions(+), 3 deletions(-)
diff --git a/dlls/ntdll/unix/signal_i386.c b/dlls/ntdll/unix/signal_i386.c index 51802a57037..c4d553ba8b8 100644 --- a/dlls/ntdll/unix/signal_i386.c +++ b/dlls/ntdll/unix/signal_i386.c @@ -1005,6 +1005,7 @@ NTSTATUS WINAPI NtSetContextThread( HANDLE handle, const CONTEXT *context ) CONTEXT_EX *context_ex = (CONTEXT_EX *)(context + 1); XSAVE_AREA_HEADER *xs = (XSAVE_AREA_HEADER *)((char *)context_ex + context_ex->XState.Offset);
+ if (xstate_compaction_enabled) frame->xstate.CompactionMask |= xstate_extended_features(); copy_xstate( &frame->xstate, xs, xs->Mask ); }
@@ -1126,6 +1127,7 @@ NTSTATUS WINAPI NtGetContextThread( HANDLE handle, CONTEXT *context ) context_ex->XState.Length > sizeof(XSAVE_AREA_HEADER) + xstate_features_size) return STATUS_INVALID_PARAMETER;
+ if (xstate_compaction_enabled) frame->xstate.CompactionMask |= xstate_extended_features(); mask = (xstate_compaction_enabled ? xstate->CompactionMask : xstate->Mask) & xstate_extended_features(); xstate->Mask = frame->xstate.Mask & mask; xstate->CompactionMask = xstate_compaction_enabled ? (0x8000000000000000 | mask) : 0; @@ -2127,6 +2129,7 @@ static void usr1_handler( int signal, siginfo_t *siginfo, void *sigcontext ) NtGetContextThread( GetCurrentThread(), &context->c ); if (xstate_extended_features()) { + if (xstate_compaction_enabled) frame->xstate.CompactionMask |= xstate_extended_features(); context_init_xstate( &context->c, &frame->xstate ); saved_compaction = frame->xstate.CompactionMask; } @@ -2617,7 +2620,8 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, "testl $3,(%ecx)\n\t" /* frame->syscall_flags & (SYSCALL_HAVE_XSAVE | SYSCALL_HAVE_XSAVEC) */ "jz 2f\n\t" "movl %fs:0x1fc,%eax\n\t" /* x86_thread_data()->xstate_features_mask */ - "movl %fs:0x200,%edx\n\t" /* x86_thread_data()->xstate_features_mask high dword */ + "xorl %edx,%edx\n\t" + "andl $7,%eax\n\t" "xorl %edi,%edi\n\t" "movl %edi,0x240(%ecx)\n\t" "movl %edi,0x244(%ecx)\n\t" diff --git a/dlls/ntdll/unix/signal_x86_64.c b/dlls/ntdll/unix/signal_x86_64.c index d071120af85..8fdcc37f078 100644 --- a/dlls/ntdll/unix/signal_x86_64.c +++ b/dlls/ntdll/unix/signal_x86_64.c @@ -1051,6 +1051,7 @@ NTSTATUS WINAPI NtSetContextThread( HANDLE handle, const CONTEXT *context ) CONTEXT_EX *context_ex = (CONTEXT_EX *)(context + 1); XSAVE_AREA_HEADER *xs = (XSAVE_AREA_HEADER *)((char *)context_ex + context_ex->XState.Offset);
+ if (xstate_compaction_enabled) frame->xstate.CompactionMask |= xstate_extended_features(); copy_xstate( &frame->xstate, xs, xs->Mask ); }
@@ -1159,6 +1160,7 @@ NTSTATUS WINAPI NtGetContextThread( HANDLE handle, CONTEXT *context ) context_ex->XState.Length > sizeof(XSAVE_AREA_HEADER) + xstate_features_size) return STATUS_INVALID_PARAMETER;
+ if (xstate_compaction_enabled) frame->xstate.CompactionMask |= xstate_extended_features(); mask = (xstate_compaction_enabled ? xstate->CompactionMask : xstate->Mask) & xstate_extended_features(); xstate->Mask = frame->xstate.Mask & mask; xstate->CompactionMask = xstate_compaction_enabled ? (0x8000000000000000 | mask) : 0; @@ -1276,6 +1278,7 @@ NTSTATUS set_thread_wow64_context( HANDLE handle, const void *ctx, ULONG size ) CONTEXT_EX *context_ex = (CONTEXT_EX *)(context + 1); XSAVE_AREA_HEADER *xs = (XSAVE_AREA_HEADER *)((char *)context_ex + context_ex->XState.Offset);
+ if (xstate_compaction_enabled) frame->xstate.CompactionMask |= xstate_extended_features(); copy_xstate( &frame->xstate, xs, xs->Mask ); frame->restore_flags |= CONTEXT_XSTATE; } @@ -1367,6 +1370,7 @@ NTSTATUS get_thread_wow64_context( HANDLE handle, void *ctx, ULONG size ) context_ex->XState.Length > sizeof(XSAVE_AREA_HEADER) + xstate_features_size) return STATUS_INVALID_PARAMETER;
+ if (xstate_compaction_enabled) frame->xstate.CompactionMask |= xstate_extended_features(); mask = (xstate_compaction_enabled ? xstate->CompactionMask : xstate->Mask) & xstate_extended_features(); xstate->Mask = frame->xstate.Mask & mask; xstate->CompactionMask = xstate_compaction_enabled ? (0x8000000000000000 | mask) : 0; @@ -2162,6 +2166,7 @@ static void usr1_handler( int signal, siginfo_t *siginfo, void *sigcontext ) NtGetContextThread( GetCurrentThread(), &context->c ); if (xstate_extended_features()) { + if (xstate_compaction_enabled) frame->xstate.CompactionMask |= xstate_extended_features(); context_init_xstate( &context->c, &frame->xstate ); saved_compaction = frame->xstate.CompactionMask; } @@ -2664,11 +2669,11 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, #ifdef __APPLE__ "movq %gs:0x30,%rdx\n\t" "movl 0x340(%rdx),%eax\n\t" - "movl 0x344(%rdx),%edx\n\t" #else "movl %gs:0x340,%eax\n\t" /* amd64_thread_data()->xstate_features_mask */ - "movl %gs:0x344,%edx\n\t" /* amd64_thread_data()->xstate_features_mask high dword */ #endif + "xorl %edx,%edx\n\t" + "andl $7,%eax\n\t" "xorq %rbp,%rbp\n\t" "movq %rbp,0x2c0(%rcx)\n\t" "movq %rbp,0x2c8(%rcx)\n\t"
From: Paul Gofman pgofman@codeweavers.com
--- dlls/ntdll/unix/signal_i386.c | 2 ++ dlls/ntdll/unix/signal_x86_64.c | 2 ++ 2 files changed, 4 insertions(+)
diff --git a/dlls/ntdll/unix/signal_i386.c b/dlls/ntdll/unix/signal_i386.c index c4d553ba8b8..94799522202 100644 --- a/dlls/ntdll/unix/signal_i386.c +++ b/dlls/ntdll/unix/signal_i386.c @@ -1004,9 +1004,11 @@ NTSTATUS WINAPI NtSetContextThread( HANDLE handle, const CONTEXT *context ) { CONTEXT_EX *context_ex = (CONTEXT_EX *)(context + 1); XSAVE_AREA_HEADER *xs = (XSAVE_AREA_HEADER *)((char *)context_ex + context_ex->XState.Offset); + UINT64 mask = frame->xstate.Mask;
if (xstate_compaction_enabled) frame->xstate.CompactionMask |= xstate_extended_features(); copy_xstate( &frame->xstate, xs, xs->Mask ); + if (xs->CompactionMask) frame->xstate.Mask |= mask & ~xs->CompactionMask; }
frame->restore_flags |= flags & ~CONTEXT_INTEGER; diff --git a/dlls/ntdll/unix/signal_x86_64.c b/dlls/ntdll/unix/signal_x86_64.c index 8fdcc37f078..f4a6fcce8ca 100644 --- a/dlls/ntdll/unix/signal_x86_64.c +++ b/dlls/ntdll/unix/signal_x86_64.c @@ -1050,9 +1050,11 @@ NTSTATUS WINAPI NtSetContextThread( HANDLE handle, const CONTEXT *context ) { CONTEXT_EX *context_ex = (CONTEXT_EX *)(context + 1); XSAVE_AREA_HEADER *xs = (XSAVE_AREA_HEADER *)((char *)context_ex + context_ex->XState.Offset); + UINT64 mask = frame->xstate.Mask;
if (xstate_compaction_enabled) frame->xstate.CompactionMask |= xstate_extended_features(); copy_xstate( &frame->xstate, xs, xs->Mask ); + if (xs->CompactionMask) frame->xstate.Mask |= mask & ~xs->CompactionMask; }
frame->restore_flags |= flags & ~CONTEXT_INTEGER;
From: Paul Gofman pgofman@codeweavers.com
--- dlls/ntdll/unix/signal_i386.c | 2 ++ dlls/ntdll/unix/signal_x86_64.c | 4 +++ dlls/ntdll/unix/system.c | 48 ++++++++++++++++++++++++++++++--- 3 files changed, 50 insertions(+), 4 deletions(-)
diff --git a/dlls/ntdll/unix/signal_i386.c b/dlls/ntdll/unix/signal_i386.c index 94799522202..61f42806f5d 100644 --- a/dlls/ntdll/unix/signal_i386.c +++ b/dlls/ntdll/unix/signal_i386.c @@ -1139,6 +1139,8 @@ NTSTATUS WINAPI NtGetContextThread( HANDLE handle, CONTEXT *context ) if (context_ex->XState.Length < xstate_get_size( xstate->CompactionMask, xstate->Mask )) return STATUS_BUFFER_OVERFLOW; copy_xstate( xstate, &frame->xstate, xstate->Mask ); + /* copy_xstate may use avx in memcpy, restore xstate not to break the tests. */ + frame->restore_flags |= CONTEXT_XSTATE; } } /* update the cached version of the debug registers */ diff --git a/dlls/ntdll/unix/signal_x86_64.c b/dlls/ntdll/unix/signal_x86_64.c index f4a6fcce8ca..d8d16917942 100644 --- a/dlls/ntdll/unix/signal_x86_64.c +++ b/dlls/ntdll/unix/signal_x86_64.c @@ -1172,6 +1172,8 @@ NTSTATUS WINAPI NtGetContextThread( HANDLE handle, CONTEXT *context ) if (context_ex->XState.Length < xstate_get_size( xstate->CompactionMask, xstate->Mask )) return STATUS_BUFFER_OVERFLOW; copy_xstate( xstate, &frame->xstate, xstate->Mask ); + /* copy_xstate may use avx in memcpy, restore xstate not to break the tests. */ + frame->restore_flags |= CONTEXT_XSTATE; } } /* update the cached version of the debug registers */ @@ -1382,6 +1384,8 @@ NTSTATUS get_thread_wow64_context( HANDLE handle, void *ctx, ULONG size ) if (context_ex->XState.Length < xstate_get_size( xstate->CompactionMask, xstate->Mask )) return STATUS_BUFFER_OVERFLOW; copy_xstate( xstate, &frame->xstate, xstate->Mask ); + /* copy_xstate may use avx in memcpy, restore xstate not to break the tests. */ + frame->restore_flags |= CONTEXT_XSTATE; } } return STATUS_SUCCESS; diff --git a/dlls/ntdll/unix/system.c b/dlls/ntdll/unix/system.c index 4c6c4cd23e2..068eb9d6839 100644 --- a/dlls/ntdll/unix/system.c +++ b/dlls/ntdll/unix/system.c @@ -250,20 +250,60 @@ BOOL xstate_compaction_enabled = FALSE; UINT64 xstate_supported_features_mask; UINT64 xstate_features_size;
+static int xstate_feature_offset[64] = {0, 0, 576}; +static int xstate_feature_size[64] = {0, 0, 256}; +static UINT64 xstate_aligned_features; + +static int next_xstate_offset( int off, UINT64 compaction_mask, int feature_idx ) +{ + const UINT64 feature_mask = (UINT64)1 << feature_idx; + + if (!compaction_mask) return xstate_feature_offset[feature_idx + 1] - sizeof(XSAVE_FORMAT); + + if (compaction_mask & feature_mask) off += xstate_feature_size[feature_idx]; + if (xstate_aligned_features & (feature_mask << 1)) + off = (off + 63) & ~63; + return off; +} + unsigned int xstate_get_size( UINT64 compaction_mask, UINT64 mask ) { - if (!(mask & ((UINT64)1 << XSTATE_AVX))) return sizeof(XSAVE_AREA_HEADER); - return sizeof(XSAVE_AREA_HEADER) + sizeof(YMMCONTEXT); + unsigned int i; + int off; + + mask >>= 2; + off = sizeof(XSAVE_AREA_HEADER); + i = 2; + while (mask) + { + if (mask == 1) return off + xstate_feature_size[i]; + off = next_xstate_offset( off, compaction_mask, i ); + mask >>= 1; + ++i; + } + return off; }
void copy_xstate( XSAVE_AREA_HEADER *dst, XSAVE_AREA_HEADER *src, UINT64 mask ) { + unsigned int i; + int src_off, dst_off; + mask &= xstate_extended_features() & src->Mask; if (src->CompactionMask) mask &= src->CompactionMask; if (dst->CompactionMask) mask &= dst->CompactionMask; dst->Mask = (dst->Mask & ~xstate_extended_features()) | mask; - if (mask & ((UINT64)1 << XSTATE_AVX)) - *(YMMCONTEXT *)(dst + 1) = *(YMMCONTEXT *)(src + 1); + mask >>= 2; + src_off = dst_off = sizeof(XSAVE_AREA_HEADER); + i = 2; + while (1) + { + if (mask & 1) memcpy( (char *)dst + dst_off, (char *)src + src_off, xstate_feature_size[i] ); + if (!(mask >>= 1)) break; + src_off = next_xstate_offset( src_off, src->CompactionMask, i ); + dst_off = next_xstate_offset( dst_off, dst->CompactionMask, i ); + ++i; + } }
#define AUTH 0x68747541 /* "Auth" */
From: Paul Gofman pgofman@codeweavers.com
--- dlls/ntdll/unix/thread.c | 82 +++++++++++++--------------------------- 1 file changed, 26 insertions(+), 56 deletions(-)
diff --git a/dlls/ntdll/unix/thread.c b/dlls/ntdll/unix/thread.c index 60c833b0320..0d98473f01b 100644 --- a/dlls/ntdll/unix/thread.c +++ b/dlls/ntdll/unix/thread.c @@ -613,6 +613,28 @@ static NTSTATUS context_to_server( context_t *to, USHORT to_machine, const void }
+/*********************************************************************** + * xstate_from_server + * + * Copy xstate from the server format. + */ +static void xstate_from_server( CONTEXT_EX *xctx, const context_t *from ) +{ + XSTATE *xs = (XSTATE *)((char *)xctx + xctx->XState.Offset); + unsigned int i; + + xs->Mask &= ~4; + if (xs->CompactionMask) xs->CompactionMask = 0x8000000000000004; + for (i = 0; i < ARRAY_SIZE( from->ymm.regs.ymm_high); i++) + { + if (!from->ymm.regs.ymm_high[i].low && !from->ymm.regs.ymm_high[i].high) continue; + memcpy( &xs->YmmContext, &from->ymm.regs, sizeof(xs->YmmContext) ); + xs->Mask |= 4; + break; + } +} + + /*********************************************************************** * context_from_server * @@ -686,20 +708,7 @@ static NTSTATUS context_from_server( void *dst, const context_t *from, USHORT ma memcpy( to->ExtendedRegisters, from->ext.i386_regs, sizeof(to->ExtendedRegisters) ); } if ((from->flags & SERVER_CTX_YMM_REGISTERS) && (to_flags & CONTEXT_I386_XSTATE)) - { - CONTEXT_EX *xctx = (CONTEXT_EX *)(to + 1); - XSTATE *xs = (XSTATE *)((char *)xctx + xctx->XState.Offset); - - xs->Mask &= ~4; - if (xs->CompactionMask) xs->CompactionMask = 0x8000000000000004; - for (i = 0; i < ARRAY_SIZE( from->ymm.regs.ymm_high); i++) - { - if (!from->ymm.regs.ymm_high[i].low && !from->ymm.regs.ymm_high[i].high) continue; - memcpy( &xs->YmmContext, &from->ymm.regs, sizeof(xs->YmmContext) ); - xs->Mask |= 4; - break; - } - } + xstate_from_server( (CONTEXT_EX *)(to + 1), from ); return STATUS_SUCCESS; }
@@ -760,20 +769,7 @@ static NTSTATUS context_from_server( void *dst, const context_t *from, USHORT ma to->Dr7 = from->debug.x86_64_regs.dr7; } if ((from->flags & SERVER_CTX_YMM_REGISTERS) && (to_flags & CONTEXT_I386_XSTATE)) - { - CONTEXT_EX *xctx = (CONTEXT_EX *)(to + 1); - XSTATE *xs = (XSTATE *)((char *)xctx + xctx->XState.Offset); - - xs->Mask &= ~4; - if (xs->CompactionMask) xs->CompactionMask = 0x8000000000000004; - for (i = 0; i < ARRAY_SIZE( from->ymm.regs.ymm_high); i++) - { - if (!from->ymm.regs.ymm_high[i].low && !from->ymm.regs.ymm_high[i].high) continue; - memcpy( &xs->YmmContext, &from->ymm.regs, sizeof(xs->YmmContext) ); - xs->Mask |= 4; - break; - } - } + xstate_from_server( (CONTEXT_EX *)(to + 1), from ); return STATUS_SUCCESS; }
@@ -835,20 +831,7 @@ static NTSTATUS context_from_server( void *dst, const context_t *from, USHORT ma to->Dr7 = from->debug.x86_64_regs.dr7; } if ((from->flags & SERVER_CTX_YMM_REGISTERS) && (to_flags & CONTEXT_AMD64_XSTATE)) - { - CONTEXT_EX *xctx = (CONTEXT_EX *)(to + 1); - XSTATE *xs = (XSTATE *)((char *)xctx + xctx->XState.Offset); - - xs->Mask &= ~4; - if (xs->CompactionMask) xs->CompactionMask = 0x8000000000000004; - for (i = 0; i < ARRAY_SIZE( from->ymm.regs.ymm_high); i++) - { - if (!from->ymm.regs.ymm_high[i].low && !from->ymm.regs.ymm_high[i].high) continue; - memcpy( &xs->YmmContext, &from->ymm.regs, sizeof(xs->YmmContext) ); - xs->Mask |= 4; - break; - } - } + xstate_from_server( (CONTEXT_EX *)(to + 1), from ); return STATUS_SUCCESS; }
@@ -917,20 +900,7 @@ static NTSTATUS context_from_server( void *dst, const context_t *from, USHORT ma to->Dr7 = from->debug.i386_regs.dr7; } if ((from->flags & SERVER_CTX_YMM_REGISTERS) && (to_flags & CONTEXT_AMD64_XSTATE)) - { - CONTEXT_EX *xctx = (CONTEXT_EX *)(to + 1); - XSTATE *xs = (XSTATE *)((char *)xctx + xctx->XState.Offset); - - xs->Mask &= ~4; - if (xs->CompactionMask) xs->CompactionMask = 0x8000000000000004; - for (i = 0; i < ARRAY_SIZE( from->ymm.regs.ymm_high); i++) - { - if (!from->ymm.regs.ymm_high[i].low && !from->ymm.regs.ymm_high[i].high) continue; - memcpy( &xs->YmmContext, &from->ymm.regs, sizeof(xs->YmmContext) ); - xs->Mask |= 4; - break; - } - } + xstate_from_server( (CONTEXT_EX *)(to + 1), from ); return STATUS_SUCCESS; }
From: Paul Gofman pgofman@codeweavers.com
--- dlls/ntdll/unix/thread.c | 46 ++++++++++++++++------------------------ 1 file changed, 18 insertions(+), 28 deletions(-)
diff --git a/dlls/ntdll/unix/thread.c b/dlls/ntdll/unix/thread.c index 0d98473f01b..800f2cc1968 100644 --- a/dlls/ntdll/unix/thread.c +++ b/dlls/ntdll/unix/thread.c @@ -227,6 +227,20 @@ static unsigned int get_native_context_flags( USHORT native_machine, USHORT wow_ }
+/*********************************************************************** + * xstate_to_server + * + * Copy xstate to the server format. + */ +static void xstate_to_server( context_t *to, const CONTEXT_EX *xctx ) +{ + const XSTATE *xs = (const XSTATE *)((const char *)xctx + xctx->XState.Offset); + + to->flags |= SERVER_CTX_YMM_REGISTERS; + if (xs->Mask & 4) memcpy( &to->ymm.regs.ymm_high, &xs->YmmContext, sizeof(xs->YmmContext) ); +} + + /*********************************************************************** * context_to_server * @@ -303,13 +317,7 @@ static NTSTATUS context_to_server( context_t *to, USHORT to_machine, const void memcpy( to->ext.i386_regs, from->ExtendedRegisters, sizeof(to->ext.i386_regs) ); } if (flags & CONTEXT_I386_XSTATE) - { - const CONTEXT_EX *xctx = (const CONTEXT_EX *)(from + 1); - const XSTATE *xs = (const XSTATE *)((const char *)xctx + xctx->XState.Offset); - - to->flags |= SERVER_CTX_YMM_REGISTERS; - if (xs->Mask & 4) memcpy( &to->ymm.regs.ymm_high, &xs->YmmContext, sizeof(xs->YmmContext) ); - } + xstate_to_server( to, (const CONTEXT_EX *)(from + 1) ); return STATUS_SUCCESS; }
@@ -367,13 +375,7 @@ static NTSTATUS context_to_server( context_t *to, USHORT to_machine, const void fpu_to_fpux( (XMM_SAVE_AREA32 *)to->fp.x86_64_regs.fpregs, &from->FloatSave ); } if (flags & CONTEXT_I386_XSTATE) - { - const CONTEXT_EX *xctx = (const CONTEXT_EX *)(from + 1); - const XSTATE *xs = (const XSTATE *)((const char *)xctx + xctx->XState.Offset); - - to->flags |= SERVER_CTX_YMM_REGISTERS; - if (xs->Mask & 4) memcpy( &to->ymm.regs.ymm_high, &xs->YmmContext, sizeof(xs->YmmContext) ); - } + xstate_to_server( to, (const CONTEXT_EX *)(from + 1) ); return STATUS_SUCCESS; }
@@ -434,13 +436,7 @@ static NTSTATUS context_to_server( context_t *to, USHORT to_machine, const void to->debug.x86_64_regs.dr7 = from->Dr7; } if (flags & CONTEXT_AMD64_XSTATE) - { - const CONTEXT_EX *xctx = (const CONTEXT_EX *)(from + 1); - const XSTATE *xs = (const XSTATE *)((const char *)xctx + xctx->XState.Offset); - - to->flags |= SERVER_CTX_YMM_REGISTERS; - if (xs->Mask & 4) memcpy( &to->ymm.regs.ymm_high, &xs->YmmContext, sizeof(xs->YmmContext) ); - } + xstate_to_server( to, (const CONTEXT_EX *)(from + 1) ); return STATUS_SUCCESS; }
@@ -505,13 +501,7 @@ static NTSTATUS context_to_server( context_t *to, USHORT to_machine, const void to->debug.i386_regs.dr7 = from->Dr7; } if (flags & CONTEXT_AMD64_XSTATE) - { - const CONTEXT_EX *xctx = (const CONTEXT_EX *)(from + 1); - const XSTATE *xs = (const XSTATE *)((const char *)xctx + xctx->XState.Offset); - - to->flags |= SERVER_CTX_YMM_REGISTERS; - if (xs->Mask & 4) memcpy( &to->ymm.regs.ymm_high, &xs->YmmContext, sizeof(xs->YmmContext) ); - } + xstate_to_server( to, (const CONTEXT_EX *)(from + 1) ); return STATUS_SUCCESS; }
From: Paul Gofman pgofman@codeweavers.com
--- dlls/ntdll/unix/thread.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-)
diff --git a/dlls/ntdll/unix/thread.c b/dlls/ntdll/unix/thread.c index 800f2cc1968..bf17fd06579 100644 --- a/dlls/ntdll/unix/thread.c +++ b/dlls/ntdll/unix/thread.c @@ -236,6 +236,7 @@ static void xstate_to_server( context_t *to, const CONTEXT_EX *xctx ) { const XSTATE *xs = (const XSTATE *)((const char *)xctx + xctx->XState.Offset);
+ if (xs->CompactionMask && !(xs->CompactionMask & 4)) return; to->flags |= SERVER_CTX_YMM_REGISTERS; if (xs->Mask & 4) memcpy( &to->ymm.regs.ymm_high, &xs->YmmContext, sizeof(xs->YmmContext) ); } @@ -613,8 +614,14 @@ static void xstate_from_server( CONTEXT_EX *xctx, const context_t *from ) XSTATE *xs = (XSTATE *)((char *)xctx + xctx->XState.Offset); unsigned int i;
- xs->Mask &= ~4; - if (xs->CompactionMask) xs->CompactionMask = 0x8000000000000004; + xs->Mask &= 3; + + if (xs->CompactionMask) + { + xs->CompactionMask &= ~(UINT64)3; + if (!(xs->CompactionMask & 4)) return; + } + for (i = 0; i < ARRAY_SIZE( from->ymm.regs.ymm_high); i++) { if (!from->ymm.regs.ymm_high[i].low && !from->ymm.regs.ymm_high[i].high) continue;
From: Paul Gofman pgofman@codeweavers.com
--- dlls/ntdll/tests/exception.c | 2 +- dlls/ntdll/unix/system.c | 37 +++++++++++++++++++++---- programs/wineboot/wineboot.c | 53 ++++++++++++++++++++++++++---------- 3 files changed, 71 insertions(+), 21 deletions(-)
diff --git a/dlls/ntdll/tests/exception.c b/dlls/ntdll/tests/exception.c index d3c279ecab8..0736da2ee8f 100644 --- a/dlls/ntdll/tests/exception.c +++ b/dlls/ntdll/tests/exception.c @@ -9115,7 +9115,7 @@ static DWORD test_extended_context_handler(EXCEPTION_RECORD *rec, EXCEPTION_REGI } else { - ok(xs->Mask == (xsaveopt_enabled ? 0 : 4), "Got unexpected Mask %#I64x.\n", xs->Mask); + ok((xs->Mask & 7) == (xsaveopt_enabled ? 0 : 4), "Got unexpected Mask %#I64x.\n", xs->Mask); /* The save area has garbage if xsaveopt is available, so we can't test * its contents. */
diff --git a/dlls/ntdll/unix/system.c b/dlls/ntdll/unix/system.c index 068eb9d6839..77bfdb21bba 100644 --- a/dlls/ntdll/unix/system.c +++ b/dlls/ntdll/unix/system.c @@ -250,8 +250,8 @@ BOOL xstate_compaction_enabled = FALSE; UINT64 xstate_supported_features_mask; UINT64 xstate_features_size;
-static int xstate_feature_offset[64] = {0, 0, 576}; -static int xstate_feature_size[64] = {0, 0, 256}; +static int xstate_feature_offset[64]; +static int xstate_feature_size[64]; static UINT64 xstate_aligned_features;
static int next_xstate_offset( int off, UINT64 compaction_mask, int feature_idx ) @@ -403,8 +403,11 @@ static void get_cpuid_name( char *buffer )
static void get_cpuinfo( SYSTEM_CPU_INFORMATION *info ) { + static const ULONG64 wine_xstate_supported_features = 0xff; /* XSTATE_AVX, XSTATE_MPX_BNDREGS, XSTATE_MPX_BNDCSR, + * XSTATE_AVX512_KMASK, XSTATE_AVX512_ZMM_H, XSTATE_AVX512_ZMM */ unsigned int regs[4], regs2[4], regs3[4]; ULONGLONG features; + unsigned int i;
#if defined(__i386__) info->ProcessorArchitecture = PROCESSOR_ARCHITECTURE_INTEL; @@ -452,15 +455,39 @@ static void get_cpuinfo( SYSTEM_CPU_INFORMATION *info )
if (features & CPU_FEATURE_XSAVE) { + ULONG64 xcr0; + do_cpuid( 0x0000000d, 1, regs3 ); /* get XSAVE details */ if (regs3[0] & 2) xstate_compaction_enabled = TRUE; - xstate_supported_features_mask = 3; - if (features & CPU_FEATURE_AVX) - xstate_supported_features_mask |= (UINT64)1 << XSTATE_AVX; + + do_cpuid( 0x0000000d, 0, regs3 ); /* get user xstate features */ + xstate_supported_features_mask = ((ULONG64)regs3[3] << 32) | regs3[0]; + __asm__ volatile + ( + "xorl %%ecx,%%ecx\n\t" + "xgetbv\n\t" + "movl %%eax,%0\n\t" + "movl %%edx,%1\n\t" + : "=m"(regs3[0]), "=m"(regs3[1]) : : "eax", "ecx", "edx" + ); + xcr0 = ((ULONG64)regs3[1] << 32) | regs3[0]; + xstate_supported_features_mask &= xcr0 & wine_xstate_supported_features; + TRACE("xcr0 %#llx, wine_xstate_supported_features %#llx, xstate_supported_features_mask %#llx.\n", + (long long)xcr0, (long long)wine_xstate_supported_features, (long long)xstate_supported_features_mask); + for (i = 2; i < 64; ++i) + { + if (!(xstate_supported_features_mask & ((ULONG64)1 << i))) continue; + do_cpuid( 0x0000000d, i, regs3 ); /* get user xstate features */ + xstate_feature_offset[i] = regs3[1]; + xstate_feature_size[i] = regs3[0]; + if (regs3[2] & 2) xstate_aligned_features |= (ULONG64)1 << i; + TRACE("xstate[%d] offset %d, size %d, aligned %d.\n", i, xstate_feature_offset[i], xstate_feature_size[i], !!(regs3[2] & 2)); + } xstate_features_size = xstate_get_size( xstate_compaction_enabled ? 0x8000000000000000 | xstate_supported_features_mask : 0, xstate_supported_features_mask ) - sizeof(XSAVE_AREA_HEADER); xstate_features_size = (xstate_features_size + 15) & ~15; + TRACE("xstate_features_size %lld.\n", (long long)xstate_features_size); }
if (regs[1] == AUTH && regs[3] == ENTI && regs[2] == CAMD) diff --git a/programs/wineboot/wineboot.c b/programs/wineboot/wineboot.c index 1c1ad858fa4..4876f4ac420 100644 --- a/programs/wineboot/wineboot.c +++ b/programs/wineboot/wineboot.c @@ -195,8 +195,11 @@ static DWORD set_reg_value_dword( HKEY hkey, const WCHAR *name, DWORD value )
static void initialize_xstate_features(struct _KUSER_SHARED_DATA *data) { + static const ULONG64 wine_xstate_supported_features = 0xfc; /* XSTATE_AVX, XSTATE_MPX_BNDREGS, XSTATE_MPX_BNDCSR, + * XSTATE_AVX512_KMASK, XSTATE_AVX512_ZMM_H, XSTATE_AVX512_ZMM */ XSTATE_CONFIGURATION *xstate = &data->XState; - unsigned int i; + ULONG64 supported_mask, xcr0; + unsigned int i, off; int regs[4];
if (!data->ProcessorFeatures[PF_AVX_INSTRUCTIONS_AVAILABLE]) @@ -215,29 +218,49 @@ static void initialize_xstate_features(struct _KUSER_SHARED_DATA *data)
__cpuidex(regs, 0xd, 0); TRACE("XSAVE details %#x, %#x, %#x, %#x.\n", regs[0], regs[1], regs[2], regs[3]); - if (!(regs[0] & XSTATE_AVX)) + supported_mask = ((ULONG64)regs[3] << 32) | regs[0]; + __asm__ volatile + ( + "xorl %%ecx,%%ecx\n\t" + "xgetbv\n\t" + "movl %%eax,%0\n\t" + "movl %%edx,%1\n\t" + : "=m"(regs[0]), "=m"(regs[1]) : : "eax", "ecx", "edx" + ); + xcr0 = ((ULONG64)regs[1] << 32) | regs[0]; + supported_mask &= xcr0 & wine_xstate_supported_features; + if (!(supported_mask >> 2)) return;
- xstate->EnabledFeatures = (1 << XSTATE_LEGACY_FLOATING_POINT) | (1 << XSTATE_LEGACY_SSE) | (1 << XSTATE_AVX); + xstate->EnabledFeatures = (1 << XSTATE_LEGACY_FLOATING_POINT) | (1 << XSTATE_LEGACY_SSE) | supported_mask; xstate->EnabledVolatileFeatures = xstate->EnabledFeatures; - xstate->Size = sizeof(XSAVE_FORMAT) + sizeof(XSTATE); xstate->AllFeatureSize = regs[1]; - xstate->AllFeatures[0] = offsetof(XSAVE_FORMAT, XmmRegisters); - xstate->AllFeatures[1] = sizeof(M128A) * 16; - xstate->AllFeatures[2] = sizeof(YMMCONTEXT); - - for (i = 0; i < 3; ++i) - xstate->Features[i].Size = xstate->AllFeatures[i]; - - xstate->Features[1].Offset = xstate->Features[0].Size; - xstate->Features[2].Offset = sizeof(XSAVE_FORMAT) + offsetof(XSTATE, YmmContext);
__cpuidex(regs, 0xd, 1); xstate->OptimizedSave = regs[0] & 1; xstate->CompactionEnabled = !!(regs[0] & 2);
- __cpuidex(regs, 0xd, 2); - TRACE("XSAVE feature 2 %#x, %#x, %#x, %#x.\n", regs[0], regs[1], regs[2], regs[3]); + xstate->Features[0].Size = xstate->AllFeatures[0] = offsetof(XSAVE_FORMAT, XmmRegisters); + xstate->Features[1].Size = xstate->AllFeatures[1] = sizeof(M128A) * 16; + xstate->Features[1].Offset = xstate->Features[0].Size; + off = sizeof(XSAVE_FORMAT) + sizeof(XSAVE_AREA_HEADER); + supported_mask >>= 2; + for (i = 2; supported_mask; ++i, supported_mask >>= 1) + { + if (!(supported_mask & 1)) continue; + __cpuidex( regs, 0xd, i ); + xstate->Features[i].Offset = regs[1]; + xstate->Features[i].Size = xstate->AllFeatures[i] = regs[0]; + if (regs[2] & 2) + { + xstate->AlignedFeatures |= (ULONG64)1 << i; + off = (off + 63) & ~63; + } + off += xstate->Features[i].Size; + TRACE("xstate[%d] offset %lu, size %lu, aligned %d.\n", i, xstate->Features[i].Offset, xstate->Features[i].Size, !!(regs[2] & 2)); + } + xstate->Size = xstate->CompactionEnabled ? off : xstate->Features[i - 1].Offset + xstate->Features[i - 1].Size; + TRACE("xstate size %lu, compacted %d, optimized %d.\n", xstate->Size, xstate->CompactionEnabled, xstate->OptimizedSave); }
static BOOL is_tsc_trusted_by_the_kernel(void)
From: Paul Gofman pgofman@codeweavers.com
--- dlls/ntdll/tests/exception.c | 100 +++++++++++++++++++++++++++++++---- 1 file changed, 90 insertions(+), 10 deletions(-)
diff --git a/dlls/ntdll/tests/exception.c b/dlls/ntdll/tests/exception.c index 0736da2ee8f..40ab893ecc3 100644 --- a/dlls/ntdll/tests/exception.c +++ b/dlls/ntdll/tests/exception.c @@ -9061,11 +9061,12 @@ static const unsigned test_extended_context_spoil_data2[8] = {0x15, 0x25, 0x35,
static BOOL test_extended_context_modified_state; static BOOL xsaveopt_enabled, compaction_enabled; +static ULONG64 xstate_supported_features;
static DWORD test_extended_context_handler(EXCEPTION_RECORD *rec, EXCEPTION_REGISTRATION_RECORD *frame, CONTEXT *context, EXCEPTION_REGISTRATION_RECORD **dispatcher) { - static const ULONG64 expected_compaction_mask = 0x8000000000000004; + const ULONG64 expected_compaction_mask = (0x8000000000000000 | xstate_supported_features) & ~(ULONG64)3; CONTEXT_EX *xctx = (CONTEXT_EX *)(context + 1); unsigned int *context_ymm_data; DWORD expected_min_offset; @@ -9103,7 +9104,7 @@ static DWORD test_extended_context_handler(EXCEPTION_RECORD *rec, EXCEPTION_REGI
if (compaction_enabled) ok((xs->CompactionMask & (expected_compaction_mask | 3)) == expected_compaction_mask, - "Got compaction mask %#I64x.\n", xs->CompactionMask); + "Got compaction mask %#I64x, expected %#I64x.\n", xs->CompactionMask, expected_compaction_mask); else ok(!xs->CompactionMask, "Got compaction mask %#I64x.\n", xs->CompactionMask);
@@ -9398,7 +9399,8 @@ static void test_extended_context(void) }; const struct context_parameters *context_arch;
- const ULONG64 supported_features = 7, supported_compaction_mask = supported_features | ((ULONG64)1 << 63); + const ULONG64 supported_features = 0xff; + const ULONG64 supported_compaction_mask = supported_features | ((ULONG64)1 << 63); ULONG expected_length, expected_length_xstate, context_flags, expected_offset, max_xstate_length; ULONG64 enabled_features, expected_compaction; DECLSPEC_ALIGN(64) BYTE context_buffer2[4096]; @@ -9438,6 +9440,7 @@ static void test_extended_context(void) compaction_enabled = regs[0] & 2; } #endif + xstate_supported_features = enabled_features & supported_features;
/* Test context manipulation functions. */ length = 0xdeadbeef; @@ -9908,7 +9911,7 @@ static void test_extended_context(void)
xs->Mask = 0xdeadbeef; xs->CompactionMask = 0xdeadbeef; - bret = pSetXStateFeaturesMask(context, 7); + bret = pSetXStateFeaturesMask(context, xstate_supported_features); ok(bret == !!(flags & CONTEXT_NATIVE), "Got unexpected bret %#x.\n", bret); context_flags = *(DWORD *)(context_buffer + context_arch[test].flags_offset); ok(context_flags == (bret ? flags_fpx : flags), @@ -9922,8 +9925,8 @@ static void test_extended_context(void) mask = 0xdeadbeef; bret = pGetXStateFeaturesMask(context, &mask); if (flags & CONTEXT_NATIVE) - ok(bret && mask == (enabled_features & supported_features), - "Got unexpected bret %#x, mask %s, flags %#lx.\n", bret, wine_dbgstr_longlong(mask), flags); + ok(bret && mask == xstate_supported_features, + "Got unexpected bret %#x, mask %s, flags %#lx (enabled_features & supported_features %#I64x).\n", bret, wine_dbgstr_longlong(mask), flags, xstate_supported_features); else ok(!bret && mask == 0xdeadbeef, "Got unexpected bret %#x, mask %s, flags %#lx.\n", bret, wine_dbgstr_longlong(mask), flags); @@ -10148,6 +10151,14 @@ static void test_extended_context(void) &context, &length); memset(&xs->YmmContext, 0xcc, sizeof(xs->YmmContext)); ok(bret, "Got unexpected bret %#x.\n", bret); + + /* clear potentially leftover xstate */ + pSetXStateFeaturesMask(context, 0); + context->ContextFlags = CONTEXT_XSTATE; + SetThreadContext(GetCurrentThread(), context); + + context->ContextFlags = CONTEXT_FULL | CONTEXT_XSTATE | CONTEXT_FLOATING_POINT; + pSetXStateFeaturesMask(context, ~(ULONG64)0); *(void **)(call_func_code_reset_ymm_state + call_func_offsets.func_addr) = GetThreadContext; *(void **)(call_func_code_reset_ymm_state + call_func_offsets.func_param1) = (void *)GetCurrentThread(); @@ -10165,12 +10176,12 @@ static void test_extended_context(void) ok(context->ContextFlags == expected_flags, "Got unexpected ContextFlags %#lx.\n", context->ContextFlags);
- expected_compaction = compaction_enabled ? ((ULONG64)1 << 63) | 4 : 0; + expected_compaction = compaction_enabled ? ((ULONG64)1 << 63) | (xstate_supported_features & ~(UINT64)3) : 0;
xs = (XSTATE *)((BYTE *)context_ex + context_ex->XState.Offset); ok((xs->Mask & supported_features) == (xsaveopt_enabled ? 0 : 4), "Got unexpected Mask %#I64x.\n", xs->Mask); ok((xs->CompactionMask & (supported_features | ((ULONG64)1 << 63))) == expected_compaction, - "Got unexpected CompactionMask %s.\n", wine_dbgstr_longlong(xs->CompactionMask)); + "Got unexpected CompactionMask %s (expected %#I64x).\n", wine_dbgstr_longlong(xs->CompactionMask), expected_compaction);
for (i = 4; i < 8; ++i) ok(!data[i], "Got unexpected data %#x, i %u.\n", data[i], i); @@ -10180,6 +10191,36 @@ static void test_extended_context(void) || broken(((ULONG *)&xs->YmmContext)[i] == test_extended_context_data[i + 4]), "Got unexpected data %#lx, i %u.\n", ((ULONG *)&xs->YmmContext)[i], i);
+ /* Test setting context which has only part of xstate in CompactionMask. */ + if (compaction_enabled && enabled_features & ((ULONG64)1 << XSTATE_AVX512_KMASK)) + { + *(void **)(call_func_code_set_ymm0 + call_func_offsets.func_addr) = SetThreadContext; + *(void **)(call_func_code_set_ymm0 + call_func_offsets.func_param1) = (void *)GetCurrentThread(); + *(void **)(call_func_code_set_ymm0 + call_func_offsets.func_param2) = context; + *(void **)(call_func_code_set_ymm0 + call_func_offsets.ymm0_save) = data; + memcpy(code_mem, call_func_code_set_ymm0, sizeof(call_func_code_set_ymm0)); + context->ContextFlags = CONTEXT_XSTATE; + xs->CompactionMask = 0x8000000000000000 | ((ULONG64)1 << XSTATE_AVX512_KMASK); + xs->Mask = 0; + memcpy(data, test_extended_context_data, sizeof(data)); + bret = func(); + ok(bret, "Got unexpected bret %#x, GetLastError() %lu.\n", bret, GetLastError()); + /* Setting a context with only part of xstate in CompactionMask doesn't change missing parts. */ + for (i = 4; i < 8; ++i) + ok(data[i] == test_extended_context_data[i], "Got unexpected data %#x, i %u.\n", data[i], i); + + memcpy(data, test_extended_context_data, sizeof(data)); + xs->CompactionMask |= XSTATE_MASK_GSSE; + bret = func(); + ok(bret, "Got unexpected bret %#x, GetLastError() %lu.\n", bret, GetLastError()); + for (i = 4; i < 8; ++i) + ok(!data[i], "Got unexpected data %#x, i %u.\n", data[i], i); + } + else + { + skip("avx512 is not available, skipping test.\n"); + } + /* Test fault exception context. */ memset(data, 0xff, sizeof(data)); xs->Mask = 0; @@ -10216,9 +10257,10 @@ static void test_extended_context(void) bret = GetThreadContext(thread, context); ok(bret, "Got unexpected bret %#x, GetLastError() %lu.\n", bret, GetLastError()); todo_wine_if (!xsaveopt_enabled) - ok((xs->Mask & supported_features) == (xsaveopt_enabled ? 0 : 4), "Got unexpected Mask %#I64x.\n", xs->Mask); + ok((xs->Mask & supported_features) == (xsaveopt_enabled ? 0 : 4), "Got unexpected Mask %#I64x.\n", xs->Mask); ok((xs->CompactionMask & supported_compaction_mask) == expected_compaction, - "Got unexpected CompactionMask %s.\n", wine_dbgstr_longlong(xs->CompactionMask)); + "Got unexpected CompactionMask %I64x, expected %I64x.\n", xs->CompactionMask, + expected_compaction);
for (i = 0; i < 16 * 4; ++i) ok(((ULONG *)&xs->YmmContext)[i] == ((xs->Mask & 4) ? 0 : 0xcccccccc), @@ -10299,6 +10341,44 @@ static void test_extended_context(void) "Got unexpected value %#lx, i %u.\n", ((ULONG *)&xs->YmmContext)[i], i); }
+ if (compaction_enabled && enabled_features & ((ULONG64)1 << XSTATE_AVX512_KMASK)) + { + ULONG64 saved_mask; + ULONG *d; + + saved_mask = xs->CompactionMask; + xs->Mask = XSTATE_MASK_GSSE; + xs->CompactionMask = 0x8000000000000000 | xs->Mask; + *(ULONG *)&xs->YmmContext = 0x11111111; + bret = SetThreadContext(thread, context); + ok(bret, "Got unexpected bret %#x, GetLastError() %lu.\n", bret, GetLastError()); + + xs->Mask = (ULONG64)1 << XSTATE_AVX512_KMASK; + xs->CompactionMask = 0x8000000000000000 | xs->Mask; + *(ULONG *)&xs->YmmContext = 0x22222222; + bret = SetThreadContext(thread, context); + ok(bret, "Got unexpected bret %#x, GetLastError() %lu.\n", bret, GetLastError()); + + xs->CompactionMask = saved_mask; + bret = GetThreadContext(thread, context); + ok(bret, "Got unexpected bret %#x, GetLastError() %lu.\n", bret, GetLastError()); + + todo_wine_if(xs->Mask == XSTATE_MASK_GSSE) + ok((xs->Mask & (XSTATE_MASK_GSSE | ((ULONG64)1 << XSTATE_AVX512_KMASK))) + == (XSTATE_MASK_GSSE | ((ULONG64)1 << XSTATE_AVX512_KMASK)), "got Mask %#I64x.\n", xs->Mask); + d = pLocateXStateFeature(context, XSTATE_AVX, NULL); + ok(!!d, "Got NULL.\n"); + ok(*d == 0x11111111, "got %#lx.\n", *d); + + d = pLocateXStateFeature(context, XSTATE_AVX512_KMASK, NULL); + ok(!!d, "Got NULL.\n"); + todo_wine ok(*d == 0x22222222, "got %#lx.\n", *d); + } + else + { + skip("avx512 is not available, skipping test.\n"); + } + bret = ResumeThread(thread); ok(bret, "Got unexpected bret %#x, GetLastError() %lu.\n", bret, GetLastError());
Jacek Caban (@jacek) commented about dlls/ntdll/unix/system.c:
do_cpuid( 0x0000000d, 1, regs3 ); /* get XSAVE details */ if (regs3[0] & 2) xstate_compaction_enabled = TRUE;
xstate_supported_features_mask = 3;
if (features & CPU_FEATURE_AVX)
xstate_supported_features_mask |= (UINT64)1 << XSTATE_AVX;
do_cpuid( 0x0000000d, 0, regs3 ); /* get user xstate features */
xstate_supported_features_mask = ((ULONG64)regs3[3] << 32) | regs3[0];
__asm__ volatile
(
"xorl %%ecx,%%ecx\n\t"
"xgetbv\n\t"
"movl %%eax,%0\n\t"
"movl %%edx,%1\n\t"
: "=m"(regs3[0]), "=m"(regs3[1]) : : "eax", "ecx", "edx"
);
It would be nice to avoid inline assembly like that. I guess we can't use builtin `_xgetbv` for compatibility reasons, but I guess we could have its local implementation, similar to `do_cpuid`.
Jacek Caban (@jacek) commented about programs/wineboot/wineboot.c:
__cpuidex(regs, 0xd, 0); TRACE("XSAVE details %#x, %#x, %#x, %#x.\n", regs[0], regs[1], regs[2], regs[3]);
- if (!(regs[0] & XSTATE_AVX))
- supported_mask = ((ULONG64)regs[3] << 32) | regs[0];
- __asm__ volatile
- (
"xorl %%ecx,%%ecx\n\t"
"xgetbv\n\t"
"movl %%eax,%0\n\t"
"movl %%edx,%1\n\t"
: "=m"(regs[0]), "=m"(regs[1]) : : "eax", "ecx", "edx"
- );
Similar to ntdll, we could avoid inline assembly here. For PE parts, we'd ideally provide it by `intrin.h`, but a local implementation would do too, I think.