 
            Signed-off-by: Paul Gofman pgofman@codeweavers.com --- include/ddk/wdm.h | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-)
diff --git a/include/ddk/wdm.h b/include/ddk/wdm.h index 9fcb1387012..b8bb7bd971f 100644 --- a/include/ddk/wdm.h +++ b/include/ddk/wdm.h @@ -1207,16 +1207,24 @@ typedef enum _ALTERNATIVE_ARCHITECTURE_TYPE
typedef struct _XSTATE_FEATURE { - ULONG Offset; - ULONG Size; + ULONG Offset; + ULONG Size; } XSTATE_FEATURE, *PXSTATE_FEATURE;
typedef struct _XSTATE_CONFIGURATION { - ULONG64 EnabledFeatures; - ULONG Size; - ULONG OptimizedSave:1; - XSTATE_FEATURE Features[MAXIMUM_XSTATE_FEATURES]; + ULONG64 EnabledFeatures; + ULONG64 EnabledVolatileFeatures; + ULONG Size; + ULONG OptimizedSave:1; + ULONG CompactionEnabled:1; + XSTATE_FEATURE Features[MAXIMUM_XSTATE_FEATURES]; + + ULONG64 EnabledSupervisorFeatures; + ULONG64 AlignedFeatures; + ULONG AllFeatureSize; + ULONG AllFeatures[MAXIMUM_XSTATE_FEATURES]; + ULONG64 EnabledUserVisibleSupervisorFeatures; } XSTATE_CONFIGURATION, *PXSTATE_CONFIGURATION;
typedef struct _KUSER_SHARED_DATA {
 
            Mxcsr will be overwritten anyway by consequent fxrstor.
Signed-off-by: Paul Gofman pgofman@codeweavers.com --- dlls/ntdll/unix/signal_x86_64.c | 1 - 1 file changed, 1 deletion(-)
diff --git a/dlls/ntdll/unix/signal_x86_64.c b/dlls/ntdll/unix/signal_x86_64.c index 6f01b76c5ae..38a8093abf9 100644 --- a/dlls/ntdll/unix/signal_x86_64.c +++ b/dlls/ntdll/unix/signal_x86_64.c @@ -1483,7 +1483,6 @@ __ASM_GLOBAL_FUNC( set_full_cpu_context, __ASM_SEH(".seh_stackalloc 0x40\n\t") __ASM_SEH(".seh_endprologue\n\t") __ASM_CFI(".cfi_adjust_cfa_offset 40\n\t") - "ldmxcsr 0x34(%rdi)\n\t" /* context->MxCsr */ "movw 0x38(%rdi),%ax\n\t" /* context->SegCs */ "movq %rax,8(%rsp)\n\t" "movw 0x42(%rdi),%ax\n\t" /* context->SegSs */
 
            The structure is defined for both x86_64 and i386 (along with M128A) in (newer) Windows SDK.
Signed-off-by: Paul Gofman pgofman@codeweavers.com --- dlls/ntdll/signal_i386.c | 32 ++---------------- dlls/ntdll/unix/signal_i386.c | 53 +++++++---------------------- dlls/ntdll/unix/system.c | 26 +------------- include/winnt.h | 42 ++++++++++++----------- programs/winedbg/be_i386.c | 64 +++++++++++------------------------ 5 files changed, 57 insertions(+), 160 deletions(-)
diff --git a/dlls/ntdll/signal_i386.c b/dlls/ntdll/signal_i386.c index b65beb7215b..5e8bc45e3d9 100644 --- a/dlls/ntdll/signal_i386.c +++ b/dlls/ntdll/signal_i386.c @@ -36,34 +36,6 @@
WINE_DEFAULT_DEBUG_CHANNEL(seh);
-/* not defined for x86, so copy the x86_64 definition */ -typedef struct DECLSPEC_ALIGN(16) _M128A -{ - ULONGLONG Low; - LONGLONG High; -} M128A; - -typedef struct -{ - WORD ControlWord; - WORD StatusWord; - BYTE TagWord; - BYTE Reserved1; - WORD ErrorOpcode; - DWORD ErrorOffset; - WORD ErrorSelector; - WORD Reserved2; - DWORD DataOffset; - WORD DataSelector; - WORD Reserved3; - DWORD MxCsr; - DWORD MxCsr_Mask; - M128A FloatRegisters[8]; - M128A XmmRegisters[16]; - BYTE Reserved4[96]; -} XMM_SAVE_AREA32; - - struct x86_thread_data { DWORD fs; /* 1d4 TEB selector */ @@ -287,8 +259,8 @@ static inline void save_fpux( CONTEXT *context ) { #ifdef __GNUC__ /* we have to enforce alignment by hand */ - char buffer[sizeof(XMM_SAVE_AREA32) + 16]; - XMM_SAVE_AREA32 *state = (XMM_SAVE_AREA32 *)(((ULONG_PTR)buffer + 15) & ~15); + char buffer[sizeof(XSAVE_FORMAT) + 16]; + XSAVE_FORMAT *state = (XSAVE_FORMAT *)(((ULONG_PTR)buffer + 15) & ~15);
context->ContextFlags |= CONTEXT_EXTENDED_REGISTERS; __asm__ __volatile__( "fxsave %0" : "=m" (*state) ); diff --git a/dlls/ntdll/unix/signal_i386.c b/dlls/ntdll/unix/signal_i386.c index 39c154ebbc1..bf4922e98f5 100644 --- a/dlls/ntdll/unix/signal_i386.c +++ b/dlls/ntdll/unix/signal_i386.c @@ -65,33 +65,6 @@ WINE_DEFAULT_DEBUG_CHANNEL(seh);
#undef ERR /* Solaris needs to define this */
-/* not defined for x86, so copy the x86_64 definition */ -typedef struct DECLSPEC_ALIGN(16) _M128A -{ - ULONGLONG Low; - LONGLONG High; -} M128A; - -typedef struct -{ - WORD ControlWord; - WORD StatusWord; - BYTE TagWord; - BYTE Reserved1; - WORD ErrorOpcode; - DWORD ErrorOffset; - WORD ErrorSelector; - WORD Reserved2; - DWORD DataOffset; - WORD DataSelector; - WORD Reserved3; - DWORD MxCsr; - DWORD MxCsr_Mask; - M128A FloatRegisters[8]; - M128A XmmRegisters[16]; - BYTE Reserved4[96]; -} XMM_SAVE_AREA32; - /*********************************************************************** * signal context platform-specific definitions */ @@ -171,7 +144,7 @@ typedef struct ucontext #define ERROR_sig(context) ((context)->uc_mcontext.gregs[REG_ERR])
#define FPU_sig(context) ((FLOATING_SAVE_AREA*)((context)->uc_mcontext.fpregs)) -#define FPUX_sig(context) (FPU_sig(context) && !((context)->uc_mcontext.fpregs->status >> 16) ? (XMM_SAVE_AREA32 *)(FPU_sig(context) + 1) : NULL) +#define FPUX_sig(context) (FPU_sig(context) && !((context)->uc_mcontext.fpregs->status >> 16) ? (XSAVE_FORMAT *)(FPU_sig(context) + 1) : NULL)
#ifdef __ANDROID__ /* custom signal restorer since we may have unmapped the one in vdso, and bionic doesn't check for that */ @@ -336,7 +309,7 @@ static inline int set_thread_area( struct modify_ldt_s *ptr ) #define TRAP_sig(context) ((context)->uc_mcontext->__es.__trapno) #define ERROR_sig(context) ((context)->uc_mcontext->__es.__err) #define FPU_sig(context) NULL -#define FPUX_sig(context) ((XMM_SAVE_AREA32 *)&(context)->uc_mcontext->__fs.__fpu_fcw) +#define FPUX_sig(context) ((XSAVE_FORMAT *)&(context)->uc_mcontext->__fs.__fpu_fcw) #else #define EAX_sig(context) ((context)->uc_mcontext->ss.eax) #define EBX_sig(context) ((context)->uc_mcontext->ss.ebx) @@ -357,7 +330,7 @@ static inline int set_thread_area( struct modify_ldt_s *ptr ) #define TRAP_sig(context) ((context)->uc_mcontext->es.trapno) #define ERROR_sig(context) ((context)->uc_mcontext->es.err) #define FPU_sig(context) NULL -#define FPUX_sig(context) ((XMM_SAVE_AREA32 *)&(context)->uc_mcontext->fs.fpu_fcw) +#define FPUX_sig(context) ((XSAVE_FORMAT *)&(context)->uc_mcontext->fs.fpu_fcw) #endif
#elif defined(__NetBSD__) @@ -387,7 +360,7 @@ static inline int set_thread_area( struct modify_ldt_s *ptr ) #define ERROR_sig(context) ((context)->uc_mcontext.__gregs[_REG_ERR])
#define FPU_sig(context) NULL -#define FPUX_sig(context) ((XMM_SAVE_AREA32 *)&((context)->uc_mcontext.__fpregs)) +#define FPUX_sig(context) ((XSAVE_FORMAT *)&((context)->uc_mcontext.__fpregs))
#define T_MCHK T_MCA #define T_XMMFLT T_XMM @@ -687,8 +660,8 @@ static inline void save_fpu( CONTEXT *context ) static inline void save_fpux( CONTEXT *context ) { /* we have to enforce alignment by hand */ - char buffer[sizeof(XMM_SAVE_AREA32) + 16]; - XMM_SAVE_AREA32 *state = (XMM_SAVE_AREA32 *)(((ULONG_PTR)buffer + 15) & ~15); + char buffer[sizeof(XSAVE_FORMAT) + 16]; + XSAVE_FORMAT *state = (XSAVE_FORMAT *)(((ULONG_PTR)buffer + 15) & ~15);
context->ContextFlags |= CONTEXT_EXTENDED_REGISTERS; __asm__ __volatile__( "fxsave %0" : "=m" (*state) ); @@ -718,8 +691,8 @@ static inline void restore_fpu( const CONTEXT *context ) static inline void restore_fpux( const CONTEXT *context ) { /* we have to enforce alignment by hand */ - char buffer[sizeof(XMM_SAVE_AREA32) + 16]; - XMM_SAVE_AREA32 *state = (XMM_SAVE_AREA32 *)(((ULONG_PTR)buffer + 15) & ~15); + char buffer[sizeof(XSAVE_FORMAT) + 16]; + XSAVE_FORMAT *state = (XSAVE_FORMAT *)(((ULONG_PTR)buffer + 15) & ~15);
memcpy( state, context->ExtendedRegisters, sizeof(*state) ); /* reset the current interrupt status */ @@ -733,7 +706,7 @@ static inline void restore_fpux( const CONTEXT *context ) * * Build a standard FPU context from an extended one. */ -static void fpux_to_fpu( FLOATING_SAVE_AREA *fpu, const XMM_SAVE_AREA32 *fpux ) +static void fpux_to_fpu( FLOATING_SAVE_AREA *fpu, const XSAVE_FORMAT *fpux ) { unsigned int i, tag, stack_top;
@@ -782,7 +755,7 @@ static void fpux_to_fpu( FLOATING_SAVE_AREA *fpu, const XMM_SAVE_AREA32 *fpux ) static inline void save_context( CONTEXT *context, const ucontext_t *sigcontext ) { FLOATING_SAVE_AREA *fpu = FPU_sig(sigcontext); - XMM_SAVE_AREA32 *fpux = FPUX_sig(sigcontext); + XSAVE_FORMAT *fpux = FPUX_sig(sigcontext);
memset(context, 0, sizeof(*context)); context->ContextFlags = CONTEXT_FULL | CONTEXT_DEBUG_REGISTERS; @@ -832,7 +805,7 @@ static inline void save_context( CONTEXT *context, const ucontext_t *sigcontext static inline void restore_context( const CONTEXT *context, ucontext_t *sigcontext ) { FLOATING_SAVE_AREA *fpu = FPU_sig(sigcontext); - XMM_SAVE_AREA32 *fpux = FPUX_sig(sigcontext); + XSAVE_FORMAT *fpux = FPUX_sig(sigcontext);
x86_thread_data()->dr0 = context->Dr0; x86_thread_data()->dr1 = context->Dr1; @@ -2234,8 +2207,8 @@ static void init_thread_context( CONTEXT *context, LPTHREAD_START_ROUTINE entry, context->Esp = (DWORD)NtCurrentTeb()->Tib.StackBase - 16; context->Eip = (DWORD)relay; context->FloatSave.ControlWord = 0x27f; - ((XMM_SAVE_AREA32 *)context->ExtendedRegisters)->ControlWord = 0x27f; - ((XMM_SAVE_AREA32 *)context->ExtendedRegisters)->MxCsr = 0x1f80; + ((XSAVE_FORMAT *)context->ExtendedRegisters)->ControlWord = 0x27f; + ((XSAVE_FORMAT *)context->ExtendedRegisters)->MxCsr = 0x1f80; }
diff --git a/dlls/ntdll/unix/system.c b/dlls/ntdll/unix/system.c index 9d82a6774c6..c2412643afa 100644 --- a/dlls/ntdll/unix/system.c +++ b/dlls/ntdll/unix/system.c @@ -240,33 +240,9 @@ static int have_cpuid(void) static inline BOOL have_sse_daz_mode(void) { #ifdef __i386__ - typedef struct DECLSPEC_ALIGN(16) _M128A { - ULONGLONG Low; - LONGLONG High; - } M128A; - - typedef struct _XMM_SAVE_AREA32 { - WORD ControlWord; - WORD StatusWord; - BYTE TagWord; - BYTE Reserved1; - WORD ErrorOpcode; - DWORD ErrorOffset; - WORD ErrorSelector; - WORD Reserved2; - DWORD DataOffset; - WORD DataSelector; - WORD Reserved3; - DWORD MxCsr; - DWORD MxCsr_Mask; - M128A FloatRegisters[8]; - M128A XmmRegisters[16]; - BYTE Reserved4[96]; - } XMM_SAVE_AREA32; - /* Intel says we need a zeroed 16-byte aligned buffer */ char buffer[512 + 16]; - XMM_SAVE_AREA32 *state = (XMM_SAVE_AREA32 *)(((ULONG_PTR)buffer + 15) & ~15); + XSAVE_FORMAT *state = (XSAVE_FORMAT *)(((ULONG_PTR)buffer + 15) & ~15); memset(buffer, 0, sizeof(buffer));
__asm__ __volatile__( "fxsave %0" : "=m" (*state) : "m" (*state) ); diff --git a/include/winnt.h b/include/winnt.h index ac89ebc5ef7..38f7983cbf7 100644 --- a/include/winnt.h +++ b/include/winnt.h @@ -1060,30 +1060,12 @@ typedef struct _LDT_ENTRY { } HighWord; } LDT_ENTRY, *PLDT_ENTRY, WOW64_LDT_ENTRY, *PWOW64_LDT_ENTRY;
-/* x86-64 context definitions */ -#if defined(__x86_64__) - -#define CONTEXT_AMD64 0x00100000 - -#define CONTEXT_CONTROL (CONTEXT_AMD64 | 0x0001) -#define CONTEXT_INTEGER (CONTEXT_AMD64 | 0x0002) -#define CONTEXT_SEGMENTS (CONTEXT_AMD64 | 0x0004) -#define CONTEXT_FLOATING_POINT (CONTEXT_AMD64 | 0x0008) -#define CONTEXT_DEBUG_REGISTERS (CONTEXT_AMD64 | 0x0010) -#define CONTEXT_XSTATE (CONTEXT_AMD64 | 0x0040) -#define CONTEXT_FULL (CONTEXT_CONTROL | CONTEXT_INTEGER | CONTEXT_FLOATING_POINT) -#define CONTEXT_ALL (CONTEXT_CONTROL | CONTEXT_INTEGER | CONTEXT_SEGMENTS | CONTEXT_FLOATING_POINT | CONTEXT_DEBUG_REGISTERS) - -#define EXCEPTION_READ_FAULT 0 -#define EXCEPTION_WRITE_FAULT 1 -#define EXCEPTION_EXECUTE_FAULT 8 - typedef struct DECLSPEC_ALIGN(16) _M128A { ULONGLONG Low; LONGLONG High; } M128A, *PM128A;
-typedef struct _XMM_SAVE_AREA32 { +typedef struct _XSAVE_FORMAT { WORD ControlWord; /* 000 */ WORD StatusWord; /* 002 */ BYTE TagWord; /* 004 */ @@ -1100,7 +1082,27 @@ typedef struct _XMM_SAVE_AREA32 { M128A FloatRegisters[8]; /* 020 */ M128A XmmRegisters[16]; /* 0a0 */ BYTE Reserved4[96]; /* 1a0 */ -} XMM_SAVE_AREA32, *PXMM_SAVE_AREA32; +} XSAVE_FORMAT, *PXSAVE_FORMAT; + +/* x86-64 context definitions */ +#if defined(__x86_64__) + +#define CONTEXT_AMD64 0x00100000 + +#define CONTEXT_CONTROL (CONTEXT_AMD64 | 0x0001) +#define CONTEXT_INTEGER (CONTEXT_AMD64 | 0x0002) +#define CONTEXT_SEGMENTS (CONTEXT_AMD64 | 0x0004) +#define CONTEXT_FLOATING_POINT (CONTEXT_AMD64 | 0x0008) +#define CONTEXT_DEBUG_REGISTERS (CONTEXT_AMD64 | 0x0010) +#define CONTEXT_XSTATE (CONTEXT_AMD64 | 0x0040) +#define CONTEXT_FULL (CONTEXT_CONTROL | CONTEXT_INTEGER | CONTEXT_FLOATING_POINT) +#define CONTEXT_ALL (CONTEXT_CONTROL | CONTEXT_INTEGER | CONTEXT_SEGMENTS | CONTEXT_FLOATING_POINT | CONTEXT_DEBUG_REGISTERS) + +#define EXCEPTION_READ_FAULT 0 +#define EXCEPTION_WRITE_FAULT 1 +#define EXCEPTION_EXECUTE_FAULT 8 + +typedef XSAVE_FORMAT XMM_SAVE_AREA32, *PXMM_SAVE_AREA32;
typedef struct DECLSPEC_ALIGN(16) _CONTEXT { DWORD64 P1Home; /* 000 */ diff --git a/programs/winedbg/be_i386.c b/programs/winedbg/be_i386.c index e6fd4357cc9..427d76a35ad 100644 --- a/programs/winedbg/be_i386.c +++ b/programs/winedbg/be_i386.c @@ -33,32 +33,6 @@ extern void be_i386_disasm_one_insn(ADDRESS64* addr, int display);
#define IS_VM86_MODE(ctx) (ctx->EFlags & V86_FLAG)
-#ifndef __x86_64__ -typedef struct DECLSPEC_ALIGN(16) _M128A { - ULONGLONG Low; - LONGLONG High; -} M128A, *PM128A; - -typedef struct _XMM_SAVE_AREA32 { - WORD ControlWord; /* 000 */ - WORD StatusWord; /* 002 */ - BYTE TagWord; /* 004 */ - BYTE Reserved1; /* 005 */ - WORD ErrorOpcode; /* 006 */ - DWORD ErrorOffset; /* 008 */ - WORD ErrorSelector; /* 00c */ - WORD Reserved2; /* 00e */ - DWORD DataOffset; /* 010 */ - WORD DataSelector; /* 014 */ - WORD Reserved3; /* 016 */ - DWORD MxCsr; /* 018 */ - DWORD MxCsr_Mask; /* 01c */ - M128A FloatRegisters[8]; /* 020 */ - M128A XmmRegisters[16]; /* 0a0 */ - BYTE Reserved4[96]; /* 1a0 */ -} XMM_SAVE_AREA32, *PXMM_SAVE_AREA32; -#endif - static ADDRESS_MODE get_selector_type(HANDLE hThread, const WOW64_CONTEXT *ctx, WORD sel) { LDT_ENTRY le; @@ -158,7 +132,7 @@ static void be_i386_all_print_context(HANDLE hThread, const dbg_ctx_t *pctx) static const char mxcsr_flags[16][4] = { "IE", "DE", "ZE", "OE", "UE", "PE", "DAZ", "IM", "DM", "ZM", "OM", "UM", "PM", "R-", "R+", "FZ" }; const WOW64_CONTEXT *ctx = &pctx->x86; - XMM_SAVE_AREA32 *xmm_area; + XSAVE_FORMAT *xmm_area; long double ST[8]; /* These are for floating regs */ int cnt;
@@ -223,7 +197,7 @@ static void be_i386_all_print_context(HANDLE hThread, const dbg_ctx_t *pctx) dbg_printf(" ST%d:%Lf ", cnt, ST[cnt]); }
- xmm_area = (XMM_SAVE_AREA32 *) &ctx->ExtendedRegisters; + xmm_area = (XSAVE_FORMAT *) &ctx->ExtendedRegisters;
dbg_printf(" mxcsr: %04x (", xmm_area->MxCsr ); for (cnt = 0; cnt < 16; cnt++) @@ -350,14 +324,14 @@ static struct dbg_internal_var be_i386_ctx[] = {CV_REG_ST0+5, "ST5", (DWORD_PTR*)FIELD_OFFSET(WOW64_CONTEXT, FloatSave.RegisterArea[50]), dbg_itype_long_real}, {CV_REG_ST0+6, "ST6", (DWORD_PTR*)FIELD_OFFSET(WOW64_CONTEXT, FloatSave.RegisterArea[60]), dbg_itype_long_real}, {CV_REG_ST0+7, "ST7", (DWORD_PTR*)FIELD_OFFSET(WOW64_CONTEXT, FloatSave.RegisterArea[70]), dbg_itype_long_real}, - {CV_AMD64_XMM0, "XMM0", (DWORD_PTR*)(FIELD_OFFSET(WOW64_CONTEXT, ExtendedRegisters) + FIELD_OFFSET(XMM_SAVE_AREA32, XmmRegisters[0])), dbg_itype_m128a}, - {CV_AMD64_XMM0+1, "XMM1", (DWORD_PTR*)(FIELD_OFFSET(WOW64_CONTEXT, ExtendedRegisters) + FIELD_OFFSET(XMM_SAVE_AREA32, XmmRegisters[1])), dbg_itype_m128a}, - {CV_AMD64_XMM0+2, "XMM2", (DWORD_PTR*)(FIELD_OFFSET(WOW64_CONTEXT, ExtendedRegisters) + FIELD_OFFSET(XMM_SAVE_AREA32, XmmRegisters[2])), dbg_itype_m128a}, - {CV_AMD64_XMM0+3, "XMM3", (DWORD_PTR*)(FIELD_OFFSET(WOW64_CONTEXT, ExtendedRegisters) + FIELD_OFFSET(XMM_SAVE_AREA32, XmmRegisters[3])), dbg_itype_m128a}, - {CV_AMD64_XMM0+4, "XMM4", (DWORD_PTR*)(FIELD_OFFSET(WOW64_CONTEXT, ExtendedRegisters) + FIELD_OFFSET(XMM_SAVE_AREA32, XmmRegisters[4])), dbg_itype_m128a}, - {CV_AMD64_XMM0+5, "XMM5", (DWORD_PTR*)(FIELD_OFFSET(WOW64_CONTEXT, ExtendedRegisters) + FIELD_OFFSET(XMM_SAVE_AREA32, XmmRegisters[5])), dbg_itype_m128a}, - {CV_AMD64_XMM0+6, "XMM6", (DWORD_PTR*)(FIELD_OFFSET(WOW64_CONTEXT, ExtendedRegisters) + FIELD_OFFSET(XMM_SAVE_AREA32, XmmRegisters[6])), dbg_itype_m128a}, - {CV_AMD64_XMM0+7, "XMM7", (DWORD_PTR*)(FIELD_OFFSET(WOW64_CONTEXT, ExtendedRegisters) + FIELD_OFFSET(XMM_SAVE_AREA32, XmmRegisters[7])), dbg_itype_m128a}, + {CV_AMD64_XMM0, "XMM0", (DWORD_PTR*)(FIELD_OFFSET(WOW64_CONTEXT, ExtendedRegisters) + FIELD_OFFSET(XSAVE_FORMAT, XmmRegisters[0])), dbg_itype_m128a}, + {CV_AMD64_XMM0+1, "XMM1", (DWORD_PTR*)(FIELD_OFFSET(WOW64_CONTEXT, ExtendedRegisters) + FIELD_OFFSET(XSAVE_FORMAT, XmmRegisters[1])), dbg_itype_m128a}, + {CV_AMD64_XMM0+2, "XMM2", (DWORD_PTR*)(FIELD_OFFSET(WOW64_CONTEXT, ExtendedRegisters) + FIELD_OFFSET(XSAVE_FORMAT, XmmRegisters[2])), dbg_itype_m128a}, + {CV_AMD64_XMM0+3, "XMM3", (DWORD_PTR*)(FIELD_OFFSET(WOW64_CONTEXT, ExtendedRegisters) + FIELD_OFFSET(XSAVE_FORMAT, XmmRegisters[3])), dbg_itype_m128a}, + {CV_AMD64_XMM0+4, "XMM4", (DWORD_PTR*)(FIELD_OFFSET(WOW64_CONTEXT, ExtendedRegisters) + FIELD_OFFSET(XSAVE_FORMAT, XmmRegisters[4])), dbg_itype_m128a}, + {CV_AMD64_XMM0+5, "XMM5", (DWORD_PTR*)(FIELD_OFFSET(WOW64_CONTEXT, ExtendedRegisters) + FIELD_OFFSET(XSAVE_FORMAT, XmmRegisters[5])), dbg_itype_m128a}, + {CV_AMD64_XMM0+6, "XMM6", (DWORD_PTR*)(FIELD_OFFSET(WOW64_CONTEXT, ExtendedRegisters) + FIELD_OFFSET(XSAVE_FORMAT, XmmRegisters[6])), dbg_itype_m128a}, + {CV_AMD64_XMM0+7, "XMM7", (DWORD_PTR*)(FIELD_OFFSET(WOW64_CONTEXT, ExtendedRegisters) + FIELD_OFFSET(XSAVE_FORMAT, XmmRegisters[7])), dbg_itype_m128a}, {0, NULL, 0, dbg_itype_none} };
@@ -901,15 +875,15 @@ static struct gdb_register be_i386_gdb_register_map[] = { REG(NULL, "fooff", NULL, FloatSave.DataOffset), { NULL, "fop", NULL, FIELD_OFFSET(WOW64_CONTEXT, FloatSave.ErrorSelector)+2, 2},
- { "sse", "xmm0", "vec128", FIELD_OFFSET(WOW64_CONTEXT, ExtendedRegisters) + FIELD_OFFSET(XMM_SAVE_AREA32, XmmRegisters[0]), 16}, - { NULL, "xmm1", "vec128", FIELD_OFFSET(WOW64_CONTEXT, ExtendedRegisters) + FIELD_OFFSET(XMM_SAVE_AREA32, XmmRegisters[1]), 16}, - { NULL, "xmm2", "vec128", FIELD_OFFSET(WOW64_CONTEXT, ExtendedRegisters) + FIELD_OFFSET(XMM_SAVE_AREA32, XmmRegisters[2]), 16}, - { NULL, "xmm3", "vec128", FIELD_OFFSET(WOW64_CONTEXT, ExtendedRegisters) + FIELD_OFFSET(XMM_SAVE_AREA32, XmmRegisters[3]), 16}, - { NULL, "xmm4", "vec128", FIELD_OFFSET(WOW64_CONTEXT, ExtendedRegisters) + FIELD_OFFSET(XMM_SAVE_AREA32, XmmRegisters[4]), 16}, - { NULL, "xmm5", "vec128", FIELD_OFFSET(WOW64_CONTEXT, ExtendedRegisters) + FIELD_OFFSET(XMM_SAVE_AREA32, XmmRegisters[5]), 16}, - { NULL, "xmm6", "vec128", FIELD_OFFSET(WOW64_CONTEXT, ExtendedRegisters) + FIELD_OFFSET(XMM_SAVE_AREA32, XmmRegisters[6]), 16}, - { NULL, "xmm7", "vec128", FIELD_OFFSET(WOW64_CONTEXT, ExtendedRegisters) + FIELD_OFFSET(XMM_SAVE_AREA32, XmmRegisters[7]), 16}, - { NULL, "mxcsr", "i386_mxcsr", FIELD_OFFSET(WOW64_CONTEXT, ExtendedRegisters) + FIELD_OFFSET(XMM_SAVE_AREA32, MxCsr), 4}, + { "sse", "xmm0", "vec128", FIELD_OFFSET(WOW64_CONTEXT, ExtendedRegisters) + FIELD_OFFSET(XSAVE_FORMAT, XmmRegisters[0]), 16}, + { NULL, "xmm1", "vec128", FIELD_OFFSET(WOW64_CONTEXT, ExtendedRegisters) + FIELD_OFFSET(XSAVE_FORMAT, XmmRegisters[1]), 16}, + { NULL, "xmm2", "vec128", FIELD_OFFSET(WOW64_CONTEXT, ExtendedRegisters) + FIELD_OFFSET(XSAVE_FORMAT, XmmRegisters[2]), 16}, + { NULL, "xmm3", "vec128", FIELD_OFFSET(WOW64_CONTEXT, ExtendedRegisters) + FIELD_OFFSET(XSAVE_FORMAT, XmmRegisters[3]), 16}, + { NULL, "xmm4", "vec128", FIELD_OFFSET(WOW64_CONTEXT, ExtendedRegisters) + FIELD_OFFSET(XSAVE_FORMAT, XmmRegisters[4]), 16}, + { NULL, "xmm5", "vec128", FIELD_OFFSET(WOW64_CONTEXT, ExtendedRegisters) + FIELD_OFFSET(XSAVE_FORMAT, XmmRegisters[5]), 16}, + { NULL, "xmm6", "vec128", FIELD_OFFSET(WOW64_CONTEXT, ExtendedRegisters) + FIELD_OFFSET(XSAVE_FORMAT, XmmRegisters[6]), 16}, + { NULL, "xmm7", "vec128", FIELD_OFFSET(WOW64_CONTEXT, ExtendedRegisters) + FIELD_OFFSET(XSAVE_FORMAT, XmmRegisters[7]), 16}, + { NULL, "mxcsr", "i386_mxcsr", FIELD_OFFSET(WOW64_CONTEXT, ExtendedRegisters) + FIELD_OFFSET(XSAVE_FORMAT, MxCsr), 4}, };
struct backend_cpu be_i386 =
 
            Signed-off-by: Paul Gofman pgofman@codeweavers.com --- include/winnt.h | 58 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+)
diff --git a/include/winnt.h b/include/winnt.h index 38f7983cbf7..8c5f24aff86 100644 --- a/include/winnt.h +++ b/include/winnt.h @@ -1297,6 +1297,64 @@ NTSYSAPI PVOID WINAPI RtlVirtualUnwind(ULONG,ULONG64,ULONG64,RUNTIME_FUNCTION*,C
#endif /* __x86_64__ */
+#define XSTATE_LEGACY_FLOATING_POINT 0 +#define XSTATE_LEGACY_SSE 1 +#define XSTATE_GSSE 2 +#define XSTATE_AVX XSTATE_GSSE +#define XSTATE_MPX_BNDREGS 3 +#define XSTATE_MPX_BNDCSR 4 +#define XSTATE_AVX512_KMASK 5 +#define XSTATE_AVX512_ZMM_H 6 +#define XSTATE_AVX512_ZMM 7 +#define XSTATE_IPT 8 +#define XSTATE_CET_U 11 +#define XSTATE_LWP 62 + +typedef struct _YMMCONTEXT +{ + M128A Ymm0; + M128A Ymm1; + M128A Ymm2; + M128A Ymm3; + M128A Ymm4; + M128A Ymm5; + M128A Ymm6; + M128A Ymm7; + M128A Ymm8; + M128A Ymm9; + M128A Ymm10; + M128A Ymm11; + M128A Ymm12; + M128A Ymm13; + M128A Ymm14; + M128A Ymm15; +} +YMMCONTEXT, *PYMMCONTEXT; + +typedef struct _XSTATE +{ + ULONG64 Mask; + ULONG64 CompactionMask; + ULONG64 Reserved[6]; + YMMCONTEXT YmmContext; +} XSTATE, *PXSTATE; + +typedef struct _CONTEXT_CHUNK +{ + LONG Offset; + ULONG Length; +} CONTEXT_CHUNK, *PCONTEXT_CHUNK; + +typedef struct _CONTEXT_EX +{ + CONTEXT_CHUNK All; + CONTEXT_CHUNK Legacy; + CONTEXT_CHUNK XState; +#ifdef _WIN64 + ULONG64 align; +#endif +} CONTEXT_EX, *PCONTEXT_EX; + /* IA64 context definitions */ #ifdef __ia64__
 
            Signed-off-by: Paul Gofman pgofman@codeweavers.com --- include/msvcrt/intrin.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/include/msvcrt/intrin.h b/include/msvcrt/intrin.h index 5ad2776450a..38496251d5a 100644 --- a/include/msvcrt/intrin.h +++ b/include/msvcrt/intrin.h @@ -12,9 +12,13 @@ extern "C" { #endif
#if defined(__i386__) || defined(__x86_64__) +static inline void __cpuidex(int info[4], int ax, int cx) +{ + __asm__ ("cpuid" : "=a"(info[0]), "=b" (info[1]), "=c"(info[2]), "=d"(info[3]) : "a"(ax), "c"(cx)); +} static inline void __cpuid(int info[4], int ax) { - __asm__ ("cpuid" : "=a"(info[0]), "=b" (info[1]), "=c"(info[2]), "=d"(info[3]) : "a"(ax), "c"(0)); + return __cpuidex(info, ax, 0); } #endif
 
            Signed-off-by: Paul Gofman pgofman@codeweavers.com --- dlls/ntdll/tests/virtual.c | 67 ++++++++++++++++++++++++++++++++++++ programs/wineboot/wineboot.c | 59 +++++++++++++++++++++++++++++++ 2 files changed, 126 insertions(+)
diff --git a/dlls/ntdll/tests/virtual.c b/dlls/ntdll/tests/virtual.c index beab744178a..4a0a4a6b8e5 100644 --- a/dlls/ntdll/tests/virtual.c +++ b/dlls/ntdll/tests/virtual.c @@ -516,9 +516,34 @@ static void test_NtMapViewOfSection(void) CloseHandle(process); }
+#define SUPPORTED_XSTATE_FEATURES ((1 << XSTATE_LEGACY_FLOATING_POINT) | (1 << XSTATE_LEGACY_SSE) | (1 << XSTATE_AVX)) + static void test_user_shared_data(void) { + struct old_xstate_configuration + { + ULONG64 EnabledFeatures; + ULONG Size; + ULONG OptimizedSave:1; + ULONG CompactionEnabled:1; + XSTATE_FEATURE Features[MAXIMUM_XSTATE_FEATURES]; + }; + + static const ULONG feature_offsets[] = + { + 0, + 160, /*offsetof(XMM_SAVE_AREA32, XmmRegisters)*/ + 512 /* sizeof(XMM_SAVE_AREA32) */ + offsetof(XSTATE, YmmContext), + }; + static const ULONG feature_sizes[] = + { + 160, + 256, /*sizeof(M128A) * 16 */ + sizeof(YMMCONTEXT), + }; const KSHARED_USER_DATA *user_shared_data = (void *)0x7ffe0000; + XSTATE_CONFIGURATION xstate = user_shared_data->XState; + unsigned int i;
ok(user_shared_data->NumberOfPhysicalPages == sbi.MmNumberOfPhysicalPages, "Got number of physical pages %#x, expected %#x.\n", @@ -534,6 +559,48 @@ static void test_user_shared_data(void) ok(user_shared_data->ActiveGroupCount == 1 || broken(!user_shared_data->ActiveGroupCount) /* before Win7 */, "Got unexpected ActiveGroupCount %u.\n", user_shared_data->ActiveGroupCount); + + if (!xstate.EnabledFeatures) + { + struct old_xstate_configuration *xs_old + = (struct old_xstate_configuration *)((char *)user_shared_data + 0x3e0); + + if (!xs_old->EnabledFeatures) + { + skip("XState features are not supported.\n"); + return; + } + + memset(&xstate, 0, sizeof(xstate)); + xstate.EnabledFeatures = xstate.EnabledVolatileFeatures = xs_old->EnabledFeatures; + memcpy(&xstate.Size, &xs_old->Size, sizeof(*xs_old) - offsetof(struct old_xstate_configuration, Size)); + for (i = 0; i < 3; ++i) + xstate.AllFeatures[i] = xs_old->Features[i].Size; + xstate.AllFeatureSize = 512 + sizeof(XSTATE); + } + + trace("XState EnabledFeatures %s.\n", wine_dbgstr_longlong(xstate.EnabledFeatures)); + ok((xstate.EnabledFeatures & SUPPORTED_XSTATE_FEATURES) == SUPPORTED_XSTATE_FEATURES, + "Got unexpected EnabledFeatures %s.\n", wine_dbgstr_longlong(xstate.EnabledFeatures)); + ok((xstate.EnabledVolatileFeatures & SUPPORTED_XSTATE_FEATURES) == xstate.EnabledFeatures, + "Got unexpected EnabledVolatileFeatures %s.\n", wine_dbgstr_longlong(xstate.EnabledVolatileFeatures)); + ok(xstate.Size >= 512 + sizeof(XSTATE), "Got unexpected Size %u.\n", xstate.Size); + if (xstate.CompactionEnabled) + ok(xstate.OptimizedSave, "Got zero OptimizedSave with compaction enabled.\n"); + ok(!xstate.AlignedFeatures, "Got unexpected AlignedFeatures %s.\n", + wine_dbgstr_longlong(xstate.AlignedFeatures)); + ok(xstate.AllFeatureSize >= 512 + sizeof(XSTATE), "Got unexpected AllFeatureSize %u.\n", + xstate.AllFeatureSize); + + for (i = 0; i < ARRAY_SIZE(feature_sizes); ++i) + { + ok(xstate.AllFeatures[i] == feature_sizes[i], "Got unexpected AllFeatures[%u] %u, expected %u.\n", i, + xstate.AllFeatures[i], feature_sizes[i]); + ok(xstate.Features[i].Size == feature_sizes[i], "Got unexpected Features[%u].Size %u, expected %u.\n", i, + xstate.Features[i].Size, feature_sizes[i]); + ok(xstate.Features[i].Offset == feature_offsets[i], "Got unexpected Features[%u].Offset %u, expected %u.\n", + i, xstate.Features[i].Offset, feature_offsets[i]); + } }
START_TEST(virtual) diff --git a/programs/wineboot/wineboot.c b/programs/wineboot/wineboot.c index 902f6af042e..2f8b7169cf6 100644 --- a/programs/wineboot/wineboot.c +++ b/programs/wineboot/wineboot.c @@ -191,6 +191,63 @@ static DWORD set_reg_value_dword( HKEY hkey, const WCHAR *name, DWORD value ) return RegSetValueExW( hkey, name, 0, REG_DWORD, (const BYTE *)&value, sizeof(value) ); }
+#if defined(__i386__) || defined(__x86_64__) + +static void initialize_xstate_features(struct _KUSER_SHARED_DATA *data) +{ + XSTATE_CONFIGURATION *xstate = &data->XState; + unsigned int i; + int regs[4]; + + if (!data->ProcessorFeatures[PF_AVX_INSTRUCTIONS_AVAILABLE]) + return; + + __cpuidex(regs, 0, 0); + + TRACE("Max cpuid level %#x.\n", regs[0]); + if (regs[0] < 0xd) + return; + + __cpuidex(regs, 1, 0); + TRACE("CPU features %#x, %#x, %#x, %#x.\n", regs[0], regs[1], regs[2], regs[3]); + if (!(regs[2] & (0x1 << 27))) /* xsave OS enabled */ + return; + + __cpuidex(regs, 0xd, 0); + TRACE("XSAVE details %#x, %#x, %#x, %#x.\n", regs[0], regs[1], regs[2], regs[3]); + if (!(regs[0] & XSTATE_AVX)) + return; + + xstate->EnabledFeatures = (1 << XSTATE_LEGACY_FLOATING_POINT) | (1 << XSTATE_LEGACY_SSE) | (1 << XSTATE_AVX); + xstate->EnabledVolatileFeatures = xstate->EnabledFeatures; + xstate->Size = sizeof(XSAVE_FORMAT) + sizeof(XSTATE); + xstate->AllFeatureSize = regs[1]; + xstate->AllFeatures[0] = offsetof(XSAVE_FORMAT, XmmRegisters); + xstate->AllFeatures[1] = sizeof(M128A) * 16; + xstate->AllFeatures[2] = sizeof(YMMCONTEXT); + + for (i = 0; i < 3; ++i) + xstate->Features[i].Size = xstate->AllFeatures[i]; + + xstate->Features[1].Offset = xstate->Features[0].Size; + xstate->Features[2].Offset = sizeof(XSAVE_FORMAT) + offsetof(XSTATE, YmmContext); + + __cpuidex(regs, 0xd, 1); + xstate->OptimizedSave = regs[0] & 1; + xstate->CompactionEnabled = !!(regs[0] & 2); + + __cpuidex(regs, 0xd, 2); + TRACE("XSAVE feature 2 %#x, %#x, %#x, %#x.\n", regs[0], regs[1], regs[2], regs[3]); +} + +#else + +static void initialize_xstate_features(struct _KUSER_SHARED_DATA *data) +{ +} + +#endif + static void create_user_shared_data(void) { struct _KUSER_SHARED_DATA *data; @@ -276,6 +333,8 @@ static void create_user_shared_data(void) data->ActiveProcessorCount = NtCurrentTeb()->Peb->NumberOfProcessors; data->ActiveGroupCount = 1;
+ initialize_xstate_features( data ); + UnmapViewOfFile( data ); }
 
            On Thu, 20 Aug 2020, Paul Gofman wrote: [...]
--- a/dlls/ntdll/tests/virtual.c +++ b/dlls/ntdll/tests/virtual.c @@ -516,9 +516,34 @@ static void test_NtMapViewOfSection(void)
[...]
- for (i = 0; i < ARRAY_SIZE(feature_sizes); ++i)
- {
ok(xstate.AllFeatures[i] == feature_sizes[i], "Got unexpected AllFeatures[%u] %u, expected %u.\n", i,
xstate.AllFeatures[i], feature_sizes[i]);
ok(xstate.Features[i].Size == feature_sizes[i], "Got unexpected Features[%u].Size %u, expected %u.\n", i,
xstate.Features[i].Size, feature_sizes[i]);
This test has failures on w7u and Windows 8.1: https://test.winehq.org/data/patterns.html#ntdll:virtual
* I suspect it mishandles 32-bit CPUs which would explain the failure on w7u. * For Windows 8.1 it may just be a broken behavior (which is masked on the TestBot because w8 and w864 skip this test). * Finally there was a broken() case that was introduced for win10pro but in fact it is necessary for all Windows 10 versions. So at a minimum the comment should be changed. But I'm not sure it makes sense to consider the behavior broken.
I've created a bug report to keep track of all that: https://bugs.winehq.org/show_bug.cgi?id=51188
Could you have a look?
 
            FWIW the tests succeed on real hardware for me (64 bit, xstate supported) on Win7 and Win10 (latest; the broken case is not hit). I suspect the fall into broken case has something to do with VM specifics, so I am inclined to leave it broken at least for now but change the comment.
Regarding the failure on 32 bit CPUs, I think I should just drop the part of the test which converts the old structure layout and this will skip the tests on Win7/8 regardless of CPU type. I guess the old Win internal structure layout testing is not much interesting, especially for 32 bit CPUs.
On 5/26/21 14:13, Francois Gouget wrote:
On Thu, 20 Aug 2020, Paul Gofman wrote: [...]
--- a/dlls/ntdll/tests/virtual.c +++ b/dlls/ntdll/tests/virtual.c @@ -516,9 +516,34 @@ static void test_NtMapViewOfSection(void)
[...]
- for (i = 0; i < ARRAY_SIZE(feature_sizes); ++i)
- {
ok(xstate.AllFeatures[i] == feature_sizes[i], "Got unexpected AllFeatures[%u] %u, expected %u.\n", i,
xstate.AllFeatures[i], feature_sizes[i]);
ok(xstate.Features[i].Size == feature_sizes[i], "Got unexpected Features[%u].Size %u, expected %u.\n", i,
xstate.Features[i].Size, feature_sizes[i]);This test has failures on w7u and Windows 8.1: https://test.winehq.org/data/patterns.html#ntdll:virtual
- I suspect it mishandles 32-bit CPUs which would explain the failure on w7u.
- For Windows 8.1 it may just be a broken behavior (which is masked on the TestBot because w8 and w864 skip this test).
- Finally there was a broken() case that was introduced for win10pro but in fact it is necessary for all Windows 10 versions. So at a minimum the comment should be changed. But I'm not sure it makes sense to consider the behavior broken.
I've created a bug report to keep track of all that: https://bugs.winehq.org/show_bug.cgi?id=51188
Could you have a look?
 
            On Wed, 26 May 2021, Paul Gofman wrote:
FWIW the tests succeed on real hardware for me (64 bit, xstate supported) on Win7 and Win10 (latest; the broken case is not hit). I suspect the fall into broken case has something to do with VM specifics,
Not so for Windows 10 since it falls into the broken case on cw-rx460 (Windows 10 1507) which is not a VM (i7-2600K + AMD RX460 graphics card):
virtual.c:1128: system page size 0x1000 virtual.c:224: Tests skipped: NtAllocateVirtualMemoryEx() is missing virtual.c:939: XState EnabledFeatures 7. virtual.c:962: Test failed: Got unexpected AllFeatures[0] 0, expected 160. virtual.c:962: Test failed: Got unexpected AllFeatures[1] 0, expected 256. virtual.c:962: Test failed: Got unexpected AllFeatures[2] 0, expected 256. virtual.c:970: Size=832 AllFeatureSize=832 virtual.c:1075: Tests skipped: RtlFindExportedRoutineByName not supported virtual.c:1092: Tests skipped: syscall thunk relocated 0f78:virtual:0.234 1040 tests executed (0 marked as todo, 3 failures), 3 skipped.
 
            On 5/26/21 15:17, Francois Gouget wrote:
On Wed, 26 May 2021, Paul Gofman wrote:
FWIW the tests succeed on real hardware for me (64 bit, xstate supported) on Win7 and Win10 (latest; the broken case is not hit). I suspect the fall into broken case has something to do with VM specifics,
Not so for Windows 10 since it falls into the broken case on cw-rx460 (Windows 10 1507) which is not a VM (i7-2600K + AMD RX460 graphics card):
So if it is different for different real hw Win10 machines, but the reason is not known, can't that result just stay broken (whatever the reason for that is, would it be 1507 version, CPU model or some setup quirk)? I have all the default system with all the updates auto installed here, so unlikely something in my setup makes it work in an unusual way.
 
            On Wed, 26 May 2021, Paul Gofman wrote:
On 5/26/21 15:17, Francois Gouget wrote:
On Wed, 26 May 2021, Paul Gofman wrote:
FWIW the tests succeed on real hardware for me (64 bit, xstate supported) on Win7 and Win10 (latest; the broken case is not hit). I suspect the fall into broken case has something to do with VM specifics,
Not so for Windows 10 since it falls into the broken case on cw-rx460 (Windows 10 1507) which is not a VM (i7-2600K + AMD RX460 graphics card):
So if it is different for different real hw Win10 machines, but the reason is not known, can't that result just stay broken
It seems wrong to me to claim a Windows behavior is broken if: 1. It is the behavior of all recent Windows versions. 2. We have no idea why some machines behave differently. 3. We have no idea how many machines exhibit the 'broken' behavior. (If 90% of the Windows machines 'in the wild' exhibit the broken behavior then this is the behavior that will be expected)
Options: 1. Drop the test If we don't know of Windows applications that depend on this aspect and don't expect them to, then maybe the test is not needed. 2. Investigate more to actually understand what is going on. Then we can decide what to do. 3. Leave it as is The broken is not causing failures so it's ok. Change the comment to a FIXME explaining we don't know what's going on. That also means we don't know how to implement this in Wine.
(whatever the reason for that is, would it be 1507 version, CPU model or some setup quirk)?
I am not going to test all the Windows versions on cw-gtx560 and cw-rx460 because each time it requires manually reimaging the partition, manually stopping WineTest before it starts, manually transferring the test executable and manually running it.
But it's not just Windows 10 1507, the test fails in the same way on:
cw-gtx560 Win10 1909 i7-2600K broken (success) cw-rx460 Win10 2009 i7-2600K broken (success)
That's consistent with the VMs behavior so there is no reason to expect other Windows versions would behave differently.
I have all the default system with all the updates auto installed here, so unlikely something in my setup makes it work in an unusual way.
I can say the same for cw-rx460 and cw-gtx560: there may be some setting that makes a difference but at least they are all tracked on the page below so there's not a lot.
https://wiki.winehq.org/Wine_TestBot_VMs#Windows_configuration
 
            On 5/26/21 17:23, Francois Gouget wrote:
On Wed, 26 May 2021, Paul Gofman wrote:
It seems wrong to me to claim a Windows behavior is broken if:
- It is the behavior of all recent Windows versions.
Not that I see here. And the result doesn't make sense by itself, not sure why the size for all features should be zero. Sometimes we mark test output possible on Windows as broken because it looks weird compared to other possible results.
Options:
- Drop the test If we don't know of Windows applications that depend on this aspect and don't expect them to, then maybe the test is not needed.
At this point I think it is the best way. I'd personally prefer it to stay in the current state at least for now but if that causes concerns I think this test doesn't worth it, I highly doubt anything depends on it in the wild given it is not consistent.
- Investigate more to actually understand what is going on. Then we can decide what to do.
I can't do that without direct access to a machine reproducing this behaviour. If I had one maybe I would be curious to find out.
 
            On Wed, 26 May 2021, Paul Gofman wrote:
FWIW the tests succeed on real hardware for me (64 bit, xstate supported) on Win7 and Win10 (latest; the broken case is not hit).
So it would be interesting to tease out the differences between your machines and the ones I have access to.
What is your CPU? Maybe Windows 10 only sets these fields for recent enough CPUs?
You said you have all the latest updates. The most recent Windows 10 version I have is 2009 with all the updates up to 2021-05-03. Do you have 21H1 already?
Can you think of other aspects that could make a difference?
 
            On 5/27/21 03:00, Francois Gouget wrote:
On Wed, 26 May 2021, Paul Gofman wrote:
FWIW the tests succeed on real hardware for me (64 bit, xstate supported) on Win7 and Win10 (latest; the broken case is not hit).
So it would be interesting to tease out the differences between your machines and the ones I have access to.
What is your CPU? Maybe Windows 10 only sets these fields for recent enough CPUs?
You said you have all the latest updates. The most recent Windows 10 version I have is 2009 with all the updates up to 2021-05-03. Do you have 21H1 already?
Can you think of other aspects that could make a difference?
I have AMD Ryzen 5 3500X, Windows 10 Pro, 20H2 Build 19042.985. Everything was the same with these test results at the moment of writing the original patch. Maybe this depends on CPU feature flags, or AVX2. As far as I looked up, i7-2600K is from early 2010s and doesn't support AVX2, maybe it doesn't support some other xstate related features like xsavec or xsaveopt (I don't have a full cpuid for that CPU to be sure). I am not sure that this internal field behaviour CPU dependence worth supporting, and thus not sure it worth spending a lot of time investigating. If this is a matter of justifying the 'broken' statement, I am open to any variant which is best for the (probably not unique) case when we don't know for sure why it differs, like removing check for this field, marking both results as non-broken or changing the comment.
 
            On Thu, 27 May 2021, Paul Gofman wrote: [...]
I have AMD Ryzen 5 3500X, Windows 10 Pro, 20H2 Build 19042.985. Everything was the same with these test results at the moment of writing the original patch. Maybe this depends on CPU feature flags, or AVX2. As far as I looked up, i7-2600K is from early 2010s and doesn't support AVX2, maybe it doesn't support some other xstate related features like xsavec or xsaveopt (I don't have a full cpuid for that CPU to be sure).
I think it has to do with the xsavec support. Running the (Linux) cpuid tool on the cw-* machines gives:
i7-2600K: XSAVE features (0xd/1): XSAVEOPT instruction = true XSAVEC instruction = false XGETBV instruction = false XSAVES/XRSTORS instructions = false SAVE area size in bytes = 0x00000000 (0) IA32_XSS lower 32 bits valid bit field mask = 0x00000000 IA32_XSS upper 32 bits valid bit field mask = 0x00000000
(same thing in the TestBot VMs and on my i7-4790K)
And the test does not get the broken case if I run it in a Windows 10 VM on my laptop where cpuid says:
i7-8565U: XSAVE features (0xd/1): XSAVEOPT instruction = true XSAVEC instruction = true XGETBV instruction = true XSAVES/XRSTORS instructions = true SAVE area size in bytes = 0x000003c0 (960) IA32_XSS lower 32 bits valid bit field mask = 0x00000100 IA32_XSS upper 32 bits valid bit field mask = 0x00000000
I think that's also the reason for the failure on Windows 8.1. So I'll send a patch.
 
            Signed-off-by: Paul Gofman pgofman@codeweavers.com --- dlls/kernel32/process.c | 6 ++++-- dlls/ntdll/exception.c | 10 +++++++++ dlls/ntdll/ntdll.spec | 1 + dlls/ntdll/tests/virtual.c | 33 +++++++++++++++++++++++------ dlls/ntoskrnl.exe/ntoskrnl.exe.spec | 1 + include/ddk/wdm.h | 2 ++ include/winbase.h | 1 + 7 files changed, 45 insertions(+), 9 deletions(-)
diff --git a/dlls/kernel32/process.c b/dlls/kernel32/process.c index b2bd5980fd9..4f27fc3298b 100644 --- a/dlls/kernel32/process.c +++ b/dlls/kernel32/process.c @@ -741,13 +741,15 @@ DWORD WINAPI GetMaximumProcessorCount(WORD group) return cpus; }
+ULONG64 WINAPI RtlGetEnabledExtendedFeatures(ULONG64 feature_mask); + /*********************************************************************** * GetEnabledXStateFeatures (KERNEL32.@) */ DWORD64 WINAPI GetEnabledXStateFeatures(void) { - FIXME("\n"); - return 0; + TRACE(".\n"); + return RtlGetEnabledExtendedFeatures(~(ULONG64)0); }
/*********************************************************************** diff --git a/dlls/ntdll/exception.c b/dlls/ntdll/exception.c index f8aca6dfb7f..3f7443bb45b 100644 --- a/dlls/ntdll/exception.c +++ b/dlls/ntdll/exception.c @@ -30,6 +30,7 @@ #define WIN32_NO_STATUS #include "windef.h" #include "winternl.h" +#include "ddk/wdm.h" #include "wine/exception.h" #include "wine/server.h" #include "wine/list.h" @@ -655,3 +656,12 @@ BOOL WINAPI IsBadStringPtrW( LPCWSTR str, UINT_PTR max ) __ENDTRY return FALSE; } + + +/********************************************************************** + * RtlGetEnabledExtendedFeatures (NTDLL.@) + */ +ULONG64 WINAPI RtlGetEnabledExtendedFeatures(ULONG64 feature_mask) +{ + return user_shared_data->XState.EnabledFeatures & feature_mask; +} diff --git a/dlls/ntdll/ntdll.spec b/dlls/ntdll/ntdll.spec index 2a95dddf202..9981c57bd7d 100644 --- a/dlls/ntdll/ntdll.spec +++ b/dlls/ntdll/ntdll.spec @@ -693,6 +693,7 @@ @ stdcall RtlGetDaclSecurityDescriptor(ptr ptr ptr ptr) @ stub RtlGetElementGenericTable # @ stub RtlGetElementGenericTableAvl +@ stdcall RtlGetEnabledExtendedFeatures(int64) @ stdcall RtlGetExePath(wstr ptr) # @ stub RtlGetFirstRange @ stdcall RtlGetFrame() diff --git a/dlls/ntdll/tests/virtual.c b/dlls/ntdll/tests/virtual.c index 4a0a4a6b8e5..47eb784cbbc 100644 --- a/dlls/ntdll/tests/virtual.c +++ b/dlls/ntdll/tests/virtual.c @@ -29,7 +29,9 @@
static unsigned int page_size;
+static DWORD64 (WINAPI *pGetEnabledXStateFeatures)(void); static NTSTATUS (WINAPI *pRtlCreateUserStack)(SIZE_T, SIZE_T, ULONG, SIZE_T, SIZE_T, INITIAL_TEB *); +static ULONG64 (WINAPI *pRtlGetEnabledExtendedFeatures)(ULONG64); static NTSTATUS (WINAPI *pRtlFreeUserStack)(void *); static BOOL (WINAPI *pIsWow64Process)(HANDLE, PBOOL); static const BOOL is_win64 = sizeof(void*) != sizeof(int); @@ -543,6 +545,7 @@ static void test_user_shared_data(void) }; const KSHARED_USER_DATA *user_shared_data = (void *)0x7ffe0000; XSTATE_CONFIGURATION xstate = user_shared_data->XState; + ULONG64 feature_mask; unsigned int i;
ok(user_shared_data->NumberOfPhysicalPages == sbi.MmNumberOfPhysicalPages, @@ -560,17 +563,24 @@ static void test_user_shared_data(void) || broken(!user_shared_data->ActiveGroupCount) /* before Win7 */, "Got unexpected ActiveGroupCount %u.\n", user_shared_data->ActiveGroupCount);
+ if (!pRtlGetEnabledExtendedFeatures) + { + skip("RtlGetEnabledExtendedFeatures is not available.\n"); + return; + } + + feature_mask = pRtlGetEnabledExtendedFeatures(~(ULONG64)0); + if (!feature_mask) + { + skip("XState features are not available.\n"); + return; + } + if (!xstate.EnabledFeatures) { struct old_xstate_configuration *xs_old = (struct old_xstate_configuration *)((char *)user_shared_data + 0x3e0);
- if (!xs_old->EnabledFeatures) - { - skip("XState features are not supported.\n"); - return; - } - memset(&xstate, 0, sizeof(xstate)); xstate.EnabledFeatures = xstate.EnabledVolatileFeatures = xs_old->EnabledFeatures; memcpy(&xstate.Size, &xs_old->Size, sizeof(*xs_old) - offsetof(struct old_xstate_configuration, Size)); @@ -580,6 +590,14 @@ static void test_user_shared_data(void) }
trace("XState EnabledFeatures %s.\n", wine_dbgstr_longlong(xstate.EnabledFeatures)); + feature_mask = pRtlGetEnabledExtendedFeatures(0); + ok(!feature_mask, "Got unexpected feature_mask %s.\n", wine_dbgstr_longlong(feature_mask)); + feature_mask = pRtlGetEnabledExtendedFeatures(~(ULONG64)0); + ok(feature_mask == xstate.EnabledFeatures, "Got unexpected feature_mask %s.\n", + wine_dbgstr_longlong(feature_mask)); + feature_mask = pGetEnabledXStateFeatures(); + ok(feature_mask == xstate.EnabledFeatures, "Got unexpected feature_mask %s.\n", + wine_dbgstr_longlong(feature_mask)); ok((xstate.EnabledFeatures & SUPPORTED_XSTATE_FEATURES) == SUPPORTED_XSTATE_FEATURES, "Got unexpected EnabledFeatures %s.\n", wine_dbgstr_longlong(xstate.EnabledFeatures)); ok((xstate.EnabledVolatileFeatures & SUPPORTED_XSTATE_FEATURES) == xstate.EnabledFeatures, @@ -623,10 +641,11 @@ START_TEST(virtual)
mod = GetModuleHandleA("kernel32.dll"); pIsWow64Process = (void *)GetProcAddress(mod, "IsWow64Process"); - + pGetEnabledXStateFeatures = (void *)GetProcAddress(mod, "GetEnabledXStateFeatures"); mod = GetModuleHandleA("ntdll.dll"); pRtlCreateUserStack = (void *)GetProcAddress(mod, "RtlCreateUserStack"); pRtlFreeUserStack = (void *)GetProcAddress(mod, "RtlFreeUserStack"); + pRtlGetEnabledExtendedFeatures = (void *)GetProcAddress(mod, "RtlGetEnabledExtendedFeatures");
NtQuerySystemInformation(SystemBasicInformation, &sbi, sizeof(sbi), NULL); trace("system page size %#x\n", sbi.PageSize); diff --git a/dlls/ntoskrnl.exe/ntoskrnl.exe.spec b/dlls/ntoskrnl.exe/ntoskrnl.exe.spec index 4d39f8eea2f..0e8082fad26 100644 --- a/dlls/ntoskrnl.exe/ntoskrnl.exe.spec +++ b/dlls/ntoskrnl.exe/ntoskrnl.exe.spec @@ -1105,6 +1105,7 @@ @ stdcall RtlGetProductInfo(long long long long ptr) @ stdcall RtlGetSaclSecurityDescriptor(ptr ptr ptr ptr) @ stub RtlGetSetBootStatusData +@ stdcall RtlGetEnabledExtendedFeatures(int64) @ stdcall RtlGetVersion(ptr) @ stdcall RtlHashUnicodeString(ptr long long ptr) @ stdcall RtlIdnToAscii(long wstr long ptr ptr) diff --git a/include/ddk/wdm.h b/include/ddk/wdm.h index b8bb7bd971f..23bee1ba9ee 100644 --- a/include/ddk/wdm.h +++ b/include/ddk/wdm.h @@ -1843,6 +1843,8 @@ void WINAPI RtlCopyMemoryNonTemporal(void*,const void*,SIZE_T); #endif BOOLEAN WINAPI RtlIsNtDdiVersionAvailable(ULONG);
+ULONG64 WINAPI RtlGetEnabledExtendedFeatures(ULONG64 feature_mask); + NTSTATUS WINAPI ZwAddBootEntry(PUNICODE_STRING,PUNICODE_STRING); NTSTATUS WINAPI ZwAccessCheckAndAuditAlarm(PUNICODE_STRING,HANDLE,PUNICODE_STRING,PUNICODE_STRING,PSECURITY_DESCRIPTOR,ACCESS_MASK,PGENERIC_MAPPING,BOOLEAN,PACCESS_MASK,PBOOLEAN,PBOOLEAN); NTSTATUS WINAPI ZwAdjustPrivilegesToken(HANDLE,BOOLEAN,PTOKEN_PRIVILEGES,DWORD,PTOKEN_PRIVILEGES,PDWORD); diff --git a/include/winbase.h b/include/winbase.h index 319423b572f..f2177ada1e4 100644 --- a/include/winbase.h +++ b/include/winbase.h @@ -2136,6 +2136,7 @@ WINBASEAPI DWORD WINAPI GetDllDirectoryW(DWORD,LPWSTR); WINBASEAPI UINT WINAPI GetDriveTypeA(LPCSTR); WINBASEAPI UINT WINAPI GetDriveTypeW(LPCWSTR); #define GetDriveType WINELIB_NAME_AW(GetDriveType) +WINBASEAPI DWORD64 WINAPI GetEnabledXStateFeatures(void); WINBASEAPI LPSTR WINAPI GetEnvironmentStringsA(void); WINBASEAPI LPWSTR WINAPI GetEnvironmentStringsW(void); #define GetEnvironmentStrings WINELIB_NAME_AW(GetEnvironmentStrings)
 
            Hi,
While running your changed tests, I think I found new failures. Being a bot and all I'm not very good at pattern recognition, so I might be wrong, but could you please double-check?
Full results can be found at: https://testbot.winehq.org/JobDetails.pl?Key=77351
Your paranoid android.
=== debiant (32 bit report) ===
kernel32: loader.c:3932: Test failed: ntdll.dll:0: wrong OptionalHeader.AddressOfEntryPoint 5f380 / 5f340 loader.c:3946: Test failed: ntdll.dll:0: wrong OptionalHeader.DataDirectory[i].Size 13416 / 133c6 loader.c:3952: Test failed: ntdll.dll: wrong section 0 loader.c:3952: Test failed: ntdll.dll: wrong section 2 loader.c:3952: Test failed: ntdll.dll: wrong section 5 loader.c:3952: Test failed: ntdll.dll: wrong section 10 loader.c:3952: Test failed: ntdll.dll: wrong section 11 loader.c:3952: Test failed: ntdll.dll: wrong section 12 loader.c:3952: Test failed: ntdll.dll: wrong section 13 loader.c:3952: Test failed: ntdll.dll: wrong section 14 loader.c:3952: Test failed: ntdll.dll: wrong section 15
=== debiant (32 bit Chinese:China report) ===
kernel32: change.c:350: Test failed: should be ready loader.c:3932: Test failed: ntdll.dll:0: wrong OptionalHeader.AddressOfEntryPoint 5f380 / 5f340 loader.c:3946: Test failed: ntdll.dll:0: wrong OptionalHeader.DataDirectory[i].Size 13416 / 133c6 loader.c:3952: Test failed: ntdll.dll: wrong section 0 loader.c:3952: Test failed: ntdll.dll: wrong section 2 loader.c:3952: Test failed: ntdll.dll: wrong section 5 loader.c:3952: Test failed: ntdll.dll: wrong section 10 loader.c:3952: Test failed: ntdll.dll: wrong section 11 loader.c:3952: Test failed: ntdll.dll: wrong section 12 loader.c:3952: Test failed: ntdll.dll: wrong section 13 loader.c:3952: Test failed: ntdll.dll: wrong section 14 loader.c:3952: Test failed: ntdll.dll: wrong section 15
=== debiant (32 bit WoW report) ===
kernel32: loader.c:3932: Test failed: ntdll.dll:0: wrong OptionalHeader.AddressOfEntryPoint 5f380 / 5f340 loader.c:3946: Test failed: ntdll.dll:0: wrong OptionalHeader.DataDirectory[i].Size 13416 / 133c6 loader.c:3952: Test failed: ntdll.dll: wrong section 0 loader.c:3952: Test failed: ntdll.dll: wrong section 2 loader.c:3952: Test failed: ntdll.dll: wrong section 5 loader.c:3952: Test failed: ntdll.dll: wrong section 10 loader.c:3952: Test failed: ntdll.dll: wrong section 11 loader.c:3952: Test failed: ntdll.dll: wrong section 12 loader.c:3952: Test failed: ntdll.dll: wrong section 13 loader.c:3952: Test failed: ntdll.dll: wrong section 14 loader.c:3952: Test failed: ntdll.dll: wrong section 15
=== debiant (64 bit WoW report) ===
kernel32: loader.c:3932: Test failed: ntdll.dll:0: wrong OptionalHeader.AddressOfEntryPoint 5f380 / 5f340 loader.c:3946: Test failed: ntdll.dll:0: wrong OptionalHeader.DataDirectory[i].Size 13416 / 133c6 loader.c:3952: Test failed: ntdll.dll: wrong section 0 loader.c:3952: Test failed: ntdll.dll: wrong section 2 loader.c:3952: Test failed: ntdll.dll: wrong section 5 loader.c:3952: Test failed: ntdll.dll: wrong section 10 loader.c:3952: Test failed: ntdll.dll: wrong section 11 loader.c:3952: Test failed: ntdll.dll: wrong section 12 loader.c:3952: Test failed: ntdll.dll: wrong section 13 loader.c:3952: Test failed: ntdll.dll: wrong section 14 loader.c:3952: Test failed: ntdll.dll: wrong section 15


