This one is somewhat tricky, as the existing `ProcessorFeatures` array is limited to `PROCESSOR_FEATURE_MAX` aka 64 entries, while this now defines features up to index 88.
To handle the higher number of features, this patch extends `KUSER_SHARED_DATA` with a separate second array for the later features. This requires updating an assert about its size.
If there's a better place to store the excess feature bits, I'm open for suggestions. As far as I can see, there's no obvious place that would be ABI visible where it should be stored. (I also see that upstream definitions of `KUSER_SHARED_DATA` has been extended with another new field that Wine doesn't have - so having our own data there probably isn't ideal.)
So if there's some other convenient way of storing this (e.g. a global variable in ntdll) that also works, but it would need to be accessible from both the PE and unix sides, so it's probably not very straightforward either?
From: Martin Storsjö martin@martin.st
These were defined in newer versions of WinSDK 10.0.26100. --- include/winnt.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+)
diff --git a/include/winnt.h b/include/winnt.h index 423242ddcc4..ae5f54de022 100644 --- a/include/winnt.h +++ b/include/winnt.h @@ -1114,6 +1114,34 @@ typedef enum _HEAP_INFORMATION_CLASS { #define PF_ARM_SVE_F32MM_INSTRUCTIONS_AVAILABLE 58 #define PF_ARM_SVE_F64MM_INSTRUCTIONS_AVAILABLE 59 #define PF_BMI2_INSTRUCTIONS_AVAILABLE 60 +#define PF_MOVDIR64B_INSTRUCTION_AVAILABLE 61 +#define PF_ARM_LSE2_AVAILABLE 62 +#define PF_RESERVED_FEATURE 63 +#define PF_ARM_SHA3_INSTRUCTIONS_AVAILABLE 64 +#define PF_ARM_SHA512_INSTRUCTIONS_AVAILABLE 65 +#define PF_ARM_V82_I8MM_INSTRUCTIONS_AVAILABLE 66 +#define PF_ARM_V82_FP16_INSTRUCTIONS_AVAILABLE 67 +#define PF_ARM_V86_BF16_INSTRUCTIONS_AVAILABLE 68 +#define PF_ARM_V86_EBF16_INSTRUCTIONS_AVAILABLE 69 +#define PF_ARM_SME_INSTRUCTIONS_AVAILABLE 70 +#define PF_ARM_SME2_INSTRUCTIONS_AVAILABLE 71 +#define PF_ARM_SME2_1_INSTRUCTIONS_AVAILABLE 72 +#define PF_ARM_SME2_2_INSTRUCTIONS_AVAILABLE 73 +#define PF_ARM_SME_AES_INSTRUCTIONS_AVAILABLE 74 +#define PF_ARM_SME_SBITPERM_INSTRUCTIONS_AVAILABLE 75 +#define PF_ARM_SME_SF8MM4_INSTRUCTIONS_AVAILABLE 76 +#define PF_ARM_SME_SF8MM8_INSTRUCTIONS_AVAILABLE 77 +#define PF_ARM_SME_SF8DP2_INSTRUCTIONS_AVAILABLE 78 +#define PF_ARM_SME_SF8DP4_INSTRUCTIONS_AVAILABLE 79 +#define PF_ARM_SME_SF8FMA_INSTRUCTIONS_AVAILABLE 80 +#define PF_ARM_SME_F8F32_INSTRUCTIONS_AVAILABLE 81 +#define PF_ARM_SME_F8F16_INSTRUCTIONS_AVAILABLE 82 +#define PF_ARM_SME_F16F16_INSTRUCTIONS_AVAILABLE 83 +#define PF_ARM_SME_B16B16_INSTRUCTIONS_AVAILABLE 84 +#define PF_ARM_SME_F64F64_INSTRUCTIONS_AVAILABLE 85 +#define PF_ARM_SME_I16I64_INSTRUCTIONS_AVAILABLE 86 +#define PF_ARM_SME_LUTv2_INSTRUCTIONS_AVAILABLE 87 +#define PF_ARM_SME_FA64_INSTRUCTIONS_AVAILABLE 88
/* Execution state flags */
From: Martin Storsjö martin@martin.st
This requires extending the current ProcessorFeatures array somehow, as the primary one in KUSER_SHARED_DATA is limited to 64 entries. --- dlls/ntdll/signal_arm64.c | 6 +++++- dlls/ntdll/unix/system.c | 33 +++++++++++++++++++++++++++++++++ include/ddk/wdm.h | 5 ++++- 3 files changed, 42 insertions(+), 2 deletions(-)
diff --git a/dlls/ntdll/signal_arm64.c b/dlls/ntdll/signal_arm64.c index 9096f331dbc..ff568658b16 100644 --- a/dlls/ntdll/signal_arm64.c +++ b/dlls/ntdll/signal_arm64.c @@ -650,7 +650,11 @@ BOOLEAN WINAPI RtlIsProcessorFeaturePresent( UINT feature ) (1ull << PF_ARM_SVE_SM4_INSTRUCTIONS_AVAILABLE) | (1ull << PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE) | (1ull << PF_ARM_SVE_F32MM_INSTRUCTIONS_AVAILABLE) | - (1ull << PF_ARM_SVE_F64MM_INSTRUCTIONS_AVAILABLE); + (1ull << PF_ARM_SVE_F64MM_INSTRUCTIONS_AVAILABLE) | + (1ull << PF_ARM_LSE2_AVAILABLE); + + if (feature >= PROCESSOR_FEATURE_MAX && feature < PROCESSOR_FEATURE_MAX_EXTRA) + return user_shared_data->ProcessorFeaturesExtra[feature - PROCESSOR_FEATURE_MAX];
return (feature < PROCESSOR_FEATURE_MAX && (arm64_features & (1ull << feature)) && user_shared_data->ProcessorFeatures[feature]); diff --git a/dlls/ntdll/unix/system.c b/dlls/ntdll/unix/system.c index 40b355174b9..a40af485b00 100644 --- a/dlls/ntdll/unix/system.c +++ b/dlls/ntdll/unix/system.c @@ -604,6 +604,7 @@ static ULONGLONG get_cpu_features(void) void init_shared_data_cpuinfo( KUSER_SHARED_DATA *data ) { BOOLEAN *features = data->ProcessorFeatures; + BOOLEAN *features_extra = data->ProcessorFeaturesExtra;
#ifdef linux FILE *f = fopen("/proc/cpuinfo", "r"); @@ -645,6 +646,38 @@ void init_shared_data_cpuinfo( KUSER_SHARED_DATA *data ) features[PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE] = has_feature( value, "svei8mm" ); features[PF_ARM_SVE_F32MM_INSTRUCTIONS_AVAILABLE] = has_feature( value, "svef32mm" ); features[PF_ARM_SVE_F64MM_INSTRUCTIONS_AVAILABLE] = has_feature( value, "svef64mm" ); + features[PF_ARM_LSE2_AVAILABLE] = has_feature( value, "uscat" ); + +#define SET_FEATURE_EXTRA(id, string) \ + features_extra[id - PROCESSOR_FEATURE_MAX] = has_feature( value, string ) + + SET_FEATURE_EXTRA( PF_ARM_SHA3_INSTRUCTIONS_AVAILABLE, "sha3" ); + SET_FEATURE_EXTRA( PF_ARM_SHA512_INSTRUCTIONS_AVAILABLE, "sha512" ); + SET_FEATURE_EXTRA( PF_ARM_V82_I8MM_INSTRUCTIONS_AVAILABLE, "i8mm" ); + SET_FEATURE_EXTRA( PF_ARM_V82_FP16_INSTRUCTIONS_AVAILABLE, "fphp" ); + SET_FEATURE_EXTRA( PF_ARM_V86_BF16_INSTRUCTIONS_AVAILABLE, "bf16" ); + SET_FEATURE_EXTRA( PF_ARM_V86_EBF16_INSTRUCTIONS_AVAILABLE, "ebf16" ); + SET_FEATURE_EXTRA( PF_ARM_SME_INSTRUCTIONS_AVAILABLE, "sme" ); + SET_FEATURE_EXTRA( PF_ARM_SME2_INSTRUCTIONS_AVAILABLE, "sme2" ); + SET_FEATURE_EXTRA( PF_ARM_SME2_1_INSTRUCTIONS_AVAILABLE, "sme2p1" ); + SET_FEATURE_EXTRA( PF_ARM_SME2_2_INSTRUCTIONS_AVAILABLE, "sme2p2" ); + SET_FEATURE_EXTRA( PF_ARM_SME_AES_INSTRUCTIONS_AVAILABLE, "smeaes" ); + SET_FEATURE_EXTRA( PF_ARM_SME_SBITPERM_INSTRUCTIONS_AVAILABLE, "smesbitperm" ); + /* The PF_ARM_SME_SF8MM4_INSTRUCTIONS_AVAILABLE and + * PF_ARM_SME_SF8MM8_INSTRUCTIONS_AVAILABLE flags aren't exposed by + * the Linux kernel, see + * https://lists.infradead.org/pipermail/linux-arm-kernel/2025-January/991187.h... */ + SET_FEATURE_EXTRA( PF_ARM_SME_SF8DP2_INSTRUCTIONS_AVAILABLE, "smesf8dp2" ); + SET_FEATURE_EXTRA( PF_ARM_SME_SF8DP4_INSTRUCTIONS_AVAILABLE, "smesf8dp4" ); + SET_FEATURE_EXTRA( PF_ARM_SME_SF8FMA_INSTRUCTIONS_AVAILABLE, "smesf8fma" ); + SET_FEATURE_EXTRA( PF_ARM_SME_F8F32_INSTRUCTIONS_AVAILABLE, "smef8f32" ); + SET_FEATURE_EXTRA( PF_ARM_SME_F8F16_INSTRUCTIONS_AVAILABLE, "smef8f16" ); + SET_FEATURE_EXTRA( PF_ARM_SME_F16F16_INSTRUCTIONS_AVAILABLE, "smef16f16" ); + SET_FEATURE_EXTRA( PF_ARM_SME_B16B16_INSTRUCTIONS_AVAILABLE, "smeb16b16" ); + SET_FEATURE_EXTRA( PF_ARM_SME_F64F64_INSTRUCTIONS_AVAILABLE, "smef64f64" ); + SET_FEATURE_EXTRA( PF_ARM_SME_I16I64_INSTRUCTIONS_AVAILABLE, "smei16i64" ); + SET_FEATURE_EXTRA( PF_ARM_SME_LUTv2_INSTRUCTIONS_AVAILABLE, "smelutv2" ); + SET_FEATURE_EXTRA( PF_ARM_SME_FA64_INSTRUCTIONS_AVAILABLE, "smefa64" ); break; } fclose( f ); diff --git a/include/ddk/wdm.h b/include/ddk/wdm.h index 9bbba22d2fe..6a4a43d400b 100644 --- a/include/ddk/wdm.h +++ b/include/ddk/wdm.h @@ -1231,6 +1231,8 @@ typedef enum _NT_PRODUCT_TYPE {
#define PROCESSOR_FEATURE_MAX 64
+#define PROCESSOR_FEATURE_MAX_EXTRA 96 + typedef enum _ALTERNATIVE_ARCHITECTURE_TYPE { StandardDesign, @@ -1354,9 +1356,10 @@ typedef struct _KUSER_SHARED_DATA { KSYSTEM_TIME FeatureConfigurationChangeStamp; /* 0x720 */ ULONG Spare; ULONG64 UserPointerAuthMask; /* 0x730 */ + BOOLEAN ProcessorFeaturesExtra[PROCESSOR_FEATURE_MAX_EXTRA - PROCESSOR_FEATURE_MAX];/* 0x738 */ } KUSER_SHARED_DATA, *PKUSER_SHARED_DATA;
-C_ASSERT( sizeof(KUSER_SHARED_DATA) == 0x738 ); +C_ASSERT( sizeof(KUSER_SHARED_DATA) == 0x758 );
#define SHARED_GLOBAL_FLAGS_QPC_BYPASS_ENABLED 0x01 #define SHARED_GLOBAL_FLAGS_QPC_BYPASS_USE_HV_PAGE 0x02