This one is somewhat tricky, as the existing `ProcessorFeatures` array is limited to `PROCESSOR_FEATURE_MAX` aka 64 entries, while this now defines features up to index 88.
To handle the higher number of features, this patch extends `KUSER_SHARED_DATA` with a separate second array for the later features. This requires updating an assert about its size.
If there's a better place to store the excess feature bits, I'm open for suggestions. As far as I can see, there's no obvious place that would be ABI visible where it should be stored. (I also see that upstream definitions of `KUSER_SHARED_DATA` has been extended with another new field that Wine doesn't have - so having our own data there probably isn't ideal.)
So if there's some other convenient way of storing this (e.g. a global variable in ntdll) that also works, but it would need to be accessible from both the PE and unix sides, so it's probably not very straightforward either?
-- v2: arm64: Detect more new processor features.
From: Martin Storsjö martin@martin.st
This requires extending the current ProcessorFeatures array somehow, as the primary one in KUSER_SHARED_DATA is limited to 64 entries. --- dlls/ntdll/signal_arm64.c | 3 +++ dlls/ntdll/unix/system.c | 32 ++++++++++++++++++++++++++++++++ include/ddk/wdm.h | 5 ++++- 3 files changed, 39 insertions(+), 1 deletion(-)
diff --git a/dlls/ntdll/signal_arm64.c b/dlls/ntdll/signal_arm64.c index 31469dfea71..ff568658b16 100644 --- a/dlls/ntdll/signal_arm64.c +++ b/dlls/ntdll/signal_arm64.c @@ -653,6 +653,9 @@ BOOLEAN WINAPI RtlIsProcessorFeaturePresent( UINT feature ) (1ull << PF_ARM_SVE_F64MM_INSTRUCTIONS_AVAILABLE) | (1ull << PF_ARM_LSE2_AVAILABLE);
+ if (feature >= PROCESSOR_FEATURE_MAX && feature < PROCESSOR_FEATURE_MAX_EXTRA) + return user_shared_data->ProcessorFeaturesExtra[feature - PROCESSOR_FEATURE_MAX]; + return (feature < PROCESSOR_FEATURE_MAX && (arm64_features & (1ull << feature)) && user_shared_data->ProcessorFeatures[feature]); } diff --git a/dlls/ntdll/unix/system.c b/dlls/ntdll/unix/system.c index e8ced3b213d..3399de9f0a6 100644 --- a/dlls/ntdll/unix/system.c +++ b/dlls/ntdll/unix/system.c @@ -605,6 +605,7 @@ static ULONGLONG get_cpu_features(void) void init_shared_data_cpuinfo( KUSER_SHARED_DATA *data ) { BOOLEAN *features = data->ProcessorFeatures; + BOOLEAN *features_extra = data->ProcessorFeaturesExtra;
#ifdef linux FILE *f = fopen("/proc/cpuinfo", "r"); @@ -647,6 +648,37 @@ void init_shared_data_cpuinfo( KUSER_SHARED_DATA *data ) features[PF_ARM_SVE_F32MM_INSTRUCTIONS_AVAILABLE] = has_feature( value, "svef32mm" ); features[PF_ARM_SVE_F64MM_INSTRUCTIONS_AVAILABLE] = has_feature( value, "svef64mm" ); features[PF_ARM_LSE2_AVAILABLE] = has_feature( value, "uscat" ); + +#define SET_FEATURE_EXTRA(id, string) \ + features_extra[id - PROCESSOR_FEATURE_MAX] = has_feature( value, string ) + + SET_FEATURE_EXTRA( PF_ARM_SHA3_INSTRUCTIONS_AVAILABLE, "sha3" ); + SET_FEATURE_EXTRA( PF_ARM_SHA512_INSTRUCTIONS_AVAILABLE, "sha512" ); + SET_FEATURE_EXTRA( PF_ARM_V82_I8MM_INSTRUCTIONS_AVAILABLE, "i8mm" ); + SET_FEATURE_EXTRA( PF_ARM_V82_FP16_INSTRUCTIONS_AVAILABLE, "fphp" ); + SET_FEATURE_EXTRA( PF_ARM_V86_BF16_INSTRUCTIONS_AVAILABLE, "bf16" ); + SET_FEATURE_EXTRA( PF_ARM_V86_EBF16_INSTRUCTIONS_AVAILABLE, "ebf16" ); + SET_FEATURE_EXTRA( PF_ARM_SME_INSTRUCTIONS_AVAILABLE, "sme" ); + SET_FEATURE_EXTRA( PF_ARM_SME2_INSTRUCTIONS_AVAILABLE, "sme2" ); + SET_FEATURE_EXTRA( PF_ARM_SME2_1_INSTRUCTIONS_AVAILABLE, "sme2p1" ); + SET_FEATURE_EXTRA( PF_ARM_SME2_2_INSTRUCTIONS_AVAILABLE, "sme2p2" ); + SET_FEATURE_EXTRA( PF_ARM_SME_AES_INSTRUCTIONS_AVAILABLE, "smeaes" ); + SET_FEATURE_EXTRA( PF_ARM_SME_SBITPERM_INSTRUCTIONS_AVAILABLE, "smesbitperm" ); + /* The PF_ARM_SME_SF8MM4_INSTRUCTIONS_AVAILABLE and + * PF_ARM_SME_SF8MM8_INSTRUCTIONS_AVAILABLE flags aren't exposed by + * the Linux kernel, see + * https://lists.infradead.org/pipermail/linux-arm-kernel/2025-January/991187.h... */ + SET_FEATURE_EXTRA( PF_ARM_SME_SF8DP2_INSTRUCTIONS_AVAILABLE, "smesf8dp2" ); + SET_FEATURE_EXTRA( PF_ARM_SME_SF8DP4_INSTRUCTIONS_AVAILABLE, "smesf8dp4" ); + SET_FEATURE_EXTRA( PF_ARM_SME_SF8FMA_INSTRUCTIONS_AVAILABLE, "smesf8fma" ); + SET_FEATURE_EXTRA( PF_ARM_SME_F8F32_INSTRUCTIONS_AVAILABLE, "smef8f32" ); + SET_FEATURE_EXTRA( PF_ARM_SME_F8F16_INSTRUCTIONS_AVAILABLE, "smef8f16" ); + SET_FEATURE_EXTRA( PF_ARM_SME_F16F16_INSTRUCTIONS_AVAILABLE, "smef16f16" ); + SET_FEATURE_EXTRA( PF_ARM_SME_B16B16_INSTRUCTIONS_AVAILABLE, "smeb16b16" ); + SET_FEATURE_EXTRA( PF_ARM_SME_F64F64_INSTRUCTIONS_AVAILABLE, "smef64f64" ); + SET_FEATURE_EXTRA( PF_ARM_SME_I16I64_INSTRUCTIONS_AVAILABLE, "smei16i64" ); + SET_FEATURE_EXTRA( PF_ARM_SME_LUTv2_INSTRUCTIONS_AVAILABLE, "smelutv2" ); + SET_FEATURE_EXTRA( PF_ARM_SME_FA64_INSTRUCTIONS_AVAILABLE, "smefa64" ); break; } fclose( f ); diff --git a/include/ddk/wdm.h b/include/ddk/wdm.h index 9bbba22d2fe..6a4a43d400b 100644 --- a/include/ddk/wdm.h +++ b/include/ddk/wdm.h @@ -1231,6 +1231,8 @@ typedef enum _NT_PRODUCT_TYPE {
#define PROCESSOR_FEATURE_MAX 64
+#define PROCESSOR_FEATURE_MAX_EXTRA 96 + typedef enum _ALTERNATIVE_ARCHITECTURE_TYPE { StandardDesign, @@ -1354,9 +1356,10 @@ typedef struct _KUSER_SHARED_DATA { KSYSTEM_TIME FeatureConfigurationChangeStamp; /* 0x720 */ ULONG Spare; ULONG64 UserPointerAuthMask; /* 0x730 */ + BOOLEAN ProcessorFeaturesExtra[PROCESSOR_FEATURE_MAX_EXTRA - PROCESSOR_FEATURE_MAX];/* 0x738 */ } KUSER_SHARED_DATA, *PKUSER_SHARED_DATA;
-C_ASSERT( sizeof(KUSER_SHARED_DATA) == 0x738 ); +C_ASSERT( sizeof(KUSER_SHARED_DATA) == 0x758 );
#define SHARED_GLOBAL_FLAGS_QPC_BYPASS_ENABLED 0x01 #define SHARED_GLOBAL_FLAGS_QPC_BYPASS_USE_HV_PAGE 0x02