[PATCH 0/2] MR9395: arm64: Detect more new processor features.
This one is somewhat tricky, as the existing `ProcessorFeatures` array is limited to `PROCESSOR_FEATURE_MAX` aka 64 entries, while this now defines features up to index 88. To handle the higher number of features, this patch extends `KUSER_SHARED_DATA` with a separate second array for the later features. This requires updating an assert about its size. If there's a better place to store the excess feature bits, I'm open for suggestions. As far as I can see, there's no obvious place that would be ABI visible where it should be stored. (I also see that upstream definitions of `KUSER_SHARED_DATA` has been extended with another new field that Wine doesn't have - so having our own data there probably isn't ideal.) So if there's some other convenient way of storing this (e.g. a global variable in ntdll) that also works, but it would need to be accessible from both the PE and unix sides, so it's probably not very straightforward either? -- https://gitlab.winehq.org/wine/wine/-/merge_requests/9395
From: Martin Storsjö <martin(a)martin.st> These were defined in newer versions of WinSDK 10.0.26100. --- include/winnt.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/include/winnt.h b/include/winnt.h index 423242ddcc4..ae5f54de022 100644 --- a/include/winnt.h +++ b/include/winnt.h @@ -1114,6 +1114,34 @@ typedef enum _HEAP_INFORMATION_CLASS { #define PF_ARM_SVE_F32MM_INSTRUCTIONS_AVAILABLE 58 #define PF_ARM_SVE_F64MM_INSTRUCTIONS_AVAILABLE 59 #define PF_BMI2_INSTRUCTIONS_AVAILABLE 60 +#define PF_MOVDIR64B_INSTRUCTION_AVAILABLE 61 +#define PF_ARM_LSE2_AVAILABLE 62 +#define PF_RESERVED_FEATURE 63 +#define PF_ARM_SHA3_INSTRUCTIONS_AVAILABLE 64 +#define PF_ARM_SHA512_INSTRUCTIONS_AVAILABLE 65 +#define PF_ARM_V82_I8MM_INSTRUCTIONS_AVAILABLE 66 +#define PF_ARM_V82_FP16_INSTRUCTIONS_AVAILABLE 67 +#define PF_ARM_V86_BF16_INSTRUCTIONS_AVAILABLE 68 +#define PF_ARM_V86_EBF16_INSTRUCTIONS_AVAILABLE 69 +#define PF_ARM_SME_INSTRUCTIONS_AVAILABLE 70 +#define PF_ARM_SME2_INSTRUCTIONS_AVAILABLE 71 +#define PF_ARM_SME2_1_INSTRUCTIONS_AVAILABLE 72 +#define PF_ARM_SME2_2_INSTRUCTIONS_AVAILABLE 73 +#define PF_ARM_SME_AES_INSTRUCTIONS_AVAILABLE 74 +#define PF_ARM_SME_SBITPERM_INSTRUCTIONS_AVAILABLE 75 +#define PF_ARM_SME_SF8MM4_INSTRUCTIONS_AVAILABLE 76 +#define PF_ARM_SME_SF8MM8_INSTRUCTIONS_AVAILABLE 77 +#define PF_ARM_SME_SF8DP2_INSTRUCTIONS_AVAILABLE 78 +#define PF_ARM_SME_SF8DP4_INSTRUCTIONS_AVAILABLE 79 +#define PF_ARM_SME_SF8FMA_INSTRUCTIONS_AVAILABLE 80 +#define PF_ARM_SME_F8F32_INSTRUCTIONS_AVAILABLE 81 +#define PF_ARM_SME_F8F16_INSTRUCTIONS_AVAILABLE 82 +#define PF_ARM_SME_F16F16_INSTRUCTIONS_AVAILABLE 83 +#define PF_ARM_SME_B16B16_INSTRUCTIONS_AVAILABLE 84 +#define PF_ARM_SME_F64F64_INSTRUCTIONS_AVAILABLE 85 +#define PF_ARM_SME_I16I64_INSTRUCTIONS_AVAILABLE 86 +#define PF_ARM_SME_LUTv2_INSTRUCTIONS_AVAILABLE 87 +#define PF_ARM_SME_FA64_INSTRUCTIONS_AVAILABLE 88 /* Execution state flags */ -- GitLab https://gitlab.winehq.org/wine/wine/-/merge_requests/9395
From: Martin Storsjö <martin(a)martin.st> This requires extending the current ProcessorFeatures array somehow, as the primary one in KUSER_SHARED_DATA is limited to 64 entries. --- dlls/ntdll/signal_arm64.c | 6 +++++- dlls/ntdll/unix/system.c | 33 +++++++++++++++++++++++++++++++++ include/ddk/wdm.h | 5 ++++- 3 files changed, 42 insertions(+), 2 deletions(-) diff --git a/dlls/ntdll/signal_arm64.c b/dlls/ntdll/signal_arm64.c index 9096f331dbc..ff568658b16 100644 --- a/dlls/ntdll/signal_arm64.c +++ b/dlls/ntdll/signal_arm64.c @@ -650,7 +650,11 @@ BOOLEAN WINAPI RtlIsProcessorFeaturePresent( UINT feature ) (1ull << PF_ARM_SVE_SM4_INSTRUCTIONS_AVAILABLE) | (1ull << PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE) | (1ull << PF_ARM_SVE_F32MM_INSTRUCTIONS_AVAILABLE) | - (1ull << PF_ARM_SVE_F64MM_INSTRUCTIONS_AVAILABLE); + (1ull << PF_ARM_SVE_F64MM_INSTRUCTIONS_AVAILABLE) | + (1ull << PF_ARM_LSE2_AVAILABLE); + + if (feature >= PROCESSOR_FEATURE_MAX && feature < PROCESSOR_FEATURE_MAX_EXTRA) + return user_shared_data->ProcessorFeaturesExtra[feature - PROCESSOR_FEATURE_MAX]; return (feature < PROCESSOR_FEATURE_MAX && (arm64_features & (1ull << feature)) && user_shared_data->ProcessorFeatures[feature]); diff --git a/dlls/ntdll/unix/system.c b/dlls/ntdll/unix/system.c index 40b355174b9..a40af485b00 100644 --- a/dlls/ntdll/unix/system.c +++ b/dlls/ntdll/unix/system.c @@ -604,6 +604,7 @@ static ULONGLONG get_cpu_features(void) void init_shared_data_cpuinfo( KUSER_SHARED_DATA *data ) { BOOLEAN *features = data->ProcessorFeatures; + BOOLEAN *features_extra = data->ProcessorFeaturesExtra; #ifdef linux FILE *f = fopen("/proc/cpuinfo", "r"); @@ -645,6 +646,38 @@ void init_shared_data_cpuinfo( KUSER_SHARED_DATA *data ) features[PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE] = has_feature( value, "svei8mm" ); features[PF_ARM_SVE_F32MM_INSTRUCTIONS_AVAILABLE] = has_feature( value, "svef32mm" ); features[PF_ARM_SVE_F64MM_INSTRUCTIONS_AVAILABLE] = has_feature( value, "svef64mm" ); + features[PF_ARM_LSE2_AVAILABLE] = has_feature( value, "uscat" ); + +#define SET_FEATURE_EXTRA(id, string) \ + features_extra[id - PROCESSOR_FEATURE_MAX] = has_feature( value, string ) + + SET_FEATURE_EXTRA( PF_ARM_SHA3_INSTRUCTIONS_AVAILABLE, "sha3" ); + SET_FEATURE_EXTRA( PF_ARM_SHA512_INSTRUCTIONS_AVAILABLE, "sha512" ); + SET_FEATURE_EXTRA( PF_ARM_V82_I8MM_INSTRUCTIONS_AVAILABLE, "i8mm" ); + SET_FEATURE_EXTRA( PF_ARM_V82_FP16_INSTRUCTIONS_AVAILABLE, "fphp" ); + SET_FEATURE_EXTRA( PF_ARM_V86_BF16_INSTRUCTIONS_AVAILABLE, "bf16" ); + SET_FEATURE_EXTRA( PF_ARM_V86_EBF16_INSTRUCTIONS_AVAILABLE, "ebf16" ); + SET_FEATURE_EXTRA( PF_ARM_SME_INSTRUCTIONS_AVAILABLE, "sme" ); + SET_FEATURE_EXTRA( PF_ARM_SME2_INSTRUCTIONS_AVAILABLE, "sme2" ); + SET_FEATURE_EXTRA( PF_ARM_SME2_1_INSTRUCTIONS_AVAILABLE, "sme2p1" ); + SET_FEATURE_EXTRA( PF_ARM_SME2_2_INSTRUCTIONS_AVAILABLE, "sme2p2" ); + SET_FEATURE_EXTRA( PF_ARM_SME_AES_INSTRUCTIONS_AVAILABLE, "smeaes" ); + SET_FEATURE_EXTRA( PF_ARM_SME_SBITPERM_INSTRUCTIONS_AVAILABLE, "smesbitperm" ); + /* The PF_ARM_SME_SF8MM4_INSTRUCTIONS_AVAILABLE and + * PF_ARM_SME_SF8MM8_INSTRUCTIONS_AVAILABLE flags aren't exposed by + * the Linux kernel, see + * https://lists.infradead.org/pipermail/linux-arm-kernel/2025-January/991187.h... */ + SET_FEATURE_EXTRA( PF_ARM_SME_SF8DP2_INSTRUCTIONS_AVAILABLE, "smesf8dp2" ); + SET_FEATURE_EXTRA( PF_ARM_SME_SF8DP4_INSTRUCTIONS_AVAILABLE, "smesf8dp4" ); + SET_FEATURE_EXTRA( PF_ARM_SME_SF8FMA_INSTRUCTIONS_AVAILABLE, "smesf8fma" ); + SET_FEATURE_EXTRA( PF_ARM_SME_F8F32_INSTRUCTIONS_AVAILABLE, "smef8f32" ); + SET_FEATURE_EXTRA( PF_ARM_SME_F8F16_INSTRUCTIONS_AVAILABLE, "smef8f16" ); + SET_FEATURE_EXTRA( PF_ARM_SME_F16F16_INSTRUCTIONS_AVAILABLE, "smef16f16" ); + SET_FEATURE_EXTRA( PF_ARM_SME_B16B16_INSTRUCTIONS_AVAILABLE, "smeb16b16" ); + SET_FEATURE_EXTRA( PF_ARM_SME_F64F64_INSTRUCTIONS_AVAILABLE, "smef64f64" ); + SET_FEATURE_EXTRA( PF_ARM_SME_I16I64_INSTRUCTIONS_AVAILABLE, "smei16i64" ); + SET_FEATURE_EXTRA( PF_ARM_SME_LUTv2_INSTRUCTIONS_AVAILABLE, "smelutv2" ); + SET_FEATURE_EXTRA( PF_ARM_SME_FA64_INSTRUCTIONS_AVAILABLE, "smefa64" ); break; } fclose( f ); diff --git a/include/ddk/wdm.h b/include/ddk/wdm.h index 9bbba22d2fe..6a4a43d400b 100644 --- a/include/ddk/wdm.h +++ b/include/ddk/wdm.h @@ -1231,6 +1231,8 @@ typedef enum _NT_PRODUCT_TYPE { #define PROCESSOR_FEATURE_MAX 64 +#define PROCESSOR_FEATURE_MAX_EXTRA 96 + typedef enum _ALTERNATIVE_ARCHITECTURE_TYPE { StandardDesign, @@ -1354,9 +1356,10 @@ typedef struct _KUSER_SHARED_DATA { KSYSTEM_TIME FeatureConfigurationChangeStamp; /* 0x720 */ ULONG Spare; ULONG64 UserPointerAuthMask; /* 0x730 */ + BOOLEAN ProcessorFeaturesExtra[PROCESSOR_FEATURE_MAX_EXTRA - PROCESSOR_FEATURE_MAX];/* 0x738 */ } KUSER_SHARED_DATA, *PKUSER_SHARED_DATA; -C_ASSERT( sizeof(KUSER_SHARED_DATA) == 0x738 ); +C_ASSERT( sizeof(KUSER_SHARED_DATA) == 0x758 ); #define SHARED_GLOBAL_FLAGS_QPC_BYPASS_ENABLED 0x01 #define SHARED_GLOBAL_FLAGS_QPC_BYPASS_USE_HV_PAGE 0x02 -- GitLab https://gitlab.winehq.org/wine/wine/-/merge_requests/9395
I merged the easy parts in a7c7f538e658ff6b6a3889e87aa7c13907eb4035, the rest will need more thought, I don't think we want to add private extensions to KUSER_SHARED_DATA. -- https://gitlab.winehq.org/wine/wine/-/merge_requests/9395#note_121923
On Thu Nov 13 21:51:32 2025 +0000, Alexandre Julliard wrote:
I merged the easy parts in a7c7f538e658ff6b6a3889e87aa7c13907eb4035, the rest will need more thought, I don't think we want to add private extensions to KUSER_SHARED_DATA. Thanks! Yes, private extensions to KUSER_SHARED_DATA is probably not a good idea.
If there's any better place we can stash it that's definitely better. As far as I've been able to poke around, I'm not aware of any publicly accessible place where it should be stored, so any private storage within ntdll should be fine - we just have the challenge of accessing the same data from both the PE and unix side. -- https://gitlab.winehq.org/wine/wine/-/merge_requests/9395#note_121957
participants (2)
-
Alexandre Julliard (@julliard) -
Martin Storsjö