Hi,
This patch series was motivated by a bug exposed by earlier logical core parsing I added this summer (bug 45453).
The fix was simple, but I added a new helper function as more parts of the code use similar code.
While adding the fix, I noticed how our Linux CPU parsing logic will really fail on high core count (>32 or >64). These core counts are becoming more and more common. Consumer AMD Threadripper now has 64 logical cores (maybe >64 next year) and in the server space you can already easily go above on dual socket systems.
Supporting high core count, doesn't look trivial to me. The CPU parsing logic is quite fragile as it is. We need to break info better down into multiple groups (hard coded to 1, right now) based on NUMA nodes. Wineserver changes likely needed as well for thread affinity. Many others like needed e.g. the classic GetLogicalProcessorInformation supposedly reports info just for the "group" the thread is currently assigned to as it can't handle >32 / >64 cores.
All in all some fun improvements, which warrant a FIXME. For users who currently go above the limit, we are probably reaching undefined behavior right now...
Thanks, Roderick
Roderick Colenbrander (3): ntdll: parse sysfs cpu_shared_map using helper function. ntdll: Fix parsing thread_siblings bitmaps on high core count systems. ntdll: print FIXME on systems supporting more CPU cores than supported.
dlls/ntdll/nt.c | 103 ++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 86 insertions(+), 17 deletions(-)
Signed-off-by: Roderick Colenbrander thunderbird2k@gmail.com --- dlls/ntdll/nt.c | 40 +++++++++++++++++++++++++++++++--------- 1 file changed, 31 insertions(+), 9 deletions(-)
diff --git a/dlls/ntdll/nt.c b/dlls/ntdll/nt.c index aa6f84625e..0e11e0eacd 100644 --- a/dlls/ntdll/nt.c +++ b/dlls/ntdll/nt.c @@ -1604,6 +1604,36 @@ static inline BOOL logical_proc_info_add_group(SYSTEM_LOGICAL_PROCESSOR_INFORMAT }
#ifdef linux +/* Helper function for counting bitmap values as commonly used by the Linux kernel + * for storing CPU masks in sysfs. The format is comma separated lists of hex values + * each max 32-bit e.g. "00ff" or even "00,00000000,0000ffff". + * + * Example files include: + * - /sys/devices/system/cpu/cpu0/cache/index0/shared_cpu_map + * - /sys/devices/system/cpu/cpu0/topology/thread_siblings + */ +static BOOL sysfs_parse_bitmap(const char *filename, ULONG_PTR * const mask) +{ + FILE *f; + DWORD r; + + f = fopen(filename, "r"); + if (!f) + return FALSE; + + while (!feof(f)) + { + char op; + if (!fscanf(f, "%x%c ", &r, &op)) + break; + + *mask = (sizeof(ULONG_PTR)>sizeof(int) ? *mask<<(8*sizeof(DWORD)) : 0) + r; + } + + fclose(f); + return TRUE; +} + /* for 'data', max_len is the array count. for 'dataex', max_len is in bytes */ static NTSTATUS create_logical_proc_info(SYSTEM_LOGICAL_PROCESSOR_INFORMATION **data, SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX **dataex, DWORD *max_len) @@ -1693,15 +1723,7 @@ static NTSTATUS create_logical_proc_info(SYSTEM_LOGICAL_PROCESSOR_INFORMATION ** ULONG_PTR mask = 0;
sprintf(name, cache_info, i, j, "shared_cpu_map"); - f = fopen(name, "r"); - if(!f) continue; - while(!feof(f)) - { - if(!fscanf(f, "%x%c ", &r, &op)) - break; - mask = (sizeof(ULONG_PTR)>sizeof(int) ? mask<<(8*sizeof(DWORD)) : 0) + r; - } - fclose(f); + if(!sysfs_parse_bitmap(name, &mask)) continue;
sprintf(name, cache_info, i, j, "level"); f = fopen(name, "r");
Linux compiled for a high number of CPU cores, the bitmap is broken into sections of 32-bit by commas. Handle parsing this.
Wine-bug: https://bugs.winehq.org/show_bug.cgi?id=45453 Signed-off-by: Roderick Colenbrander thunderbird2k@gmail.com --- dlls/ntdll/nt.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-)
diff --git a/dlls/ntdll/nt.c b/dlls/ntdll/nt.c index 0e11e0eacd..db51a988d5 100644 --- a/dlls/ntdll/nt.c +++ b/dlls/ntdll/nt.c @@ -1704,13 +1704,8 @@ static NTSTATUS create_logical_proc_info(SYSTEM_LOGICAL_PROCESSOR_INFORMATION **
/* Mask of logical threads sharing same physical core in kernel core numbering. */ sprintf(name, core_info, i, "thread_siblings"); - f = fopen(name, "r"); - if(f) - { - fscanf(f, "%lx", &thread_mask); - fclose(f); - } - else thread_mask = 1<<i; + if(!sysfs_parse_bitmap(name, &thread_mask)) + thread_mask = 1<<i; if(!logical_proc_info_add_by_id(data, dataex, &len, max_len, RelationProcessorCore, phys_core, thread_mask)) { fclose(fcpu_list);
Signed-off-by: Roderick Colenbrander thunderbird2k@gmail.com --- dlls/ntdll/nt.c | 54 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 53 insertions(+), 1 deletion(-)
diff --git a/dlls/ntdll/nt.c b/dlls/ntdll/nt.c index db51a988d5..dcf9123409 100644 --- a/dlls/ntdll/nt.c +++ b/dlls/ntdll/nt.c @@ -1634,6 +1634,44 @@ static BOOL sysfs_parse_bitmap(const char *filename, ULONG_PTR * const mask) return TRUE; }
+/* Helper function for counting number of elements in interval lists as used by + * the Linux kernel. The format is comma separated list of intervals of which + * each interval has the format of "begin-end" where begin and end are decimal + * numbers. E.g. "0-7", "0-7,16-23" + * + * Example files include: + * - /sys/devices/system/cpu/online + * - /sys/devices/system/cpu/cpu0/cache/index0/shared_cpu_list + * - /sys/devices/system/cpu/cpu0/topology/thread_siblings_list. + */ +static BOOL sysfs_count_list_elements(const char *filename, DWORD *result) +{ + FILE *f; + + f = fopen(filename, "r"); + if (!f) + return FALSE; + + while (!feof(f)) + { + char op; + DWORD beg, end; + + if (!fscanf(f, "%u%c ", &beg, &op)) + break; + + if(op == '-') + fscanf(f, "%u%c ", &end, &op); + else + end = beg; + + *result += end - beg + 1; + } + + fclose(f); + return TRUE; +} + /* for 'data', max_len is the array count. for 'dataex', max_len is in bytes */ static NTSTATUS create_logical_proc_info(SYSTEM_LOGICAL_PROCESSOR_INFORMATION **data, SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX **dataex, DWORD *max_len) @@ -1643,10 +1681,24 @@ static NTSTATUS create_logical_proc_info(SYSTEM_LOGICAL_PROCESSOR_INFORMATION ** static const char numa_info[] = "/sys/devices/system/node/node%u/cpumap";
FILE *fcpu_list, *fnuma_list, *f; - DWORD len = 0, beg, end, i, j, r, num_cpus = 0; + DWORD len = 0, beg, end, i, j, r, num_cpus = 0, max_cpus = 0; char op, name[MAX_PATH]; ULONG_PTR all_cpus_mask = 0;
+ /* On systems with a large number of CPU cores (32 or 64 depending on 32-bit or 64-bit), + * we have issues parsing processor information: + * - ULONG_PTR masks as used in data structures can't hold all cores. Requires splitting + * data appropriately into "processor groups". We are hard coding 1. + * - Thread affinity code in wineserver and our CPU parsing code here work independently. + * So far the Windows mask applied directly to Linux, but process groups break that. + * (NUMA systems you may have multiple non-full groups.) + */ + if(sysfs_count_list_elements("/sys/devices/system/cpu/present", &max_cpus) && max_cpus > MAXIMUM_PROCESSORS) + { + FIXME("Improve CPU info reporting: system supports %u logical cores, but only %u supported!\n", + max_cpus, MAXIMUM_PROCESSORS); + } + fcpu_list = fopen("/sys/devices/system/cpu/online", "r"); if(!fcpu_list) return STATUS_NOT_IMPLEMENTED;