[PATCH v3 0/2] MR10466: ntdll: Consider cgroup CPU limits in init_cpu_info().
On Linux, `init_cpu_info()` uses `sysconf(_SC_NPROCESSORS_ONLN)` which returns the host machine's total CPU count, ignoring cgroup CPU bandwidth limits. This causes applications that scale thread counts based on `GetSystemInfo().dwNumberOfProcessors` to over-subscribe CPUs in container environments (Docker, Kubernetes). This MR reads cgroup v2 `cpu.max` and cgroup v1 `cpu.cfs_quota_us`/`cpu.cfs_period_us` to determine the effective CPU limit, and clamps the reported processor count accordingly. For cgroup v2, it walks up the hierarchy to find the most restrictive quota. Falls back to `sysconf` when no cgroup limit is configured. GNU coreutils 9.8 (September 2025) added similar cgroup v2 awareness to `nproc`: https://github.com/coreutils/coreutils/issues/108 Wine-Bug: https://bugs.winehq.org/show_bug.cgi?id=59556 ## Verification Verified with Wine built from this branch running inside Docker containers (cgroup v2). **Environment:** Docker on Linux (4-core host), Wine 11.5 with this patch applied. ### Without CPU limit ``` $ cat /sys/fs/cgroup/cpu.max max 100000 $ wine cmd /c "echo %NUMBER_OF_PROCESSORS%" 4 ``` All 4 host CPUs are reported. No override is applied when `cpu.max` is unlimited. ### With `--cpus=1.4` ``` $ cat /sys/fs/cgroup/cpu.max 140000 100000 $ wine cmd /c "echo %NUMBER_OF_PROCESSORS%" 1 ``` ### With `--cpus=1.5` ``` $ cat /sys/fs/cgroup/cpu.max 150000 100000 $ wine cmd /c "echo %NUMBER_OF_PROCESSORS%" 2 ``` ### With `--cpus=2.0` ``` $ cat /sys/fs/cgroup/cpu.max 200000 100000 $ wine cmd /c "echo %NUMBER_OF_PROCESSORS%" 2 ``` ### With `--cpus=2.4` ``` $ cat /sys/fs/cgroup/cpu.max 240000 100000 $ wine cmd /c "echo %NUMBER_OF_PROCESSORS%" 2 ``` ### With `--cpus=2.5` ``` $ cat /sys/fs/cgroup/cpu.max 250000 100000 $ wine cmd /c "echo %NUMBER_OF_PROCESSORS%" 3 ``` The rounding uses `(double)quota / period + 0.5`, which rounds to the nearest integer. For example, `--cpus=1.4` rounds down to 1, while `--cpus=1.5` rounds up to 2. -- v3: ntdll: Use system_cpu_mask in get_system_affinity_mask() when available. https://gitlab.winehq.org/wine/wine/-/merge_requests/10466
From: Hiroki Awata <castaneai@castaneai.net> On Linux, init_cpu_info() uses sysconf(_SC_NPROCESSORS_ONLN) which returns the host machine's total CPU count, ignoring cgroup CPU bandwidth limits. This causes applications that scale thread counts based on GetSystemInfo().dwNumberOfProcessors to over-subscribe CPUs in container environments (Docker, Kubernetes). Read cgroup v2 cpu.max and cgroup v1 cpu.cfs_quota_us/period_us to determine the effective CPU limit, and clamp the reported processor count accordingly. For cgroup v2, walk up the hierarchy to find the most restrictive quota. Fall back to sysconf when no cgroup limit is configured. Wine-Bug: https://bugs.winehq.org/show_bug.cgi?id=59556 Signed-off-by: Hiroki Awata <castaneai@castaneai.net> --- dlls/ntdll/unix/system.c | 149 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 149 insertions(+) diff --git a/dlls/ntdll/unix/system.c b/dlls/ntdll/unix/system.c index aa66ec2654c..a864454159a 100644 --- a/dlls/ntdll/unix/system.c +++ b/dlls/ntdll/unix/system.c @@ -1623,6 +1623,145 @@ static void get_random( void *buf, ULONG len ) #endif } +#ifdef linux +static long read_cgroup_cpu_limit(void) +{ + char line[512], path[512], buf[1024], data[512]; + FILE *f; + long quota, period; + long result = 0; + char *s, *last_sep; + + /* Try cgroup v2 first: walk up the hierarchy for the lowest quota */ + f = fopen("/proc/self/cgroup", "r"); + if (f) + { + while (fgets(line, sizeof(line), f)) + { + if (!strncmp(line, "0::", 3)) + { + if ((s = strchr(line + 3, '\n'))) *s = 0; + snprintf(path, sizeof(path), "%s", line + 3); + fclose(f); + + while (*path) + { + snprintf(buf, sizeof(buf), "/sys/fs/cgroup%s/cpu.max", path); + f = fopen(buf, "r"); + if (f) + { + if (fgets(data, sizeof(data), f) && strncmp(data, "max", 3) != 0) + { + quota = strtol(data, &s, 10); + if (*s == ' ') + { + period = strtol(s + 1, NULL, 10); + if (period > 0) + { + long ncpus = (long)((double)quota / period + 0.5); + if (ncpus < 1) ncpus = 1; + if (!result || ncpus < result) result = ncpus; + } + } + } + fclose(f); + if (result == 1) return 1; + } + + last_sep = strrchr(path, '/'); + if (!last_sep) break; + if (last_sep == path && *(path + 1)) + *(path + 1) = 0; /* iterate on "/" too */ + else + *last_sep = 0; + } + + return result; + } + } + fclose(f); + } + + /* cgroup v1 fallback */ + f = fopen("/proc/self/cgroup", "r"); + if (!f) return 0; + + path[0] = 0; + while (fgets(line, sizeof(line), f)) + { + /* look for a line containing "cpu" controller, e.g. "4:cpu,cpuacct:/path" or "4:cpu:/path" */ + char *controllers, *cgroup_path; + s = strchr(line, ':'); + if (!s) continue; + controllers = s + 1; + s = strchr(controllers, ':'); + if (!s) continue; + *s = 0; + cgroup_path = s + 1; + if ((s = strchr(cgroup_path, '\n'))) *s = 0; + + /* check if "cpu" is one of the controllers */ + s = controllers; + while (*s) + { + char *end = s; + while (*end && *end != ',') end++; + if (end - s == 3 && !strncmp(s, "cpu", 3)) + { + snprintf(path, sizeof(path), "%s", cgroup_path); + break; + } + s = *end ? end + 1 : end; + } + if (path[0]) break; + } + fclose(f); + + if (!path[0]) return 0; + + /* try cpu,cpuacct mount point first, then cpu */ + snprintf(buf, sizeof(buf), "/sys/fs/cgroup/cpu,cpuacct%s/cpu.cfs_quota_us", path); + f = fopen(buf, "r"); + if (!f) + { + snprintf(buf, sizeof(buf), "/sys/fs/cgroup/cpu%s/cpu.cfs_quota_us", path); + f = fopen(buf, "r"); + } + if (!f) return 0; + + if (!fgets(buf, sizeof(buf), f)) + { + fclose(f); + return 0; + } + fclose(f); + quota = strtol(buf, NULL, 10); + if (quota <= 0) return 0; + + /* read period */ + snprintf(buf, sizeof(buf), "/sys/fs/cgroup/cpu,cpuacct%s/cpu.cfs_period_us", path); + f = fopen(buf, "r"); + if (!f) + { + snprintf(buf, sizeof(buf), "/sys/fs/cgroup/cpu%s/cpu.cfs_period_us", path); + f = fopen(buf, "r"); + } + if (!f) return 0; + + if (!fgets(buf, sizeof(buf), f)) + { + fclose(f); + return 0; + } + fclose(f); + period = strtol(buf, NULL, 10); + if (period <= 0) return 0; + + result = (long)((double)quota / period + 0.5); + return result < 1 ? 1 : result; +} +#endif + /****************************************************************** * init_cpu_info * @@ -1652,6 +1791,16 @@ void init_cpu_info(void) #else num = 1; FIXME("Detecting the number of processors is not supported.\n"); +#endif +#ifdef linux + { + long cgroup_cpus = read_cgroup_cpu_limit(); + if (cgroup_cpus > 0 && cgroup_cpus < num) + { + TRACE( "limiting to %ld CPUs from cgroup\n", cgroup_cpus ); + num = cgroup_cpus; + } + } #endif peb->NumberOfProcessors = num; init_cpu_model(); -- GitLab https://gitlab.winehq.org/wine/wine/-/merge_requests/10466
From: Hiroki Awata <castaneai@castaneai.net> get_system_affinity_mask() computes the affinity mask from peb->NumberOfProcessors. When NumberOfProcessors is reduced by cgroup CPU limits, this produces a mask that does not reflect the actual CPUs available to the process. Use system_cpu_mask from the logical processor info instead, which represents the real hardware topology. Fall back to NumberOfProcessors when system_cpu_mask is not yet initialized. Signed-off-by: Hiroki Awata <castaneai@castaneai.net> --- dlls/ntdll/unix/system.c | 2 +- dlls/ntdll/unix/unix_private.h | 1 + dlls/ntdll/unix/virtual.c | 5 ++++- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/dlls/ntdll/unix/system.c b/dlls/ntdll/unix/system.c index a864454159a..4edf04c9842 100644 --- a/dlls/ntdll/unix/system.c +++ b/dlls/ntdll/unix/system.c @@ -259,7 +259,7 @@ static SYSTEM_LOGICAL_PROCESSOR_INFORMATION *logical_proc_info; static unsigned int logical_proc_info_len, logical_proc_info_alloc_len; static SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *logical_proc_info_ex; static unsigned int logical_proc_info_ex_size, logical_proc_info_ex_alloc_size; -static ULONG_PTR system_cpu_mask; +ULONG_PTR system_cpu_mask; static pthread_mutex_t timezone_mutex = PTHREAD_MUTEX_INITIALIZER; diff --git a/dlls/ntdll/unix/unix_private.h b/dlls/ntdll/unix/unix_private.h index a9e8f522892..ebb44a9f63d 100644 --- a/dlls/ntdll/unix/unix_private.h +++ b/dlls/ntdll/unix/unix_private.h @@ -277,6 +277,7 @@ extern NTSTATUS system_time_precise( void *args ); extern void *anon_mmap_fixed( void *start, size_t size, int prot, int flags ); extern void *anon_mmap_alloc( size_t size, int prot ); extern void virtual_init(void); +extern ULONG_PTR system_cpu_mask; extern ULONG_PTR get_system_affinity_mask(void); extern void virtual_get_system_info( SYSTEM_BASIC_INFORMATION *info, BOOL wow64 ); extern NTSTATUS virtual_map_builtin_module( HANDLE mapping, void **module, SIZE_T *size, diff --git a/dlls/ntdll/unix/virtual.c b/dlls/ntdll/unix/virtual.c index e3fe3311928..a400744f6e6 100644 --- a/dlls/ntdll/unix/virtual.c +++ b/dlls/ntdll/unix/virtual.c @@ -3722,7 +3722,10 @@ void virtual_init(void) */ ULONG_PTR get_system_affinity_mask(void) { - ULONG num_cpus = peb->NumberOfProcessors; + ULONG num_cpus; + + if (system_cpu_mask) return system_cpu_mask; + num_cpus = peb->NumberOfProcessors; if (num_cpus >= sizeof(ULONG_PTR) * 8) return ~(ULONG_PTR)0; return ((ULONG_PTR)1 << num_cpus) - 1; } -- GitLab https://gitlab.winehq.org/wine/wine/-/merge_requests/10466
Unless there is evidence that Windows does it this way, I don't think we want NumberOfProcessors to be inconsistent with the rest of the CPU information. -- https://gitlab.winehq.org/wine/wine/-/merge_requests/10466#note_134259
participants (3)
-
Alexandre Julliard (@julliard) -
Hiroki Awata -
Hiroki Awata (@castaneai)