Hi all,
Recently I was helping a user troubleshoot some Ryzen performance issues. As part of that still unresolved bug (Bugzilla 43277), I found out or CPU core reporting code on Linux is not correct.
The summary is that we report all logical cores as if they were physical CPU cores. Applications also can't determine whether they are dealing with SMT / HyperThreaded cores. Essentially applications think there are twice the number of physical cores.
The issue can easily be observed through various sample applications: - Sysinternals core info application uses GetLogicalProcessorInformationEx: https://docs.microsoft.com/en-us/sysinternals/downloads/coreinfo - Example application on GetLogicalProcessorInformation: https://msdn.microsoft.com/en-us/library/windows/desktop/ms683194(v=vs.85).a...
The root cause of the issue is in how we enumerated CPU cores. We do so by walking all logical cores in sysfs (/sys/devices/system/cpuX). Each core we report as a physical core. We are not parsing the 'thread_siblings' file, which tells us the mask of logical cores sharing a physical core. This causes us to count cores twice. This happens both on AMD and Intel.
The patch series adjusts how we enumerate logical cores on Linux by parsing 'thread_siblings' and making sure we don't add a new physical core if a sibling earlier added a physical core for the same mask and package. The cache parsing logic does something similar by parsing 'shared_cpu_map' and logical_proc_add_cache_info ignores already reported caches as well.
In addition, we are now also reporting SMT capabilities to Flags of GetLogicalProcessorInformation(Ex).
Thanks, Roderick
Roderick Colenbrander (4): ntdll: Derive number of logical CPU cores from core mask. ntdll: Report logical cores per physical core through mask. ntdll: GetLogicalProcessorInformationEx report LTP_PC_SMT for SMT cores. ntdll: GetLogicalProcessorInformation report LPT_PC_SMT for SMT cores.
dlls/ntdll/nt.c | 50 ++++++++++++++++++++++++++++++++++++++++---------- include/winnt.h | 2 ++ 2 files changed, 42 insertions(+), 10 deletions(-)
Signed-off-by: Roderick Colenbrander thunderbird2k@gmail.com --- dlls/ntdll/nt.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-)
diff --git a/dlls/ntdll/nt.c b/dlls/ntdll/nt.c index dc0ce04f42..e6458b98f2 100644 --- a/dlls/ntdll/nt.c +++ b/dlls/ntdll/nt.c @@ -1511,6 +1511,17 @@ static inline BOOL logical_proc_info_add_group(SYSTEM_LOGICAL_PROCESSOR_INFORMAT }
#ifdef linux +static DWORD count_bits(ULONG_PTR mask) +{ + DWORD count = 0; + while (mask > 0) + { + mask >>= 1; + count++; + } + return count; +} + /* for 'data', max_len is the array count. for 'dataex', max_len is in bytes */ static NTSTATUS create_logical_proc_info(SYSTEM_LOGICAL_PROCESSOR_INFORMATION **data, SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX **dataex, DWORD *max_len) @@ -1643,7 +1654,6 @@ static NTSTATUS create_logical_proc_info(SYSTEM_LOGICAL_PROCESSOR_INFORMATION ** for(i=0; i<len; i++){ if((*data)[i].Relationship == RelationProcessorCore){ all_cpus_mask |= (*data)[i].ProcessorMask; - ++num_cpus; } } }else{ @@ -1651,11 +1661,11 @@ static NTSTATUS create_logical_proc_info(SYSTEM_LOGICAL_PROCESSOR_INFORMATION ** SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *infoex = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)(((char *)*dataex) + i); if(infoex->Relationship == RelationProcessorCore){ all_cpus_mask |= infoex->u.Processor.GroupMask[0].Mask; - ++num_cpus; } i += infoex->Size; } } + num_cpus = count_bits(all_cpus_mask);
fnuma_list = fopen("/sys/devices/system/node/online", "r"); if(!fnuma_list)
Signed-off-by: Roderick Colenbrander thunderbird2k@gmail.com --- dlls/ntdll/nt.c | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-)
diff --git a/dlls/ntdll/nt.c b/dlls/ntdll/nt.c index e6458b98f2..0b69f7de44 100644 --- a/dlls/ntdll/nt.c +++ b/dlls/ntdll/nt.c @@ -1325,8 +1325,16 @@ static inline BOOL logical_proc_info_add_by_id(SYSTEM_LOGICAL_PROCESSOR_INFORMAT (*pdata)[i].ProcessorMask |= mask; return TRUE; } - }else - i = *len; + } + else + { + for(i=0; i<*len; i++) + { + if ((*pdata)[i].Relationship==rel && (*pdata)[i].ProcessorMask==mask && + (*pdata)[i].u.Reserved[1]==id) + return TRUE; + } + }
while(*len == *pmax_len) { @@ -1352,6 +1360,11 @@ static inline BOOL logical_proc_info_add_by_id(SYSTEM_LOGICAL_PROCESSOR_INFORMAT dataex->u.Processor.GroupMask[0].Mask |= mask; return TRUE; } + else if (rel == RelationProcessorCore && dataex->Relationship == rel && + dataex->u.Processor.GroupMask[0].Mask == mask && dataex->u.Processor.Reserved[1] == id) + { + return TRUE; + } ofs += dataex->Size; }
@@ -1548,6 +1561,8 @@ static NTSTATUS create_logical_proc_info(SYSTEM_LOGICAL_PROCESSOR_INFORMATION **
for(i=beg; i<=end; i++) { + ULONG_PTR thread_mask = 0; + if(i > 8*sizeof(ULONG_PTR)) { FIXME("skipping logical processor %d\n", i); @@ -1568,15 +1583,16 @@ static NTSTATUS create_logical_proc_info(SYSTEM_LOGICAL_PROCESSOR_INFORMATION ** return STATUS_NO_MEMORY; }
- sprintf(name, core_info, i, "core_id"); + /* Mask of logical threads sharing same physical core. */ + sprintf(name, core_info, i, "thread_siblings"); f = fopen(name, "r"); if(f) { - fscanf(f, "%u", &r); + fscanf(f, "%lx", &thread_mask); fclose(f); } - else r = i; - if(!logical_proc_info_add_by_id(data, dataex, &len, max_len, RelationProcessorCore, r, (ULONG_PTR)1 << i)) + else thread_mask = 1<<i; + if(!logical_proc_info_add_by_id(data, dataex, &len, max_len, RelationProcessorCore, r, thread_mask)) { fclose(fcpu_list); return STATUS_NO_MEMORY;
Please still review the series for overall. I just realized though the code can be slightly simplified if I still submit 'core_id' to logical_proc_info_add_by_id. The lookup code within that function doesn't have to parse the masks (ProcessorMask or GroupMask[0]) and it is cleaner and more future proof that way.
Thanks, Roderick
On Wed, Jun 13, 2018 at 10:48 PM, Roderick Colenbrander thunderbird2k@gmail.com wrote:
Signed-off-by: Roderick Colenbrander thunderbird2k@gmail.com
dlls/ntdll/nt.c | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-)
diff --git a/dlls/ntdll/nt.c b/dlls/ntdll/nt.c index e6458b98f2..0b69f7de44 100644 --- a/dlls/ntdll/nt.c +++ b/dlls/ntdll/nt.c @@ -1325,8 +1325,16 @@ static inline BOOL logical_proc_info_add_by_id(SYSTEM_LOGICAL_PROCESSOR_INFORMAT (*pdata)[i].ProcessorMask |= mask; return TRUE; }
}else
i = *len;
}
else
{
for(i=0; i<*len; i++)
{
if ((*pdata)[i].Relationship==rel && (*pdata)[i].ProcessorMask==mask &&
(*pdata)[i].u.Reserved[1]==id)
return TRUE;
}
} while(*len == *pmax_len) {
@@ -1352,6 +1360,11 @@ static inline BOOL logical_proc_info_add_by_id(SYSTEM_LOGICAL_PROCESSOR_INFORMAT dataex->u.Processor.GroupMask[0].Mask |= mask; return TRUE; }
else if (rel == RelationProcessorCore && dataex->Relationship == rel &&
dataex->u.Processor.GroupMask[0].Mask == mask && dataex->u.Processor.Reserved[1] == id)
{
return TRUE;
} ofs += dataex->Size; }
@@ -1548,6 +1561,8 @@ static NTSTATUS create_logical_proc_info(SYSTEM_LOGICAL_PROCESSOR_INFORMATION **
for(i=beg; i<=end; i++) {
ULONG_PTR thread_mask = 0;
if(i > 8*sizeof(ULONG_PTR)) { FIXME("skipping logical processor %d\n", i);
@@ -1568,15 +1583,16 @@ static NTSTATUS create_logical_proc_info(SYSTEM_LOGICAL_PROCESSOR_INFORMATION ** return STATUS_NO_MEMORY; }
sprintf(name, core_info, i, "core_id");
/* Mask of logical threads sharing same physical core. */
sprintf(name, core_info, i, "thread_siblings"); f = fopen(name, "r"); if(f) {
fscanf(f, "%u", &r);
fscanf(f, "%lx", &thread_mask); fclose(f); }
else r = i;
if(!logical_proc_info_add_by_id(data, dataex, &len, max_len, RelationProcessorCore, r, (ULONG_PTR)1 << i))
else thread_mask = 1<<i;
if(!logical_proc_info_add_by_id(data, dataex, &len, max_len, RelationProcessorCore, r, thread_mask)) { fclose(fcpu_list); return STATUS_NO_MEMORY;
-- 2.14.4
Signed-off-by: Roderick Colenbrander thunderbird2k@gmail.com --- dlls/ntdll/nt.c | 27 +++++++++++++++------------ include/winnt.h | 2 ++ 2 files changed, 17 insertions(+), 12 deletions(-)
diff --git a/dlls/ntdll/nt.c b/dlls/ntdll/nt.c index 0b69f7de44..9d50ad1891 100644 --- a/dlls/ntdll/nt.c +++ b/dlls/ntdll/nt.c @@ -1309,6 +1309,17 @@ static DWORD log_proc_ex_size_plus(DWORD size) return sizeof(LOGICAL_PROCESSOR_RELATIONSHIP) + sizeof(DWORD) + size; }
+static DWORD count_bits(ULONG_PTR mask) +{ + DWORD count = 0; + while (mask > 0) + { + mask >>= 1; + count++; + } + return count; +} + static inline BOOL logical_proc_info_add_by_id(SYSTEM_LOGICAL_PROCESSOR_INFORMATION **pdata, SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX **pdataex, DWORD *len, DWORD *pmax_len, LOGICAL_PROCESSOR_RELATIONSHIP rel, DWORD id, ULONG_PTR mask) @@ -1381,7 +1392,10 @@ static inline BOOL logical_proc_info_add_by_id(SYSTEM_LOGICAL_PROCESSOR_INFORMAT
dataex->Relationship = rel; dataex->Size = log_proc_ex_size_plus(sizeof(PROCESSOR_RELATIONSHIP)); - dataex->u.Processor.Flags = 0; /* TODO */ + if (rel == RelationProcessorCore) + dataex->u.Processor.Flags = count_bits(mask) > 1 ? LTP_PC_SMT : 0; + else + dataex->u.Processor.Flags = 0; dataex->u.Processor.EfficiencyClass = 0; dataex->u.Processor.GroupCount = 1; dataex->u.Processor.GroupMask[0].Mask = mask; @@ -1524,17 +1538,6 @@ static inline BOOL logical_proc_info_add_group(SYSTEM_LOGICAL_PROCESSOR_INFORMAT }
#ifdef linux -static DWORD count_bits(ULONG_PTR mask) -{ - DWORD count = 0; - while (mask > 0) - { - mask >>= 1; - count++; - } - return count; -} - /* for 'data', max_len is the array count. for 'dataex', max_len is in bytes */ static NTSTATUS create_logical_proc_info(SYSTEM_LOGICAL_PROCESSOR_INFORMATION **data, SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX **dataex, DWORD *max_len) diff --git a/include/winnt.h b/include/winnt.h index 54bf11dabd..862cc45291 100644 --- a/include/winnt.h +++ b/include/winnt.h @@ -5982,6 +5982,8 @@ typedef enum _LOGICAL_PROCESSOR_RELATIONSHIP RelationAll = 0xffff } LOGICAL_PROCESSOR_RELATIONSHIP;
+#define LTP_PC_SMT 0x1 + typedef enum _PROCESSOR_CACHE_TYPE { CacheUnified,
Signed-off-by: Roderick Colenbrander thunderbird2k@gmail.com --- dlls/ntdll/nt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/dlls/ntdll/nt.c b/dlls/ntdll/nt.c index 9d50ad1891..f866091c38 100644 --- a/dlls/ntdll/nt.c +++ b/dlls/ntdll/nt.c @@ -1355,7 +1355,8 @@ static inline BOOL logical_proc_info_add_by_id(SYSTEM_LOGICAL_PROCESSOR_INFORMAT
(*pdata)[i].Relationship = rel; (*pdata)[i].ProcessorMask = mask; - /* TODO: set processor core flags */ + if (rel == RelationProcessorCore) + (*pdata)[i].u.ProcessorCore.Flags = count_bits(mask) > 1 ? LTP_PC_SMT : 0; (*pdata)[i].u.Reserved[0] = 0; (*pdata)[i].u.Reserved[1] = id; *len = i+1;