Re: [PATCH] kernel32: implement GetLogicalProcessorInformation

24 Nov 2011

Hi,

On Nov 24, 2011, at 4:21 AM, Claudio Fontana wrote:
...
First implementation of GetLogicalProcessorInformation.
Limitations: all logical processors are added to the same NUMA set,
and all cores are added to the same package.
Only the linux-specific helper function is implemented, so for now
everybody else gets the fallback hard-coded results only.
The fallback is also triggered when the OS-specific APIs fail.
The linux-specific function is based on sysfs.
The new OS-specific functions can be easily added as additional
ifdefs by following the linux example.
---
dlls/kernel32/process.c         |  350 ++++++++++++++++++++++++++++++++++++++-
dlls/kernel32/tests/Makefile.in |    1 +
dlls/kernel32/tests/glpi.c      |   82 +++++++++
You should really implement this in ntdll.NtQuerySystemInformation (information class SystemLogicalProcessorInformation). In general, kernel32 forwards most of its implementation to ntdll, which reduces code duplication.
3 files changed, 430 insertions(+), 3 deletions(-)
create mode 100644 dlls/kernel32/tests/glpi.c
IMO, you should add the test to dlls/ntdll/tests/info.c, and test NtQuerySystemInformation() with info class SystemLogicalProcessorInformation.

diff --git a/dlls/kernel32/process.c b/dlls/kernel32/process.c
index b6c0b9d..de39244 100644
--- a/dlls/kernel32/process.c
+++ b/dlls/kernel32/process.c
@@ -51,6 +51,12 @@
#include "ntstatus.h"
#define WIN32_NO_STATUS
+
+#include "windef.h"
+#include "winbase.h"
+#include "winerror.h"
+#include "winnt.h"
+
#include "winternl.h"
#include "kernel_private.h"
#include "psapi.h"
@@ -3705,14 +3711,352 @@ HANDLE WINAPI GetCurrentProcess(void)
    return (HANDLE)~(ULONG_PTR)0;
}
+/***************
+ * glpi_impl_fb:
+ *   a fallback implementation of GetLogicalProcessorInformation
+ *   for the systems we do not support yet,
+ *   or for when the OS-specific API fails.
+ *   Same arguments and return value as glpi.
+ */
This comment should really come right before the function you're documenting. Also, it's formatted wrong. It should look more like this:
/***********************************************************************************
 *        logical_processor_info_fallback
 *
 * Fallback implementation of NtQuerySystemInformation() with info class
 * SystemLogicalProcessorInfo.
 */
...
+
+#undef SLPI
+#ifdef NONAMELESSUNION
+#define SLPI(buffer, idx) buffer[idx].DUMMYUNIONNAME
+#else
+#define SLPI(buffer, idx) buffer[idx]
+#endif /* NONAMELESSUNION */
You could avoid the ifdef if you used the U() macro. In fact, why are you defining a macro at all? Just use U(buffer[idx]).
+
+static BOOL
+glpi_impl_fb(PSYSTEM_LOGICAL_PROCESSOR_INFORMATION buffer, PDWORD pbuflen)
Surely you could use a more descriptive name. (See one example in my example doc comment.) This isn't 1978, you know.
+{
+    int glpi_n = 5;
+    FIXME("(%p,%p): reporting hard coded information\n", buffer, pbuflen);
+
+    if (*pbuflen < sizeof(*buffer) * glpi_n)
+    {
+	SetLastError(ERROR_INSUFFICIENT_BUFFER);
+	*pbuflen = sizeof(*buffer) * glpi_n;
+	return FALSE;
+    }
+
+    buffer[0].ProcessorMask = (ULONG_PTR)0x01;
+    buffer[1].ProcessorMask = (ULONG_PTR)0x01;
+    buffer[2].ProcessorMask = (ULONG_PTR)0x01;
+    buffer[3].ProcessorMask = (ULONG_PTR)0x01;
+    buffer[4].ProcessorMask = (ULONG_PTR)0x01;
These casts aren't necessary.
+
+    buffer[0].Relationship = RelationProcessorCore;
+    buffer[1].Relationship = RelationNumaNode;
+    buffer[2].Relationship = RelationCache;
+    buffer[3].Relationship = RelationCache;
+    buffer[4].Relationship = RelationProcessorPackage;
+
+    SLPI(buffer, 0).ProcessorCore.Flags = (BYTE)0x00;
+    SLPI(buffer, 1).NumaNode.NodeNumber = (DWORD)0x00;
Neither are these.
+
+    SLPI(buffer, 2).Cache.Level = (BYTE)0x01;
+    SLPI(buffer, 2).Cache.Associativity = (BYTE)0x08;
+    SLPI(buffer, 2).Cache.LineSize = (WORD)64;
+    SLPI(buffer, 2).Cache.Size = (DWORD)16 << 10; /* 16 KB */
Or these.
+    SLPI(buffer, 2).Cache.Type = CacheData;
+
+    SLPI(buffer, 3).Cache.Level = (BYTE)0x02;
+    SLPI(buffer, 3).Cache.Associativity = (BYTE)0x08;
+    SLPI(buffer, 3).Cache.LineSize = (WORD)64;
+    SLPI(buffer, 3).Cache.Size = (DWORD)1024 << 10; /* 1024 KB */
Or these.
+    SLPI(buffer, 3).Cache.Type = CacheUnified;
+    /* no additional info for processor package at buffer[4] needed. */
+
+    *pbuflen = sizeof(*buffer) * glpi_n;
+    return TRUE;
+}
+
+#ifdef linux
+/******************
+ * glpi_impl_linux:
+ *   an implementation of GetLogicalProcessorInformation
+ *   for linux sysfs.
+ * Returns: -1 on API failure (trigger fallback),
+ *          otherwise see glpi.
+ */
See above.
+
+#define SYS_CPU "/sys/devices/system/cpu/cpu"
+#define SYS_CPU_MAX 63
I'd criticize your use of a static limit, if only Windows didn't have that limit ;). Actually, you can only have 32 processors on a 32-bit system because a ULONG_PTR is 32 bits there.
+#define SYS_CPU_COREID "/topology/core_id"
+#define SYS_CPU_PACKAGEID "/topology/physical_package_id"
+#define SYS_CPU_CACHE "/cache/index"
+#define SYS_CPU_CACHE_MAX 12
+#define SYS_CPU_CACHE_LEVEL "/level"
+#define SYS_CPU_CACHE_SIZE "/size"
+#define SYS_CPU_CACHE_TYPE "/type"
+#define SYS_CPU_CACHE_LINE "/coherency_line_size"
+#define SYS_CPU_CACHE_ASSOC "/ways_of_associativity"
+
+static int glpi_impl_linux(SYSTEM_LOGICAL_PROCESSOR_INFORMATION *buffer,
+			   PDWORD pbuflen)
+{
+    /* remember the glpi results, so that subsequent calls are faster */
+    static int n_cores = 0;
+    static int n_caches = 0;
+    static PSYSTEM_LOGICAL_PROCESSOR_INFORMATION cores = NULL;
+    static PSYSTEM_LOGICAL_PROCESSOR_INFORMATION caches = NULL;
+    static SYSTEM_LOGICAL_PROCESSOR_INFORMATION numa[1];
+    static SYSTEM_LOGICAL_PROCESSOR_INFORMATION package[1];
+
+    if (!n_cores)
+    {
+	int n_cpus = 0; int cpu_i;
+	char path[PATH_MAX];
+
+	TRACE("building new cached result.\n");
+
+	for (n_cpus = 0; n_cpus < SYS_CPU_MAX; n_cpus++)
+	{
+	    snprintf(path, sizeof(path), SYS_CPU "%d", n_cpus);
+	    if (access(path, R_OK | X_OK) != 0)
Don't use access(2). There's a big fat warning on the man-page telling you not to use it because of a race condition. Another way to solve the problem you're trying to solve might be to scan the "/sys/devices/system/cpu/" directory for the number of entries using the POSIX readdir(3) interface. You could scan them all up front, or you could take each one as you go, re-allocating the table for each new entry.
+		break;
+	}
+
+	TRACE("detected %d logical cpus.\n", n_cpus);
+	if (n_cpus < 1)
+	{
+	    TRACE("requesting fallback implementation.\n");
+	    return -1;
+	}
+
+	cores = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*cores) * n_cpus);
+	caches = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*caches) * 1);
+
+	for (cpu_i = 0; cpu_i < n_cpus; cpu_i++)
+	{
+	    FILE *f; int core_id; int i;
+	    snprintf(path, sizeof(path), SYS_CPU "%d" SYS_CPU_COREID, cpu_i);
+	    cores[n_cores].ProcessorMask = (ULONG_PTR)0x01 << cpu_i;
This cast might be needed if there are more than 32 CPUs, so that's OK.
+	    cores[n_cores].Relationship = RelationProcessorCore;
+
+	    if (!(f = fopen(path, "r")) || fscanf(f, "%d", &core_id) != 1)
+	    {
+		/* topology info not available, provide default cpuid=cpu_i */
+		ERR("could not read cpu%d topology.\n", cpu_i);
+		SLPI(cores, n_cores).Reserved[0] = cpu_i;
+		n_cores++;
+		if (f)
+		    fclose(f);
+		continue; /* next cpu */
+	    }
+
+	    /* normal sane case */
+	    fclose(f);
+	    /* store the core_id to catch cpus with same core_id */
+	    SLPI(cores, n_cores).Reserved[0] = core_id;
+
+	    /* look for this core_id in previous elements */
+	    for (i = 0; i < n_cores; i++)
+		if (SLPI(cores, i).Reserved[0] == SLPI(cores, n_cores).Reserved[0])
+		    break;
+
+	    if (i < n_cores)
+	    {
+		TRACE("found existing coreid %d for cpu%d.\n", core_id, cpu_i);
+		/* add this logical processor to the existing mask */
+		cores[i].ProcessorMask |= cores[n_cores].ProcessorMask;
+		/* do not advance n_cores: this element can be overwritten. */
+	    }
+	    else
+	    {
+		TRACE("found new coreid %d for cpu%d.\n", core_id, cpu_i);
+
+		/* look for caches */
+		for (i = 0; i < SYS_CPU_CACHE_MAX; i++)
+		{
+		    unsigned long value;
+		    char string[80];
+
+		    snprintf(path, sizeof(path), SYS_CPU "%d" SYS_CPU_CACHE "%d",
+			     cpu_i, i);
+		    if (access(path, R_OK | X_OK) != 0)
Don't use access(2).
+			break; /* no more caches found. */
+
+		    caches = HeapReAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY,
+					 caches, sizeof(*caches) * (n_caches + 1));
+
+		    /* we keep the index into the cores array to set
+		       the right value at the end. */
+		    caches[n_caches].ProcessorMask = (ULONG_PTR)n_cores;
This cast, however, is definitely not needed.
+		    caches[n_caches].Relationship = RelationCache;
+
+		    snprintf(path, sizeof(path), SYS_CPU "%d" SYS_CPU_CACHE "%d"
+			     SYS_CPU_CACHE_LEVEL, cpu_i, i);
+		    if (!(f = fopen(path, "r")) || fscanf(f, "%lu", &value) != 1)
+			SLPI(caches, n_caches).Cache.Level = (BYTE)0x01;
Neither is this.
+		    else
+			SLPI(caches, n_caches).Cache.Level = (BYTE)value;
+		    if (f)
+			fclose(f);
+
+		    snprintf(path, sizeof(path), SYS_CPU "%d" SYS_CPU_CACHE "%d"
+			     SYS_CPU_CACHE_ASSOC, cpu_i, i);
+		    if (!(f = fopen(path, "r")) || fscanf(f, "%lu", &value) != 1)
+			SLPI(caches, n_caches).Cache.Associativity = 0x08;
+		    else
+			SLPI(caches, n_caches).Cache.Associativity = (BYTE)value;
+		    if (f)
+			fclose(f);
+
+		    snprintf(path, sizeof(path), SYS_CPU "%d" SYS_CPU_CACHE "%d"
+			     SYS_CPU_CACHE_LINE, cpu_i, i);
+		    if (!(f = fopen(path, "r")) || fscanf(f, "%lu", &value) != 1)
+			SLPI(caches, n_caches).Cache.LineSize = (WORD)64;
This cast is not needed.
+		    else
+			SLPI(caches, n_caches).Cache.LineSize = (WORD)value;
+		    if (f)
+			fclose(f);
+
+		    snprintf(path, sizeof(path), SYS_CPU "%d" SYS_CPU_CACHE "%d"
+			     SYS_CPU_CACHE_SIZE, cpu_i, i);
+		    if (!(f = fopen(path, "r")) || fscanf(f, "%lu", &value) != 1)
+			SLPI(caches, n_caches).Cache.Size = (DWORD)16 << 10;
This cast isn't needed unless an int is less than 32 bits in size.
+		    else
+			SLPI(caches, n_caches).Cache.Size = (DWORD)value << 10;
+		    if (f)
+			fclose(f);
+
+		    snprintf(path, sizeof(path), SYS_CPU "%d" SYS_CPU_CACHE "%d"
+			     SYS_CPU_CACHE_TYPE, cpu_i, i);
+		    if (!(f = fopen(path, "r")) || !fgets(string, sizeof(string), f))
+			SLPI(caches, n_caches).Cache.Type = CacheData;
+		    else switch (string[0])
+                    {
+		    case 'D':
+			SLPI(caches, n_caches).Cache.Type = CacheData; break;
+		    case 'I':
+			SLPI(caches, n_caches).Cache.Type = CacheInstruction; break;
+		    case 'U': default:
+			SLPI(caches, n_caches).Cache.Type =
+			    string[2] == 'i' ? CacheUnified : CacheTrace; break;
+		    }
+
+		    if (f)
+			fclose(f);
+
+		    n_caches++;
+		}
+		n_cores++;
+		TRACE("detected %d caches for coreid %d\n", i, core_id);
+	    }
+	}
+
+	if (n_caches < 1)
+	{
+	    HeapFree(GetProcessHeap(), 0, caches);
+	    caches = NULL;
+	}
+
+	/* trim leftovers */
+	cores = HeapReAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY,
+			    cores, sizeof(*cores) * n_cores);
+
+	if (1) /* final cleanup work */
Why the if (1)?
+	{
+	    int i;
+	    /* set the real processor mask in the caches. */
+	    for (i = 0; i < n_caches; i++)
+	    {
+		int cores_i;
+		cores_i = caches[i].ProcessorMask;
+		caches[i].ProcessorMask = cores[cores_i].ProcessorMask;
+	    }
+
+	    /* remove the reserved values from the cores, set flags,
+	       build numa and package. */
+
+	    numa[0].ProcessorMask = 0x00;
+	    numa[0].Relationship = RelationNumaNode;
+	    SLPI(numa, 0).NumaNode.NodeNumber = 0x00;
+	    package[0].Relationship = RelationProcessorPackage;
+
+	    for (i = 0; i < n_cores; i++)
+	    {
+		SLPI(cores, i).Reserved[0] = 0;
+		SLPI(cores, i).ProcessorCore.Flags = 0x00; /* ?unclear */
+		numa[0].ProcessorMask |= cores[i].ProcessorMask;
+	    }
+
+	    package[0].ProcessorMask = numa[0].ProcessorMask;
+	}
+    }
+
+    TRACE("getting cached result.\n");
+
+    if (1) /* final check for required size, final result */
Same here.
+    {
+	DWORD required; required = sizeof(*cores) * (n_cores + n_caches + 2);
+	if (*pbuflen < required)
+	{
+	    TRACE("return with failure (ERROR_INSUFFICIENT_BUFFER).\n");
+	    SetLastError(ERROR_INSUFFICIENT_BUFFER);
+	    *pbuflen = required;
+	    return 0;
+	}
+
+	*pbuflen = required;
+
+	/* beware: buffer is SYSTEM_LOGICAL_PROCESSOR_INFORMATION *buffer; */
+	memcpy(buffer, cores, sizeof(*cores) * n_cores);
+	buffer += n_cores;
+
+	if (n_caches > 0)
+	{
+	    memcpy(buffer, caches, sizeof(*caches) * n_caches);
+	    buffer += n_caches;
+	}
+
+	*buffer++ = package[0];
+	*buffer++ = numa[0];
+    }
+
+    TRACE("return success!\n");
Is this really necessary? IMO it's generally better to be quiet if you have nothing interesting to say (in typical Unix fashion). And a function succeeding isn't interesting.
+    return 1;
+}
+
+#undef SYS_CPU
+#undef SYS_CPU_MAX
+#undef SYS_CPU_COREID
+#undef SYS_CPU_PACKAGEID
+#undef SYS_CPU_CACHE
+#undef SYS_CPU_CACHE_MAX
+#undef SYS_CPU_CACHE_LEVEL
+#undef SYS_CPU_CACHE_SIZE
+#undef SYS_CPU_CACHE_TYPE
+#undef SYS_CPU_CACHE_LINE
+#undef SYS_CPU_CACHE_ASSOC
+
+#endif /* linux */
+
/***********************************************************************
 *           GetLogicalProcessorInformation     (KERNEL32.@)
 */
BOOL WINAPI GetLogicalProcessorInformation(PSYSTEM_LOGICAL_PROCESSOR_INFORMATION buffer, PDWORD pBufLen)
{
-    FIXME("(%p,%p): stub\n", buffer, pBufLen);
-    SetLastError(ERROR_CALL_NOT_IMPLEMENTED);
-    return FALSE;
+    int rv;
+
+    if (pBufLen == NULL || buffer == NULL)
+    {
+	SetLastError(ERROR_BAD_ARGUMENTS);
+	return FALSE;
+    }
+
+#if defined linux
+    rv = glpi_impl_linux(buffer, pBufLen);
+#else /* no supported OS found */
+    rv = -1;
+#endif
+
+    /* fallback hard-coded result */
+    if (rv < 0)
+	rv = glpi_impl_fb(buffer, pBufLen);
+
+    return rv;
}
/***********************************************************************
diff --git a/dlls/kernel32/tests/Makefile.in b/dlls/kernel32/tests/Makefile.in
index dce27db..1505944 100644
--- a/dlls/kernel32/tests/Makefile.in
+++ b/dlls/kernel32/tests/Makefile.in
@@ -17,6 +17,7 @@ C_SRCS = \
  file.c \
  format_msg.c \
  generated.c \
+	glpi.c \
  heap.c \
  loader.c \
  locale.c \
diff --git a/dlls/kernel32/tests/glpi.c b/dlls/kernel32/tests/glpi.c
new file mode 100644
index 0000000..a81d1d5
--- /dev/null
+++ b/dlls/kernel32/tests/glpi.c
Like I said, this should be integrated into ntdll:info instead of being its own test in kernel32. Note that the style of reporting errors is different between ntdll and kernel32. In kernel32, the function returns a BOOL, and if it's false, you have to check the LastError. In ntdll, however, functions simply return the status code directly. Also, ntdll functions return an NTSTATUS instead of a constant from <winerror.h>.
@@ -0,0 +1,82 @@
+#include <stdarg.h>
+#include <stdlib.h>
+#include <time.h>
+#include <stdio.h>
+
+#include "wine/test.h"
+#include "windef.h"
+#include "winbase.h"
+#include "winerror.h"
+
+static void test_glpi(void)
+{
+    SYSTEM_LOGICAL_PROCESSOR_INFORMATION buffer[200];
+    DWORD buflen = sizeof(buffer);
+    BOOL ret; int i, n;
+
+    ret = GetLogicalProcessorInformation(NULL, &buflen);
+    ok(!ret, "Passing NULL as buffer should not be ok.\n");
+
+    ret = GetLogicalProcessorInformation(buffer, NULL);
+    ok(!ret, "Passing NULL as pbuflen should not be ok.\n");
+
+    ret = GetLogicalProcessorInformation(buffer, &buflen);
+    ok(ret, "Normal glpi call (%d)\n", GetLastError());
Don't call GetLastError() inside an ok(). (Actually, that's moot, because this test belongs in ntdll:process, but in general, you shouldn't call GetLastError() inside an ok().)
+
+    ret = GetLogicalProcessorInformation(buffer, &buflen);
+    ok(ret, "glpi call with the resulting buflen (%d)\n", buflen);
Showing the last error (ntdll: return status) here might help diagnosing any problems with this test. (But don't call GetLastError() inside the ok().)
+
+    buflen--;
+
+    ret = GetLogicalProcessorInformation(buffer, &buflen);
+    ok(!ret, "glpi call with insufficient buflen (%d)\n", buflen);
Here you should make sure the LastError is ERROR_INSUFFICIENT_BUFFER (corresponding NTSTATUS: STATUS_BUFFER_TOO_SMALL, or possibly STATUS_INVALID_BUFFER_SIZE), and not some other error.
+
+    ret = GetLogicalProcessorInformation(buffer, &buflen);
+    ok(ret, "glpi call with resulting buflen (%d)\n", buflen);
Trace the LastError (ntdll: return status) here, too.
+
+    n = buflen / sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION);
+
+    printf("glpi result: %d array elements.\n", n);
Don't use printf(3); use trace().
+
+#ifdef NONAMELESSUNION
+#define BUFFER_I buffer[i].u
+#else
+#define BUFFER_I buffer[i]
+#endif /* NONAMELESSUNION */
Like I said above, just use U(buffer[i]).
+
+    for (i = 0; i < n; i++)
+    {
+	switch (buffer[i].Relationship)
+	{
+	case RelationProcessorCore:
+	    printf("ProcessorCore idx=%03d mask=%lxh flags=%u\n",
+		   i, buffer[i].ProcessorMask, BUFFER_I.ProcessorCore.Flags);
+	    break;
+	case RelationNumaNode:
+	    printf("NumaNode      idx=%03d mask=%lxh noden=%u\n",
+		    i, buffer[i].ProcessorMask, BUFFER_I.NumaNode.NodeNumber);
+	    break;
+	case RelationCache:
+	    printf("Cache         idx=%03d mask=%lxh "
+		    "l=%u, a=%u, ls=%u, sz=%u, ty=%u\n",
+		   i, buffer[i].ProcessorMask,
+		   BUFFER_I.Cache.Level, BUFFER_I.Cache.Associativity,
+		   BUFFER_I.Cache.LineSize, BUFFER_I.Cache.Size,
+		   BUFFER_I.Cache.Type);
+	    break;
+	case RelationProcessorPackage:
+	    printf("ProcessorPack idx=%03d mask=%lxh\n",
+		   i, buffer[i].ProcessorMask);
For all of these, use trace() instead of printf(3).
+	    break;
+	default:
+	    ok(0, "invalid relationship %d", buffer[i].Relationship);
+	}
+    }
+#undef BUFFER_I
+}
+
+START_TEST(glpi)
+{
+    test_glpi();
+}
+
-- 
1.6.4
One general note about your patch: use spaces, not tabs. The rest of the file only uses tabs in doc comments.

Chip

    

Re: [PATCH] kernel32: implement GetLogicalProcessorInformation

Charles Davis