Hi,
On Nov 24, 2011, at 4:21 AM, Claudio Fontana wrote:
> First implementation of GetLogicalProcessorInformation.
> Limitations: all logical processors are added to the same NUMA set,
> and all cores are added to the same package.
> Only the linux-specific helper function is implemented, so for now
> everybody else gets the fallback hard-coded results only.
> The fallback is also triggered when the OS-specific APIs fail.
>
> The linux-specific function is based on sysfs.
>
> The new OS-specific functions can be easily added as additional
> ifdefs by following the linux example.
> ---
> dlls/kernel32/process.c | 350 ++++++++++++++++++++++++++++++++++++++-
> dlls/kernel32/tests/Makefile.in | 1 +
> dlls/kernel32/tests/glpi.c | 82 +++++++++
You should really implement this in ntdll.NtQuerySystemInformation (information class SystemLogicalProcessorInformation). In general, kernel32 forwards most of its implementation to ntdll, which reduces code duplication.
> 3 files changed, 430 insertions(+), 3 deletions(-)
> create mode 100644 dlls/kernel32/tests/glpi.c
IMO, you should add the test to dlls/ntdll/tests/info.c, and test NtQuerySystemInformation() with info class SystemLogicalProcessorInformation.
>
> diff --git a/dlls/kernel32/process.c b/dlls/kernel32/process.c
> index b6c0b9d..de39244 100644
> --- a/dlls/kernel32/process.c
> +++ b/dlls/kernel32/process.c
> @@ -51,6 +51,12 @@
>
> #include "ntstatus.h"
> #define WIN32_NO_STATUS
> +
> +#include "windef.h"
> +#include "winbase.h"
> +#include "winerror.h"
> +#include "winnt.h"
> +
> #include "winternl.h"
> #include "kernel_private.h"
> #include "psapi.h"
> @@ -3705,14 +3711,352 @@ HANDLE WINAPI GetCurrentProcess(void)
> return (HANDLE)~(ULONG_PTR)0;
> }
>
> +/***************
> + * glpi_impl_fb:
> + * a fallback implementation of GetLogicalProcessorInformation
> + * for the systems we do not support yet,
> + * or for when the OS-specific API fails.
> + * Same arguments and return value as glpi.
> + */
This comment should really come right before the function you're documenting. Also, it's formatted wrong. It should look more like this:
/***********************************************************************************
* logical_processor_info_fallback
*
* Fallback implementation of NtQuerySystemInformation() with info class
* SystemLogicalProcessorInfo.
*/
> +
> +#undef SLPI
> +#ifdef NONAMELESSUNION
> +#define SLPI(buffer, idx) buffer[idx].DUMMYUNIONNAME
> +#else
> +#define SLPI(buffer, idx) buffer[idx]
> +#endif /* NONAMELESSUNION */
You could avoid the ifdef if you used the U() macro. In fact, why are you defining a macro at all? Just use U(buffer[idx]).
> +
> +static BOOL
> +glpi_impl_fb(PSYSTEM_LOGICAL_PROCESSOR_INFORMATION buffer, PDWORD pbuflen)
Surely you could use a more descriptive name. (See one example in my example doc comment.) This isn't 1978, you know.
> +{
> + int glpi_n = 5;
> + FIXME("(%p,%p): reporting hard coded information\n", buffer, pbuflen);
> +
> + if (*pbuflen < sizeof(*buffer) * glpi_n)
> + {
> + SetLastError(ERROR_INSUFFICIENT_BUFFER);
> + *pbuflen = sizeof(*buffer) * glpi_n;
> + return FALSE;
> + }
> +
> + buffer[0].ProcessorMask = (ULONG_PTR)0x01;
> + buffer[1].ProcessorMask = (ULONG_PTR)0x01;
> + buffer[2].ProcessorMask = (ULONG_PTR)0x01;
> + buffer[3].ProcessorMask = (ULONG_PTR)0x01;
> + buffer[4].ProcessorMask = (ULONG_PTR)0x01;
These casts aren't necessary.
> +
> + buffer[0].Relationship = RelationProcessorCore;
> + buffer[1].Relationship = RelationNumaNode;
> + buffer[2].Relationship = RelationCache;
> + buffer[3].Relationship = RelationCache;
> + buffer[4].Relationship = RelationProcessorPackage;
> +
> + SLPI(buffer, 0).ProcessorCore.Flags = (BYTE)0x00;
> + SLPI(buffer, 1).NumaNode.NodeNumber = (DWORD)0x00;
Neither are these.
> +
> + SLPI(buffer, 2).Cache.Level = (BYTE)0x01;
> + SLPI(buffer, 2).Cache.Associativity = (BYTE)0x08;
> + SLPI(buffer, 2).Cache.LineSize = (WORD)64;
> + SLPI(buffer, 2).Cache.Size = (DWORD)16 << 10; /* 16 KB */
Or these.
> + SLPI(buffer, 2).Cache.Type = CacheData;
> +
> + SLPI(buffer, 3).Cache.Level = (BYTE)0x02;
> + SLPI(buffer, 3).Cache.Associativity = (BYTE)0x08;
> + SLPI(buffer, 3).Cache.LineSize = (WORD)64;
> + SLPI(buffer, 3).Cache.Size = (DWORD)1024 << 10; /* 1024 KB */
Or these.
> + SLPI(buffer, 3).Cache.Type = CacheUnified;
> + /* no additional info for processor package at buffer[4] needed. */
> +
> + *pbuflen = sizeof(*buffer) * glpi_n;
> + return TRUE;
> +}
> +
> +#ifdef linux
> +/******************
> + * glpi_impl_linux:
> + * an implementation of GetLogicalProcessorInformation
> + * for linux sysfs.
> + * Returns: -1 on API failure (trigger fallback),
> + * otherwise see glpi.
> + */
See above.
> +
> +#define SYS_CPU "/sys/devices/system/cpu/cpu"
> +#define SYS_CPU_MAX 63
I'd criticize your use of a static limit, if only Windows didn't have that limit ;). Actually, you can only have 32 processors on a 32-bit system because a ULONG_PTR is 32 bits there.
> +#define SYS_CPU_COREID "/topology/core_id"
> +#define SYS_CPU_PACKAGEID "/topology/physical_package_id"
> +#define SYS_CPU_CACHE "/cache/index"
> +#define SYS_CPU_CACHE_MAX 12
> +#define SYS_CPU_CACHE_LEVEL "/level"
> +#define SYS_CPU_CACHE_SIZE "/size"
> +#define SYS_CPU_CACHE_TYPE "/type"
> +#define SYS_CPU_CACHE_LINE "/coherency_line_size"
> +#define SYS_CPU_CACHE_ASSOC "/ways_of_associativity"
> +
> +static int glpi_impl_linux(SYSTEM_LOGICAL_PROCESSOR_INFORMATION *buffer,
> + PDWORD pbuflen)
> +{
> + /* remember the glpi results, so that subsequent calls are faster */
> + static int n_cores = 0;
> + static int n_caches = 0;
> + static PSYSTEM_LOGICAL_PROCESSOR_INFORMATION cores = NULL;
> + static PSYSTEM_LOGICAL_PROCESSOR_INFORMATION caches = NULL;
> + static SYSTEM_LOGICAL_PROCESSOR_INFORMATION numa[1];
> + static SYSTEM_LOGICAL_PROCESSOR_INFORMATION package[1];
> +
> + if (!n_cores)
> + {
> + int n_cpus = 0; int cpu_i;
> + char path[PATH_MAX];
> +
> + TRACE("building new cached result.\n");
> +
> + for (n_cpus = 0; n_cpus < SYS_CPU_MAX; n_cpus++)
> + {
> + snprintf(path, sizeof(path), SYS_CPU "%d", n_cpus);
> + if (access(path, R_OK | X_OK) != 0)
Don't use access(2). There's a big fat warning on the man-page telling you not to use it because of a race condition. Another way to solve the problem you're trying to solve might be to scan the "/sys/devices/system/cpu/" directory for the number of entries using the POSIX readdir(3) interface. You could scan them all up front, or you could take each one as you go, re-allocating the table for each new entry.
> + break;
> + }
> +
> + TRACE("detected %d logical cpus.\n", n_cpus);
> + if (n_cpus < 1)
> + {
> + TRACE("requesting fallback implementation.\n");
> + return -1;
> + }
> +
> + cores = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*cores) * n_cpus);
> + caches = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*caches) * 1);
> +
> + for (cpu_i = 0; cpu_i < n_cpus; cpu_i++)
> + {
> + FILE *f; int core_id; int i;
> + snprintf(path, sizeof(path), SYS_CPU "%d" SYS_CPU_COREID, cpu_i);
> + cores[n_cores].ProcessorMask = (ULONG_PTR)0x01 << cpu_i;
This cast might be needed if there are more than 32 CPUs, so that's OK.
> + cores[n_cores].Relationship = RelationProcessorCore;
> +
> + if (!(f = fopen(path, "r")) || fscanf(f, "%d", &core_id) != 1)
> + {
> + /* topology info not available, provide default cpuid=cpu_i */
> + ERR("could not read cpu%d topology.\n", cpu_i);
> + SLPI(cores, n_cores).Reserved[0] = cpu_i;
> + n_cores++;
> + if (f)
> + fclose(f);
> + continue; /* next cpu */
> + }
> +
> + /* normal sane case */
> + fclose(f);
> + /* store the core_id to catch cpus with same core_id */
> + SLPI(cores, n_cores).Reserved[0] = core_id;
> +
> + /* look for this core_id in previous elements */
> + for (i = 0; i < n_cores; i++)
> + if (SLPI(cores, i).Reserved[0] == SLPI(cores, n_cores).Reserved[0])
> + break;
> +
> + if (i < n_cores)
> + {
> + TRACE("found existing coreid %d for cpu%d.\n", core_id, cpu_i);
> + /* add this logical processor to the existing mask */
> + cores[i].ProcessorMask |= cores[n_cores].ProcessorMask;
> + /* do not advance n_cores: this element can be overwritten. */
> + }
> + else
> + {
> + TRACE("found new coreid %d for cpu%d.\n", core_id, cpu_i);
> +
> + /* look for caches */
> + for (i = 0; i < SYS_CPU_CACHE_MAX; i++)
> + {
> + unsigned long value;
> + char string[80];
> +
> + snprintf(path, sizeof(path), SYS_CPU "%d" SYS_CPU_CACHE "%d",
> + cpu_i, i);
> + if (access(path, R_OK | X_OK) != 0)
Don't use access(2).
> + break; /* no more caches found. */
> +
> + caches = HeapReAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY,
> + caches, sizeof(*caches) * (n_caches + 1));
> +
> + /* we keep the index into the cores array to set
> + the right value at the end. */
> + caches[n_caches].ProcessorMask = (ULONG_PTR)n_cores;
This cast, however, is definitely not needed.
> + caches[n_caches].Relationship = RelationCache;
> +
> + snprintf(path, sizeof(path), SYS_CPU "%d" SYS_CPU_CACHE "%d"
> + SYS_CPU_CACHE_LEVEL, cpu_i, i);
> + if (!(f = fopen(path, "r")) || fscanf(f, "%lu", &value) != 1)
> + SLPI(caches, n_caches).Cache.Level = (BYTE)0x01;
Neither is this.
> + else
> + SLPI(caches, n_caches).Cache.Level = (BYTE)value;
> + if (f)
> + fclose(f);
> +
> + snprintf(path, sizeof(path), SYS_CPU "%d" SYS_CPU_CACHE "%d"
> + SYS_CPU_CACHE_ASSOC, cpu_i, i);
> + if (!(f = fopen(path, "r")) || fscanf(f, "%lu", &value) != 1)
> + SLPI(caches, n_caches).Cache.Associativity = 0x08;
> + else
> + SLPI(caches, n_caches).Cache.Associativity = (BYTE)value;
> + if (f)
> + fclose(f);
> +
> + snprintf(path, sizeof(path), SYS_CPU "%d" SYS_CPU_CACHE "%d"
> + SYS_CPU_CACHE_LINE, cpu_i, i);
> + if (!(f = fopen(path, "r")) || fscanf(f, "%lu", &value) != 1)
> + SLPI(caches, n_caches).Cache.LineSize = (WORD)64;
This cast is not needed.
> + else
> + SLPI(caches, n_caches).Cache.LineSize = (WORD)value;
> + if (f)
> + fclose(f);
> +
> + snprintf(path, sizeof(path), SYS_CPU "%d" SYS_CPU_CACHE "%d"
> + SYS_CPU_CACHE_SIZE, cpu_i, i);
> + if (!(f = fopen(path, "r")) || fscanf(f, "%lu", &value) != 1)
> + SLPI(caches, n_caches).Cache.Size = (DWORD)16 << 10;
This cast isn't needed unless an int is less than 32 bits in size.
> + else
> + SLPI(caches, n_caches).Cache.Size = (DWORD)value << 10;
> + if (f)
> + fclose(f);
> +
> + snprintf(path, sizeof(path), SYS_CPU "%d" SYS_CPU_CACHE "%d"
> + SYS_CPU_CACHE_TYPE, cpu_i, i);
> + if (!(f = fopen(path, "r")) || !fgets(string, sizeof(string), f))
> + SLPI(caches, n_caches).Cache.Type = CacheData;
> + else switch (string[0])
> + {
> + case 'D':
> + SLPI(caches, n_caches).Cache.Type = CacheData; break;
> + case 'I':
> + SLPI(caches, n_caches).Cache.Type = CacheInstruction; break;
> + case 'U': default:
> + SLPI(caches, n_caches).Cache.Type =
> + string[2] == 'i' ? CacheUnified : CacheTrace; break;
> + }
> +
> + if (f)
> + fclose(f);
> +
> + n_caches++;
> + }
> + n_cores++;
> + TRACE("detected %d caches for coreid %d\n", i, core_id);
> + }
> + }
> +
> + if (n_caches < 1)
> + {
> + HeapFree(GetProcessHeap(), 0, caches);
> + caches = NULL;
> + }
> +
> + /* trim leftovers */
> + cores = HeapReAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY,
> + cores, sizeof(*cores) * n_cores);
> +
> + if (1) /* final cleanup work */
Why the if (1)?
> + {
> + int i;
> + /* set the real processor mask in the caches. */
> + for (i = 0; i < n_caches; i++)
> + {
> + int cores_i;
> + cores_i = caches[i].ProcessorMask;
> + caches[i].ProcessorMask = cores[cores_i].ProcessorMask;
> + }
> +
> + /* remove the reserved values from the cores, set flags,
> + build numa and package. */
> +
> + numa[0].ProcessorMask = 0x00;
> + numa[0].Relationship = RelationNumaNode;
> + SLPI(numa, 0).NumaNode.NodeNumber = 0x00;
> + package[0].Relationship = RelationProcessorPackage;
> +
> + for (i = 0; i < n_cores; i++)
> + {
> + SLPI(cores, i).Reserved[0] = 0;
> + SLPI(cores, i).ProcessorCore.Flags = 0x00; /* ?unclear */
> + numa[0].ProcessorMask |= cores[i].ProcessorMask;
> + }
> +
> + package[0].ProcessorMask = numa[0].ProcessorMask;
> + }
> + }
> +
> + TRACE("getting cached result.\n");
> +
> + if (1) /* final check for required size, final result */
Same here.
> + {
> + DWORD required; required = sizeof(*cores) * (n_cores + n_caches + 2);
> + if (*pbuflen < required)
> + {
> + TRACE("return with failure (ERROR_INSUFFICIENT_BUFFER).\n");
> + SetLastError(ERROR_INSUFFICIENT_BUFFER);
> + *pbuflen = required;
> + return 0;
> + }
> +
> + *pbuflen = required;
> +
> + /* beware: buffer is SYSTEM_LOGICAL_PROCESSOR_INFORMATION *buffer; */
> + memcpy(buffer, cores, sizeof(*cores) * n_cores);
> + buffer += n_cores;
> +
> + if (n_caches > 0)
> + {
> + memcpy(buffer, caches, sizeof(*caches) * n_caches);
> + buffer += n_caches;
> + }
> +
> + *buffer++ = package[0];
> + *buffer++ = numa[0];
> + }
> +
> + TRACE("return success!\n");
Is this really necessary? IMO it's generally better to be quiet if you have nothing interesting to say (in typical Unix fashion). And a function succeeding isn't interesting.
> + return 1;
> +}
> +
> +#undef SYS_CPU
> +#undef SYS_CPU_MAX
> +#undef SYS_CPU_COREID
> +#undef SYS_CPU_PACKAGEID
> +#undef SYS_CPU_CACHE
> +#undef SYS_CPU_CACHE_MAX
> +#undef SYS_CPU_CACHE_LEVEL
> +#undef SYS_CPU_CACHE_SIZE
> +#undef SYS_CPU_CACHE_TYPE
> +#undef SYS_CPU_CACHE_LINE
> +#undef SYS_CPU_CACHE_ASSOC
> +
> +#endif /* linux */
> +
> /***********************************************************************
> * GetLogicalProcessorInformation (KERNEL32.@)
> */
> BOOL WINAPI GetLogicalProcessorInformation(PSYSTEM_LOGICAL_PROCESSOR_INFORMATION buffer, PDWORD pBufLen)
> {
> - FIXME("(%p,%p): stub\n", buffer, pBufLen);
> - SetLastError(ERROR_CALL_NOT_IMPLEMENTED);
> - return FALSE;
> + int rv;
> +
> + if (pBufLen == NULL || buffer == NULL)
> + {
> + SetLastError(ERROR_BAD_ARGUMENTS);
> + return FALSE;
> + }
> +
> +#if defined linux
> + rv = glpi_impl_linux(buffer, pBufLen);
> +#else /* no supported OS found */
> + rv = -1;
> +#endif
> +
> + /* fallback hard-coded result */
> + if (rv < 0)
> + rv = glpi_impl_fb(buffer, pBufLen);
> +
> + return rv;
> }
>
> /***********************************************************************
> diff --git a/dlls/kernel32/tests/Makefile.in b/dlls/kernel32/tests/Makefile.in
> index dce27db..1505944 100644
> --- a/dlls/kernel32/tests/Makefile.in
> +++ b/dlls/kernel32/tests/Makefile.in
> @@ -17,6 +17,7 @@ C_SRCS = \
> file.c \
> format_msg.c \
> generated.c \
> + glpi.c \
> heap.c \
> loader.c \
> locale.c \
> diff --git a/dlls/kernel32/tests/glpi.c b/dlls/kernel32/tests/glpi.c
> new file mode 100644
> index 0000000..a81d1d5
> --- /dev/null
> +++ b/dlls/kernel32/tests/glpi.c
Like I said, this should be integrated into ntdll:info instead of being its own test in kernel32. Note that the style of reporting errors is different between ntdll and kernel32. In kernel32, the function returns a BOOL, and if it's false, you have to check the LastError. In ntdll, however, functions simply return the status code directly. Also, ntdll functions return an NTSTATUS instead of a constant from <winerror.h>.
> @@ -0,0 +1,82 @@
> +#include <stdarg.h>
> +#include <stdlib.h>
> +#include <time.h>
> +#include <stdio.h>
> +
> +#include "wine/test.h"
> +#include "windef.h"
> +#include "winbase.h"
> +#include "winerror.h"
> +
> +static void test_glpi(void)
> +{
> + SYSTEM_LOGICAL_PROCESSOR_INFORMATION buffer[200];
> + DWORD buflen = sizeof(buffer);
> + BOOL ret; int i, n;
> +
> + ret = GetLogicalProcessorInformation(NULL, &buflen);
> + ok(!ret, "Passing NULL as buffer should not be ok.\n");
> +
> + ret = GetLogicalProcessorInformation(buffer, NULL);
> + ok(!ret, "Passing NULL as pbuflen should not be ok.\n");
> +
> + ret = GetLogicalProcessorInformation(buffer, &buflen);
> + ok(ret, "Normal glpi call (%d)\n", GetLastError());
Don't call GetLastError() inside an ok(). (Actually, that's moot, because this test belongs in ntdll:process, but in general, you shouldn't call GetLastError() inside an ok().)
> +
> + ret = GetLogicalProcessorInformation(buffer, &buflen);
> + ok(ret, "glpi call with the resulting buflen (%d)\n", buflen);
Showing the last error (ntdll: return status) here might help diagnosing any problems with this test. (But don't call GetLastError() inside the ok().)
> +
> + buflen--;
> +
> + ret = GetLogicalProcessorInformation(buffer, &buflen);
> + ok(!ret, "glpi call with insufficient buflen (%d)\n", buflen);
Here you should make sure the LastError is ERROR_INSUFFICIENT_BUFFER (corresponding NTSTATUS: STATUS_BUFFER_TOO_SMALL, or possibly STATUS_INVALID_BUFFER_SIZE), and not some other error.
> +
> + ret = GetLogicalProcessorInformation(buffer, &buflen);
> + ok(ret, "glpi call with resulting buflen (%d)\n", buflen);
Trace the LastError (ntdll: return status) here, too.
> +
> + n = buflen / sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION);
> +
> + printf("glpi result: %d array elements.\n", n);
Don't use printf(3); use trace().
> +
> +#ifdef NONAMELESSUNION
> +#define BUFFER_I buffer[i].u
> +#else
> +#define BUFFER_I buffer[i]
> +#endif /* NONAMELESSUNION */
Like I said above, just use U(buffer[i]).
> +
> + for (i = 0; i < n; i++)
> + {
> + switch (buffer[i].Relationship)
> + {
> + case RelationProcessorCore:
> + printf("ProcessorCore idx=%03d mask=%lxh flags=%u\n",
> + i, buffer[i].ProcessorMask, BUFFER_I.ProcessorCore.Flags);
> + break;
> + case RelationNumaNode:
> + printf("NumaNode idx=%03d mask=%lxh noden=%u\n",
> + i, buffer[i].ProcessorMask, BUFFER_I.NumaNode.NodeNumber);
> + break;
> + case RelationCache:
> + printf("Cache idx=%03d mask=%lxh "
> + "l=%u, a=%u, ls=%u, sz=%u, ty=%u\n",
> + i, buffer[i].ProcessorMask,
> + BUFFER_I.Cache.Level, BUFFER_I.Cache.Associativity,
> + BUFFER_I.Cache.LineSize, BUFFER_I.Cache.Size,
> + BUFFER_I.Cache.Type);
> + break;
> + case RelationProcessorPackage:
> + printf("ProcessorPack idx=%03d mask=%lxh\n",
> + i, buffer[i].ProcessorMask);
For all of these, use trace() instead of printf(3).
> + break;
> + default:
> + ok(0, "invalid relationship %d", buffer[i].Relationship);
> + }
> + }
> +#undef BUFFER_I
> +}
> +
> +START_TEST(glpi)
> +{
> + test_glpi();
> +}
> +
> --
> 1.6.4
>
>
>
One general note about your patch: use spaces, not tabs. The rest of the file only uses tabs in doc comments.
Chip