A much cleaner result (written by hand) that we can reuse where needed later (_i.e._ `GetNumaProcessorNode` which also crashes with [a similar MAV](https://gist.github.com/wasertech/f894ce8d6250e72a01a861c0e4eb6064) on multi-node systems).
Took me a while to understand where I should put everything, not even sure I got it right. Let me know.
I thought I was going to need `FileNumaNodeInformation` but turns out it's not really needed. I can remove it if you want.
This is what I see when I try to get a node count read: ```log 0024:fixme:ntdll:init_numa_info node affinity; using node 0. GetNumaHighestNodeNumber: 1 ```
I only see this fixme if I try to access `FILE_NUMA_NODE_INFORMATION`. I would really prefer such a behavior for our compatibility layer on any multi-node system.
A big thanks to @besentv and @zfigura for their invaluable feedback on this.
From: wasertech danny@waser.tech
--- dlls/kernelbase/memory.c | 40 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-)
diff --git a/dlls/kernelbase/memory.c b/dlls/kernelbase/memory.c index c0a3d65d341..dfa4c434d2b 100644 --- a/dlls/kernelbase/memory.c +++ b/dlls/kernelbase/memory.c @@ -1582,8 +1582,46 @@ BOOL WINAPI SetProcessDefaultCpuSets(HANDLE process, const ULONG *cpu_set_ids, U */ BOOL WINAPI DECLSPEC_HOTPATCH GetNumaHighestNodeNumber( ULONG *node ) { - FIXME( "semi-stub: %p\n", node ); + NTSTATUS nts; + DWORD rel = RelationNumaNode; + DWORD iex_len = 0; + SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *iex = NULL; + unsigned int i; + + if (!node) { SetLastError( ERROR_INVALID_PARAMETER ); return FALSE; } *node = 0; + + set_ntstatus( NtQuerySystemInformationEx( SystemLogicalProcessorInformationEx, + &rel, sizeof(rel), NULL, 0, &iex_len ) ); + if (!iex_len) + { FIXME( "stub: iex_len %lu, node %lu\n", iex_len, *node ); return TRUE; } + + iex = RtlAllocateHeap( GetProcessHeap(), HEAP_ZERO_MEMORY, iex_len ); + if (!iex) { FIXME( "stub: iex %p, node %lu\n", iex, *node ); return TRUE; } + + nts = set_ntstatus( NtQuerySystemInformationEx( SystemLogicalProcessorInformationEx, + &rel, sizeof(rel), iex, iex_len, &iex_len ) ); + if (!nts) + { + FIXME( "stub: nts %lu, node %lu\n", nts, *node ); + RtlFreeHeap( GetProcessHeap(), HEAP_ZERO_MEMORY, iex ); + return TRUE; + } + + for ( i = 0; i < iex_len; ) + { + SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *ex = ( SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX * ) + ( (char *)iex + i ); + if (!ex->Size) break; + if (ex->Relationship == RelationNumaNode) + { + if (ex->NumaNode.NodeNumber > *node) + *node = ex->NumaNode.NodeNumber; + } + i += ex->Size; + } + + RtlFreeHeap( GetProcessHeap(), HEAP_ZERO_MEMORY, iex ); return TRUE; }
From: wasertech danny@waser.tech
--- dlls/kernel32/tests/process.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+)
diff --git a/dlls/kernel32/tests/process.c b/dlls/kernel32/tests/process.c index 7984f3d6fb4..fe2644c73e1 100644 --- a/dlls/kernel32/tests/process.c +++ b/dlls/kernel32/tests/process.c @@ -79,6 +79,7 @@ static BOOL (WINAPI *pQueryInformationJobObject)(HANDLE job, JOBOBJECTINFOCLAS static BOOL (WINAPI *pSetInformationJobObject)(HANDLE job, JOBOBJECTINFOCLASS class, LPVOID info, DWORD len); static HANDLE (WINAPI *pCreateIoCompletionPort)(HANDLE file, HANDLE existing_port, ULONG_PTR key, DWORD threads); static BOOL (WINAPI *pGetNumaProcessorNode)(UCHAR, PUCHAR); +static BOOL (WINAPI *pGetNumaHighestNodeNumber)(ULONG); static NTSTATUS (WINAPI *pNtQueryInformationProcess)(HANDLE, PROCESSINFOCLASS, PVOID, ULONG, PULONG); static NTSTATUS (WINAPI *pNtQueryInformationThread)(HANDLE, THREADINFOCLASS, PVOID, ULONG, PULONG); static NTSTATUS (WINAPI *pNtQuerySystemInformationEx)(SYSTEM_INFORMATION_CLASS, void*, ULONG, void*, ULONG, ULONG*); @@ -269,6 +270,7 @@ static BOOL init(void) pSetInformationJobObject = (void *)GetProcAddress(hkernel32, "SetInformationJobObject"); pCreateIoCompletionPort = (void *)GetProcAddress(hkernel32, "CreateIoCompletionPort"); pGetNumaProcessorNode = (void *)GetProcAddress(hkernel32, "GetNumaProcessorNode"); + pGetNumaHighestNodeNumber = (void *)GetProcAddress(hkernel32, "GetNumaHighestNodeNumber"); pWTSGetActiveConsoleSessionId = (void *)GetProcAddress(hkernel32, "WTSGetActiveConsoleSessionId"); pCreateToolhelp32Snapshot = (void *)GetProcAddress(hkernel32, "CreateToolhelp32Snapshot"); pProcess32First = (void *)GetProcAddress(hkernel32, "Process32First"); @@ -4087,6 +4089,24 @@ static void test_GetNumaProcessorNode(void) } }
+static void test_GetNumaHighestNodeNumber(void) +{ + BOOL could_find_nodes; + ULONG node_count; + + if (!pGetNumaHighestNodeNumber) + { + win_skip("GetNumaHighestNodeNumber is missing\n"); + return; + } + + node_count = 0xFF; + could_find_nodes = pGetNumaHighestNodeNumber(&node_count); + ok(could_find_nodes, "GetNumaHighestNodeNumber returned FALSE\n"); + ok(node_count <= 64 && node_count != 0xFF && node_count >= 0, + "GetNumaHighestNodeNumber returned invalid node %u\n", node_count); +} + static void test_session_info(void) { DWORD session_id, active_session;
From: wasertech danny@waser.tech
--- dlls/ntdll/unix/file.c | 8 +++++++- dlls/ntdll/unix/system.c | 41 ++++++++++++++++++++++++++++++++++++++++ include/winternl.h | 6 ++++++ 3 files changed, 54 insertions(+), 1 deletion(-)
diff --git a/dlls/ntdll/unix/file.c b/dlls/ntdll/unix/file.c index 958ae9a6937..13d4cfafd8c 100644 --- a/dlls/ntdll/unix/file.c +++ b/dlls/ntdll/unix/file.c @@ -125,6 +125,8 @@ WINE_DEFAULT_DEBUG_CHANNEL(file); WINE_DECLARE_DEBUG_CHANNEL(winediag);
+BOOL init_numa_info(FILE_NUMA_NODE_INFORMATION *info); + #define MAX_DOS_DRIVES 26
/* just in case... */ @@ -4583,7 +4585,7 @@ NTSTATUS WINAPI NtQueryInformationFile( HANDLE handle, IO_STATUS_BLOCK *io, 0, /* FileIdGlobalTxDirectoryInformation */ 0, /* FileIsRemoteDeviceInformation */ 0, /* FileAttributeCacheInformation */ - 0, /* FileNumaNodeInformation */ + sizeof(FILE_NUMA_NODE_INFORMATION), /* FileNumaNodeInformation */ 0, /* FileStandardLinkInformation */ 0, /* FileRemoteProtocolInformation */ 0, /* FileRenameInformationBypassAccessCheck */ @@ -4735,6 +4737,10 @@ NTSTATUS WINAPI NtQueryInformationFile( HANDLE handle, IO_STATUS_BLOCK *io, info->ReparseTag = IO_REPARSE_TAG_MOUNT_POINT; } break; + case FileNumaNodeInformation: + FILE_NUMA_NODE_INFORMATION *ni = (FILE_NUMA_NODE_INFORMATION *)ptr; + if (!init_numa_info(ni)) status = STATUS_NOT_IMPLEMENTED; + status = STATUS_SUCCESS; break; case FileStatInformation: if (fd_get_file_info( fd, options, &st, &attr ) == -1) status = errno_to_status( errno ); else if (!S_ISREG(st.st_mode) && !S_ISDIR(st.st_mode)) diff --git a/dlls/ntdll/unix/system.c b/dlls/ntdll/unix/system.c index 342e443f5e7..bf61f179849 100644 --- a/dlls/ntdll/unix/system.c +++ b/dlls/ntdll/unix/system.c @@ -1404,6 +1404,33 @@ static void init_logical_proc_info(void) init_tsc_frequency(); }
+BOOL init_numa_info(FILE_NUMA_NODE_INFORMATION *info) +{ + unsigned int i; + + if (!info) return FALSE; + pthread_once(&logical_proc_init_once, init_logical_proc_info); + + info->HighestNodeNumber = 0; + info->NodeNumber = 0; + info->Reserved = 0; + + if (!logical_proc_info_len || !logical_proc_info) + return TRUE; + + for (i = 0; i < logical_proc_info_len; ++i) + { + if (logical_proc_info[i].Relationship == RelationNumaNode) + { + if (logical_proc_info[i].NumaNode.NodeNumber > info->HighestNodeNumber) + info->HighestNodeNumber = logical_proc_info[i].NumaNode.NodeNumber; + } + } + + info->NodeNumber = 0; + return TRUE; +} + /****************************************************************** * init_cpu_info * @@ -3885,6 +3912,20 @@ NTSTATUS WINAPI NtQuerySystemInformationEx( SYSTEM_INFORMATION_CLASS class, break; }
+ case SystemNumaProximityNodeInformation: + { + FILE_NUMA_NODE_INFORMATION *numa_info = info; + + len = sizeof(FILE_NUMA_NODE_INFORMATION); + if (size < len) { ret = STATUS_BUFFER_TOO_SMALL; break; } + if (!info) return STATUS_ACCESS_VIOLATION; + if (!init_numa_info(numa_info)) + ret = STATUS_NOT_IMPLEMENTED; + + ret = STATUS_SUCCESS; + break; + } + default: FIXME( "(0x%08x,%p,%u,%p,%u,%p) stub\n", class, query, query_len, info, size, ret_size ); break; diff --git a/include/winternl.h b/include/winternl.h index 0432bb6d2bd..898c0305f8a 100644 --- a/include/winternl.h +++ b/include/winternl.h @@ -1790,6 +1790,12 @@ typedef struct _FILE_STAT_INFORMATION { ULONG EffectiveAccess; } FILE_STAT_INFORMATION, *PFILE_STAT_INFORMATION;
+typedef struct _FILE_NUMA_NODE_INFORMATION { + ULONG HighestNodeNumber; + ULONG NodeNumber; + ULONG Reserved; +} FILE_NUMA_NODE_INFORMATION, *PFILE_NUMA_NODE_INFORMATION; + typedef struct _FILE_IO_COMPLETION_NOTIFICATION_INFORMATION { ULONG Flags; } FILE_IO_COMPLETION_NOTIFICATION_INFORMATION, *PFILE_IO_COMPLETION_NOTIFICATION_INFORMATION;
From: wasertech danny@waser.tech
--- dlls/kernelbase/memory.c | 43 +++++++--------------------------------- 1 file changed, 7 insertions(+), 36 deletions(-)
diff --git a/dlls/kernelbase/memory.c b/dlls/kernelbase/memory.c index dfa4c434d2b..d1f828114dd 100644 --- a/dlls/kernelbase/memory.c +++ b/dlls/kernelbase/memory.c @@ -1582,46 +1582,17 @@ BOOL WINAPI SetProcessDefaultCpuSets(HANDLE process, const ULONG *cpu_set_ids, U */ BOOL WINAPI DECLSPEC_HOTPATCH GetNumaHighestNodeNumber( ULONG *node ) { - NTSTATUS nts; - DWORD rel = RelationNumaNode; - DWORD iex_len = 0; - SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *iex = NULL; - unsigned int i; + FILE_NUMA_NODE_INFORMATION ni;
if (!node) { SetLastError( ERROR_INVALID_PARAMETER ); return FALSE; } - *node = 0; - - set_ntstatus( NtQuerySystemInformationEx( SystemLogicalProcessorInformationEx, - &rel, sizeof(rel), NULL, 0, &iex_len ) ); - if (!iex_len) - { FIXME( "stub: iex_len %lu, node %lu\n", iex_len, *node ); return TRUE; } - - iex = RtlAllocateHeap( GetProcessHeap(), HEAP_ZERO_MEMORY, iex_len ); - if (!iex) { FIXME( "stub: iex %p, node %lu\n", iex, *node ); return TRUE; } - - nts = set_ntstatus( NtQuerySystemInformationEx( SystemLogicalProcessorInformationEx, - &rel, sizeof(rel), iex, iex_len, &iex_len ) ); - if (!nts) - { - FIXME( "stub: nts %lu, node %lu\n", nts, *node ); - RtlFreeHeap( GetProcessHeap(), HEAP_ZERO_MEMORY, iex ); - return TRUE; - }
- for ( i = 0; i < iex_len; ) - { - SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *ex = ( SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX * ) - ( (char *)iex + i ); - if (!ex->Size) break; - if (ex->Relationship == RelationNumaNode) - { - if (ex->NumaNode.NodeNumber > *node) - *node = ex->NumaNode.NodeNumber; - } - i += ex->Size; - } + if ( + !set_ntstatus( NtQuerySystemInformationEx( SystemNumaProximityNodeInformation, + NULL, 0, &ni, sizeof(ni), NULL )) + ) + return FALSE;
- RtlFreeHeap( GetProcessHeap(), HEAP_ZERO_MEMORY, iex ); + *node = ni.HighestNodeNumber; return TRUE; }
From: wasertech danny@waser.tech
--- dlls/ntdll/unix/system.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/dlls/ntdll/unix/system.c b/dlls/ntdll/unix/system.c index bf61f179849..2b5ea75480f 100644 --- a/dlls/ntdll/unix/system.c +++ b/dlls/ntdll/unix/system.c @@ -1428,6 +1428,7 @@ BOOL init_numa_info(FILE_NUMA_NODE_INFORMATION *info) }
info->NodeNumber = 0; + FIXME( "node affinity; using node %u.\n", info->NodeNumber ); return TRUE; }
A couple of notes:
- a bit orthogonal to the suggested changes itself, but if the goal is to properly report CPU info on high core count machines the major missing bit currently is processor group support (effectively limiting apps' seen and managable CPU count to 64 (on 64 bit arch). - doing some complicated queries and shuffling functions are probably not that much useful until the NUMA info is properly queried in ntdll Unix part. Without that, if the goal is to just have a (better) stub which will avoid some failures it may as well hardcode simple answer which is de-facto what Wine reports now. Which is the problem with GetNumaHighestNodeNumber() returning 0 now which is solved by the changes which will effectively yield the same now?
If going for implementing NUMA functionality for real, I'd probably suggest to limit the initial scope of this and start from properly returning NUMA associations from NtQuerySystemInformation( SystemLogicalProcessorInformation[Ex] ), so also existing GetLogicalProcessorInformation[Ex]() returns actual data. While again, I am not sure how much useful that is in general until processor groups / full CPU count is supported.
Separately, I don't know if using LLMs in preparation of Wine MRs is allowed or not, but just asking it to do the job and sending it is obviously not going to work, LLMs are not ready for these sorts of problems.
If going for implementing NUMA functionality for real, I'd probably suggest to limit the initial scope of this and start from properly returning NUMA associations from NtQuerySystemInformation( SystemLogicalProcessorInformation[Ex] ), so also existing GetLogicalProcessorInformationEx returns actual data.
Scratch that, the numa nodes are actually queried and filled already. And probably GetNumaProcessorNode() / GetNumaHighestNodeNumber() can be easily implemented on top of GetLogicalProcessorInformationEx().
Just curious, what use did you find for FILE_NUMA_NODE_INFORMATION? Especially before VirtualAllocExNuma / MapViewOfFileExNuma are properly implemented taking into account the node parameter?
On Thu Sep 18 18:40:32 2025 +0000, Paul Gofman wrote:
If going for implementing NUMA functionality for real, I'd probably
suggest to limit the initial scope of this and start from properly returning NUMA associations from NtQuerySystemInformation( SystemLogicalProcessorInformation[Ex] ), so also existing GetLogicalProcessorInformationEx returns actual data. Scratch that, the numa nodes are actually queried and filled already. And probably GetNumaProcessorNode() / GetNumaHighestNodeNumber() can be easily implemented on top of GetLogicalProcessorInformationEx().
There is SystemNumaProcessorMap, I think that's a more likely candidate.
... if the goal is to properly report CPU info on high core count machines the major missing bit currently is processor group support ...
Yeah I saw that but I only have 12 cores / 24 threads and 2 nodes on 1 group... I am not even affected. It's just not my focus here. I only care about solving the memory access violation that arises when you run a simple app like a game that tries to grab the first node to spawn concurrent work.
... the numa nodes are actually queried and filled already. And probably GetNumaProcessorNode() / GetNumaHighestNodeNumber() can be easily implemented on top of GetLogicalProcessorInformationEx().
Yes if you look at commit df3dea2cd1dff29eaac8a5ec04176933f079b363 that's how I did it at first, but
1. it's still a lot of logic that you would have to redo every-time you just need an accurate node count, 2. I was asked multiple times to do this in ntdll, 3. it would be very useful to be able to reuse that kind of logic where such MAV can arise (like in `GetNumaProcessorNode`)
so I pushed it deeper into the "kernel".
There is SystemNumaProcessorMap, I think that's a more likely candidate.
I really wasn't sure. I can move it.
Just curious, what use did you find for FILE_NUMA_NODE_INFORMATION? Especially before VirtualAllocExNuma / MapViewOfFileExNuma are properly implemented taking into account the node parameter?
That's a good point. It's because I don't really care about moving the memory between nodes. Not now at least. Maybe one day I'll actually need this ability but for now, I just need a place to put this info for quick access so that I can avoid any memory access violation that can arise when you report only 1 logical node and the app tries to initialize its memory like if it had only 1 node when it has _de-facto_ 2 physical ones. For now I don't need `GetNumaProcessorNode` only `GetNumaHighestNodeNumber`.
I'll take some time to understand the failing tests and address them (if I can), remove the sync commit that comes from gitlab (do it myself) and migrate to `SystemNumaProcessorMap` instead of `SystemNumaProximityNodeInformation`.
In the meantime if you have more feedback, please keep it comming.