From: wasertech danny@waser.tech
--- dlls/kernel32/process.c | 134 +++++++++++++++++++++---- dlls/kernel32/tests/process.c | 63 ++++++++++++ dlls/kernelbase/memory.c | 180 +++++++++++++++++++++++++++++++--- dlls/kernelbase/thread.c | 25 ++++- 4 files changed, 373 insertions(+), 29 deletions(-)
diff --git a/dlls/kernel32/process.c b/dlls/kernel32/process.c index 23ec8ce7601..61cbc6a60f4 100644 --- a/dlls/kernel32/process.c +++ b/dlls/kernel32/process.c @@ -40,6 +40,11 @@
WINE_DEFAULT_DEBUG_CHANNEL(process);
+/* NUMA prototypes */ +extern BOOL WINAPI GetNumaHighestNodeNumber( ULONG *node ); +extern BOOL WINAPI GetNumaNodeProcessorMaskEx( USHORT node, GROUP_AFFINITY *mask ); +extern BOOL WINAPI GetNumaProximityNodeEx( ULONG proximity_id, USHORT *node ); + static const struct _KUSER_SHARED_DATA *user_shared_data = (struct _KUSER_SHARED_DATA *)0x7ffe0000;
typedef struct @@ -763,9 +768,16 @@ BOOL WINAPI GetFirmwareType(FIRMWARE_TYPE *type) */ BOOL WINAPI GetNumaNodeProcessorMask(UCHAR node, PULONGLONG mask) { - FIXME("(%c %p): stub\n", node, mask); - SetLastError(ERROR_CALL_NOT_IMPLEMENTED); - return FALSE; + GROUP_AFFINITY affinity; + TRACE("GetNumaNodeProcessorMask(node=%u, mask=%p)\n", node, mask); + if (!mask) + { + SetLastError(ERROR_INVALID_PARAMETER); + return FALSE; + } + if (!GetNumaNodeProcessorMaskEx(node, &affinity)) return FALSE; + *mask = affinity.Mask; + return TRUE; }
/********************************************************************** @@ -773,9 +785,25 @@ BOOL WINAPI GetNumaNodeProcessorMask(UCHAR node, PULONGLONG mask) */ BOOL WINAPI GetNumaAvailableMemoryNode(UCHAR node, PULONGLONG available_bytes) { - FIXME("(%c %p): stub\n", node, available_bytes); - SetLastError(ERROR_CALL_NOT_IMPLEMENTED); - return FALSE; + MEMORYSTATUSEX status; + ULONG highest; + TRACE("GetNumaAvailableMemoryNode(node=%u, avail=%p)\n", node, available_bytes); + if (!available_bytes) + { + SetLastError(ERROR_INVALID_PARAMETER); + return FALSE; + } + if (!GetNumaHighestNodeNumber(&highest)) return FALSE; + if (node > highest) + { + SetLastError(ERROR_INVALID_PARAMETER); + return FALSE; + } + status.dwLength = sizeof(status); + if (!GlobalMemoryStatusEx(&status)) return FALSE; + /* Approximation: distributes free memory among known nodes */ + *available_bytes = status.ullAvailPhys / (highest + 1); + return TRUE; }
/********************************************************************** @@ -783,9 +811,8 @@ BOOL WINAPI GetNumaAvailableMemoryNode(UCHAR node, PULONGLONG available_bytes) */ BOOL WINAPI GetNumaAvailableMemoryNodeEx(USHORT node, PULONGLONG available_bytes) { - FIXME("(%hu %p): stub\n", node, available_bytes); - SetLastError(ERROR_CALL_NOT_IMPLEMENTED); - return FALSE; + /* Same approximation as the 8-bit version */ + return GetNumaAvailableMemoryNode((UCHAR)node, available_bytes); }
/*********************************************************************** @@ -793,14 +820,41 @@ BOOL WINAPI GetNumaAvailableMemoryNodeEx(USHORT node, PULONGLONG available_bytes */ BOOL WINAPI GetNumaProcessorNode(UCHAR processor, PUCHAR node) { - TRACE("(%d, %p)\n", processor, node); - - if (processor < system_info.NumberOfProcessors) + ULONG highest, n; + GROUP_AFFINITY affinity; + TRACE("GetNumaProcessorNode(proc=%u, node=%p)\n", processor, node); + if (!node) { + SetLastError(ERROR_INVALID_PARAMETER); + return FALSE; + } + /* Basic check */ + if (processor >= system_info.NumberOfProcessors) + { + *node = 0xFF; + SetLastError(ERROR_INVALID_PARAMETER); + return FALSE; + } + if (!GetNumaHighestNodeNumber(&highest)) + { + /* if failed -> consider single-node */ *node = 0; return TRUE; } - + if (highest == 0) + { + *node = 0; /* no-NUMA system (or fallback) */ + return TRUE; + } + for (n = 0; n <= highest; ++n) + { + if (GetNumaNodeProcessorMaskEx((USHORT)n, &affinity) && (affinity.Mask & ((ULONGLONG)1 << processor))) + { + *node = (UCHAR)n; + return TRUE; + } + } + /* Not found: invalid */ *node = 0xFF; SetLastError(ERROR_INVALID_PARAMETER); return FALSE; @@ -811,17 +865,63 @@ BOOL WINAPI GetNumaProcessorNode(UCHAR processor, PUCHAR node) */ BOOL WINAPI GetNumaProcessorNodeEx(PPROCESSOR_NUMBER processor, PUSHORT node_number) { - SetLastError(ERROR_CALL_NOT_IMPLEMENTED); + ULONG highest, n; + GROUP_AFFINITY affinity; + TRACE("GetNumaProcessorNodeEx(proc=%p, node_number=%p)\n", processor, node_number); + if (!processor || !node_number) + { + SetLastError(ERROR_INVALID_PARAMETER); + return FALSE; + } + if (processor->Group != 0) + { + /* Current implementation: single group supported */ + SetLastError(ERROR_INVALID_PARAMETER); + return FALSE; + } + if (processor->Number >= system_info.NumberOfProcessors || processor->Number >= 8 * sizeof(affinity.Mask)) + { + SetLastError(ERROR_INVALID_PARAMETER); + return FALSE; + } + if (!GetNumaHighestNodeNumber(&highest)) return FALSE; + if (highest == 0) + { + *node_number = 0; /* no-NUMA system */ + return TRUE; + } + for (n = 0; n <= highest; ++n) + { + if (GetNumaNodeProcessorMaskEx((USHORT)n, &affinity) && (affinity.Mask & ((ULONGLONG)1 << processor->Number))) + { + *node_number = (USHORT)n; + return TRUE; + } + } + SetLastError(ERROR_INVALID_PARAMETER); return FALSE; }
/*********************************************************************** * GetNumaProximityNode (KERNEL32.@) */ -BOOL WINAPI GetNumaProximityNode(ULONG proximity_id, PUCHAR node_number) +BOOL WINAPI GetNumaProximityNode(ULONG proximity_id, PUCHAR node_number) { - SetLastError(ERROR_CALL_NOT_IMPLEMENTED); - return FALSE; + USHORT node16; + TRACE("GetNumaProximityNode(proximity=%lu, node_number=%p)\n", proximity_id, node_number); + if (!node_number) + { + SetLastError(ERROR_INVALID_PARAMETER); + return FALSE; + } + if (!GetNumaProximityNodeEx(proximity_id, &node16)) return FALSE; + if (node16 > 0xFF) + { + SetLastError(ERROR_INVALID_PARAMETER); + return FALSE; + } + *node_number = (UCHAR)node16; + return TRUE; }
/********************************************************************** diff --git a/dlls/kernel32/tests/process.c b/dlls/kernel32/tests/process.c index 7984f3d6fb4..4632ea309b5 100644 --- a/dlls/kernel32/tests/process.c +++ b/dlls/kernel32/tests/process.c @@ -78,7 +78,10 @@ static BOOL (WINAPI *pTerminateJobObject)(HANDLE job, UINT exit_code); static BOOL (WINAPI *pQueryInformationJobObject)(HANDLE job, JOBOBJECTINFOCLASS class, LPVOID info, DWORD len, LPDWORD ret_len); static BOOL (WINAPI *pSetInformationJobObject)(HANDLE job, JOBOBJECTINFOCLASS class, LPVOID info, DWORD len); static HANDLE (WINAPI *pCreateIoCompletionPort)(HANDLE file, HANDLE existing_port, ULONG_PTR key, DWORD threads); +static BOOL (WINAPI *pGetNumaHighestNodeNumber)(ULONG *); static BOOL (WINAPI *pGetNumaProcessorNode)(UCHAR, PUCHAR); +static BOOL (WINAPI *pGetNumaNodeProcessorMaskEx)(USHORT, GROUP_AFFINITY *); +static BOOL (WINAPI *pGetNumaProximityNodeEx)(ULONG, USHORT *); static NTSTATUS (WINAPI *pNtQueryInformationProcess)(HANDLE, PROCESSINFOCLASS, PVOID, ULONG, PULONG); static NTSTATUS (WINAPI *pNtQueryInformationThread)(HANDLE, THREADINFOCLASS, PVOID, ULONG, PULONG); static NTSTATUS (WINAPI *pNtQuerySystemInformationEx)(SYSTEM_INFORMATION_CLASS, void*, ULONG, void*, ULONG, ULONG*); @@ -268,7 +271,10 @@ static BOOL init(void) pQueryInformationJobObject = (void *)GetProcAddress(hkernel32, "QueryInformationJobObject"); pSetInformationJobObject = (void *)GetProcAddress(hkernel32, "SetInformationJobObject"); pCreateIoCompletionPort = (void *)GetProcAddress(hkernel32, "CreateIoCompletionPort"); + pGetNumaHighestNodeNumber = (void *)GetProcAddress(hkernel32, "GetNumaHighestNodeNumber"); pGetNumaProcessorNode = (void *)GetProcAddress(hkernel32, "GetNumaProcessorNode"); + pGetNumaNodeProcessorMaskEx = (void *)GetProcAddress(hkernel32, "GetNumaNodeProcessorMaskEx"); + pGetNumaProximityNodeEx = (void *)GetProcAddress(hkernel32, "GetNumaProximityNodeEx"); pWTSGetActiveConsoleSessionId = (void *)GetProcAddress(hkernel32, "WTSGetActiveConsoleSessionId"); pCreateToolhelp32Snapshot = (void *)GetProcAddress(hkernel32, "CreateToolhelp32Snapshot"); pProcess32First = (void *)GetProcAddress(hkernel32, "Process32First"); @@ -4087,6 +4093,62 @@ static void test_GetNumaProcessorNode(void) } }
+static void test_NumaBasic(void) +{ + ULONG highest_node = 0xdeadbeef; + BOOL ret; + GROUP_AFFINITY affinity; + USHORT node; + + if (!pGetNumaHighestNodeNumber && !pGetNumaNodeProcessorMaskEx && !pGetNumaProximityNodeEx) + { + win_skip("NUMA extended functions are missing\n"); + return; + } + + if (pGetNumaHighestNodeNumber) + { + SetLastError(0xdeadbeef); + ret = pGetNumaHighestNodeNumber(&highest_node); + ok(ret || GetLastError() == ERROR_INVALID_FUNCTION, + "GetNumaHighestNodeNumber failed: %lu\n", GetLastError()); + if (ret) + trace("Highest NUMA node: %lu\n", highest_node); + } + + if (pGetNumaNodeProcessorMaskEx) + { + memset(&affinity, 0, sizeof(affinity)); + SetLastError(0xdeadbeef); + ret = pGetNumaNodeProcessorMaskEx(0, &affinity); + ok(ret || GetLastError() == ERROR_INVALID_PARAMETER || GetLastError() == ERROR_INVALID_FUNCTION, + "GetNumaNodeProcessorMaskEx(0) unexpected failure %lu\n", GetLastError()); + if (ret) + trace("Node0: Group=%u Mask=%I64x\n", affinity.Group, (unsigned long long)affinity.Mask); + + memset(&affinity, 0, sizeof(affinity)); + ret = pGetNumaNodeProcessorMaskEx(1, &affinity); + if (ret) + trace("Node1: Group=%u Mask=%I64x\n", affinity.Group, (unsigned long long)affinity.Mask); + } + + if (pGetNumaProximityNodeEx) + { + node = 0xdead; + SetLastError(0xdeadbeef); + ret = pGetNumaProximityNodeEx(0, &node); + ok(ret || GetLastError() == ERROR_INVALID_PARAMETER || GetLastError() == ERROR_INVALID_FUNCTION, + "GetNumaProximityNodeEx(0) failed %lu\n", GetLastError()); + if (ret) + trace("Proximity 0 -> node %u\n", node); + + node = 0xdead; + ret = pGetNumaProximityNodeEx(1, &node); + if (ret) + trace("Proximity 1 -> node %u\n", node); + } +} + static void test_session_info(void) { DWORD session_id, active_session; @@ -5684,6 +5746,7 @@ START_TEST(process) test_DuplicateHandle(); test_StdHandleInheritance(); test_GetNumaProcessorNode(); + test_NumaBasic(); test_session_info(); test_GetLogicalProcessorInformationEx(); test_GetSystemCpuSetInformation(); diff --git a/dlls/kernelbase/memory.c b/dlls/kernelbase/memory.c index c0a3d65d341..aa83ce85b24 100644 --- a/dlls/kernelbase/memory.c +++ b/dlls/kernelbase/memory.c @@ -40,7 +40,7 @@ WINE_DEFAULT_DEBUG_CHANNEL(heap); WINE_DECLARE_DEBUG_CHANNEL(virtual); WINE_DECLARE_DEBUG_CHANNEL(globalmem); - +WINE_DECLARE_DEBUG_CHANNEL(numa);
static CRITICAL_SECTION memstatus_section; static CRITICAL_SECTION_DEBUG critsect_debug = @@ -1476,6 +1476,134 @@ BOOL WINAPI DECLSPEC_HOTPATCH MapUserPhysicalPages( void *addr, ULONG_PTR page_c * NUMA functions ***********************************************************************/
+/* NUMA support using Windows logical processor information */ +static ULONG numa_highest_node_number = 0; +static BOOL numa_initialized = FALSE; +static RTL_CRITICAL_SECTION numa_cs; +static RTL_CRITICAL_SECTION_DEBUG numa_cs_debug = +{ + 0, 0, &numa_cs, + { &numa_cs_debug.ProcessLocksList, &numa_cs_debug.ProcessLocksList }, + 0, 0, { (DWORD_PTR)(__FILE__ ": numa_cs") } +}; +static RTL_CRITICAL_SECTION numa_cs = { &numa_cs_debug, -1, 0, 0, 0, 0 }; + +/* Structure to hold CPU mask for each NUMA node */ +static struct numa_node_info { + ULONG_PTR cpu_mask; + BOOL valid; +} numa_nodes[64]; /* Windows supports up to 64 NUMA nodes */ + +/* NUMA runtime tweak flags */ +static int numa_env_checked = 0; +static BOOL numa_force_single = FALSE; /* WINE_NUMA_FORCE_SINGLE=1 -> force single node */ +static BOOL numa_contig = FALSE; /* WINE_NUMA_CONTIG=1 -> remap contiguous masks */ + +static unsigned int popcount_ulongptr( ULONG_PTR v ) +{ +#if defined(__GNUC__) + return __builtin_popcountll( (unsigned long long)v ); +#else + unsigned int c = 0; while (v) { v &= (v-1); c++; } return c; +#endif +} + +static void initialize_numa_info(void) +{ + SYSTEM_LOGICAL_PROCESSOR_INFORMATION *info = NULL; + DWORD len = 0, i; + ULONG max_node = 0; + + if (numa_initialized) return; + + if (!numa_env_checked) + { + char buffer[256]; + numa_env_checked = 1; + if (GetEnvironmentVariableA("WINE_NUMA_FORCE_SINGLE", buffer, sizeof(buffer))) numa_force_single = TRUE; + if (GetEnvironmentVariableA("WINE_NUMA_CONTIG", buffer, sizeof(buffer))) numa_contig = TRUE; + TRACE_(numa)("NUMA env: FORCE_SINGLE=%d CONTIG=%d\n", numa_force_single, numa_contig); + } + + memset(numa_nodes, 0, sizeof(numa_nodes)); + + if (!numa_force_single) + { + /* Query logical processor information to get NUMA topology */ + if (!GetLogicalProcessorInformation(NULL, &len) && GetLastError() == ERROR_INSUFFICIENT_BUFFER) + { + info = HeapAlloc(GetProcessHeap(), 0, len); + if (info && GetLogicalProcessorInformation(info, &len)) + { + DWORD count = len / sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION); + for (i = 0; i < count; i++) + { + if (info[i].Relationship == RelationNumaNode) + { + ULONG node_number = info[i].NumaNode.NodeNumber; + if (node_number < 64) + { + numa_nodes[node_number].cpu_mask = info[i].ProcessorMask; + numa_nodes[node_number].valid = TRUE; + if (node_number > max_node) max_node = node_number; + TRACE_(numa)("NUMA raw: node=%lu mask=0x%llx\n", (unsigned long)node_number, (unsigned long long)info[i].ProcessorMask); + } + } + } + } + HeapFree(GetProcessHeap(), 0, info); + } + } + /* Fallback single node if requested or none discovered */ + if (numa_force_single || (max_node == 0 && !numa_nodes[0].valid)) + { + SYSTEM_INFO si; GetSystemInfo(&si); + numa_nodes[0].cpu_mask = (si.dwNumberOfProcessors >= (sizeof(ULONG_PTR)*8)) ? ~(ULONG_PTR)0 : ((1ULL << si.dwNumberOfProcessors) - 1); + numa_nodes[0].valid = TRUE; max_node = 0; + TRACE_(numa)("NUMA fallback single: mask=0x%llx procs=%u\n", (unsigned long long)numa_nodes[0].cpu_mask, (unsigned)popcount_ulongptr(numa_nodes[0].cpu_mask)); + } + else if (numa_contig && max_node > 0) + { + /* Remap each node to a contiguous block of bits in ascending order */ + ULONG_PTR new_masks[64] = {0}; unsigned int bit_offset = 0; BOOL ok = TRUE; + for (i = 0; i <= max_node; i++) if (numa_nodes[i].valid) + { + unsigned int cnt = popcount_ulongptr(numa_nodes[i].cpu_mask); + if (!cnt || bit_offset + cnt > sizeof(ULONG_PTR)*8) { ok = FALSE; break; } + new_masks[i] = (((ULONG_PTR)1 << cnt) - 1) << bit_offset; + TRACE_(numa)("NUMA remap: node=%lu raw=0x%llx cnt=%u -> contig=0x%llx base=%u\n", + (unsigned long)i, (unsigned long long)numa_nodes[i].cpu_mask, cnt, + (unsigned long long)new_masks[i], bit_offset); + bit_offset += cnt; + } + if (ok) + { + for (i = 0; i <= max_node; i++) if (numa_nodes[i].valid && new_masks[i]) numa_nodes[i].cpu_mask = new_masks[i]; + } + else TRACE_(numa)("NUMA remap: aborted (ok=%d)\n", ok); + } + + numa_highest_node_number = max_node; + numa_initialized = TRUE; + TRACE_(numa)("NUMA init done: highest_node=%lu\n", (unsigned long)numa_highest_node_number); +} + +static BOOL get_numa_node_cpu_mask(UCHAR node, GROUP_AFFINITY *mask) +{ + RtlEnterCriticalSection(&numa_cs); + if (!numa_initialized) initialize_numa_info(); + if (node >= 64 || !numa_nodes[node].valid) + { + RtlLeaveCriticalSection(&numa_cs); SetLastError(ERROR_INVALID_PARAMETER); return FALSE; + } + TRACE_(numa)("get_numa_node_cpu_mask: node=%u mask=0x%llx\n", node, (unsigned long long)numa_nodes[node].cpu_mask); + RtlLeaveCriticalSection(&numa_cs); + memset(mask, 0, sizeof(*mask)); + mask->Group = 0; /* Single processor group for now */ + mask->Mask = numa_nodes[node].cpu_mask; + TRACE_(numa)("get_numa_node_cpu_mask: returning Group=%hu Mask=0x%llx\n", mask->Group, (unsigned long long)mask->Mask); + return TRUE; +}
/*********************************************************************** * AllocateUserPhysicalPagesNuma (kernelbase.@) @@ -1580,10 +1708,19 @@ BOOL WINAPI SetProcessDefaultCpuSets(HANDLE process, const ULONG *cpu_set_ids, U /********************************************************************** * GetNumaHighestNodeNumber (kernelbase.@) */ -BOOL WINAPI DECLSPEC_HOTPATCH GetNumaHighestNodeNumber( ULONG *node ) -{ - FIXME( "semi-stub: %p\n", node ); - *node = 0; +BOOL WINAPI GetNumaHighestNodeNumber( ULONG *node ) +{ + TRACE("(%p)\n", node); + if (!node) + { + SetLastError(ERROR_INVALID_PARAMETER); + return FALSE; + } + RtlEnterCriticalSection(&numa_cs); + if (!numa_initialized) + initialize_numa_info(); + *node = numa_highest_node_number; + RtlLeaveCriticalSection(&numa_cs); return TRUE; }
@@ -1591,20 +1728,41 @@ BOOL WINAPI DECLSPEC_HOTPATCH GetNumaHighestNodeNumber( ULONG *node ) /********************************************************************** * GetNumaNodeProcessorMaskEx (kernelbase.@) */ -BOOL WINAPI DECLSPEC_HOTPATCH GetNumaNodeProcessorMaskEx( USHORT node, GROUP_AFFINITY *mask ) +BOOL WINAPI GetNumaNodeProcessorMaskEx( USHORT node, GROUP_AFFINITY *mask ) { - FIXME( "stub: %hu %p\n", node, mask ); - SetLastError( ERROR_CALL_NOT_IMPLEMENTED ); - return FALSE; + TRACE("(%hu, %p)\n", node, mask); + if (!mask) + { + SetLastError(ERROR_INVALID_PARAMETER); + return FALSE; + } + return get_numa_node_cpu_mask((UCHAR)node, mask); }
/*********************************************************************** * GetNumaProximityNodeEx (kernelbase.@) */ -BOOL WINAPI DECLSPEC_HOTPATCH GetNumaProximityNodeEx( ULONG proximity_id, USHORT *node ) +BOOL WINAPI GetNumaProximityNodeEx( ULONG proximity_id, USHORT *node ) { - SetLastError( ERROR_CALL_NOT_IMPLEMENTED ); + TRACE("(%lu, %p)\n", proximity_id, node); + if (!node) + { + SetLastError(ERROR_INVALID_PARAMETER); + return FALSE; + } + RtlEnterCriticalSection(&numa_cs); + if (!numa_initialized) + initialize_numa_info(); + /* For simplicity, assume proximity_id maps directly to node number */ + if (proximity_id <= numa_highest_node_number) + { + *node = (USHORT)proximity_id; + RtlLeaveCriticalSection(&numa_cs); + return TRUE; + } + RtlLeaveCriticalSection(&numa_cs); + SetLastError(ERROR_INVALID_PARAMETER); return FALSE; }
diff --git a/dlls/kernelbase/thread.c b/dlls/kernelbase/thread.c index dbde4dcfaf3..e69ce948af1 100644 --- a/dlls/kernelbase/thread.c +++ b/dlls/kernelbase/thread.c @@ -498,10 +498,33 @@ BOOL WINAPI SetThreadErrorMode( DWORD mode, DWORD *old ) /*********************************************************************** * SetThreadGroupAffinity (kernelbase.@) */ -BOOL WINAPI DECLSPEC_HOTPATCH SetThreadGroupAffinity( HANDLE thread, const GROUP_AFFINITY *new, +BOOL WINAPI SetThreadGroupAffinity( HANDLE thread, const GROUP_AFFINITY *new, GROUP_AFFINITY *old ) { + GROUP_AFFINITY local_old; if (old && !GetThreadGroupAffinity( thread, old )) return FALSE; + /* Debug: log requested group/mask and previous affinity if available */ + if (new) + { + TRACE("SetThreadGroupAffinity: thread=%p new->Group=%hu new->Mask=0x%llx\n", + thread, new->Group, (unsigned long long)new->Mask); + } + else + { + TRACE("SetThreadGroupAffinity: thread=%p new=NULL\n", thread); + } + if (old) + { + TRACE("SetThreadGroupAffinity: previous old->Group=%hu old->Mask=0x%llx\n", + old->Group, (unsigned long long)old->Mask); + } + else + { + /* If caller didn't supply 'old', fetch it locally for logging */ + if (GetThreadGroupAffinity(thread, &local_old)) + TRACE("SetThreadGroupAffinity: fetched previous Group=%hu Mask=0x%llx\n", + local_old.Group, (unsigned long long)local_old.Mask); + } return set_ntstatus( NtSetInformationThread( thread, ThreadGroupInformation, new, sizeof(*new) )); }