More details here: https://devblogs.microsoft.com/oldnewthing/20181206-00/?p=100415
However it does not mention that `PAGE_NOACCESS` and `PAGE_READONLY` still result in an error on Windows, which is properly implemented in this MR.
Only `WriteProcessMemory` offers this "service", `NtWriteVirtualMemory` will fail on non-writeable and executable regions (and already does so, except for the the mach server backend, which needs https://gitlab.winehq.org/wine/wine/-/merge_requests/4826 to also behave correctly here).
-- v3: kernelbase: Flush instruction cache when necessary in WriteProcessMemory. kernelbase: Allow WriteProcessMemory to succeed on PAGE_EXECUTE_READ.
From: Marc-Aurel Zent mzent@codeweavers.com
--- dlls/kernel32/tests/virtual.c | 13 ++++++++++++ dlls/kernelbase/memory.c | 38 ++++++++++++++++++++++++++++++++++- 2 files changed, 50 insertions(+), 1 deletion(-)
diff --git a/dlls/kernel32/tests/virtual.c b/dlls/kernel32/tests/virtual.c index a023104ad88..3c362b60ae3 100644 --- a/dlls/kernel32/tests/virtual.c +++ b/dlls/kernel32/tests/virtual.c @@ -83,6 +83,8 @@ static void test_VirtualAllocEx(void) DWORD old_prot; MEMORY_BASIC_INFORMATION info; HANDLE hProcess; + NTSTATUS status; + NTSTATUS (NTAPI *pNtWriteVirtualMemory)(HANDLE, void *, const void *, SIZE_T, SIZE_T *);
/* Same process */ addr1 = VirtualAllocEx(GetCurrentProcess(), NULL, alloc_size, MEM_COMMIT, PAGE_EXECUTE_READWRITE); @@ -122,6 +124,17 @@ static void test_VirtualAllocEx(void) b = ReadProcessMemory(hProcess, addr1, src, 0, &bytes_read); ok(b && !bytes_read, "read failed: %lu\n", GetLastError());
+ /* test write to read-only execute region */ + addr2 = VirtualAllocEx(hProcess, NULL, alloc_size, MEM_COMMIT, + PAGE_EXECUTE_READ); + ok(addr2 != NULL, "VirtualAllocEx error %lu\n", GetLastError()); + b = WriteProcessMemory(hProcess, addr2, src, alloc_size, &bytes_written); + ok(b, "WriteProcessMemory should succeed on a PAGE_EXECUTE_READ region. Failed error: %lu\n", GetLastError()); + pNtWriteVirtualMemory = (NTSTATUS (NTAPI *)(HANDLE, void *, const void *, SIZE_T, SIZE_T *)) + GetProcAddress(GetModuleHandleW(L"ntdll"), "NtWriteVirtualMemory"); + status = pNtWriteVirtualMemory(hProcess, addr2, src, alloc_size, &bytes_written); + ok(!NT_SUCCESS(status), "Expected NtWriteVirtualMemory to fail on PAGE_EXECUTE_READ, but got status: %lu\n", status); + /* test invalid source buffers */
b = VirtualProtect( src + 0x2000, 0x2000, PAGE_NOACCESS, &old_prot ); diff --git a/dlls/kernelbase/memory.c b/dlls/kernelbase/memory.c index 4f4bba9a13b..293a3f93dda 100644 --- a/dlls/kernelbase/memory.c +++ b/dlls/kernelbase/memory.c @@ -537,7 +537,43 @@ BOOL WINAPI DECLSPEC_HOTPATCH VirtualUnlock( void *addr, SIZE_T size ) BOOL WINAPI DECLSPEC_HOTPATCH WriteProcessMemory( HANDLE process, void *addr, const void *buffer, SIZE_T size, SIZE_T *bytes_written ) { - return set_ntstatus( NtWriteVirtualMemory( process, addr, buffer, size, bytes_written )); + NTSTATUS status, protect_status; + MEMORY_BASIC_INFORMATION mbi; + SIZE_T res_len, region_size = size; + ULONG old_prot; + void *base_addr = addr; + + status = NtQueryVirtualMemory( process, addr, MemoryBasicInformation, &mbi, sizeof(mbi), &res_len ); + + if (!NT_SUCCESS(status)) return set_ntstatus( status ); + + if ((mbi.Protect & PAGE_NOACCESS) == PAGE_NOACCESS || + (mbi.Protect & PAGE_READONLY) == PAGE_READONLY) + return set_ntstatus( STATUS_ACCESS_VIOLATION ); + + if ((mbi.Protect & PAGE_READWRITE) == PAGE_READWRITE || + (mbi.Protect & PAGE_WRITECOPY) == PAGE_WRITECOPY) + return set_ntstatus( NtWriteVirtualMemory( process, addr, buffer, size, bytes_written ) ); + + if ((mbi.Protect & PAGE_EXECUTE_READWRITE) == PAGE_EXECUTE_READWRITE || + (mbi.Protect & PAGE_EXECUTE_WRITECOPY) == PAGE_EXECUTE_WRITECOPY) + { + status = NtWriteVirtualMemory( process, addr, buffer, size, bytes_written ); + return set_ntstatus( status ); + } + + /* In the non-writeable code region case, Windows changes the protection to facilitate the write + * and then changes it back. */ + + protect_status = NtProtectVirtualMemory( process, &base_addr, ®ion_size, PAGE_EXECUTE_READWRITE, &old_prot ); + if (!NT_SUCCESS(protect_status)) return set_ntstatus( protect_status ); + + status = NtWriteVirtualMemory( process, addr, buffer, size, bytes_written ); + + protect_status = NtProtectVirtualMemory( process, &base_addr, ®ion_size, old_prot, &old_prot ); + if (!NT_SUCCESS(protect_status)) return set_ntstatus( protect_status ); + + return set_ntstatus( status ); }
From: Marc-Aurel Zent mzent@codeweavers.com
--- dlls/kernelbase/memory.c | 2 ++ 1 file changed, 2 insertions(+)
diff --git a/dlls/kernelbase/memory.c b/dlls/kernelbase/memory.c index 293a3f93dda..62315d37773 100644 --- a/dlls/kernelbase/memory.c +++ b/dlls/kernelbase/memory.c @@ -559,6 +559,7 @@ BOOL WINAPI DECLSPEC_HOTPATCH WriteProcessMemory( HANDLE process, void *addr, co (mbi.Protect & PAGE_EXECUTE_WRITECOPY) == PAGE_EXECUTE_WRITECOPY) { status = NtWriteVirtualMemory( process, addr, buffer, size, bytes_written ); + NtFlushInstructionCache( process, addr, size ); return set_ntstatus( status ); }
@@ -573,6 +574,7 @@ BOOL WINAPI DECLSPEC_HOTPATCH WriteProcessMemory( HANDLE process, void *addr, co protect_status = NtProtectVirtualMemory( process, &base_addr, ®ion_size, old_prot, &old_prot ); if (!NT_SUCCESS(protect_status)) return set_ntstatus( protect_status );
+ NtFlushInstructionCache( process, addr, size ); return set_ntstatus( status ); }
Hi,
It looks like your patch introduced the new failures shown below. Please investigate and fix them before resubmitting your patch. If they are not new, fixing them anyway would help a lot. Otherwise please ask for the known failures list to be updated.
The tests also ran into some preexisting test failures. If you know how to fix them that would be helpful. See the TestBot job for the details:
The full results can be found at: https://testbot.winehq.org/JobDetails.pl?Key=143680
Your paranoid android.
=== debian11 (32 bit report) ===
kernel32: virtual.c:136: Test failed: Expected NtWriteVirtualMemory to fail on PAGE_EXECUTE_READ, but got status: 0
=== debian11 (32 bit ar:MA report) ===
kernel32: virtual.c:136: Test failed: Expected NtWriteVirtualMemory to fail on PAGE_EXECUTE_READ, but got status: 0
=== debian11 (32 bit de report) ===
kernel32: virtual.c:136: Test failed: Expected NtWriteVirtualMemory to fail on PAGE_EXECUTE_READ, but got status: 0
=== debian11 (32 bit fr report) ===
kernel32: virtual.c:136: Test failed: Expected NtWriteVirtualMemory to fail on PAGE_EXECUTE_READ, but got status: 0
=== debian11 (32 bit he:IL report) ===
kernel32: virtual.c:136: Test failed: Expected NtWriteVirtualMemory to fail on PAGE_EXECUTE_READ, but got status: 0
=== debian11 (32 bit hi:IN report) ===
kernel32: virtual.c:136: Test failed: Expected NtWriteVirtualMemory to fail on PAGE_EXECUTE_READ, but got status: 0
=== debian11 (32 bit ja:JP report) ===
kernel32: virtual.c:136: Test failed: Expected NtWriteVirtualMemory to fail on PAGE_EXECUTE_READ, but got status: 0
=== debian11 (32 bit zh:CN report) ===
kernel32: virtual.c:136: Test failed: Expected NtWriteVirtualMemory to fail on PAGE_EXECUTE_READ, but got status: 0
=== debian11b (32 bit WoW report) ===
kernel32: virtual.c:136: Test failed: Expected NtWriteVirtualMemory to fail on PAGE_EXECUTE_READ, but got status: 0
=== debian11b (64 bit WoW report) ===
kernel32: virtual.c:136: Test failed: Expected NtWriteVirtualMemory to fail on PAGE_EXECUTE_READ, but got status: 0
PAGE_READWRITE was a mistake there on my part, should be fixed now.
I added another test to show the difference to NtWriteVirtualMemory as well.
Concerning spanning multiple regions, it also behaves now the same as it does on Windows (first memory region determines if any changes to the protection are made), e.g.:
* Page 1 PAGE_READWRITE, Page 2 PAGE_READONLY: fails * Page 1 PAGE_READONLY, Page 2 PAGE_EXECUTE_READWRITE: fails * Page 1 PAGE_EXECUTE_READWRITE, Page 2 PAGE_READONLY: fails * Page 1 PAGE_EXECUTE_READ, Page 2 PAGE_READONLY: succeeds * Page 1 PAGE_EXECUTE_READWRITE, Page 2 PAGE_READWRITE: succeeds
Not too sure if it is also worth adding tests for some of these (and comparing them to NtWriteVirtualMemory as well).
No idea about the COM test failures here, but they are unrelated.
Does `WriteProcessMemory` actually implicitly flush icache on Windows? Note that Wine currently does not implement cross-process icache invalidation.
It seems like it does (although only tested intra-process arm64), made some tests here https://gitlab.winehq.org/wine/wine/-/merge_requests/5222#note_63372
I assume cross-process icache invalidation would need to be implemented via APC by the looks of it.
A bit more on optimizing (or, rather, not regressing) performance of the "normal" case, did you consider attempting to write first and then query protection only if write failed with the status suggesting missing memory write access? Also, the new behaviour still rejects PAGE_READONLY, PAGE_NOACCESS. It seems to me that only PAGE_EXECUTE_READ (and maybe PAGE_EXECUTE in theory) are left, maybe it would be clearer to check for allowed cases instead?
I can invert the logic there to handle the cases which require a protection change first (or alternatively assert them together with the comment).
Am still a bit hesitant to not querying first, at least the performance measurements seem to indicate a noticeable difference between WriteProcessMemory and NtWriteVirtualMemory on Windows in the “normal” case (this could be as well due to Windows unconditionally flushing the instruction cache though).
It would also remove the optimization of knowing the protection beforehand to skip the flush in non-executable regions.
NtWriteVirtualMemory: returns 0.
How is this possible?
Am still a bit hesitant to not querying first, at least the performance measurements seem to indicate a noticeable difference between WriteProcessMemory and NtWriteVirtualMemory on Windows in the “normal” case (this could be as well due to Windows unconditionally flushing the instruction cache though).
Are you saying that it is slower on Windows? Is that on ARM only probably? Can't imagine that being quicker on Linux / x86, with two server call roundtrips.
On Tue Mar 5 16:09:41 2024 +0000, Paul Gofman wrote:
Am still a bit hesitant to not querying first, at least the
performance measurements seem to indicate a noticeable difference between WriteProcessMemory and NtWriteVirtualMemory on Windows in the “normal” case (this could be as well due to Windows unconditionally flushing the instruction cache though). Are you saying that it is slower on Windows? Is that on ARM only probably? Can't imagine that being quicker on Linux / x86, with two server call roundtrips.
Slower on x86 Windows, haven’t tested ARM there yet, but would be interesting for comparison, especially since the flush should be no-op on x86.
In the inter-process case server round trip will be most likely the dominant factor as you said, but not too sure if performance regressions are too critical there, was more concerned about the intra-process case, where querying is fairly cheap.
IIRC some hooking frameworks use these APIs on themselves to “safely” read/write memory, but yeah letting it fail and unconditionally flush, would not be a performance regression on x86 in the normal case I think. Will do some testing on that.
On Tue Mar 5 16:05:10 2024 +0000, Jinoh Kang wrote:
NtWriteVirtualMemory: returns 0.
How is this possible?
No idea, I assume most of the initialized memory was 0 and some of the cache got invalidated when doing the syscall compared to the memcpy case.
On Tue Mar 5 16:46:12 2024 +0000, Marc-Aurel Zent wrote:
No idea, I assume most of the initialized memory was 0 and some of the cache got invalidated when doing the syscall compared to the memcpy case.
This is the code I used to test that (quickly thrown together), maybe it tests differently on different machines, but I get perfect consistency with it for the results above. ``` #include <windows.h> #include <cstdio>
typedef NTSTATUS(NTAPI* pNtWriteVirtualMemory)( HANDLE ProcessHandle, PVOID BaseAddress, VOID* Buffer, ULONG NumberOfBytesToWrite, PULONG NumberOfBytesWritten );
typedef int (*test_function)();
int main() { HMODULE ntdll = GetModuleHandleW(L"ntdll"); pNtWriteVirtualMemory NtWriteVirtualMemory = (pNtWriteVirtualMemory)GetProcAddress(ntdll, "NtWriteVirtualMemory");
void* execMemory = VirtualAlloc(NULL, 4096, MEM_COMMIT | MEM_RESERVE, PAGE_EXECUTE_READWRITE);
unsigned char code[] = { // movz x0, #1, lsl #0 0x01, 0x00, 0x80, 0xD2, // ret 0xC0, 0x03, 0x5F, 0xD6 }; SIZE_T bytesWritten; ULONG bytesWritten2;
//memcpy(execMemory, code, sizeof(code)); NtWriteVirtualMemory(GetCurrentProcess(), (char*)execMemory , code , sizeof(code), &bytesWritten2); //WriteProcessMemory(GetCurrentProcess(), (char*)execMemory, code, sizeof(code), &bytesWritten);
//FlushInstructionCache(GetCurrentProcess(), execMemory, sizeof(code));
test_function func = (test_function)execMemory; int result = func(); printf("Function returned: %d\n", result);
VirtualFree(execMemory, 0, MEM_RELEASE); return 0; } ```