-- v2: ntdll: Make sure NT flag is not set before iretq in wine_syscall_dispatcher_return on x64.
From: Paul Gofman pgofman@codeweavers.com
--- dlls/ntdll/tests/exception.c | 65 ++++++++++++++++++++++++++++++++- dlls/ntdll/unix/signal_x86_64.c | 5 +++ 2 files changed, 69 insertions(+), 1 deletion(-)
diff --git a/dlls/ntdll/tests/exception.c b/dlls/ntdll/tests/exception.c index 514ad218274..e196595b148 100644 --- a/dlls/ntdll/tests/exception.c +++ b/dlls/ntdll/tests/exception.c @@ -5736,6 +5736,41 @@ static void test_instrumentation_callback(void) 0x41, 0xff, 0xe2, /* jmp *r10 */ };
+ static const BYTE call_func_nt_flag[] = + { + 0x56, /* push %rsi */ + 0x57, /* push %rdi */ + 0x48, 0x89, 0xce, /* mov %rcx,%rsi */ + 0x48, 0x89, 0xd7, /* mov %rdx,%rdi */ + + 0x9c, /* pushfq */ + 0x9c, /* pushfq */ + 0x48, 0x81, 0x0c, 0x24, 0x00, 0x40, 0x00, 0x00, /* orq $0x4000,(%rsp) */ + 0x9d, /* popfq */ + + 0x41, 0xff, 0xd0, /* call *%r8 */ + + 0x4c, 0x89, 0x1e, /* mov %r11,(%rsi) */ + 0x9c, /* pushfq */ + 0x5a, /* popq %rdx */ + 0x9d, /* popfq */ + 0x48, 0x89, 0x17, /* mov %rdx,(%rdi) */ + 0x5f, /* pop %rdi */ + 0x5e, /* pop %rsi */ + 0xc3, /* ret */ + }; + + static const BYTE call_NtSetContextThread_nt_flag[] = + { + 0x9c, /* pushfq */ + 0x48, 0x81, 0x0c, 0x24, 0x00, 0x40, 0x00, 0x00, /* orq $0x4000,(%rsp) */ + 0x9d, /* popfq */ + 0x41, 0xff, 0xd0, /* call *%r8 */ + 0xc3, /* ret */ + }; + + NTSTATUS (WINAPI *func_NtSetContextThread_nt_flag)(HANDLE, CONTEXT *, void *func); + NTSTATUS (WINAPI *func_nt_flag)(UINT64 *ret_r11, UINT64 *ret_rflags, void *func); struct instrumentation_callback_data curr_data, data; PROCESS_INSTRUMENTATION_CALLBACK_INFORMATION info; HMODULE ntdll = GetModuleHandleA( "ntdll.dll" ); @@ -5743,14 +5778,27 @@ static void test_instrumentation_callback(void) EXCEPTION_RECORD record; void *vectored_handler; unsigned int i, count; + ULONG64 r11, rflags; NTSTATUS status; HANDLE thread; CONTEXT ctx; HWND hwnd; LONG pass;
+ if (is_arm64ec) return;
+ func_nt_flag = (void *)((char *)code_mem + 512); + memcpy( func_nt_flag, call_func_nt_flag, sizeof(call_func_nt_flag) ); + + func_NtSetContextThread_nt_flag = (void *)((char *)code_mem + 1024); + memcpy( func_NtSetContextThread_nt_flag, call_NtSetContextThread_nt_flag, sizeof(call_NtSetContextThread_nt_flag) ); + + status = func_nt_flag( &r11, &rflags, NtFlushProcessWriteBuffers ); + ok( !status, "got %#lx.\n", status ); + ok( r11 & 0x4000, "got %#I64x.\n", r11 ); + ok( rflags & 0x4000, "got %#I64x.\n", rflags ); + memcpy( code_mem, instrumentation_callback, sizeof(instrumentation_callback) ); *(void **)((char *)code_mem + 4) = &curr_data.call_count; *(void **)((char *)code_mem + 26) = curr_data.call_data; @@ -5790,6 +5838,14 @@ static void test_instrumentation_callback(void) ok( status == STATUS_SUCCESS, "got %#lx.\n", status ); ok( data.call_count == 1, "got %u.\n", data.call_count );
+ init_instrumentation_data( &curr_data ); + status = func_nt_flag( &r11, &rflags, NtFlushProcessWriteBuffers ); + data = curr_data; + ok( !status, "got %#lx.\n", status ); + ok( data.call_count == 1, "got %u.\n", data.call_count ); + ok( r11 & 0x4000, "got %#I64x.\n", r11 ); + ok( rflags & 0x4000, "got %#I64x.\n", rflags ); + vectored_handler = AddVectoredExceptionHandler( TRUE, test_instrumentation_callback_handler ); ok( !!vectored_handler, "failed.\n" ); init_instrumentation_data( &curr_data ); @@ -5835,8 +5891,15 @@ static void test_instrumentation_callback(void) ok( data.call_count == 1, "got %u.\n", data.call_count ); ok( data.call_data[0].r10 == (void *)ctx.Rip, "got %p, expected %p.\n", data.call_data[0].r10, (void *)ctx.Rip ); init_instrumentation_data( &curr_data ); + func_NtSetContextThread_nt_flag( GetCurrentThread(), &ctx, NtSetContextThread ); + } + else if (pass == 6) + { + data = curr_data; + pRtlCaptureContext( &ctx ); + ok( !(ctx.EFlags & 0x4000), "got %#lx.\n", ctx.EFlags ); } - ok( pass == 5, "got %ld.\n", pass ); + ok( pass == 6, "got %ld.\n", pass ); RemoveVectoredExceptionHandler( vectored_handler );
apc_count = 0; diff --git a/dlls/ntdll/unix/signal_x86_64.c b/dlls/ntdll/unix/signal_x86_64.c index 64494de55f4..3c9b3e9026b 100644 --- a/dlls/ntdll/unix/signal_x86_64.c +++ b/dlls/ntdll/unix/signal_x86_64.c @@ -2946,6 +2946,7 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, "movq (%rsp),%rcx\n\t" /* frame->rip */ "pushq %r11\n\t" /* make sure that if trap flag is set the trap happens on the first instruction after iret */ + "andq $~0x4000,(%rsp)\n\t" /* make sure NT flag is not set, or iretq will fault */ "popfq\n\t" "iretq\n" /* CONTEXT_INTEGER */ @@ -2956,6 +2957,9 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, "movq 0x40(%rcx),%r10\n\t" "movq 0x48(%rcx),%r11\n\t" "movq 0x10(%rcx),%rcx\n\t" + "pushfq\n\t" + "andq $~0x4000,(%rsp)\n\t" /* make sure NT flag is not set, or iretq will fault */ + "popfq\n\t" "iretq\n" /* RESTORE_FLAGS_INSTRUMENTATION */ "2:\tmovq %gs:0x348,%r10\n\t" /* amd64_thread_data()->instrumentation_callback */ @@ -2967,6 +2971,7 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, "xchgq %r10,(%rsp)\n\t" "pushq %r11\n\t" /* make sure that if trap flag is set the trap happens on the first instruction after iret */ + "andq $~0x4000,(%rsp)\n\t" /* make sure NT flag is not set, or iretq will fault */ "popfq\n\t" "iretq\n\t"
v2: - don't set NT flag in context in the test.
On Fri May 2 11:47:42 2025 +0000, Jinoh Kang wrote:
Probably doesn't matter short term, just curious if I was getting it right, since !283 would make clear-NT-at-iret redundant if my hypothesis is correct and possibly closer to native even. In any case, thanks for your work!
'iret' path hit in wine_syscall_dispatcher in the game is due to instrumentation callback (at least the case it was initially failing, not sure if it does that with NtSetContextThread or not somewhere later, I suppose it is better and not too hard to have all those paths fixed at once).
Probably doesn't matter short term, just curious if I was getting it right, since !283 would make clear-NT-at-iret redundant if my hypothesis is correct and possibly closer to native even.
I don't think this is the case. NT flag in the context is totally irrelevant for this MR (I realized that setting it in test in ctx.EFlags is misleading for this MR and only complicates the test without a reason, so I removed it in v2). Having NT flag set for the machine frame iret restores to is fine in terms that it doesn't fault now (your test shows that). The problem concerned here is when MR is set if eflags on syscall entry, then, iret will fault regardless of whether the flag is set or not in the machine frame.
As for native, I am guessing more likely real OS should necessarily sanitize the flags for its own execution on syscall entry while storing the original. Then, whether it returns with sysret (which is meant for basic syscall return but we can't use it because it is privileged) or maybe also iret for special cases (Linux does so), it will restore the original flags of course. We could in theory just do the same (sanitize on entry after saving original), but mind 'popf' before 'iret' (introduced by 50481d06d2763b02793f8d5362b512015987011f and required for proper single step exception address) which requires clearing NT flag before 'popf' anyway.
On Fri May 2 14:40:07 2025 +0000, Paul Gofman wrote:
'iret' path hit in wine_syscall_dispatcher in the game is due to instrumentation callback (at least the case it was initially failing, not sure if it does that with NtSetContextThread or not somewhere later, I suppose it is better and not too hard to have all those paths fixed at once).
Probably doesn't matter short term, just curious if I was getting it
right, since !283 would make clear-NT-at-iret redundant if my hypothesis is correct and possibly closer to native even. I don't think this is the case. NT flag in the context is totally irrelevant for this MR (I realized that setting it in test in ctx.EFlags is misleading for this MR and only complicates the test without a reason, so I removed it in v2). Having NT flag set for the machine frame iret restores to is fine in terms that it doesn't fault now (your test shows that). The problem concerned here is when MR is set if eflags on syscall entry, then, iret will fault regardless of whether the flag is set or not in the machine frame. As for native, I am guessing more likely real OS should necessarily sanitize the flags for its own execution on syscall entry while storing the original. Then, whether it returns with sysret (which is meant for basic syscall return but we can't use it because it is privileged) or maybe also iret for special cases (Linux does so), it will restore the original flags of course. We could in theory just do the same (sanitize on entry after saving original), but mind 'popf' before 'iret' (introduced by 50481d06d2763b02793f8d5362b512015987011f and required for proper single step exception address) which requires clearing NT flag before 'popf' anyway.
Actually, on real OS with syscall entry by 'syscall', syscall instruction clears flags itself with mask set by kernel in MSR.IA32_FMASK, most likely NT flag should just be there and be cleared this way (with original flags saved in r11 for later restore).
'iret' path hit in wine_syscall_dispatcher in the game is due to instrumentation callback
Thanks for clarifying.
Having NT flag set for the machine frame iret restores to is fine in terms that it doesn't fault now (your test shows that).
Right. I also realize that I was making an incorrect assumption that the problem is the NT flag is in the machine frame that iret restores *to*; in fact, as you say, iret faults when the *current* EFLAGS.NT is 1.
Jinoh Kang (@iamahuman) commented about dlls/ntdll/tests/exception.c:
ok( data.call_count == 1, "got %u.\n", data.call_count ); ok( data.call_data[0].r10 == (void *)ctx.Rip, "got %p, expected %p.\n", data.call_data[0].r10, (void *)ctx.Rip ); init_instrumentation_data( &curr_data );
func_NtSetContextThread_nt_flag( GetCurrentThread(), &ctx, NtSetContextThread );
(nit) for consistency with cases above. I tend to think it makes it easier to understand the test.
```suggestion:-0+0 func_NtSetContextThread_nt_flag( GetCurrentThread(), &ctx, NtSetContextThread ); ok( 0, "Shouldn't be reached.\n" ); ```
Jinoh Kang (@iamahuman) commented about dlls/ntdll/tests/exception.c:
EXCEPTION_RECORD record; void *vectored_handler; unsigned int i, count;
ULONG64 r11, rflags; NTSTATUS status; HANDLE thread; CONTEXT ctx; HWND hwnd; LONG pass;
(nit) gratuitous newline?
```suggestion:-0+0 ```
This merge request was approved by Jinoh Kang.
A few nits but probably shouldn't block the patch.
in fact, as you say, iret faults when the _current_ EFLAGS.NT is 1.
yes, exactly.