Signed-off-by: Paul Gofman pgofman@codeweavers.com --- dlls/ntdll/tests/exception.c | 74 +++++++++++++++++++++++++++++++++ dlls/ntdll/unix/signal_x86_64.c | 13 ++++-- 2 files changed, 83 insertions(+), 4 deletions(-)
diff --git a/dlls/ntdll/tests/exception.c b/dlls/ntdll/tests/exception.c index 98187266d50..a592ca9cf3c 100644 --- a/dlls/ntdll/tests/exception.c +++ b/dlls/ntdll/tests/exception.c @@ -4917,6 +4917,79 @@ static void test_unwind_from_apc(void) ok(pass == 4, "Got unexpected pass %d.\n", pass); ok(test_unwind_apc_called, "Test user APC was not called.\n"); } + +static void test_syscall_clobbered_regs(void) +{ + struct regs + { + UINT64 rflags_before; + UINT64 rcx; + UINT64 r11; + }; + static const BYTE code[] = + { + 0xfd, /* std */ + 0x9c, /* pushf */ + 0x41, 0x8f, 0x00, /* pop (%r8) */ + 0x41, 0x50, /* push %r8 */ + 0x41, 0xff, 0xd1, /* callq *r9 */ + 0x41, 0x58, /* pop %r8 */ + 0x49, 0x89, 0x48, 0x08, /* mov %rcx,8(%r8) */ + 0x4d, 0x89, 0x58, 0x10, /* mov %r11,16(%r8) */ + 0xfc, /* cld */ + 0xc3, /* ret */ + }; + + static const unsigned int flags_mask = 0x844; /* ZF, PF, SF. */ + NTSTATUS (WINAPI *func)(void *arg1, void *arg2, struct regs *, void *call_addr); + NTSTATUS (WINAPI *pNtCancelTimer)(HANDLE, BOOLEAN *); + HMODULE hntdll = GetModuleHandleA("ntdll.dll"); + struct regs regs; + CONTEXT context; + NTSTATUS status; + + pNtCancelTimer = (void *)GetProcAddress(hntdll, "NtCancelTimer"); + ok(!!pNtCancelTimer, "NtCancelTimer not found.\n"); + memcpy(code_mem, code, sizeof(code)); + func = code_mem; + memset(®s, 0, sizeof(regs)); + status = func((HANDLE)0xdeadbeef, NULL, ®s, pNtCancelTimer); + ok(status == STATUS_INVALID_HANDLE, "Got unexpected status %#x.\n", status); + + /* After the syscall instruction rcx contains the address of the instruction next after syscall + * and r11 contains rflags before the instruction. rcx, r11 are set by syscall instruction itself, + * looks like Windows is preserving those (setting the the thread context with NtContinue during + * the syscall is the special case of course). + * ZF, PF, SF (0x44) may be potentially altered in the Nt syscall thunk. */ + ok(!((regs.rflags_before ^ regs.r11) & ~flags_mask), "Got unexpected rflags_before %s, r11 %s.\n", + wine_dbgstr_longlong(regs.rflags_before), wine_dbgstr_longlong(regs.r11)); + ok((BYTE *)regs.rcx > (BYTE *)pNtCancelTimer && (BYTE *)regs.rcx < (BYTE *)pNtCancelTimer + 0x20, + "Got unexpected rcx %s, pNtCancelTimer %p.\n", wine_dbgstr_longlong(regs.rcx), pNtCancelTimer); + + status = func((HANDLE)0xdeadbeef, (BOOLEAN *)0xdeadbeef, ®s, pNtCancelTimer); + ok(status == STATUS_ACCESS_VIOLATION, "Got unexpected status %#x.\n", status); + ok(!((regs.rflags_before ^ regs.r11) & ~flags_mask), "Got unexpected rflags_before %s, r11 %s.\n", + wine_dbgstr_longlong(regs.rflags_before), wine_dbgstr_longlong(regs.r11)); + ok((BYTE *)regs.rcx > (BYTE *)pNtCancelTimer && (BYTE *)regs.rcx < (BYTE *)pNtCancelTimer + 0x20, + "Got unexpected rcx %s, pNtCancelTimer %p.\n", wine_dbgstr_longlong(regs.rcx), pNtCancelTimer); + + context.ContextFlags = CONTEXT_CONTROL; + status = func(GetCurrentThread(), &context, ®s, pNtGetContextThread); + ok(status == STATUS_SUCCESS, "Got unexpected status %#x.\n", status); + ok(!((regs.rflags_before ^ regs.r11) & ~flags_mask), "Got unexpected rflags_before %s, r11 %s.\n", + wine_dbgstr_longlong(regs.rflags_before), wine_dbgstr_longlong(regs.r11)); + ok((BYTE *)regs.rcx > (BYTE *)pNtGetContextThread && (BYTE *)regs.rcx < (BYTE *)pNtGetContextThread + 0x20, + "Got unexpected rcx %s, pNtGetContextThread %p.\n", wine_dbgstr_longlong(regs.rcx), pNtGetContextThread); + + status = func(GetCurrentThread(), &context, ®s, pNtSetContextThread); + ok(status == STATUS_SUCCESS, "Got unexpected status %#x.\n", status); + /* For some reason bit 1 (documented as reserved, always 1) is cleared in r11 after NtSetContextThread() + * across all the Win versions. */ + ok(!(((regs.rflags_before & ~2) ^ regs.r11) & ~flags_mask), "Got unexpected rflags_before %s, r11 %s.\n", + wine_dbgstr_longlong(regs.rflags_before), wine_dbgstr_longlong(regs.r11)); + ok((BYTE *)regs.rcx > (BYTE *)pNtGetContextThread && (BYTE *)regs.rcx < (BYTE *)pNtGetContextThread + 0x20, + "Got unexpected rcx %s, pNtGetContextThread %p.\n", wine_dbgstr_longlong(regs.rcx), pNtGetContextThread); +} #elif defined(__arm__)
#define UNW_FLAG_NHANDLER 0 @@ -10737,6 +10810,7 @@ START_TEST(exception) test_extended_context(); test_copy_context(); test_unwind_from_apc(); + test_syscall_clobbered_regs();
#elif defined(__aarch64__)
diff --git a/dlls/ntdll/unix/signal_x86_64.c b/dlls/ntdll/unix/signal_x86_64.c index 4310c1c6a2f..bd25172b309 100644 --- a/dlls/ntdll/unix/signal_x86_64.c +++ b/dlls/ntdll/unix/signal_x86_64.c @@ -3233,21 +3233,26 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, "movq 0x28(%rcx),%rdi\n\t" "movq 0x20(%rcx),%rsi\n\t" "movq 0x08(%rcx),%rbx\n\t" + "movq 0x80(%rcx),%r11\n\t" /* frame->eflags */ "testl $0x3,%edx\n\t" /* CONTEXT_CONTROL | CONTEXT_INTEGER */ "jnz 1f\n\t" "movq 0x88(%rcx),%rsp\n\t" - "jmpq *0x70(%rcx)\n" /* frame->rip */ + "movq 0x70(%rcx),%rcx\n\t" /* frame->rip */ + "jmpq *%rcx\n\t" "1:\tleaq 0x70(%rcx),%rsp\n\t" "testl $0x2,%edx\n\t" /* CONTEXT_INTEGER */ - "jz 1f\n\t" - "movq 0x00(%rcx),%rax\n\t" + "jnz 1f\n\t" + "btr $1,%r11\n\t" + "movq (%rsp),%rcx\n\t" /* frame->rip */ + "iretq\n" + "1:\tmovq 0x00(%rcx),%rax\n\t" "movq 0x18(%rcx),%rdx\n\t" "movq 0x30(%rcx),%r8\n\t" "movq 0x38(%rcx),%r9\n\t" "movq 0x40(%rcx),%r10\n\t" "movq 0x48(%rcx),%r11\n\t" "movq 0x10(%rcx),%rcx\n" - "1:\tiretq\n" + "iretq\n" "5:\tmovl $0xc000000d,%edx\n\t" /* STATUS_INVALID_PARAMETER */ "movq %rsp,%rcx\n" __ASM_NAME("__wine_syscall_dispatcher_return") ":\n\t"
After some testing I found out that the patch's behaviour is inaccurate.
The attached test program does the following:
1. Set R10 to 0xdeadbeef5a5a5a5a and R11 to 0x0123456789ABCDEF. 2. Generate a page fault. 3. Set R10 to 0xcafebabea5a5a5a5 and R11 to 0xfedcba9876543210. 4. Issue a system call that pauses the current thread. 5. Switch to another thread, and dump the previous thread's registers. 6. Set all bits in EFLAGS to 1. (0xffffffffffffffff) 7. Dump the previous thread's registers again.
Its output on Windows 10 (20H2) is:
SharedUserData.SystemCall = 0000000000
Before set context: EFlags = 0x0000000000000246 R11 = 0x0123456789abcdef RIP = 0x00007ffa09e504d4 RCX = 0x0000000000000088 RSP = 0x0000000000ccfef8 R10 = 0xdeadbeef5a5a5a5a
After set context: EFlags = 0x0000000000210fd5 R11 = 0x0123456789abcdef RIP = 0x00007ffa09e504d4 RCX = 0x0000000000000088 RSP = 0x0000000000ccfef8 R10 = 0xdeadbeef5a5a5a5a
From this we can observe the following:
A. KiFastSystemCall doesn't clear bit 1 in R11 by itself. Rather, it's the job of NtSetContextThread.
B. KiFastSystemCall ignores registers clobbered by the SYSCALL instruction. It does try to pretend that the 1st argument is being passed to RCX, which leaves the actual 1st argument register (R11) unmodified in CONTEXT. (Also note that this implies the presence of a flag in the real kernel that records whether R10/R11 are set to valid values or not. Otherwise, the kernel would be unable to use SYSRET since R11 != RFLAGS, etc.)
C. Other entrances to kernel (e.g. a page fault) do record all registers. These values are preserved until the next time the thread switches to kernel mode.
Thanks for testing this. First of all, I did not try to simulate every bit of volatile registers clobbering performed by the kernel. Maybe my mistake here is rather touching r11 at all (while I mostly care about rcx now) or at least making those changes in a single patch. Yet your analysis might be quite helpful, although I can't say I could follow completely. My questions are inline.
On 11/30/21 19:56, Jinoh Kang wrote:
After some testing I found out that the patch's behaviour is inaccurate.
The attached test program does the following:
- Set R10 to 0xdeadbeef5a5a5a5a and R11 to 0x0123456789ABCDEF.
- Generate a page fault.
By page fault do you mean exactly page fault per se (transparently handled by kernel) or access violation? If that's the first not sure what exactly is it supposed to influence?
- Set R10 to 0xcafebabea5a5a5a5 and R11 to 0xfedcba9876543210.
Set where? If p. 2., concerns access violoation, do you mean in vectored handle? Or maybe I am not following completely.
- Issue a system call that pauses the current thread.
Which exactly do you mean, to be sure?
- Switch to another thread, and dump the previous thread's registers.
- Set all bits in EFLAGS to 1. (0xffffffffffffffff)
- Dump the previous thread's registers again.
Basically I think it would be much easier to follow if that was expressed in some sort of pseudocode naming specific functions / seh handlers etc.
Its output on Windows 10 (20H2) is:
SharedUserData.SystemCall = 0000000000
Before set context: EFlags = 0x0000000000000246 R11 = 0x0123456789abcdef RIP = 0x00007ffa09e504d4 RCX = 0x0000000000000088 RSP = 0x0000000000ccfef8 R10 = 0xdeadbeef5a5a5a5a
After set context: EFlags = 0x0000000000210fd5 R11 = 0x0123456789abcdef RIP = 0x00007ffa09e504d4 RCX = 0x0000000000000088 RSP = 0x0000000000ccfef8 R10 = 0xdeadbeef5a5a5a5a
From this we can observe the following:
A. KiFastSystemCall doesn't clear bit 1 in R11 by itself. Rather, it's the job of NtSetContextThread.
Yeah, regardless of how this is concluded from a test it looks like cleaner way to do it to me.
B. KiFastSystemCall ignores registers clobbered by the SYSCALL instruction. It does try to pretend that the 1st argument is being passed to RCX, which leaves the actual 1st argument register (R11) unmodified in CONTEXT. (Also note that this implies the presence of a flag in the real kernel that records whether R10/R11 are set to valid values or not. Otherwise, the kernel would be unable to use SYSRET since R11 != RFLAGS, etc.)
C. Other entrances to kernel (e.g. a page fault) do record all registers. These values are preserved until the next time the thread switches to kernel mode.
Again, it would be better to clarify what is meant by page fault but from this context I suspect you mean access violation (or other exception)? In Wine this is handled separately, the syscall_dispatcher() is not involved neither for entering the Unix part (that is done from signal) nor from continuing to user mode (that is done by setting registers from the signal handler and returning right to user mode ntdll entry point). Do you mean your test suggest some modifications to this part? That might be interesting to know but I didn't have an intention to cover all of that at once.
I am sorry, I actually somehow missed the actual test program thus all of my prior questions.
While you are probably right in your assessment it still seems to me that these aspects are a bit orthogonal to what I am trying to do. Wine does not handle page faults, host OS does that directly thus we can't track the influence of that on the context, nor I know any application which depends on that. That suggests though that trying to simulate r11 the way my patch does probably doesn't make much sense.
I think I will resend the patch without touching r11 at all.
Then, if I read your test correctly, WRT rcx handling, you show that the value in rcx stored in the context is not the one my test shows on syscall exit (the rcx in the context is supposed to be the first argument to syscall, right?). But my patch doesn't touch the rcx value in the context (and your test confirms that it should not).
On 11/30/21 20:22, Paul Gofman wrote:
Thanks for testing this. First of all, I did not try to simulate every bit of volatile registers clobbering performed by the kernel. Maybe my mistake here is rather touching r11 at all (while I mostly care about rcx now) or at least making those changes in a single patch. Yet your analysis might be quite helpful, although I can't say I could follow completely. My questions are inline.
On 11/30/21 19:56, Jinoh Kang wrote:
After some testing I found out that the patch's behaviour is inaccurate.
The attached test program does the following:
- Set R10 to 0xdeadbeef5a5a5a5a and R11 to 0x0123456789ABCDEF.
- Generate a page fault.
By page fault do you mean exactly page fault per se (transparently handled by kernel) or access violation? If that's the first not sure what exactly is it supposed to influence?
- Set R10 to 0xcafebabea5a5a5a5 and R11 to 0xfedcba9876543210.
Set where? If p. 2., concerns access violoation, do you mean in vectored handle? Or maybe I am not following completely.
- Issue a system call that pauses the current thread.
Which exactly do you mean, to be sure?
- Switch to another thread, and dump the previous thread's registers.
- Set all bits in EFLAGS to 1. (0xffffffffffffffff)
- Dump the previous thread's registers again.
Basically I think it would be much easier to follow if that was expressed in some sort of pseudocode naming specific functions / seh handlers etc.
Its output on Windows 10 (20H2) is:
SharedUserData.SystemCall = 0000000000
Before set context: EFlags = 0x0000000000000246 R11 = 0x0123456789abcdef RIP = 0x00007ffa09e504d4 RCX = 0x0000000000000088 RSP = 0x0000000000ccfef8 R10 = 0xdeadbeef5a5a5a5a
After set context: EFlags = 0x0000000000210fd5 R11 = 0x0123456789abcdef RIP = 0x00007ffa09e504d4 RCX = 0x0000000000000088 RSP = 0x0000000000ccfef8 R10 = 0xdeadbeef5a5a5a5a
From this we can observe the following:
A. KiFastSystemCall doesn't clear bit 1 in R11 by itself. Rather, it's the job of NtSetContextThread.
Yeah, regardless of how this is concluded from a test it looks like cleaner way to do it to me.
B. KiFastSystemCall ignores registers clobbered by the SYSCALL instruction. It does try to pretend that the 1st argument is being passed to RCX, which leaves the actual 1st argument register (R11) unmodified in CONTEXT. (Also note that this implies the presence of a flag in the real kernel that records whether R10/R11 are set to valid values or not. Otherwise, the kernel would be unable to use SYSRET since R11 != RFLAGS, etc.)
C. Other entrances to kernel (e.g. a page fault) do record all registers. These values are preserved until the next time the thread switches to kernel mode.
Again, it would be better to clarify what is meant by page fault but from this context I suspect you mean access violation (or other exception)? In Wine this is handled separately, the syscall_dispatcher() is not involved neither for entering the Unix part (that is done from signal) nor from continuing to user mode (that is done by setting registers from the signal handler and returning right to user mode ntdll entry point). Do you mean your test suggest some modifications to this part? That might be interesting to know but I didn't have an intention to cover all of that at once.
On 12/1/21 08:43, Paul Gofman wrote:
I am sorry, I actually somehow missed the actual test program thus all of my prior questions.
No worries.
While you are probably right in your assessment it still seems to me that these aspects are a bit orthogonal to what I am trying to do.
I agree. In fact I find these results surprising too, which I had no prior knowledge.
Wine does not handle page faults, host OS does that directly thus we can't track the influence of that on the context,
Yes, we can't achieve that without severe performance penalties.
nor I know any application which depends on that.
Hopefully...
That suggests though that trying to simulate r11 the way my patch does probably doesn't make much sense.
I think I will resend the patch without touching r11 at all.
Thanks!
Then, if I read your test correctly, WRT rcx handling, you show that the value in rcx stored in the context is not the one my test shows on syscall exit (the rcx in the context is supposed to be the first argument to syscall, right?).
Yes, KiFastSystemCall seems to internally reverse the effect of "mov r10, rcx" in the ntdll stub prologue (presumably for compatibility with INT 2Eh).
But my patch doesn't touch the rcx value in the context (and your test confirms that it should not).
You're right -- it shouldn't.
My speculation is that there's a flag that determines whether it's OK to clobber RCX/R11 on syscall exit. If it's enabled, KiFastSystemCall will use SYSRET instead of IRETQ. Issuing NtSetContextThread with CONTEXT_INTEGER on supposedly turns this flag off, disabling the use of SYSRET. From the observations so far, this flag more or less corresponds to CONTEXT_CONTROL in syscall_frame::restore_flags, but more testing is required...
On 11/30/21 20:22, Paul Gofman wrote:
Thanks for testing this. First of all, I did not try to simulate every bit of volatile registers clobbering performed by the kernel. Maybe my mistake here is rather touching r11 at all (while I mostly care about rcx now) or at least making those changes in a single patch. Yet your analysis might be quite helpful, although I can't say I could follow completely. My questions are inline.
On 11/30/21 19:56, Jinoh Kang wrote:
After some testing I found out that the patch's behaviour is inaccurate.
The attached test program does the following:
- Set R10 to 0xdeadbeef5a5a5a5a and R11 to 0x0123456789ABCDEF.
- Generate a page fault.
By page fault do you mean exactly page fault per se (transparently handled by kernel) or access violation? If that's the first not sure what exactly is it supposed to influence?
- Set R10 to 0xcafebabea5a5a5a5 and R11 to 0xfedcba9876543210.
Set where? If p. 2., concerns access violoation, do you mean in vectored handle? Or maybe I am not following completely.
- Issue a system call that pauses the current thread.
Which exactly do you mean, to be sure?
- Switch to another thread, and dump the previous thread's registers.
- Set all bits in EFLAGS to 1. (0xffffffffffffffff)
- Dump the previous thread's registers again.
Basically I think it would be much easier to follow if that was expressed in some sort of pseudocode naming specific functions / seh handlers etc.
Its output on Windows 10 (20H2) is:
SharedUserData.SystemCall = 0000000000
Before set context: EFlags = 0x0000000000000246 R11 = 0x0123456789abcdef RIP = 0x00007ffa09e504d4 RCX = 0x0000000000000088 RSP = 0x0000000000ccfef8 R10 = 0xdeadbeef5a5a5a5a
After set context: EFlags = 0x0000000000210fd5 R11 = 0x0123456789abcdef RIP = 0x00007ffa09e504d4 RCX = 0x0000000000000088 RSP = 0x0000000000ccfef8 R10 = 0xdeadbeef5a5a5a5a
From this we can observe the following:
A. KiFastSystemCall doesn't clear bit 1 in R11 by itself. Rather, it's the job of NtSetContextThread.
Yeah, regardless of how this is concluded from a test it looks like cleaner way to do it to me.
B. KiFastSystemCall ignores registers clobbered by the SYSCALL instruction. It does try to pretend that the 1st argument is being passed to RCX, which leaves the actual 1st argument register (R11) unmodified in CONTEXT. (Also note that this implies the presence of a flag in the real kernel that records whether R10/R11 are set to valid values or not. Otherwise, the kernel would be unable to use SYSRET since R11 != RFLAGS, etc.)
C. Other entrances to kernel (e.g. a page fault) do record all registers. These values are preserved until the next time the thread switches to kernel mode.
Again, it would be better to clarify what is meant by page fault but from this context I suspect you mean access violation (or other exception)? In Wine this is handled separately, the syscall_dispatcher() is not involved neither for entering the Unix part (that is done from signal) nor from continuing to user mode (that is done by setting registers from the signal handler and returning right to user mode ntdll entry point). Do you mean your test suggest some modifications to this part? That might be interesting to know but I didn't have an intention to cover all of that at once.
On 12/1/21 14:34, Jinoh Kang wrote:
My speculation is that there's a flag that determines whether it's OK to clobber RCX/R11 on syscall exit. If it's enabled, KiFastSystemCall will use SYSRET instead of IRETQ. Issuing NtSetContextThread with CONTEXT_INTEGER on supposedly turns this flag off, disabling the use of SYSRET. From the observations so far, this flag more or less corresponds to CONTEXT_CONTROL in
CONTEXT_INTEGER*. My apologies.
syscall_frame::restore_flags, but more testing is required...