-- v4: ntdll: Inline __wine_unix_call_fast in the dispatcher. winecrt0: Inline PE __wine_unix_call(_fast) function calls. ntdll: Avoid double indirection to get syscall_frame pointer.
From: Rémi Bernon rbernon@codeweavers.com
--- dlls/ntdll/unix/signal_i386.c | 89 ++++++++++++++++++------------- dlls/ntdll/unix/signal_x86_64.c | 93 ++++++++++++++++++++------------- 2 files changed, 111 insertions(+), 71 deletions(-)
diff --git a/dlls/ntdll/unix/signal_i386.c b/dlls/ntdll/unix/signal_i386.c index cc8605c2a4f..91c4ec3d0eb 100644 --- a/dlls/ntdll/unix/signal_i386.c +++ b/dlls/ntdll/unix/signal_i386.c @@ -2570,8 +2570,10 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, "shrl $8,%ebx\n\t" "andl $0x30,%ebx\n\t" /* syscall table number */ "addl 0x38(%ecx),%ebx\n\t" /* frame->syscall_table */ + + "\n.L__wine_syscall_dispatcher_save_fpu:\n\t" "testl $3,(%ecx)\n\t" /* frame->syscall_flags & (SYSCALL_HAVE_XSAVE | SYSCALL_HAVE_XSAVEC) */ - "jz 2f\n\t" + "jz .L__wine_syscall_dispatcher_no_xsave\n\t" "movl $7,%eax\n\t" "xorl %edx,%edx\n\t" "movl %edx,0x240(%ecx)\n\t" @@ -2581,7 +2583,7 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, "movl %edx,0x250(%ecx)\n\t" "movl %edx,0x254(%ecx)\n\t" "testl $2,(%ecx)\n\t" /* frame->syscall_flags & SYSCALL_HAVE_XSAVEC */ - "jz 1f\n\t" + "jz .L__wine_syscall_dispatcher_no_xsavec\n\t" "movl %edx,0x258(%ecx)\n\t" "movl %edx,0x25c(%ecx)\n\t" "movl %edx,0x260(%ecx)\n\t" @@ -2593,20 +2595,25 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, "movl %edx,0x278(%ecx)\n\t" "movl %edx,0x27c(%ecx)\n\t" "xsavec 0x40(%ecx)\n\t" - "jmp 4f\n" - "1:\txsave 0x40(%ecx)\n\t" - "jmp 4f\n" - "2:\ttestl $4,(%ecx)\n\t" /* frame->syscall_flags & SYSCALL_HAVE_FXSAVE */ - "jz 3f\n\t" + "jmp .L__wine_syscall_dispatcher_fpu_saved\n\t" + "\n.L__wine_syscall_dispatcher_no_xsavec:\n\t" + "xsave 0x40(%ecx)\n\t" + "jmp .L__wine_syscall_dispatcher_fpu_saved\n\t" + "\n.L__wine_syscall_dispatcher_no_xsave:\n\t" + "testl $4,(%ecx)\n\t" /* frame->syscall_flags & SYSCALL_HAVE_FXSAVE */ + "jz .L__wine_syscall_dispatcher_no_fxsave\n\t" "fxsave 0x40(%ecx)\n\t" - "jmp 4f\n" - "3:\tfnsave 0x40(%ecx)\n\t" - "fwait\n" - "4:\tmovl %ecx,%esp\n\t" + "jmp .L__wine_syscall_dispatcher_fpu_saved\n\t" + "\n.L__wine_syscall_dispatcher_no_fxsave:\n\t" + "fnsave 0x40(%ecx)\n\t" + "fwait\n\t" + + "\n.L__wine_syscall_dispatcher_fpu_saved:\n\t" + "movl %ecx,%esp\n\t" "movl 0x1c(%esp),%edx\n\t" /* frame->eax */ "andl $0xfff,%edx\n\t" /* syscall number */ "cmpl 8(%ebx),%edx\n\t" /* table->ServiceLimit */ - "jae 6f\n\t" + "jae .L__wine_syscall_dispatcher_invalid_arg\n\t" "movl 12(%ebx),%eax\n\t" /* table->ArgumentTable */ "movzbl (%eax,%edx,1),%ecx\n\t" "movl (%ebx),%eax\n\t" /* table->ServiceTable */ @@ -2617,8 +2624,9 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, "cld\n\t" "rep; movsl\n\t" "call *(%eax,%edx,4)\n\t" - "leal -0x34(%ebp),%esp\n" - "5:\t" + "leal -0x34(%ebp),%esp\n\t" + + "\n.L__wine_syscall_dispatcher_restore:\n\t" __ASM_CFI_CFA_IS_AT1(esp, 0x0c) __ASM_CFI_REG_IS_AT1(esp, esp, 0x0c) __ASM_CFI_REG_IS_AT1(eip, esp, 0x08) @@ -2628,22 +2636,28 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, __ASM_CFI_REG_IS_AT1(ebp, esp, 0x34) "movl 0(%esp),%ecx\n\t" /* frame->syscall_flags + (frame->restore_flags << 16) */ "testl $0x68 << 16,%ecx\n\t" /* CONTEXT_FLOATING_POINT | CONTEXT_EXTENDED_REGISTERS | CONTEXT_XSAVE */ - "jz 3f\n\t" + "jz .L__wine_syscall_dispatcher_fpu_restored\n\t" + + "\n.L__wine_syscall_dispatcher_restore_fpu:\n\t" "testl $3,%ecx\n\t" /* SYSCALL_HAVE_XSAVE | SYSCALL_HAVE_XSAVEC */ - "jz 1f\n\t" + "jz .L__wine_syscall_dispatcher_no_xrstor\n\t" "movl %eax,%esi\n\t" "movl $7,%eax\n\t" "xorl %edx,%edx\n\t" "xrstor 0x40(%esp)\n\t" "movl %esi,%eax\n\t" - "jmp 3f\n" - "1:\ttestl $4,%ecx\n\t" /* SYSCALL_HAVE_FXSAVE */ - "jz 2f\n\t" + "jmp .L__wine_syscall_dispatcher_fpu_restored\n\t" + "\n.L__wine_syscall_dispatcher_no_xrstor:\n\t" + "testl $4,%ecx\n\t" /* SYSCALL_HAVE_FXSAVE */ + "jz .L__wine_syscall_dispatcher_no_fxrstor\n\t" "fxrstor 0x40(%esp)\n\t" - "jmp 3f\n" - "2:\tfrstor 0x40(%esp)\n\t" - "fwait\n" - "3:\tmovl 0x2c(%esp),%edi\n\t" + "jmp .L__wine_syscall_dispatcher_fpu_restored\n\t" + "\n.L__wine_syscall_dispatcher_no_fxrstor:\n\t" + "frstor 0x40(%esp)\n\t" + "fwait\n\t" + + "\n.L__wine_syscall_dispatcher_fpu_restored:\n\t" + "movl 0x2c(%esp),%edi\n\t" __ASM_CFI(".cfi_remember_state\n\t") __ASM_CFI(".cfi_same_value %edi\n\t") "movl 0x30(%esp),%esi\n\t" @@ -2651,7 +2665,7 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, "movl 0x34(%esp),%ebp\n\t" __ASM_CFI(".cfi_same_value %ebp\n\t") "testl $0x7 << 16,%ecx\n\t" /* CONTEXT_CONTROL | CONTEXT_SEGMENTS | CONTEXT_INTEGER */ - "jnz 1f\n\t" + "jnz .L__wine_syscall_dispatcher_restore_integer\n\t" "movl 0x20(%esp),%ebx\n\t" __ASM_CFI(".cfi_remember_state\n\t") __ASM_CFI(".cfi_same_value %ebx\n\t") @@ -2659,14 +2673,17 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, __ASM_CFI(".cfi_register %eip, %ecx\n\t") "movl 0x0c(%esp),%esp\n\t" /* frame->esp */ __ASM_CFI(".cfi_same_value %esp\n\t") - "jmpl *%ecx\n" - __ASM_CFI("\t.cfi_restore_state\n") - "1:\ttestl $0x2 << 16,%ecx\n\t" /* CONTEXT_INTEGER */ - "jz 1f\n\t" + "jmpl *%ecx\n\t" + __ASM_CFI("\t.cfi_restore_state\n\t") + + "\n.L__wine_syscall_dispatcher_restore_integer:\n\t" + "testl $0x2 << 16,%ecx\n\t" /* CONTEXT_INTEGER */ + "jz .L__wine_syscall_dispatcher_restore_control\n\t" "movl 0x1c(%esp),%eax\n\t" "movl 0x24(%esp),%ecx\n\t" - "movl 0x28(%esp),%edx\n" - "1:\tmovl 0x0c(%esp),%ebx\n\t" /* frame->esp */ + "movl 0x28(%esp),%edx\n\t" + "\n.L__wine_syscall_dispatcher_restore_control:\n\t" + "movl 0x0c(%esp),%ebx\n\t" /* frame->esp */ __ASM_CFI(".cfi_register %esp, %ebx\n\t") "movw 0x12(%esp),%ss\n\t" "xchgl %ebx,%esp\n\t" @@ -2690,10 +2707,12 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, __ASM_CFI(".cfi_same_value %ebx\n\t") "popl %ds\n\t" __ASM_CFI(".cfi_adjust_cfa_offset -4\n\t") - "iret\n" - __ASM_CFI("\t.cfi_restore_state\n") - "6:\tmovl $0xc000000d,%eax\n\t" /* STATUS_INVALID_PARAMETER */ - "jmp 5b\n\t" + "iret\n\t" + __ASM_CFI("\t.cfi_restore_state\n\t") + + "\n.L__wine_syscall_dispatcher_invalid_arg:\n\t" + "movl $0xc000000d,%eax\n\t" /* STATUS_INVALID_PARAMETER */ + "jmp .L__wine_syscall_dispatcher_restore\n\t" ".globl " __ASM_NAME("__wine_syscall_dispatcher_return") "\n" __ASM_NAME("__wine_syscall_dispatcher_return") ":\n\t" __ASM_CFI(".cfi_remember_state\n\t") @@ -2707,7 +2726,7 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, "movl 8(%esp),%eax\n\t" "movl 4(%esp),%esp\n\t" __ASM_CFI(".cfi_restore_state\n\t") - "jmp 5b" ) + "jmp .L__wine_syscall_dispatcher_restore\n\t" )
/*********************************************************************** diff --git a/dlls/ntdll/unix/signal_x86_64.c b/dlls/ntdll/unix/signal_x86_64.c index cc070dda5ae..e98c0299b80 100644 --- a/dlls/ntdll/unix/signal_x86_64.c +++ b/dlls/ntdll/unix/signal_x86_64.c @@ -2648,28 +2648,34 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, * depends on us returning to it. Adjust the return address accordingly. */ "subq $0xb,0x70(%rcx)\n\t" "movl 0xb0(%rcx),%r14d\n\t" /* frame->syscall_flags */ + + "\n.L__wine_syscall_dispatcher_save_fpu:\n\t" "testl $3,%r14d\n\t" /* SYSCALL_HAVE_XSAVE | SYSCALL_HAVE_XSAVEC */ - "jz 2f\n\t" + "jz .L__wine_syscall_dispatcher_no_xsave\n\t" "movl $7,%eax\n\t" "xorl %edx,%edx\n\t" "movq %rdx,0x2c0(%rcx)\n\t" "movq %rdx,0x2c8(%rcx)\n\t" "movq %rdx,0x2d0(%rcx)\n\t" "testl $2,%r14d\n\t" /* SYSCALL_HAVE_XSAVEC */ - "jz 1f\n\t" + "jz .L__wine_syscall_dispatcher_no_xsavec\n\t" "movq %rdx,0x2d8(%rcx)\n\t" "movq %rdx,0x2e0(%rcx)\n\t" "movq %rdx,0x2e8(%rcx)\n\t" "movq %rdx,0x2f0(%rcx)\n\t" "movq %rdx,0x2f8(%rcx)\n\t" "xsavec64 0xc0(%rcx)\n\t" - "jmp 3f\n" - "1:\txsave64 0xc0(%rcx)\n\t" - "jmp 3f\n" - "2:\tfxsave64 0xc0(%rcx)\n" + "jmp .L__wine_syscall_dispatcher_fpu_saved\n\t" + "\n.L__wine_syscall_dispatcher_no_xsavec:\n\t" + "xsave64 0xc0(%rcx)\n\t" + "jmp .L__wine_syscall_dispatcher_fpu_saved\n\t" + "\n.L__wine_syscall_dispatcher_no_xsave:\n\t" + "fxsave64 0xc0(%rcx)\n\t" + + "\n.L__wine_syscall_dispatcher_fpu_saved:\n\t" /* remember state when $rcx is pointing to "frame" */ __ASM_CFI(".cfi_remember_state\n\t") - "3:\tleaq 0x98(%rcx),%rbp\n\t" + "leaq 0x98(%rcx),%rbp\n\t" __ASM_CFI_CFA_IS_AT1(rbp, 0x70) __ASM_CFI_REG_IS_AT1(rsp, rbp, 0x70) __ASM_CFI_REG_IS_AT1(rip, rbp, 0x58) @@ -2683,17 +2689,18 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, __ASM_CFI_REG_IS_AT1(rbp, rbp, 0x00) #ifdef __linux__ "testl $12,%r14d\n\t" /* SYSCALL_HAVE_PTHREAD_TEB | SYSCALL_HAVE_WRFSGSBASE */ - "jz 2f\n\t" + "jz .L__wine_syscall_dispatcher_fsgs_swapped\n\t" "movq %gs:0x330,%rsi\n\t" /* amd64_thread_data()->pthread_teb */ "testl $8,%r14d\n\t" /* SYSCALL_HAVE_WRFSGSBASE */ - "jz 1f\n\t" + "jz .L__wine_syscall_dispatcher_no_wrfsbase\n\t" "wrfsbase %rsi\n\t" - "jmp 2f\n" - "1:\tmov $0x1002,%edi\n\t" /* ARCH_SET_FS */ + "jmp .L__wine_syscall_dispatcher_fsgs_swapped\n\t" + "\n.L__wine_syscall_dispatcher_no_wrfsbase:\n\t" + "mov $0x1002,%edi\n\t" /* ARCH_SET_FS */ "mov $158,%eax\n\t" /* SYS_arch_prctl */ "syscall\n\t" - "leaq -0x98(%rbp),%rcx\n" - "2:\n\t" + "leaq -0x98(%rbp),%rcx\n\t" + "\n.L__wine_syscall_dispatcher_fsgs_swapped:\n\t" #endif "leaq 0x28(%rsp),%rsi\n\t" /* first argument */ "movq %rcx,%rsp\n\t" @@ -2706,44 +2713,52 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, "leaq (%rcx,%rbx,2),%rbx\n\t" "andl $0xfff,%eax\n\t" /* syscall number */ "cmpq 16(%rbx),%rax\n\t" /* table->ServiceLimit */ - "jae 5f\n\t" + "jae .L__wine_syscall_dispatcher_invalid_arg\n\t" "movq 24(%rbx),%rcx\n\t" /* table->ArgumentTable */ "movzbl (%rcx,%rax),%ecx\n\t" "subq $0x20,%rcx\n\t" - "jbe 1f\n\t" + "jbe .L__wine_syscall_dispatcher_args_copied\n\t" "subq %rcx,%rsp\n\t" "shrq $3,%rcx\n\t" "andq $~15,%rsp\n\t" "movq %rsp,%rdi\n\t" "cld\n\t" - "rep; movsq\n" - "1:\tmovq %r10,%rcx\n\t" + "rep; movsq\n\t" + "\n.L__wine_syscall_dispatcher_args_copied:\n\t" + "movq %r10,%rcx\n\t" "subq $0x20,%rsp\n\t" "movq (%rbx),%r10\n\t" /* table->ServiceTable */ "callq *(%r10,%rax,8)\n\t" - "leaq -0x98(%rbp),%rcx\n" + "leaq -0x98(%rbp),%rcx\n\t" /* $rcx is now pointing to "frame" again */ __ASM_CFI(".cfi_restore_state\n\t") - "2:\tmovl 0x94(%rcx),%edx\n\t" /* frame->restore_flags */ + + "\n.L__wine_syscall_dispatcher_restore:\n\t" + "movl 0x94(%rcx),%edx\n\t" /* frame->restore_flags */ #ifdef __linux__ "testl $12,%r14d\n\t" /* SYSCALL_HAVE_PTHREAD_TEB | SYSCALL_HAVE_WRFSGSBASE */ - "jz 1f\n\t" - "movw 0x7e(%rcx),%fs\n" - "1:\n\t" + "jz .L__wine_syscall_dispatcher_fsgs_restored\n\t" + "movw 0x7e(%rcx),%fs\n\t" + "\n.L__wine_syscall_dispatcher_fsgs_restored:\n\t" #endif "testl $0x48,%edx\n\t" /* CONTEXT_FLOATING_POINT | CONTEXT_XSTATE */ - "jz 4f\n\t" + "jz .L__wine_syscall_dispatcher_fpu_restored\n\t" + + "\n.L__wine_syscall_dispatcher_restore_fpu:\n\t" "testl $3,%r14d\n\t" /* SYSCALL_HAVE_XSAVE | SYSCALL_HAVE_XSAVEC */ - "jz 3f\n\t" + "jz .L__wine_syscall_dispatcher_no_xrstor\n\t" "movq %rax,%r11\n\t" "movl $7,%eax\n\t" "xorl %edx,%edx\n\t" "xrstor64 0xc0(%rcx)\n\t" "movq %r11,%rax\n\t" "movl 0x94(%rcx),%edx\n\t" - "jmp 4f\n" - "3:\tfxrstor64 0xc0(%rcx)\n" - "4:\tmovq 0x98(%rcx),%rbp\n\t" + "jmp .L__wine_syscall_dispatcher_fpu_restored\n\t" + "\n.L__wine_syscall_dispatcher_no_xrstor:\n\t" + "fxrstor64 0xc0(%rcx)\n\t" + + "\n.L__wine_syscall_dispatcher_fpu_restored:\n\t" + "movq 0x98(%rcx),%rbp\n\t" __ASM_CFI(".cfi_same_value rbp\n\t") "movq 0x68(%rcx),%r15\n\t" __ASM_CFI(".cfi_same_value r15\n\t") @@ -2760,7 +2775,7 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, "movq 0x08(%rcx),%rbx\n\t" __ASM_CFI(".cfi_same_value rbx\n\t") "testl $0x3,%edx\n\t" /* CONTEXT_CONTROL | CONTEXT_INTEGER */ - "jnz 1f\n\t" + "jnz .L__wine_syscall_dispatcher_restore_control\n\t" __ASM_CFI(".cfi_remember_state\n\t") "movq 0x80(%rcx),%r11\n\t" /* frame->eflags */ "pushq %r11\n\t" @@ -2775,25 +2790,29 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, __ASM_CFI(".cfi_restore_state\n\t") /* remember state when $rcx is pointing to "frame" */ __ASM_CFI(".cfi_remember_state\n\t") - "1:\tleaq 0x70(%rcx),%rsp\n\t" + + "\n.L__wine_syscall_dispatcher_restore_control:\n\t" + "leaq 0x70(%rcx),%rsp\n\t" __ASM_CFI_CFA_IS_AT1(rsp, 0x18) __ASM_CFI_REG_IS_AT1(rsp, rsp, 0x18) __ASM_CFI_REG_IS_AT1(rip, rsp, 0x00) "testl $0x2,%edx\n\t" /* CONTEXT_INTEGER */ - "jnz 1f\n\t" + "jnz .L__wine_syscall_dispatcher_restore_integer\n\t" "movq 0x10(%rsp),%r11\n\t" /* frame->eflags */ "movq (%rsp),%rcx\n\t" /* frame->rip */ __ASM_CFI(".cfi_register rip, rcx\n\t") - "iretq\n" + "iretq\n\t" __ASM_CFI_REG_IS_AT1(rip, rsp, 0x00) - "1:\tmovq 0x00(%rcx),%rax\n\t" + + "\n.L__wine_syscall_dispatcher_restore_integer:\n\t" + "movq 0x00(%rcx),%rax\n\t" "movq 0x18(%rcx),%rdx\n\t" "movq 0x30(%rcx),%r8\n\t" "movq 0x38(%rcx),%r9\n\t" "movq 0x40(%rcx),%r10\n\t" "movq 0x48(%rcx),%r11\n\t" - "movq 0x10(%rcx),%rcx\n" - "iretq\n" + "movq 0x10(%rcx),%rcx\n\t" + "iretq\n\t" __ASM_CFI_CFA_IS_AT1(rbp, 0x70) __ASM_CFI_REG_IS_AT1(rsp, rbp, 0x70) __ASM_CFI_REG_IS_AT1(rip, rbp, 0x58) @@ -2805,7 +2824,9 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, __ASM_CFI_REG_IS_AT1(r14, rbp, 0x48) __ASM_CFI_REG_IS_AT1(r15, rbp, 0x50) __ASM_CFI_REG_IS_AT1(rbp, rbp, 0x00) - "5:\tmovl $0xc000000d,%edx\n\t" /* STATUS_INVALID_PARAMETER */ + + "\n.L__wine_syscall_dispatcher_invalid_arg:\n\t" + "movl $0xc000000d,%edx\n\t" /* STATUS_INVALID_PARAMETER */ "movq %rsp,%rcx\n\t" /* $rcx is now pointing to "frame" again */ __ASM_CFI(".cfi_restore_state\n\t") @@ -2813,7 +2834,7 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, __ASM_NAME("__wine_syscall_dispatcher_return") ":\n\t" "movl 0xb0(%rcx),%r14d\n\t" /* frame->syscall_flags */ "movq %rdx,%rax\n\t" - "jmp 2b" ) + "jmp .L__wine_syscall_dispatcher_restore\n\t" )
/***********************************************************************
From: Rémi Bernon rbernon@codeweavers.com
Stored in the syscall table index highest bit. --- tools/winebuild/build.h | 1 + tools/winebuild/import.c | 1 + tools/winebuild/parser.c | 6 ++++++ tools/winebuild/winebuild.man.in | 4 ++++ 4 files changed, 12 insertions(+)
diff --git a/tools/winebuild/build.h b/tools/winebuild/build.h index 576304cd2b8..216f6859fe7 100644 --- a/tools/winebuild/build.h +++ b/tools/winebuild/build.h @@ -189,6 +189,7 @@ static inline int is_pe(void) #define FLAG_FASTCALL 0x0100 /* function uses fastcall calling convention */ #define FLAG_SYSCALL 0x0200 /* function is a system call */ #define FLAG_IMPORT 0x0400 /* export is imported from another module */ +#define FLAG_NOFPU 0x0800 /* function skips default syscall FPU save / restore */
#define FLAG_FORWARD 0x1000 /* function is a forwarded name */ #define FLAG_EXT_LINK 0x2000 /* function links to an external symbol */ diff --git a/tools/winebuild/import.c b/tools/winebuild/import.c index 9f87adcf593..18ca6eaef01 100644 --- a/tools/winebuild/import.c +++ b/tools/winebuild/import.c @@ -1385,6 +1385,7 @@ void output_syscalls( DLLSPEC *spec ) ORDDEF *odp = syscalls[i]; const char *name = get_link_name(odp); unsigned int id = (spec->syscall_table << 12) + i; + if (odp->flags & FLAG_NOFPU) id |= (1u << 31);
output( "\t.align %d\n", get_alignment(16) ); output( "\t%s\n", func_declaration(name) ); diff --git a/tools/winebuild/parser.c b/tools/winebuild/parser.c index 11b422bd435..0a1fbe52e62 100644 --- a/tools/winebuild/parser.c +++ b/tools/winebuild/parser.c @@ -71,6 +71,7 @@ static const char * const FlagNames[] = "fastcall", /* FLAG_FASTCALL */ "syscall", /* FLAG_SYSCALL */ "import", /* FLAG_IMPORT */ + "nofpu", /* FLAG_NOFPU */ NULL };
@@ -328,6 +329,11 @@ static int parse_spec_arguments( ORDDEF *odp, DLLSPEC *spec, int optional ) return 0; } } + else if (odp->flags & FLAG_NOFPU) + { + error( "The nofpu flag can only be used on a syscall function\n" ); + return 0; + } return 1; }
diff --git a/tools/winebuild/winebuild.man.in b/tools/winebuild/winebuild.man.in index 23ef3495744..ce016b542fb 100644 --- a/tools/winebuild/winebuild.man.in +++ b/tools/winebuild/winebuild.man.in @@ -324,6 +324,10 @@ only). The entry point will be exported by ordinal instead of by name. The name is still available for importing. .TP +.B -nofpu +The entry point is a NT syscall which does not save or restore the +FPU state and assumes it will not be modified by the call. +.TP .B -ret16 The function returns a 16-bit value (Win16 only). .TP
From: Rémi Bernon rbernon@codeweavers.com
--- dlls/ntdll/ntdll.spec | 1 + dlls/ntdll/unix/loader.c | 1 + dlls/winecrt0/unix_lib.c | 12 ++++++++++++ include/wine/unixlib.h | 1 + 4 files changed, 15 insertions(+)
diff --git a/dlls/ntdll/ntdll.spec b/dlls/ntdll/ntdll.spec index 1d4218e7b52..750c214ec2a 100644 --- a/dlls/ntdll/ntdll.spec +++ b/dlls/ntdll/ntdll.spec @@ -1690,6 +1690,7 @@ @ cdecl -syscall wine_server_handle_to_fd(long long ptr ptr)
# Unix interface +@ stdcall -syscall -nofpu __wine_unix_call_fast(int64 long ptr) @ stdcall -syscall __wine_unix_call(int64 long ptr) @ stdcall -syscall __wine_unix_spawnvp(long ptr) @ stdcall __wine_ctrl_routine(ptr) diff --git a/dlls/ntdll/unix/loader.c b/dlls/ntdll/unix/loader.c index 8a6200c7454..8d9fc8f34b9 100644 --- a/dlls/ntdll/unix/loader.c +++ b/dlls/ntdll/unix/loader.c @@ -356,6 +356,7 @@ static void * const syscalls[] = NtYieldExecution, __wine_dbg_write, __wine_unix_call, + __wine_unix_call /* __wine_unix_call_fast */, __wine_unix_spawnvp, wine_nt_to_unix_file_name, wine_server_call, diff --git a/dlls/winecrt0/unix_lib.c b/dlls/winecrt0/unix_lib.c index c86897b9905..816be88edcc 100644 --- a/dlls/winecrt0/unix_lib.c +++ b/dlls/winecrt0/unix_lib.c @@ -30,6 +30,7 @@ #include "wine/unixlib.h"
static NTSTATUS (WINAPI *p__wine_unix_call)( unixlib_handle_t, unsigned int, void * ); +static NTSTATUS (WINAPI *p__wine_unix_call_fast)( unixlib_handle_t, unsigned int, void * );
static void load_func( void **func, const char *name, void *def ) { @@ -47,10 +48,21 @@ static NTSTATUS __cdecl fallback__wine_unix_call( unixlib_handle_t handle, unsig return STATUS_DLL_NOT_FOUND; }
+static NTSTATUS __cdecl fallback__wine_unix_call_fast( unixlib_handle_t handle, unsigned int code, void *args ) +{ + return STATUS_DLL_NOT_FOUND; +} + NTSTATUS WINAPI __wine_unix_call( unixlib_handle_t handle, unsigned int code, void *args ) { LOAD_FUNC( __wine_unix_call ); return p__wine_unix_call( handle, code, args ); }
+NTSTATUS WINAPI __wine_unix_call_fast( unixlib_handle_t handle, unsigned int code, void *args ) +{ + LOAD_FUNC( __wine_unix_call_fast ); + return p__wine_unix_call_fast( handle, code, args ); +} + #endif /* __WINE_PE_BUILD */ diff --git a/include/wine/unixlib.h b/include/wine/unixlib.h index ef60b32184c..581790516a5 100644 --- a/include/wine/unixlib.h +++ b/include/wine/unixlib.h @@ -25,6 +25,7 @@ typedef NTSTATUS (*unixlib_entry_t)( void *args ); typedef UINT64 unixlib_handle_t;
extern NTSTATUS WINAPI __wine_unix_call( unixlib_handle_t handle, unsigned int code, void *args ); +extern NTSTATUS WINAPI __wine_unix_call_fast( unixlib_handle_t handle, unsigned int code, void *args );
#ifdef WINE_UNIX_LIB
From: Rémi Bernon rbernon@codeweavers.com
--- dlls/opengl32/make_opengl | 3 --- dlls/opengl32/private.h | 5 +++++ dlls/opengl32/unixlib.h | 3 --- 3 files changed, 5 insertions(+), 6 deletions(-)
diff --git a/dlls/opengl32/make_opengl b/dlls/opengl32/make_opengl index ad9def8dcec..dbc758167a0 100755 --- a/dlls/opengl32/make_opengl +++ b/dlls/opengl32/make_opengl @@ -847,9 +847,6 @@ print OUT " GLsizei length;\n"; print OUT " const GLchar *message;\n"; print OUT "};\n\n";
-print OUT "extern unixlib_handle_t unixlib_handle DECLSPEC_HIDDEN;\n"; -print OUT "#define UNIX_CALL( func, params ) __wine_unix_call( unixlib_handle, unix_ ## func, params )\n\n"; - print OUT "#endif /* __WINE_OPENGL32_UNIXLIB_H */\n"; close OUT;
diff --git a/dlls/opengl32/private.h b/dlls/opengl32/private.h index 06a97b43987..3b06266950b 100644 --- a/dlls/opengl32/private.h +++ b/dlls/opengl32/private.h @@ -28,8 +28,13 @@ #include "winternl.h" #include "wingdi.h"
+#include "wine/unixlib.h" + extern const void *extension_procs[] DECLSPEC_HIDDEN;
extern int WINAPI wglDescribePixelFormat( HDC hdc, int ipfd, UINT cjpfd, PIXELFORMATDESCRIPTOR *ppfd );
+extern unixlib_handle_t unixlib_handle DECLSPEC_HIDDEN; +#define UNIX_CALL( func, params ) __wine_unix_call_fast( unixlib_handle, unix_ ## func, params ) + #endif /* __WINE_OPENGL32_PRIVATE_H */ diff --git a/dlls/opengl32/unixlib.h b/dlls/opengl32/unixlib.h index b76858a2e13..2327a3eac7b 100644 --- a/dlls/opengl32/unixlib.h +++ b/dlls/opengl32/unixlib.h @@ -25350,7 +25350,4 @@ struct wine_gl_debug_message_params const GLchar *message; };
-extern unixlib_handle_t unixlib_handle DECLSPEC_HIDDEN; -#define UNIX_CALL( func, params ) __wine_unix_call( unixlib_handle, unix_ ## func, params ) - #endif /* __WINE_OPENGL32_UNIXLIB_H */
From: Rémi Bernon rbernon@codeweavers.com
--- dlls/ntdll/ntdll.spec | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/dlls/ntdll/ntdll.spec b/dlls/ntdll/ntdll.spec index 750c214ec2a..8e3e73a4432 100644 --- a/dlls/ntdll/ntdll.spec +++ b/dlls/ntdll/ntdll.spec @@ -317,7 +317,7 @@ @ stdcall -syscall NtQueryMutant(long long ptr long ptr) @ stdcall -syscall NtQueryObject(long long ptr long ptr) # @ stub NtQueryOpenSubKeys -@ stdcall -syscall NtQueryPerformanceCounter(ptr ptr) +@ stdcall -syscall -nofpu NtQueryPerformanceCounter(ptr ptr) # @ stub NtQueryPortInformationProcess # @ stub NtQueryQuotaInformationFile @ stdcall -syscall NtQuerySection(long long ptr long ptr) @@ -449,7 +449,7 @@ @ stdcall -syscall NtWriteFileGather(long long ptr ptr ptr ptr long ptr ptr) # @ stub NtWriteRequestData @ stdcall -syscall NtWriteVirtualMemory(long ptr ptr long ptr) -@ stdcall -syscall NtYieldExecution() +@ stdcall -syscall -nofpu NtYieldExecution() @ stub PfxFindPrefix @ stub PfxInitialize @ stub PfxInsertPrefix
From: Rémi Bernon rbernon@codeweavers.com
Using the signal context for the volatile FPU state, assuming that it hasn't been modified by the unix call. --- dlls/ntdll/unix/signal_i386.c | 40 +++++++++++++++++++++++ dlls/ntdll/unix/signal_x86_64.c | 56 +++++++++++++++++++++++++++++++++ 2 files changed, 96 insertions(+)
diff --git a/dlls/ntdll/unix/signal_i386.c b/dlls/ntdll/unix/signal_i386.c index 91c4ec3d0eb..36a8c630f11 100644 --- a/dlls/ntdll/unix/signal_i386.c +++ b/dlls/ntdll/unix/signal_i386.c @@ -493,6 +493,7 @@ C_ASSERT( offsetof( TEB, GdiTebBatch ) + offsetof( struct x86_thread_data, sysca #define SYSCALL_HAVE_XSAVE 1 #define SYSCALL_HAVE_XSAVEC 2 #define SYSCALL_HAVE_FXSAVE 4 +#define SYSCALL_NEED_XSTATE 8
static unsigned int syscall_flags;
@@ -741,6 +742,38 @@ static inline void restore_fpu( const CONTEXT *context ) }
+/*********************************************************************** + * update_frame_context + * + * Update current syscall frame context from a sigcontext. + */ +static void update_frame_context( const ucontext_t *sigcontext ) +{ + struct syscall_frame *frame = x86_thread_data()->syscall_frame; + + if (frame->syscall_flags & SYSCALL_NEED_XSTATE) + { + FLOATING_SAVE_AREA *fpu = FPU_sig(sigcontext); + XSAVE_FORMAT *fpux = FPUX_sig(sigcontext); + + if (fpu) fpu_to_fpux( &frame->u.xsave, fpu ); + if (fpux) + { + XSTATE *xs; + if (!fpu) frame->u.xsave = *fpux; + if ((cpu_info.ProcessorFeatureBits & CPU_FEATURE_AVX) && (xs = XState_sig(fpux))) + frame->xstate = *xs; + } + if (!fpu && !fpux) + { + CONTEXT context; + save_fpu( &context ); + fpu_to_fpux( &frame->u.xsave, &context.FloatSave ); + } + } +} + + /*********************************************************************** * save_context * @@ -1595,6 +1628,7 @@ __ASM_GLOBAL_FUNC( call_user_mode_callback, "movl %ebp,0x380(%esp)\n\t" "movl 0x1f8(%edx),%ecx\n\t" /* x86_thread_data()->syscall_frame */ "movl (%ecx),%eax\n\t" /* frame->syscall_flags */ + "andl $~8,%eax\n\t" /* syscall_flags &= ~SYSCALL_NEED_XSTATE */ "movl %eax,(%esp)\n\t" "movl 0x38(%ecx),%eax\n\t" /* frame->syscall_table */ "movl %eax,0x38(%esp)\n\t" @@ -2077,6 +2111,7 @@ static void usr1_handler( int signal, siginfo_t *siginfo, void *sigcontext ) DECLSPEC_ALIGN(64) XSTATE xs; xcontext.c.ContextFlags = CONTEXT_FULL; context_init_xstate( &xcontext.c, &xs ); + update_frame_context( sigcontext );
NtGetContextThread( GetCurrentThread(), &xcontext.c ); wait_suspend( &xcontext.c ); @@ -2570,6 +2605,10 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, "shrl $8,%ebx\n\t" "andl $0x30,%ebx\n\t" /* syscall table number */ "addl 0x38(%ecx),%ebx\n\t" /* frame->syscall_table */ + "testl $0x80000000,%eax\n\t" + "jz .L__wine_syscall_dispatcher_save_fpu\n\t" + "orl $0x10,(%ecx)\n\t" /* frame->syscall_flags |= SYSCALL_NEED_XSTATE */ + "jmp .L__wine_syscall_dispatcher_fpu_saved\n\t"
"\n.L__wine_syscall_dispatcher_save_fpu:\n\t" "testl $3,(%ecx)\n\t" /* frame->syscall_flags & (SYSCALL_HAVE_XSAVE | SYSCALL_HAVE_XSAVEC) */ @@ -2657,6 +2696,7 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, "fwait\n\t"
"\n.L__wine_syscall_dispatcher_fpu_restored:\n\t" + "andl $~8,0(%esp)\n\t" /* frame->syscall_flags &= ~SYSCALL_NEED_XSTATE */ "movl 0x2c(%esp),%edi\n\t" __ASM_CFI(".cfi_remember_state\n\t") __ASM_CFI(".cfi_same_value %edi\n\t") diff --git a/dlls/ntdll/unix/signal_x86_64.c b/dlls/ntdll/unix/signal_x86_64.c index e98c0299b80..b7050bcd0ff 100644 --- a/dlls/ntdll/unix/signal_x86_64.c +++ b/dlls/ntdll/unix/signal_x86_64.c @@ -364,6 +364,7 @@ C_ASSERT( sizeof(struct stack_layout) == 0x590 ); /* Should match the size in ca #define SYSCALL_HAVE_XSAVEC 2 #define SYSCALL_HAVE_PTHREAD_TEB 4 #define SYSCALL_HAVE_WRFSGSBASE 8 +#define SYSCALL_NEED_XSTATE 0x10
static unsigned int syscall_flags;
@@ -823,6 +824,30 @@ static inline void leave_handler( const ucontext_t *sigcontext ) }
+/*********************************************************************** + * update_frame_context + * + * Update current syscall frame context from a sigcontext. + */ +static void update_frame_context( const ucontext_t *sigcontext ) +{ + struct syscall_frame *frame = amd64_thread_data()->syscall_frame; + + if ((frame->syscall_flags & SYSCALL_NEED_XSTATE) && FPU_sig(sigcontext)) + { + M128A tmp[10]; + XSTATE *xs; + + memcpy( &tmp, frame->xsave.XmmRegisters + 6, sizeof(tmp) ); + frame->xsave = *FPU_sig(sigcontext); + memcpy( frame->xsave.XmmRegisters + 6, &tmp, sizeof(tmp) ); + + if ((cpu_info.ProcessorFeatureBits & CPU_FEATURE_AVX) && (xs = XState_sig(FPU_sig(sigcontext)))) + frame->xstate = *xs; + } +} + + /*********************************************************************** * save_context * @@ -1609,6 +1634,7 @@ __ASM_GLOBAL_FUNC( call_user_mode_callback, "movq 0xa8(%r10),%rax\n\t" /* prev_frame->syscall_table */ "movq %rax,0xa8(%rsp)\n\t" /* frame->syscall_table */ "movl 0xb0(%r10),%r14d\n\t" /* prev_frame->syscall_flags */ + "andl $~0x10,%r14d\n\t" /* syscall_flags &= ~SYSCALL_NEED_XSTATE */ "movl %r14d,0xb0(%rsp)\n\t" /* frame->syscall_flags */ "movq %r10,0xa0(%rsp)\n\t" /* frame->prev_frame */ "movq %rsp,0x328(%r11)\n\t" /* amd64_thread_data()->syscall_frame */ @@ -2152,6 +2178,7 @@ static void usr1_handler( int signal, siginfo_t *siginfo, void *ucontext ) DECLSPEC_ALIGN(64) XSTATE xs; context.c.ContextFlags = CONTEXT_FULL; context_init_xstate( &context.c, &xs ); + update_frame_context( ucontext );
NtGetContextThread( GetCurrentThread(), &context.c ); wait_suspend( &context.c ); @@ -2648,6 +2675,21 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, * depends on us returning to it. Adjust the return address accordingly. */ "subq $0xb,0x70(%rcx)\n\t" "movl 0xb0(%rcx),%r14d\n\t" /* frame->syscall_flags */ + "testl $0x80000000,%eax\n\t" + "jz .L__wine_syscall_dispatcher_save_fpu\n\t" + "orq $0x10,%r14\n\t" + "movq %r14,0xb0(%rcx)\n\t" /* frame->syscall_flags |= SYSCALL_NEED_XSTATE */ + "movdqa %xmm6,0x1c0(%rcx)\n\t" + "movdqa %xmm7,0x1d0(%rcx)\n\t" + "movdqa %xmm8,0x1e0(%rcx)\n\t" + "movdqa %xmm9,0x1f0(%rcx)\n\t" + "movdqa %xmm10,0x200(%rcx)\n\t" + "movdqa %xmm11,0x210(%rcx)\n\t" + "movdqa %xmm12,0x220(%rcx)\n\t" + "movdqa %xmm13,0x230(%rcx)\n\t" + "movdqa %xmm14,0x240(%rcx)\n\t" + "movdqa %xmm15,0x250(%rcx)\n\t" + "jmp .L__wine_syscall_dispatcher_fpu_saved\n\t"
"\n.L__wine_syscall_dispatcher_save_fpu:\n\t" "testl $3,%r14d\n\t" /* SYSCALL_HAVE_XSAVE | SYSCALL_HAVE_XSAVEC */ @@ -2742,7 +2784,20 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, "\n.L__wine_syscall_dispatcher_fsgs_restored:\n\t" #endif "testl $0x48,%edx\n\t" /* CONTEXT_FLOATING_POINT | CONTEXT_XSTATE */ + "jnz .L__wine_syscall_dispatcher_restore_fpu\n\t" + "testl $0x10,%r14d\n\t" /* SYSCALL_NEED_XSTATE */ "jz .L__wine_syscall_dispatcher_fpu_restored\n\t" + "movdqa 0x1c0(%rcx),%xmm6\n\t" + "movdqa 0x1d0(%rcx),%xmm7\n\t" + "movdqa 0x1e0(%rcx),%xmm8\n\t" + "movdqa 0x1f0(%rcx),%xmm9\n\t" + "movdqa 0x200(%rcx),%xmm10\n\t" + "movdqa 0x210(%rcx),%xmm11\n\t" + "movdqa 0x220(%rcx),%xmm12\n\t" + "movdqa 0x230(%rcx),%xmm13\n\t" + "movdqa 0x240(%rcx),%xmm14\n\t" + "movdqa 0x250(%rcx),%xmm15\n\t" + "jmp .L__wine_syscall_dispatcher_fpu_restored\n\t"
"\n.L__wine_syscall_dispatcher_restore_fpu:\n\t" "testl $3,%r14d\n\t" /* SYSCALL_HAVE_XSAVE | SYSCALL_HAVE_XSAVEC */ @@ -2758,6 +2813,7 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, "fxrstor64 0xc0(%rcx)\n\t"
"\n.L__wine_syscall_dispatcher_fpu_restored:\n\t" + "andl $~0x10,0xb0(%rcx)\n\t" /* frame->syscall_flags &= ~SYSCALL_NEED_XSTATE */ "movq 0x98(%rcx),%rbp\n\t" __ASM_CFI(".cfi_same_value rbp\n\t") "movq 0x68(%rcx),%r15\n\t"
From: Rémi Bernon rbernon@codeweavers.com
--- dlls/ntdll/unix/signal_x86_64.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/dlls/ntdll/unix/signal_x86_64.c b/dlls/ntdll/unix/signal_x86_64.c index b7050bcd0ff..cb12793466e 100644 --- a/dlls/ntdll/unix/signal_x86_64.c +++ b/dlls/ntdll/unix/signal_x86_64.c @@ -2632,8 +2632,7 @@ __ASM_GLOBAL_FUNC( signal_exit_thread, * __wine_syscall_dispatcher */ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, - "movq %gs:0x30,%rcx\n\t" - "movq 0x328(%rcx),%rcx\n\t" /* amd64_thread_data()->syscall_frame */ + "movq %gs:0x328,%rcx\n\t" /* amd64_thread_data()->syscall_frame */ "popq 0x70(%rcx)\n\t" /* frame->rip */ __ASM_CFI(".cfi_adjust_cfa_offset -8\n\t") __ASM_CFI_REG_IS_AT2(rip, rcx, 0xf0,0x00)
From: Rémi Bernon rbernon@codeweavers.com
This improves performance of unix calls a little by removing one level of indirection and a branch. --- dlls/win32u/Makefile.in | 2 +- dlls/winecrt0/unix_lib.c | 45 ++++++++++++++++++++-------------------- include/wine/unixlib.h | 5 +++++ 3 files changed, 29 insertions(+), 23 deletions(-)
diff --git a/dlls/win32u/Makefile.in b/dlls/win32u/Makefile.in index 692b2d5c9f2..ba6915d91d8 100644 --- a/dlls/win32u/Makefile.in +++ b/dlls/win32u/Makefile.in @@ -2,7 +2,7 @@ EXTRADEFS = -DWINE_NO_LONG_TYPES MODULE = win32u.dll UNIXLIB = win32u.so IMPORTLIB = win32u -IMPORTS = ntdll winecrt0 +IMPORTS = winecrt0 ntdll UNIX_CFLAGS = $(FREETYPE_CFLAGS) $(FONTCONFIG_CFLAGS) UNIX_LIBS = $(CARBON_LIBS) $(APPKIT_LIBS) $(PTHREAD_LIBS) -lm
diff --git a/dlls/winecrt0/unix_lib.c b/dlls/winecrt0/unix_lib.c index 816be88edcc..9f7518ee314 100644 --- a/dlls/winecrt0/unix_lib.c +++ b/dlls/winecrt0/unix_lib.c @@ -29,40 +29,41 @@ #include "winternl.h" #include "wine/unixlib.h"
-static NTSTATUS (WINAPI *p__wine_unix_call)( unixlib_handle_t, unsigned int, void * ); -static NTSTATUS (WINAPI *p__wine_unix_call_fast)( unixlib_handle_t, unsigned int, void * ); - -static void load_func( void **func, const char *name, void *def ) +static void *load_func( const char *name ) { - if (!*func) - { - HMODULE module = GetModuleHandleW( L"ntdll.dll" ); - void *proc = GetProcAddress( module, name ); - InterlockedExchangePointer( func, proc ? proc : def ); - } -} -#define LOAD_FUNC(name) load_func( (void **)&p ## name, #name, fallback ## name ) + UNICODE_STRING ntdll_str; + ANSI_STRING name_str; + HMODULE module; + void *proc;
-static NTSTATUS __cdecl fallback__wine_unix_call( unixlib_handle_t handle, unsigned int code, void *args ) -{ - return STATUS_DLL_NOT_FOUND; + RtlInitUnicodeString( &ntdll_str, L"ntdll.dll" ); + RtlInitAnsiString( &name_str, name ); + + if (LdrGetDllHandleEx( LDR_GET_DLL_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT, NULL, NULL, &ntdll_str, &module )) return NULL; + if (LdrGetProcedureAddress( module, &name_str, 0, &proc )) return NULL; + return proc; }
-static NTSTATUS __cdecl fallback__wine_unix_call_fast( unixlib_handle_t handle, unsigned int code, void *args ) +static NTSTATUS WINAPI fallback___wine_unix_call( unixlib_handle_t handle, unsigned int code, void *args ) { return STATUS_DLL_NOT_FOUND; }
-NTSTATUS WINAPI __wine_unix_call( unixlib_handle_t handle, unsigned int code, void *args ) +static NTSTATUS WINAPI delayload___wine_unix_call( unixlib_handle_t handle, unsigned int code, void *args ) { - LOAD_FUNC( __wine_unix_call ); - return p__wine_unix_call( handle, code, args ); + void *proc = load_func( "__wine_unix_call" ); + InterlockedExchangePointer( (void *)&__wine_unix_call, proc ? proc : (void *)fallback___wine_unix_call ); + return __wine_unix_call( handle, code, args ); }
-NTSTATUS WINAPI __wine_unix_call_fast( unixlib_handle_t handle, unsigned int code, void *args ) +static NTSTATUS WINAPI delayload___wine_unix_call_fast( unixlib_handle_t handle, unsigned int code, void *args ) { - LOAD_FUNC( __wine_unix_call_fast ); - return p__wine_unix_call_fast( handle, code, args ); + void *proc = load_func( "__wine_unix_call_fast" ); + InterlockedExchangePointer( (void *)&__wine_unix_call_fast, proc ? proc : (void *)fallback___wine_unix_call ); + return __wine_unix_call_fast( handle, code, args ); }
+NTSTATUS (WINAPI *__wine_unix_call)( unixlib_handle_t, unsigned int, void * ) = delayload___wine_unix_call; +NTSTATUS (WINAPI *__wine_unix_call_fast)( unixlib_handle_t, unsigned int, void * ) = delayload___wine_unix_call_fast; + #endif /* __WINE_PE_BUILD */ diff --git a/include/wine/unixlib.h b/include/wine/unixlib.h index 581790516a5..86372ad25d3 100644 --- a/include/wine/unixlib.h +++ b/include/wine/unixlib.h @@ -24,8 +24,13 @@ typedef NTSTATUS (*unixlib_entry_t)( void *args ); typedef UINT64 unixlib_handle_t;
+#if defined(WINE_UNIX_LIB) || defined(_NTSYSTEM_) extern NTSTATUS WINAPI __wine_unix_call( unixlib_handle_t handle, unsigned int code, void *args ); extern NTSTATUS WINAPI __wine_unix_call_fast( unixlib_handle_t handle, unsigned int code, void *args ); +#else +extern NTSTATUS (WINAPI *__wine_unix_call)( unixlib_handle_t handle, unsigned int code, void *args ); +extern NTSTATUS (WINAPI *__wine_unix_call_fast)( unixlib_handle_t handle, unsigned int code, void *args ); +#endif
#ifdef WINE_UNIX_LIB
From: Rémi Bernon rbernon@codeweavers.com
--- dlls/ntdll/unix/signal_i386.c | 14 ++++++++++++++ dlls/ntdll/unix/signal_x86_64.c | 14 ++++++++++++++ tools/winebuild/import.c | 1 + 3 files changed, 29 insertions(+)
diff --git a/dlls/ntdll/unix/signal_i386.c b/dlls/ntdll/unix/signal_i386.c index 36a8c630f11..b63999f23a0 100644 --- a/dlls/ntdll/unix/signal_i386.c +++ b/dlls/ntdll/unix/signal_i386.c @@ -2608,6 +2608,8 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, "testl $0x80000000,%eax\n\t" "jz .L__wine_syscall_dispatcher_save_fpu\n\t" "orl $0x10,(%ecx)\n\t" /* frame->syscall_flags |= SYSCALL_NEED_XSTATE */ + "testl $0x40000000,%eax\n\t" /* __wine_unix_call_fast */ + "jnz .L__wine_syscall_dispatcher_unix_call_fast\n\t" "jmp .L__wine_syscall_dispatcher_fpu_saved\n\t"
"\n.L__wine_syscall_dispatcher_save_fpu:\n\t" @@ -2664,6 +2666,18 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, "rep; movsl\n\t" "call *(%eax,%edx,4)\n\t" "leal -0x34(%ebp),%esp\n\t" + "jmp .L__wine_syscall_dispatcher_restore\n\t" + + "\n.L__wine_syscall_dispatcher_unix_call_fast:\n\t" + "movl %ecx,%esp\n\t" + "subl $4,%esp\n\t" + "andl $~15,%esp\n\t" + "movl 0xc(%esi),%eax\n\t" + "movl %eax,(%esp)\n\t" + "movl 0x8(%esi),%eax\n\t" + "movl (%esi),%edx\n\t" + "call *(%edx,%eax,4)\n\t" + "leal -0x34(%ebp),%esp\n"
"\n.L__wine_syscall_dispatcher_restore:\n\t" __ASM_CFI_CFA_IS_AT1(esp, 0x0c) diff --git a/dlls/ntdll/unix/signal_x86_64.c b/dlls/ntdll/unix/signal_x86_64.c index cb12793466e..b4980f58f35 100644 --- a/dlls/ntdll/unix/signal_x86_64.c +++ b/dlls/ntdll/unix/signal_x86_64.c @@ -2714,6 +2714,8 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, "fxsave64 0xc0(%rcx)\n\t"
"\n.L__wine_syscall_dispatcher_fpu_saved:\n\t" + /* remember state when $rcx is pointing to "frame" */ + __ASM_CFI(".cfi_remember_state\n\t") /* remember state when $rcx is pointing to "frame" */ __ASM_CFI(".cfi_remember_state\n\t") "leaq 0x98(%rcx),%rbp\n\t" @@ -2747,6 +2749,8 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, "movq %rcx,%rsp\n\t" "movq 0x00(%rcx),%rax\n\t" "movq 0x18(%rcx),%rdx\n\t" + "testl $0x40000000,%eax\n\t" /* __wine_unix_call_fast */ + "jnz .L__wine_syscall_dispatcher_unix_call_fast\n\t" "movl %eax,%ebx\n\t" "shrl $8,%ebx\n\t" "andl $0x30,%ebx\n\t" /* syscall table number */ @@ -2773,6 +2777,16 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, "leaq -0x98(%rbp),%rcx\n\t" /* $rcx is now pointing to "frame" again */ __ASM_CFI(".cfi_restore_state\n\t") + "jmp .L__wine_syscall_dispatcher_restore\n" + + "\n.L__wine_syscall_dispatcher_unix_call_fast:\n\t" + "movq %r8,%rdi\n\t" + "subq $0x20,%rsp\n\t" + "andq $~15,%rsp\n\t" + "callq *(%r10,%rdx,8)\n\t" + "leaq -0x98(%rbp),%rcx\n\t" + /* $rcx is now pointing to "frame" again */ + __ASM_CFI(".cfi_restore_state\n\t")
"\n.L__wine_syscall_dispatcher_restore:\n\t" "movl 0x94(%rcx),%edx\n\t" /* frame->restore_flags */ diff --git a/tools/winebuild/import.c b/tools/winebuild/import.c index 18ca6eaef01..b504834063b 100644 --- a/tools/winebuild/import.c +++ b/tools/winebuild/import.c @@ -1386,6 +1386,7 @@ void output_syscalls( DLLSPEC *spec ) const char *name = get_link_name(odp); unsigned int id = (spec->syscall_table << 12) + i; if (odp->flags & FLAG_NOFPU) id |= (1u << 31); + if (strstr( name, "__wine_unix_call_fast" )) id |= (1u << 30);
output( "\t.align %d\n", get_alignment(16) ); output( "\t%s\n", func_declaration(name) );
I think something like that could work although there's a few things I'm still not completely sure about:
Should we actually do it that way with a -nofpu flag enabled selectively, or rather do it the other way, a bit more like the Linux kernel does it with a -fpusave flag for syscalls where it matters?
I also used the syscall table number high bits to store some information, it's maybe not ideal but it is very convenient. Another possibility would be to have a dedicated sevice table for fast calls.
Is the suspend usr1_handler the only place where we might be missing some context bits? And is it okay to use the signal context for the missing pieces, or should we rather zero the state?
Could we extend the mechanism and use the signal context to get more registers unlikely to change, like some of the segment registers?
I replaced `mov %gs:0x30,%rcx; mov 0x328(%rcx),%rcx` with `mov %gs:0x328,%rcx`, and it works fine though I don't know why there was this double indirection? It is choking the CPU on entry.
What can we do about rflags? Skipping it entirely helps a bit more, but it doesn't seem right.
I replaced `mov %gs:0x30,%rcx; mov 0x328(%rcx),%rcx` with `mov %gs:0x328,%rcx`, and it works fine though I don't know why there was this double indirection? It is choking the CPU on entry.
That's needed for macOS. It will be removed when we implement %gs switching on syscall entry/exit.