-- v2: ntdll: Check non-volatile rflags and skip popfq when possible. ntdll: Use -nofpu for NtQueryPerformanceCounter syscall. ntdll: Inline __wine_unix_call_fast in the dispatcher. winecrt0: Inline PE __wine_unix_call(_fast) function calls. ntdll: Only save non-volatile FPU registers for -nofpu syscalls. opengl32: Use __wine_unix_call_fast instead of __wine_unix_call. ntdll: Introduce a new __wine_unix_call_fast syscall. winebuild: Introduce a new -nofpu syscall spec flag. ntdll: Use named labels for jumps in the syscall dispatcher.
From: Rémi Bernon rbernon@codeweavers.com
--- dlls/ntdll/unix/signal_i386.c | 81 ++++++++++++++++++-------------- dlls/ntdll/unix/signal_x86_64.c | 82 +++++++++++++++++++-------------- 2 files changed, 93 insertions(+), 70 deletions(-)
diff --git a/dlls/ntdll/unix/signal_i386.c b/dlls/ntdll/unix/signal_i386.c index cc8605c2a4f..f2c9982ce0f 100644 --- a/dlls/ntdll/unix/signal_i386.c +++ b/dlls/ntdll/unix/signal_i386.c @@ -2571,7 +2571,7 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, "andl $0x30,%ebx\n\t" /* syscall table number */ "addl 0x38(%ecx),%ebx\n\t" /* frame->syscall_table */ "testl $3,(%ecx)\n\t" /* frame->syscall_flags & (SYSCALL_HAVE_XSAVE | SYSCALL_HAVE_XSAVEC) */ - "jz 2f\n\t" + "jz .L__wine_syscall_dispatcher_no_xsave\n\t" "movl $7,%eax\n\t" "xorl %edx,%edx\n\t" "movl %edx,0x240(%ecx)\n\t" @@ -2581,7 +2581,7 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, "movl %edx,0x250(%ecx)\n\t" "movl %edx,0x254(%ecx)\n\t" "testl $2,(%ecx)\n\t" /* frame->syscall_flags & SYSCALL_HAVE_XSAVEC */ - "jz 1f\n\t" + "jz .L__wine_syscall_dispatcher_no_xsavec\n\t" "movl %edx,0x258(%ecx)\n\t" "movl %edx,0x25c(%ecx)\n\t" "movl %edx,0x260(%ecx)\n\t" @@ -2593,20 +2593,24 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, "movl %edx,0x278(%ecx)\n\t" "movl %edx,0x27c(%ecx)\n\t" "xsavec 0x40(%ecx)\n\t" - "jmp 4f\n" - "1:\txsave 0x40(%ecx)\n\t" - "jmp 4f\n" - "2:\ttestl $4,(%ecx)\n\t" /* frame->syscall_flags & SYSCALL_HAVE_FXSAVE */ - "jz 3f\n\t" + "jmp .L__wine_syscall_dispatcher_fpu_saved\n\t" + "\n.L__wine_syscall_dispatcher_no_xsavec:\n\t" + "xsave 0x40(%ecx)\n\t" + "jmp .L__wine_syscall_dispatcher_fpu_saved\n\t" + "\n.L__wine_syscall_dispatcher_no_xsave:\n\t" + "testl $4,(%ecx)\n\t" /* frame->syscall_flags & SYSCALL_HAVE_FXSAVE */ + "jz .L__wine_syscall_dispatcher_no_fxsave\n\t" "fxsave 0x40(%ecx)\n\t" - "jmp 4f\n" - "3:\tfnsave 0x40(%ecx)\n\t" - "fwait\n" - "4:\tmovl %ecx,%esp\n\t" + "jmp .L__wine_syscall_dispatcher_fpu_saved\n\t" + "\n.L__wine_syscall_dispatcher_no_fxsave:\n\t" + "fnsave 0x40(%ecx)\n\t" + "fwait\n\t" + "\n.L__wine_syscall_dispatcher_fpu_saved:\n\t" + "movl %ecx,%esp\n\t" "movl 0x1c(%esp),%edx\n\t" /* frame->eax */ "andl $0xfff,%edx\n\t" /* syscall number */ "cmpl 8(%ebx),%edx\n\t" /* table->ServiceLimit */ - "jae 6f\n\t" + "jae .L__wine_syscall_dispatcher_invalid_arg\n\t" "movl 12(%ebx),%eax\n\t" /* table->ArgumentTable */ "movzbl (%eax,%edx,1),%ecx\n\t" "movl (%ebx),%eax\n\t" /* table->ServiceTable */ @@ -2617,8 +2621,9 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, "cld\n\t" "rep; movsl\n\t" "call *(%eax,%edx,4)\n\t" - "leal -0x34(%ebp),%esp\n" - "5:\t" + "leal -0x34(%ebp),%esp\n\t" + "\n.L__wine_syscall_dispatcher_restore:\n\t" + "" __ASM_CFI_CFA_IS_AT1(esp, 0x0c) __ASM_CFI_REG_IS_AT1(esp, esp, 0x0c) __ASM_CFI_REG_IS_AT1(eip, esp, 0x08) @@ -2628,22 +2633,25 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, __ASM_CFI_REG_IS_AT1(ebp, esp, 0x34) "movl 0(%esp),%ecx\n\t" /* frame->syscall_flags + (frame->restore_flags << 16) */ "testl $0x68 << 16,%ecx\n\t" /* CONTEXT_FLOATING_POINT | CONTEXT_EXTENDED_REGISTERS | CONTEXT_XSAVE */ - "jz 3f\n\t" + "jz .L__wine_syscall_dispatcher_fpu_restored\n\t" "testl $3,%ecx\n\t" /* SYSCALL_HAVE_XSAVE | SYSCALL_HAVE_XSAVEC */ - "jz 1f\n\t" + "jz .L__wine_syscall_dispatcher_no_xrstor\n\t" "movl %eax,%esi\n\t" "movl $7,%eax\n\t" "xorl %edx,%edx\n\t" "xrstor 0x40(%esp)\n\t" "movl %esi,%eax\n\t" - "jmp 3f\n" - "1:\ttestl $4,%ecx\n\t" /* SYSCALL_HAVE_FXSAVE */ - "jz 2f\n\t" + "jmp .L__wine_syscall_dispatcher_fpu_restored\n\t" + "\n.L__wine_syscall_dispatcher_no_xrstor:\n\t" + "testl $4,%ecx\n\t" /* SYSCALL_HAVE_FXSAVE */ + "jz .L__wine_syscall_dispatcher_no_fxrstor\n\t" "fxrstor 0x40(%esp)\n\t" - "jmp 3f\n" - "2:\tfrstor 0x40(%esp)\n\t" - "fwait\n" - "3:\tmovl 0x2c(%esp),%edi\n\t" + "jmp .L__wine_syscall_dispatcher_fpu_restored\n\t" + "\n.L__wine_syscall_dispatcher_no_fxrstor:\n\t" + "frstor 0x40(%esp)\n\t" + "fwait\n\t" + "\n.L__wine_syscall_dispatcher_fpu_restored:\n\t" + "movl 0x2c(%esp),%edi\n\t" __ASM_CFI(".cfi_remember_state\n\t") __ASM_CFI(".cfi_same_value %edi\n\t") "movl 0x30(%esp),%esi\n\t" @@ -2651,7 +2659,7 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, "movl 0x34(%esp),%ebp\n\t" __ASM_CFI(".cfi_same_value %ebp\n\t") "testl $0x7 << 16,%ecx\n\t" /* CONTEXT_CONTROL | CONTEXT_SEGMENTS | CONTEXT_INTEGER */ - "jnz 1f\n\t" + "jnz .L__wine_syscall_dispatcher_integer_restore\n\t" "movl 0x20(%esp),%ebx\n\t" __ASM_CFI(".cfi_remember_state\n\t") __ASM_CFI(".cfi_same_value %ebx\n\t") @@ -2659,14 +2667,16 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, __ASM_CFI(".cfi_register %eip, %ecx\n\t") "movl 0x0c(%esp),%esp\n\t" /* frame->esp */ __ASM_CFI(".cfi_same_value %esp\n\t") - "jmpl *%ecx\n" - __ASM_CFI("\t.cfi_restore_state\n") - "1:\ttestl $0x2 << 16,%ecx\n\t" /* CONTEXT_INTEGER */ - "jz 1f\n\t" + "jmpl *%ecx\n\t" + __ASM_CFI("\t.cfi_restore_state\n\t") + "\n.L__wine_syscall_dispatcher_integer_restore:\n\t" + "testl $0x2 << 16,%ecx\n\t" /* CONTEXT_INTEGER */ + "jz .L__wine_syscall_dispatcher_control_restore\n\t" "movl 0x1c(%esp),%eax\n\t" "movl 0x24(%esp),%ecx\n\t" - "movl 0x28(%esp),%edx\n" - "1:\tmovl 0x0c(%esp),%ebx\n\t" /* frame->esp */ + "movl 0x28(%esp),%edx\n\t" + "\n.L__wine_syscall_dispatcher_control_restore:\n\t" + "movl 0x0c(%esp),%ebx\n\t" /* frame->esp */ __ASM_CFI(".cfi_register %esp, %ebx\n\t") "movw 0x12(%esp),%ss\n\t" "xchgl %ebx,%esp\n\t" @@ -2690,10 +2700,11 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, __ASM_CFI(".cfi_same_value %ebx\n\t") "popl %ds\n\t" __ASM_CFI(".cfi_adjust_cfa_offset -4\n\t") - "iret\n" - __ASM_CFI("\t.cfi_restore_state\n") - "6:\tmovl $0xc000000d,%eax\n\t" /* STATUS_INVALID_PARAMETER */ - "jmp 5b\n\t" + "iret\n\t" + __ASM_CFI("\t.cfi_restore_state\n\t") + "\n.L__wine_syscall_dispatcher_invalid_arg:\n\t" + "movl $0xc000000d,%eax\n\t" /* STATUS_INVALID_PARAMETER */ + "jmp .L__wine_syscall_dispatcher_restore\n\t" ".globl " __ASM_NAME("__wine_syscall_dispatcher_return") "\n" __ASM_NAME("__wine_syscall_dispatcher_return") ":\n\t" __ASM_CFI(".cfi_remember_state\n\t") @@ -2707,7 +2718,7 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, "movl 8(%esp),%eax\n\t" "movl 4(%esp),%esp\n\t" __ASM_CFI(".cfi_restore_state\n\t") - "jmp 5b" ) + "jmp .L__wine_syscall_dispatcher_restore\n\t" )
/*********************************************************************** diff --git a/dlls/ntdll/unix/signal_x86_64.c b/dlls/ntdll/unix/signal_x86_64.c index cc070dda5ae..eeba97d5de9 100644 --- a/dlls/ntdll/unix/signal_x86_64.c +++ b/dlls/ntdll/unix/signal_x86_64.c @@ -2649,27 +2649,30 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, "subq $0xb,0x70(%rcx)\n\t" "movl 0xb0(%rcx),%r14d\n\t" /* frame->syscall_flags */ "testl $3,%r14d\n\t" /* SYSCALL_HAVE_XSAVE | SYSCALL_HAVE_XSAVEC */ - "jz 2f\n\t" + "jz .L__wine_syscall_dispatcher_no_xsave\n\t" "movl $7,%eax\n\t" "xorl %edx,%edx\n\t" "movq %rdx,0x2c0(%rcx)\n\t" "movq %rdx,0x2c8(%rcx)\n\t" "movq %rdx,0x2d0(%rcx)\n\t" "testl $2,%r14d\n\t" /* SYSCALL_HAVE_XSAVEC */ - "jz 1f\n\t" + "jz .L__wine_syscall_dispatcher_no_xsavec\n\t" "movq %rdx,0x2d8(%rcx)\n\t" "movq %rdx,0x2e0(%rcx)\n\t" "movq %rdx,0x2e8(%rcx)\n\t" "movq %rdx,0x2f0(%rcx)\n\t" "movq %rdx,0x2f8(%rcx)\n\t" "xsavec64 0xc0(%rcx)\n\t" - "jmp 3f\n" - "1:\txsave64 0xc0(%rcx)\n\t" - "jmp 3f\n" - "2:\tfxsave64 0xc0(%rcx)\n" + "jmp .L__wine_syscall_dispatcher_fpu_saved\n\t" + "\n.L__wine_syscall_dispatcher_no_xsavec:\n\t" + "xsave64 0xc0(%rcx)\n\t" + "jmp .L__wine_syscall_dispatcher_fpu_saved\n\t" + "\n.L__wine_syscall_dispatcher_no_xsave:\n\t" + "fxsave64 0xc0(%rcx)\n\t" + "\n.L__wine_syscall_dispatcher_fpu_saved:\n\t" /* remember state when $rcx is pointing to "frame" */ __ASM_CFI(".cfi_remember_state\n\t") - "3:\tleaq 0x98(%rcx),%rbp\n\t" + "leaq 0x98(%rcx),%rbp\n\t" __ASM_CFI_CFA_IS_AT1(rbp, 0x70) __ASM_CFI_REG_IS_AT1(rsp, rbp, 0x70) __ASM_CFI_REG_IS_AT1(rip, rbp, 0x58) @@ -2683,17 +2686,18 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, __ASM_CFI_REG_IS_AT1(rbp, rbp, 0x00) #ifdef __linux__ "testl $12,%r14d\n\t" /* SYSCALL_HAVE_PTHREAD_TEB | SYSCALL_HAVE_WRFSGSBASE */ - "jz 2f\n\t" + "jz .L__wine_syscall_dispatcher_fsgs_saved\n\t" "movq %gs:0x330,%rsi\n\t" /* amd64_thread_data()->pthread_teb */ "testl $8,%r14d\n\t" /* SYSCALL_HAVE_WRFSGSBASE */ - "jz 1f\n\t" + "jz .L__wine_syscall_dispatcher_no_wrfsbase\n\t" "wrfsbase %rsi\n\t" - "jmp 2f\n" - "1:\tmov $0x1002,%edi\n\t" /* ARCH_SET_FS */ + "jmp .L__wine_syscall_dispatcher_fsgs_saved\n\t" + "\n.L__wine_syscall_dispatcher_no_wrfsbase:\n\t" + "mov $0x1002,%edi\n\t" /* ARCH_SET_FS */ "mov $158,%eax\n\t" /* SYS_arch_prctl */ "syscall\n\t" - "leaq -0x98(%rbp),%rcx\n" - "2:\n\t" + "leaq -0x98(%rbp),%rcx\n\t" + "\n.L__wine_syscall_dispatcher_fsgs_saved:\n\t" #endif "leaq 0x28(%rsp),%rsi\n\t" /* first argument */ "movq %rcx,%rsp\n\t" @@ -2706,44 +2710,49 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, "leaq (%rcx,%rbx,2),%rbx\n\t" "andl $0xfff,%eax\n\t" /* syscall number */ "cmpq 16(%rbx),%rax\n\t" /* table->ServiceLimit */ - "jae 5f\n\t" + "jae .L__wine_syscall_dispatcher_invalid_arg\n\t" "movq 24(%rbx),%rcx\n\t" /* table->ArgumentTable */ "movzbl (%rcx,%rax),%ecx\n\t" "subq $0x20,%rcx\n\t" - "jbe 1f\n\t" + "jbe .L__wine_syscall_dispatcher_args_copied\n\t" "subq %rcx,%rsp\n\t" "shrq $3,%rcx\n\t" "andq $~15,%rsp\n\t" "movq %rsp,%rdi\n\t" "cld\n\t" - "rep; movsq\n" - "1:\tmovq %r10,%rcx\n\t" + "rep; movsq\n\t" + "\n.L__wine_syscall_dispatcher_args_copied:\n\t" + "movq %r10,%rcx\n\t" "subq $0x20,%rsp\n\t" "movq (%rbx),%r10\n\t" /* table->ServiceTable */ "callq *(%r10,%rax,8)\n\t" - "leaq -0x98(%rbp),%rcx\n" + "leaq -0x98(%rbp),%rcx\n\t" /* $rcx is now pointing to "frame" again */ __ASM_CFI(".cfi_restore_state\n\t") - "2:\tmovl 0x94(%rcx),%edx\n\t" /* frame->restore_flags */ + "\n.L__wine_syscall_dispatcher_restore:\n\t" + "movl 0x94(%rcx),%edx\n\t" /* frame->restore_flags */ #ifdef __linux__ "testl $12,%r14d\n\t" /* SYSCALL_HAVE_PTHREAD_TEB | SYSCALL_HAVE_WRFSGSBASE */ - "jz 1f\n\t" - "movw 0x7e(%rcx),%fs\n" + "jz .L__wine_syscall_dispatcher_fsgs_restored\n\t" + "movw 0x7e(%rcx),%fs\n\t" + "\n.L__wine_syscall_dispatcher_fsgs_restored:\n\t" "1:\n\t" #endif "testl $0x48,%edx\n\t" /* CONTEXT_FLOATING_POINT | CONTEXT_XSTATE */ - "jz 4f\n\t" + "jz .L__wine_syscall_dispatcher_fpu_restored\n\t" "testl $3,%r14d\n\t" /* SYSCALL_HAVE_XSAVE | SYSCALL_HAVE_XSAVEC */ - "jz 3f\n\t" + "jz .L__wine_syscall_dispatcher_no_xrstor\n\t" "movq %rax,%r11\n\t" "movl $7,%eax\n\t" "xorl %edx,%edx\n\t" "xrstor64 0xc0(%rcx)\n\t" "movq %r11,%rax\n\t" "movl 0x94(%rcx),%edx\n\t" - "jmp 4f\n" - "3:\tfxrstor64 0xc0(%rcx)\n" - "4:\tmovq 0x98(%rcx),%rbp\n\t" + "jmp .L__wine_syscall_dispatcher_fpu_restored\n\t" + "\n.L__wine_syscall_dispatcher_no_xrstor:\n\t" + "fxrstor64 0xc0(%rcx)\n\t" + "\n.L__wine_syscall_dispatcher_fpu_restored:\n\t" + "movq 0x98(%rcx),%rbp\n\t" __ASM_CFI(".cfi_same_value rbp\n\t") "movq 0x68(%rcx),%r15\n\t" __ASM_CFI(".cfi_same_value r15\n\t") @@ -2760,7 +2769,7 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, "movq 0x08(%rcx),%rbx\n\t" __ASM_CFI(".cfi_same_value rbx\n\t") "testl $0x3,%edx\n\t" /* CONTEXT_CONTROL | CONTEXT_INTEGER */ - "jnz 1f\n\t" + "jnz .L__wine_syscall_dispatcher_control_restore\n\t" __ASM_CFI(".cfi_remember_state\n\t") "movq 0x80(%rcx),%r11\n\t" /* frame->eflags */ "pushq %r11\n\t" @@ -2775,25 +2784,27 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, __ASM_CFI(".cfi_restore_state\n\t") /* remember state when $rcx is pointing to "frame" */ __ASM_CFI(".cfi_remember_state\n\t") - "1:\tleaq 0x70(%rcx),%rsp\n\t" + "\n.L__wine_syscall_dispatcher_control_restore:\n\t" + "leaq 0x70(%rcx),%rsp\n\t" __ASM_CFI_CFA_IS_AT1(rsp, 0x18) __ASM_CFI_REG_IS_AT1(rsp, rsp, 0x18) __ASM_CFI_REG_IS_AT1(rip, rsp, 0x00) "testl $0x2,%edx\n\t" /* CONTEXT_INTEGER */ - "jnz 1f\n\t" + "jnz .L__wine_syscall_dispatcher_integer_restore\n\t" "movq 0x10(%rsp),%r11\n\t" /* frame->eflags */ "movq (%rsp),%rcx\n\t" /* frame->rip */ __ASM_CFI(".cfi_register rip, rcx\n\t") - "iretq\n" + "iretq\n\t" __ASM_CFI_REG_IS_AT1(rip, rsp, 0x00) - "1:\tmovq 0x00(%rcx),%rax\n\t" + "\n.L__wine_syscall_dispatcher_integer_restore:\n\t" + "movq 0x00(%rcx),%rax\n\t" "movq 0x18(%rcx),%rdx\n\t" "movq 0x30(%rcx),%r8\n\t" "movq 0x38(%rcx),%r9\n\t" "movq 0x40(%rcx),%r10\n\t" "movq 0x48(%rcx),%r11\n\t" - "movq 0x10(%rcx),%rcx\n" - "iretq\n" + "movq 0x10(%rcx),%rcx\n\t" + "iretq\n\t" __ASM_CFI_CFA_IS_AT1(rbp, 0x70) __ASM_CFI_REG_IS_AT1(rsp, rbp, 0x70) __ASM_CFI_REG_IS_AT1(rip, rbp, 0x58) @@ -2805,7 +2816,8 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, __ASM_CFI_REG_IS_AT1(r14, rbp, 0x48) __ASM_CFI_REG_IS_AT1(r15, rbp, 0x50) __ASM_CFI_REG_IS_AT1(rbp, rbp, 0x00) - "5:\tmovl $0xc000000d,%edx\n\t" /* STATUS_INVALID_PARAMETER */ + "\n.L__wine_syscall_dispatcher_invalid_arg:\n\t" + "movl $0xc000000d,%edx\n\t" /* STATUS_INVALID_PARAMETER */ "movq %rsp,%rcx\n\t" /* $rcx is now pointing to "frame" again */ __ASM_CFI(".cfi_restore_state\n\t") @@ -2813,7 +2825,7 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, __ASM_NAME("__wine_syscall_dispatcher_return") ":\n\t" "movl 0xb0(%rcx),%r14d\n\t" /* frame->syscall_flags */ "movq %rdx,%rax\n\t" - "jmp 2b" ) + "jmp .L__wine_syscall_dispatcher_restore\n\t" )
/***********************************************************************
From: Rémi Bernon rbernon@codeweavers.com
Stored in the syscall table index highest bit. --- tools/winebuild/build.h | 1 + tools/winebuild/import.c | 1 + tools/winebuild/parser.c | 6 ++++++ tools/winebuild/winebuild.man.in | 4 ++++ 4 files changed, 12 insertions(+)
diff --git a/tools/winebuild/build.h b/tools/winebuild/build.h index 576304cd2b8..216f6859fe7 100644 --- a/tools/winebuild/build.h +++ b/tools/winebuild/build.h @@ -189,6 +189,7 @@ static inline int is_pe(void) #define FLAG_FASTCALL 0x0100 /* function uses fastcall calling convention */ #define FLAG_SYSCALL 0x0200 /* function is a system call */ #define FLAG_IMPORT 0x0400 /* export is imported from another module */ +#define FLAG_NOFPU 0x0800 /* function skips default syscall FPU save / restore */
#define FLAG_FORWARD 0x1000 /* function is a forwarded name */ #define FLAG_EXT_LINK 0x2000 /* function links to an external symbol */ diff --git a/tools/winebuild/import.c b/tools/winebuild/import.c index 9f87adcf593..18ca6eaef01 100644 --- a/tools/winebuild/import.c +++ b/tools/winebuild/import.c @@ -1385,6 +1385,7 @@ void output_syscalls( DLLSPEC *spec ) ORDDEF *odp = syscalls[i]; const char *name = get_link_name(odp); unsigned int id = (spec->syscall_table << 12) + i; + if (odp->flags & FLAG_NOFPU) id |= (1u << 31);
output( "\t.align %d\n", get_alignment(16) ); output( "\t%s\n", func_declaration(name) ); diff --git a/tools/winebuild/parser.c b/tools/winebuild/parser.c index 11b422bd435..0a1fbe52e62 100644 --- a/tools/winebuild/parser.c +++ b/tools/winebuild/parser.c @@ -71,6 +71,7 @@ static const char * const FlagNames[] = "fastcall", /* FLAG_FASTCALL */ "syscall", /* FLAG_SYSCALL */ "import", /* FLAG_IMPORT */ + "nofpu", /* FLAG_NOFPU */ NULL };
@@ -328,6 +329,11 @@ static int parse_spec_arguments( ORDDEF *odp, DLLSPEC *spec, int optional ) return 0; } } + else if (odp->flags & FLAG_NOFPU) + { + error( "The nofpu flag can only be used on a syscall function\n" ); + return 0; + } return 1; }
diff --git a/tools/winebuild/winebuild.man.in b/tools/winebuild/winebuild.man.in index 23ef3495744..ce016b542fb 100644 --- a/tools/winebuild/winebuild.man.in +++ b/tools/winebuild/winebuild.man.in @@ -324,6 +324,10 @@ only). The entry point will be exported by ordinal instead of by name. The name is still available for importing. .TP +.B -nofpu +The entry point is a NT syscall which does not save or restore the +FPU state and assumes it will not be modified by the call. +.TP .B -ret16 The function returns a 16-bit value (Win16 only). .TP
From: Rémi Bernon rbernon@codeweavers.com
--- dlls/ntdll/ntdll.spec | 1 + dlls/ntdll/unix/loader.c | 1 + dlls/winecrt0/unix_lib.c | 12 ++++++++++++ include/wine/unixlib.h | 1 + 4 files changed, 15 insertions(+)
diff --git a/dlls/ntdll/ntdll.spec b/dlls/ntdll/ntdll.spec index 1d4218e7b52..750c214ec2a 100644 --- a/dlls/ntdll/ntdll.spec +++ b/dlls/ntdll/ntdll.spec @@ -1690,6 +1690,7 @@ @ cdecl -syscall wine_server_handle_to_fd(long long ptr ptr)
# Unix interface +@ stdcall -syscall -nofpu __wine_unix_call_fast(int64 long ptr) @ stdcall -syscall __wine_unix_call(int64 long ptr) @ stdcall -syscall __wine_unix_spawnvp(long ptr) @ stdcall __wine_ctrl_routine(ptr) diff --git a/dlls/ntdll/unix/loader.c b/dlls/ntdll/unix/loader.c index 8a6200c7454..8d9fc8f34b9 100644 --- a/dlls/ntdll/unix/loader.c +++ b/dlls/ntdll/unix/loader.c @@ -356,6 +356,7 @@ static void * const syscalls[] = NtYieldExecution, __wine_dbg_write, __wine_unix_call, + __wine_unix_call /* __wine_unix_call_fast */, __wine_unix_spawnvp, wine_nt_to_unix_file_name, wine_server_call, diff --git a/dlls/winecrt0/unix_lib.c b/dlls/winecrt0/unix_lib.c index c86897b9905..816be88edcc 100644 --- a/dlls/winecrt0/unix_lib.c +++ b/dlls/winecrt0/unix_lib.c @@ -30,6 +30,7 @@ #include "wine/unixlib.h"
static NTSTATUS (WINAPI *p__wine_unix_call)( unixlib_handle_t, unsigned int, void * ); +static NTSTATUS (WINAPI *p__wine_unix_call_fast)( unixlib_handle_t, unsigned int, void * );
static void load_func( void **func, const char *name, void *def ) { @@ -47,10 +48,21 @@ static NTSTATUS __cdecl fallback__wine_unix_call( unixlib_handle_t handle, unsig return STATUS_DLL_NOT_FOUND; }
+static NTSTATUS __cdecl fallback__wine_unix_call_fast( unixlib_handle_t handle, unsigned int code, void *args ) +{ + return STATUS_DLL_NOT_FOUND; +} + NTSTATUS WINAPI __wine_unix_call( unixlib_handle_t handle, unsigned int code, void *args ) { LOAD_FUNC( __wine_unix_call ); return p__wine_unix_call( handle, code, args ); }
+NTSTATUS WINAPI __wine_unix_call_fast( unixlib_handle_t handle, unsigned int code, void *args ) +{ + LOAD_FUNC( __wine_unix_call_fast ); + return p__wine_unix_call_fast( handle, code, args ); +} + #endif /* __WINE_PE_BUILD */ diff --git a/include/wine/unixlib.h b/include/wine/unixlib.h index ef60b32184c..581790516a5 100644 --- a/include/wine/unixlib.h +++ b/include/wine/unixlib.h @@ -25,6 +25,7 @@ typedef NTSTATUS (*unixlib_entry_t)( void *args ); typedef UINT64 unixlib_handle_t;
extern NTSTATUS WINAPI __wine_unix_call( unixlib_handle_t handle, unsigned int code, void *args ); +extern NTSTATUS WINAPI __wine_unix_call_fast( unixlib_handle_t handle, unsigned int code, void *args );
#ifdef WINE_UNIX_LIB
From: Rémi Bernon rbernon@codeweavers.com
--- dlls/opengl32/make_opengl | 3 --- dlls/opengl32/private.h | 5 +++++ dlls/opengl32/unixlib.h | 3 --- 3 files changed, 5 insertions(+), 6 deletions(-)
diff --git a/dlls/opengl32/make_opengl b/dlls/opengl32/make_opengl index ad9def8dcec..dbc758167a0 100755 --- a/dlls/opengl32/make_opengl +++ b/dlls/opengl32/make_opengl @@ -847,9 +847,6 @@ print OUT " GLsizei length;\n"; print OUT " const GLchar *message;\n"; print OUT "};\n\n";
-print OUT "extern unixlib_handle_t unixlib_handle DECLSPEC_HIDDEN;\n"; -print OUT "#define UNIX_CALL( func, params ) __wine_unix_call( unixlib_handle, unix_ ## func, params )\n\n"; - print OUT "#endif /* __WINE_OPENGL32_UNIXLIB_H */\n"; close OUT;
diff --git a/dlls/opengl32/private.h b/dlls/opengl32/private.h index 06a97b43987..3b06266950b 100644 --- a/dlls/opengl32/private.h +++ b/dlls/opengl32/private.h @@ -28,8 +28,13 @@ #include "winternl.h" #include "wingdi.h"
+#include "wine/unixlib.h" + extern const void *extension_procs[] DECLSPEC_HIDDEN;
extern int WINAPI wglDescribePixelFormat( HDC hdc, int ipfd, UINT cjpfd, PIXELFORMATDESCRIPTOR *ppfd );
+extern unixlib_handle_t unixlib_handle DECLSPEC_HIDDEN; +#define UNIX_CALL( func, params ) __wine_unix_call_fast( unixlib_handle, unix_ ## func, params ) + #endif /* __WINE_OPENGL32_PRIVATE_H */ diff --git a/dlls/opengl32/unixlib.h b/dlls/opengl32/unixlib.h index b76858a2e13..2327a3eac7b 100644 --- a/dlls/opengl32/unixlib.h +++ b/dlls/opengl32/unixlib.h @@ -25350,7 +25350,4 @@ struct wine_gl_debug_message_params const GLchar *message; };
-extern unixlib_handle_t unixlib_handle DECLSPEC_HIDDEN; -#define UNIX_CALL( func, params ) __wine_unix_call( unixlib_handle, unix_ ## func, params ) - #endif /* __WINE_OPENGL32_UNIXLIB_H */
From: Rémi Bernon rbernon@codeweavers.com
Using the signal context for the volatile FPU state, assuming that it hasn't been modified by the unix call. --- dlls/ntdll/unix/signal_i386.c | 61 ++++++++++++++++++++++++--- dlls/ntdll/unix/signal_x86_64.c | 74 ++++++++++++++++++++++++++++++--- 2 files changed, 125 insertions(+), 10 deletions(-)
diff --git a/dlls/ntdll/unix/signal_i386.c b/dlls/ntdll/unix/signal_i386.c index f2c9982ce0f..262a0b8072f 100644 --- a/dlls/ntdll/unix/signal_i386.c +++ b/dlls/ntdll/unix/signal_i386.c @@ -493,6 +493,7 @@ C_ASSERT( offsetof( TEB, GdiTebBatch ) + offsetof( struct x86_thread_data, sysca #define SYSCALL_HAVE_XSAVE 1 #define SYSCALL_HAVE_XSAVEC 2 #define SYSCALL_HAVE_FXSAVE 4 +#define SYSCALL_NEED_XSTATE 8
static unsigned int syscall_flags;
@@ -741,6 +742,36 @@ static inline void restore_fpu( const CONTEXT *context ) }
+/*********************************************************************** + * save_context_xstate + * + * Set the fltsave and xstate values from a sigcontext. + */ +static void save_context_xstate( struct xcontext *xcontext, const ucontext_t *sigcontext ) +{ + FLOATING_SAVE_AREA *fpu = FPU_sig(sigcontext); + XSAVE_FORMAT *fpux = FPUX_sig(sigcontext); + CONTEXT *context = &xcontext->c; + + if (fpu) + { + context->ContextFlags |= CONTEXT_FLOATING_POINT; + context->FloatSave = *fpu; + } + if (fpux) + { + XSTATE *xs; + + context->ContextFlags |= CONTEXT_FLOATING_POINT | CONTEXT_EXTENDED_REGISTERS; + memcpy( context->ExtendedRegisters, fpux, sizeof(*fpux) ); + if (!fpu) fpux_to_fpu( &context->FloatSave, fpux ); + if ((cpu_info.ProcessorFeatureBits & CPU_FEATURE_AVX) && (xs = XState_sig(fpux))) + *xstate_from_context( context ) = *xs; + } + if (!fpu && !fpux) save_fpu( context ); +} + + /*********************************************************************** * save_context * @@ -1046,7 +1077,11 @@ NTSTATUS WINAPI NtGetContextThread( HANDLE handle, CONTEXT *context ) } if (needed_flags & CONTEXT_FLOATING_POINT) { - if (!(cpu_info.ProcessorFeatureBits & CPU_FEATURE_FXSR)) + if (frame->syscall_flags & SYSCALL_NEED_XSTATE) + { + /* nothing to do, already copied from sigcontext */ + } + else if (!(cpu_info.ProcessorFeatureBits & CPU_FEATURE_FXSR)) { context->FloatSave = frame->u.fsave; } @@ -1066,8 +1101,11 @@ NTSTATUS WINAPI NtGetContextThread( HANDLE handle, CONTEXT *context ) { XSAVE_FORMAT *xs = (XSAVE_FORMAT *)context->ExtendedRegisters;
- if (!xstate_compaction_enabled || - (frame->xstate.Mask & XSTATE_MASK_LEGACY_FLOATING_POINT)) + if (frame->syscall_flags & SYSCALL_NEED_XSTATE) + { + /* nothing to do, already copied from sigcontext */ + } + else if (!xstate_compaction_enabled || (frame->xstate.Mask & XSTATE_MASK_LEGACY_FLOATING_POINT)) { memcpy( xs, &frame->u.xsave, FIELD_OFFSET( XSAVE_FORMAT, MxCsr )); memcpy( xs->FloatRegisters, frame->u.xsave.FloatRegisters, @@ -1080,7 +1118,12 @@ NTSTATUS WINAPI NtGetContextThread( HANDLE handle, CONTEXT *context ) xs->ControlWord = 0x37f; }
- if (!xstate_compaction_enabled || (frame->xstate.Mask & XSTATE_MASK_LEGACY_SSE)) + if (frame->syscall_flags & SYSCALL_NEED_XSTATE) + { + memcpy( xs->XmmRegisters + 6, frame->u.xsave.XmmRegisters + 6, + sizeof( frame->u.xsave.XmmRegisters ) - 6 * sizeof(*frame->u.xsave.XmmRegisters) ); + } + else if (!xstate_compaction_enabled || (frame->xstate.Mask & XSTATE_MASK_LEGACY_SSE)) { memcpy( xs->XmmRegisters, frame->u.xsave.XmmRegisters, sizeof( xs->XmmRegisters )); xs->MxCsr = frame->u.xsave.MxCsr; @@ -1095,7 +1138,8 @@ NTSTATUS WINAPI NtGetContextThread( HANDLE handle, CONTEXT *context )
context->ContextFlags |= CONTEXT_EXTENDED_REGISTERS; } - if ((needed_flags & CONTEXT_XSTATE) && (cpu_info.ProcessorFeatureBits & CPU_FEATURE_AVX)) + if ((needed_flags & CONTEXT_XSTATE) && (cpu_info.ProcessorFeatureBits & CPU_FEATURE_AVX) && + !(frame->syscall_flags & SYSCALL_NEED_XSTATE)) { CONTEXT_EX *context_ex = (CONTEXT_EX *)(context + 1); XSTATE *xstate = (XSTATE *)((char *)context_ex + context_ex->XState.Offset); @@ -1595,6 +1639,7 @@ __ASM_GLOBAL_FUNC( call_user_mode_callback, "movl %ebp,0x380(%esp)\n\t" "movl 0x1f8(%edx),%ecx\n\t" /* x86_thread_data()->syscall_frame */ "movl (%ecx),%eax\n\t" /* frame->syscall_flags */ + "andl $~8,%eax\n\t" /* syscall_flags &= ~SYSCALL_NEED_XSTATE */ "movl %eax,(%esp)\n\t" "movl 0x38(%ecx),%eax\n\t" /* frame->syscall_table */ "movl %eax,0x38(%esp)\n\t" @@ -2077,6 +2122,7 @@ static void usr1_handler( int signal, siginfo_t *siginfo, void *sigcontext ) DECLSPEC_ALIGN(64) XSTATE xs; xcontext.c.ContextFlags = CONTEXT_FULL; context_init_xstate( &xcontext.c, &xs ); + save_context_xstate( &xcontext, sigcontext );
NtGetContextThread( GetCurrentThread(), &xcontext.c ); wait_suspend( &xcontext.c ); @@ -2570,6 +2616,8 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, "shrl $8,%ebx\n\t" "andl $0x30,%ebx\n\t" /* syscall table number */ "addl 0x38(%ecx),%ebx\n\t" /* frame->syscall_table */ + "testl $0x80000000,%eax\n\t" + "jnz .L__wine_syscall_dispatcher_nofpu_save\n\t" "testl $3,(%ecx)\n\t" /* frame->syscall_flags & (SYSCALL_HAVE_XSAVE | SYSCALL_HAVE_XSAVEC) */ "jz .L__wine_syscall_dispatcher_no_xsave\n\t" "movl $7,%eax\n\t" @@ -2605,6 +2653,9 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, "\n.L__wine_syscall_dispatcher_no_fxsave:\n\t" "fnsave 0x40(%ecx)\n\t" "fwait\n\t" + "jmp .L__wine_syscall_dispatcher_fpu_saved\n\t" + "\n.L__wine_syscall_dispatcher_nofpu_save:\n\t" + "orl $0x10,(%ecx)\n\t" /* frame->syscall_flags |= SYSCALL_NEED_XSTATE */ "\n.L__wine_syscall_dispatcher_fpu_saved:\n\t" "movl %ecx,%esp\n\t" "movl 0x1c(%esp),%edx\n\t" /* frame->eax */ diff --git a/dlls/ntdll/unix/signal_x86_64.c b/dlls/ntdll/unix/signal_x86_64.c index eeba97d5de9..d123e381882 100644 --- a/dlls/ntdll/unix/signal_x86_64.c +++ b/dlls/ntdll/unix/signal_x86_64.c @@ -364,6 +364,7 @@ C_ASSERT( sizeof(struct stack_layout) == 0x590 ); /* Should match the size in ca #define SYSCALL_HAVE_XSAVEC 2 #define SYSCALL_HAVE_PTHREAD_TEB 4 #define SYSCALL_HAVE_WRFSGSBASE 8 +#define SYSCALL_NEED_XSTATE 0x10
static unsigned int syscall_flags;
@@ -823,6 +824,28 @@ static inline void leave_handler( const ucontext_t *sigcontext ) }
+/*********************************************************************** + * save_context_xstate + * + * Set the fltsave and xstate values from a sigcontext. + */ +static void save_context_xstate( struct xcontext *xcontext, const ucontext_t *sigcontext ) +{ + CONTEXT *context = &xcontext->c; + + if (FPU_sig(sigcontext)) + { + XSTATE *xs; + + context->ContextFlags |= CONTEXT_FLOATING_POINT; + context->u.FltSave = *FPU_sig(sigcontext); + context->MxCsr = context->u.FltSave.MxCsr; + if ((cpu_info.ProcessorFeatureBits & CPU_FEATURE_AVX) && (xs = XState_sig(FPU_sig(sigcontext)))) + *xstate_from_context( context ) = *xs; + } +} + + /*********************************************************************** * save_context * @@ -1113,8 +1136,11 @@ NTSTATUS WINAPI NtGetContextThread( HANDLE handle, CONTEXT *context ) } if (needed_flags & CONTEXT_FLOATING_POINT) { - if (!xstate_compaction_enabled || - (frame->xstate.Mask & XSTATE_MASK_LEGACY_FLOATING_POINT)) + if (frame->syscall_flags & SYSCALL_NEED_XSTATE) + { + /* nothing to do, already copied from sigcontext */ + } + else if (!xstate_compaction_enabled || (frame->xstate.Mask & XSTATE_MASK_LEGACY_FLOATING_POINT)) { memcpy( &context->u.FltSave, &frame->xsave, FIELD_OFFSET( XSAVE_FORMAT, MxCsr )); memcpy( context->u.FltSave.FloatRegisters, frame->xsave.FloatRegisters, @@ -1128,7 +1154,12 @@ NTSTATUS WINAPI NtGetContextThread( HANDLE handle, CONTEXT *context ) context->u.FltSave.ControlWord = 0x37f; }
- if (!xstate_compaction_enabled || (frame->xstate.Mask & XSTATE_MASK_LEGACY_SSE)) + if (frame->syscall_flags & SYSCALL_NEED_XSTATE) + { + memcpy( context->u.FltSave.XmmRegisters + 6, frame->xsave.XmmRegisters + 6, + sizeof( frame->xsave.XmmRegisters ) - 6 * sizeof(*frame->xsave.XmmRegisters) ); + } + else if (!xstate_compaction_enabled || (frame->xstate.Mask & XSTATE_MASK_LEGACY_SSE)) { memcpy( context->u.FltSave.XmmRegisters, frame->xsave.XmmRegisters, sizeof( context->u.FltSave.XmmRegisters )); @@ -1146,7 +1177,8 @@ NTSTATUS WINAPI NtGetContextThread( HANDLE handle, CONTEXT *context ) context->MxCsr = context->u.FltSave.MxCsr; context->ContextFlags |= CONTEXT_FLOATING_POINT; } - if ((needed_flags & CONTEXT_XSTATE) && (cpu_info.ProcessorFeatureBits & CPU_FEATURE_AVX)) + if ((needed_flags & CONTEXT_XSTATE) && (cpu_info.ProcessorFeatureBits & CPU_FEATURE_AVX) && + !(frame->syscall_flags & SYSCALL_NEED_XSTATE)) { CONTEXT_EX *context_ex = (CONTEXT_EX *)(context + 1); XSTATE *xstate = (XSTATE *)((char *)context_ex + context_ex->XState.Offset); @@ -1609,6 +1641,7 @@ __ASM_GLOBAL_FUNC( call_user_mode_callback, "movq 0xa8(%r10),%rax\n\t" /* prev_frame->syscall_table */ "movq %rax,0xa8(%rsp)\n\t" /* frame->syscall_table */ "movl 0xb0(%r10),%r14d\n\t" /* prev_frame->syscall_flags */ + "andl $~0x10,%r14d\n\t" /* syscall_flags &= ~SYSCALL_NEED_XSTATE */ "movl %r14d,0xb0(%rsp)\n\t" /* frame->syscall_flags */ "movq %r10,0xa0(%rsp)\n\t" /* frame->prev_frame */ "movq %rsp,0x328(%r11)\n\t" /* amd64_thread_data()->syscall_frame */ @@ -2152,6 +2185,7 @@ static void usr1_handler( int signal, siginfo_t *siginfo, void *ucontext ) DECLSPEC_ALIGN(64) XSTATE xs; context.c.ContextFlags = CONTEXT_FULL; context_init_xstate( &context.c, &xs ); + save_context_xstate( &context, ucontext );
NtGetContextThread( GetCurrentThread(), &context.c ); wait_suspend( &context.c ); @@ -2648,6 +2682,8 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, * depends on us returning to it. Adjust the return address accordingly. */ "subq $0xb,0x70(%rcx)\n\t" "movl 0xb0(%rcx),%r14d\n\t" /* frame->syscall_flags */ + "testl $0x80000000,%eax\n\t" + "jnz .L__wine_syscall_dispatcher_nofpu_save\n\t" "testl $3,%r14d\n\t" /* SYSCALL_HAVE_XSAVE | SYSCALL_HAVE_XSAVEC */ "jz .L__wine_syscall_dispatcher_no_xsave\n\t" "movl $7,%eax\n\t" @@ -2669,6 +2705,20 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, "jmp .L__wine_syscall_dispatcher_fpu_saved\n\t" "\n.L__wine_syscall_dispatcher_no_xsave:\n\t" "fxsave64 0xc0(%rcx)\n\t" + "jmp .L__wine_syscall_dispatcher_fpu_saved\n\t" + "\n.L__wine_syscall_dispatcher_nofpu_save:\n\t" + "orq $0x10,%r14\n\t" + "movq %r14,0xb0(%rcx)\n\t" /* frame->syscall_flags |= SYSCALL_NEED_XSTATE */ + "movdqa %xmm6,0x1c0(%rcx)\n\t" + "movdqa %xmm7,0x1d0(%rcx)\n\t" + "movdqa %xmm8,0x1e0(%rcx)\n\t" + "movdqa %xmm9,0x1f0(%rcx)\n\t" + "movdqa %xmm10,0x200(%rcx)\n\t" + "movdqa %xmm11,0x210(%rcx)\n\t" + "movdqa %xmm12,0x220(%rcx)\n\t" + "movdqa %xmm13,0x230(%rcx)\n\t" + "movdqa %xmm14,0x240(%rcx)\n\t" + "movdqa %xmm15,0x250(%rcx)\n\t" "\n.L__wine_syscall_dispatcher_fpu_saved:\n\t" /* remember state when $rcx is pointing to "frame" */ __ASM_CFI(".cfi_remember_state\n\t") @@ -2739,7 +2789,7 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, "1:\n\t" #endif "testl $0x48,%edx\n\t" /* CONTEXT_FLOATING_POINT | CONTEXT_XSTATE */ - "jz .L__wine_syscall_dispatcher_fpu_restored\n\t" + "jz .L__wine_syscall_dispatcher_nofpu_restore\n\t" "testl $3,%r14d\n\t" /* SYSCALL_HAVE_XSAVE | SYSCALL_HAVE_XSAVEC */ "jz .L__wine_syscall_dispatcher_no_xrstor\n\t" "movq %rax,%r11\n\t" @@ -2751,6 +2801,20 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, "jmp .L__wine_syscall_dispatcher_fpu_restored\n\t" "\n.L__wine_syscall_dispatcher_no_xrstor:\n\t" "fxrstor64 0xc0(%rcx)\n\t" + "jmp .L__wine_syscall_dispatcher_fpu_restored\n\t" + "\n.L__wine_syscall_dispatcher_nofpu_restore:\n\t" + "testl $0x10,%r14d\n\t" /* SYSCALL_NEED_XSTATE */ + "jz .L__wine_syscall_dispatcher_fpu_restored\n\t" + "movdqa 0x1c0(%rcx),%xmm6\n\t" + "movdqa 0x1d0(%rcx),%xmm7\n\t" + "movdqa 0x1e0(%rcx),%xmm8\n\t" + "movdqa 0x1f0(%rcx),%xmm9\n\t" + "movdqa 0x200(%rcx),%xmm10\n\t" + "movdqa 0x210(%rcx),%xmm11\n\t" + "movdqa 0x220(%rcx),%xmm12\n\t" + "movdqa 0x230(%rcx),%xmm13\n\t" + "movdqa 0x240(%rcx),%xmm14\n\t" + "movdqa 0x250(%rcx),%xmm15\n\t" "\n.L__wine_syscall_dispatcher_fpu_restored:\n\t" "movq 0x98(%rcx),%rbp\n\t" __ASM_CFI(".cfi_same_value rbp\n\t")
From: Rémi Bernon rbernon@codeweavers.com
This improves performance of unix calls a little by removing one level of indirection and a branch. --- dlls/win32u/Makefile.in | 2 +- dlls/winecrt0/unix_lib.c | 45 ++++++++++++++++++++-------------------- include/wine/unixlib.h | 5 +++++ 3 files changed, 29 insertions(+), 23 deletions(-)
diff --git a/dlls/win32u/Makefile.in b/dlls/win32u/Makefile.in index 692b2d5c9f2..ba6915d91d8 100644 --- a/dlls/win32u/Makefile.in +++ b/dlls/win32u/Makefile.in @@ -2,7 +2,7 @@ EXTRADEFS = -DWINE_NO_LONG_TYPES MODULE = win32u.dll UNIXLIB = win32u.so IMPORTLIB = win32u -IMPORTS = ntdll winecrt0 +IMPORTS = winecrt0 ntdll UNIX_CFLAGS = $(FREETYPE_CFLAGS) $(FONTCONFIG_CFLAGS) UNIX_LIBS = $(CARBON_LIBS) $(APPKIT_LIBS) $(PTHREAD_LIBS) -lm
diff --git a/dlls/winecrt0/unix_lib.c b/dlls/winecrt0/unix_lib.c index 816be88edcc..9f7518ee314 100644 --- a/dlls/winecrt0/unix_lib.c +++ b/dlls/winecrt0/unix_lib.c @@ -29,40 +29,41 @@ #include "winternl.h" #include "wine/unixlib.h"
-static NTSTATUS (WINAPI *p__wine_unix_call)( unixlib_handle_t, unsigned int, void * ); -static NTSTATUS (WINAPI *p__wine_unix_call_fast)( unixlib_handle_t, unsigned int, void * ); - -static void load_func( void **func, const char *name, void *def ) +static void *load_func( const char *name ) { - if (!*func) - { - HMODULE module = GetModuleHandleW( L"ntdll.dll" ); - void *proc = GetProcAddress( module, name ); - InterlockedExchangePointer( func, proc ? proc : def ); - } -} -#define LOAD_FUNC(name) load_func( (void **)&p ## name, #name, fallback ## name ) + UNICODE_STRING ntdll_str; + ANSI_STRING name_str; + HMODULE module; + void *proc;
-static NTSTATUS __cdecl fallback__wine_unix_call( unixlib_handle_t handle, unsigned int code, void *args ) -{ - return STATUS_DLL_NOT_FOUND; + RtlInitUnicodeString( &ntdll_str, L"ntdll.dll" ); + RtlInitAnsiString( &name_str, name ); + + if (LdrGetDllHandleEx( LDR_GET_DLL_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT, NULL, NULL, &ntdll_str, &module )) return NULL; + if (LdrGetProcedureAddress( module, &name_str, 0, &proc )) return NULL; + return proc; }
-static NTSTATUS __cdecl fallback__wine_unix_call_fast( unixlib_handle_t handle, unsigned int code, void *args ) +static NTSTATUS WINAPI fallback___wine_unix_call( unixlib_handle_t handle, unsigned int code, void *args ) { return STATUS_DLL_NOT_FOUND; }
-NTSTATUS WINAPI __wine_unix_call( unixlib_handle_t handle, unsigned int code, void *args ) +static NTSTATUS WINAPI delayload___wine_unix_call( unixlib_handle_t handle, unsigned int code, void *args ) { - LOAD_FUNC( __wine_unix_call ); - return p__wine_unix_call( handle, code, args ); + void *proc = load_func( "__wine_unix_call" ); + InterlockedExchangePointer( (void *)&__wine_unix_call, proc ? proc : (void *)fallback___wine_unix_call ); + return __wine_unix_call( handle, code, args ); }
-NTSTATUS WINAPI __wine_unix_call_fast( unixlib_handle_t handle, unsigned int code, void *args ) +static NTSTATUS WINAPI delayload___wine_unix_call_fast( unixlib_handle_t handle, unsigned int code, void *args ) { - LOAD_FUNC( __wine_unix_call_fast ); - return p__wine_unix_call_fast( handle, code, args ); + void *proc = load_func( "__wine_unix_call_fast" ); + InterlockedExchangePointer( (void *)&__wine_unix_call_fast, proc ? proc : (void *)fallback___wine_unix_call ); + return __wine_unix_call_fast( handle, code, args ); }
+NTSTATUS (WINAPI *__wine_unix_call)( unixlib_handle_t, unsigned int, void * ) = delayload___wine_unix_call; +NTSTATUS (WINAPI *__wine_unix_call_fast)( unixlib_handle_t, unsigned int, void * ) = delayload___wine_unix_call_fast; + #endif /* __WINE_PE_BUILD */ diff --git a/include/wine/unixlib.h b/include/wine/unixlib.h index 581790516a5..86372ad25d3 100644 --- a/include/wine/unixlib.h +++ b/include/wine/unixlib.h @@ -24,8 +24,13 @@ typedef NTSTATUS (*unixlib_entry_t)( void *args ); typedef UINT64 unixlib_handle_t;
+#if defined(WINE_UNIX_LIB) || defined(_NTSYSTEM_) extern NTSTATUS WINAPI __wine_unix_call( unixlib_handle_t handle, unsigned int code, void *args ); extern NTSTATUS WINAPI __wine_unix_call_fast( unixlib_handle_t handle, unsigned int code, void *args ); +#else +extern NTSTATUS (WINAPI *__wine_unix_call)( unixlib_handle_t handle, unsigned int code, void *args ); +extern NTSTATUS (WINAPI *__wine_unix_call_fast)( unixlib_handle_t handle, unsigned int code, void *args ); +#endif
#ifdef WINE_UNIX_LIB
From: Rémi Bernon rbernon@codeweavers.com
--- dlls/ntdll/unix/signal_i386.c | 13 +++++++++++++ dlls/ntdll/unix/signal_x86_64.c | 9 +++++++++ tools/winebuild/import.c | 1 + 3 files changed, 23 insertions(+)
diff --git a/dlls/ntdll/unix/signal_i386.c b/dlls/ntdll/unix/signal_i386.c index 262a0b8072f..60bd27712d8 100644 --- a/dlls/ntdll/unix/signal_i386.c +++ b/dlls/ntdll/unix/signal_i386.c @@ -2656,6 +2656,8 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, "jmp .L__wine_syscall_dispatcher_fpu_saved\n\t" "\n.L__wine_syscall_dispatcher_nofpu_save:\n\t" "orl $0x10,(%ecx)\n\t" /* frame->syscall_flags |= SYSCALL_NEED_XSTATE */ + "testl $0x40000000,%eax\n\t" /* __wine_unix_call_fast */ + "jnz .L__wine_syscall_dispatcher_unix_call_fast\n\t" "\n.L__wine_syscall_dispatcher_fpu_saved:\n\t" "movl %ecx,%esp\n\t" "movl 0x1c(%esp),%edx\n\t" /* frame->eax */ @@ -2673,6 +2675,17 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, "rep; movsl\n\t" "call *(%eax,%edx,4)\n\t" "leal -0x34(%ebp),%esp\n\t" + "jmp .L__wine_syscall_dispatcher_restore\n\t" + "\n.L__wine_syscall_dispatcher_unix_call_fast:\n\t" + "movl %ecx,%esp\n\t" + "subl $4,%esp\n\t" + "andl $~15,%esp\n\t" + "movl 0xc(%esi),%eax\n\t" + "movl %eax,(%esp)\n\t" + "movl 0x8(%esi),%eax\n\t" + "movl (%esi),%edx\n\t" + "call *(%edx,%eax,4)\n\t" + "leal -0x34(%ebp),%esp\n" "\n.L__wine_syscall_dispatcher_restore:\n\t" "" __ASM_CFI_CFA_IS_AT1(esp, 0x0c) diff --git a/dlls/ntdll/unix/signal_x86_64.c b/dlls/ntdll/unix/signal_x86_64.c index d123e381882..775949092b6 100644 --- a/dlls/ntdll/unix/signal_x86_64.c +++ b/dlls/ntdll/unix/signal_x86_64.c @@ -2753,6 +2753,8 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, "movq %rcx,%rsp\n\t" "movq 0x00(%rcx),%rax\n\t" "movq 0x18(%rcx),%rdx\n\t" + "testl $0x40000000,%eax\n\t" /* __wine_unix_call_fast */ + "jnz .L__wine_syscall_dispatcher_unix_call_fast\n\t" "movl %eax,%ebx\n\t" "shrl $8,%ebx\n\t" "andl $0x30,%ebx\n\t" /* syscall table number */ @@ -2777,6 +2779,13 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, "movq (%rbx),%r10\n\t" /* table->ServiceTable */ "callq *(%r10,%rax,8)\n\t" "leaq -0x98(%rbp),%rcx\n\t" + "jmp .L__wine_syscall_dispatcher_restore\n" + "\n.L__wine_syscall_dispatcher_unix_call_fast:\n\t" + "movq %r8,%rdi\n\t" + "subq $0x20,%rsp\n\t" + "andq $~15,%rsp\n\t" + "callq *(%r10,%rdx,8)\n\t" + "leaq -0x98(%rbp),%rcx\n\t" /* $rcx is now pointing to "frame" again */ __ASM_CFI(".cfi_restore_state\n\t") "\n.L__wine_syscall_dispatcher_restore:\n\t" diff --git a/tools/winebuild/import.c b/tools/winebuild/import.c index 18ca6eaef01..b504834063b 100644 --- a/tools/winebuild/import.c +++ b/tools/winebuild/import.c @@ -1386,6 +1386,7 @@ void output_syscalls( DLLSPEC *spec ) const char *name = get_link_name(odp); unsigned int id = (spec->syscall_table << 12) + i; if (odp->flags & FLAG_NOFPU) id |= (1u << 31); + if (strstr( name, "__wine_unix_call_fast" )) id |= (1u << 30);
output( "\t.align %d\n", get_alignment(16) ); output( "\t%s\n", func_declaration(name) );
From: Rémi Bernon rbernon@codeweavers.com
--- dlls/ntdll/ntdll.spec | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/dlls/ntdll/ntdll.spec b/dlls/ntdll/ntdll.spec index 750c214ec2a..287759ca108 100644 --- a/dlls/ntdll/ntdll.spec +++ b/dlls/ntdll/ntdll.spec @@ -317,7 +317,7 @@ @ stdcall -syscall NtQueryMutant(long long ptr long ptr) @ stdcall -syscall NtQueryObject(long long ptr long ptr) # @ stub NtQueryOpenSubKeys -@ stdcall -syscall NtQueryPerformanceCounter(ptr ptr) +@ stdcall -syscall -nofpu NtQueryPerformanceCounter(ptr ptr) # @ stub NtQueryPortInformationProcess # @ stub NtQueryQuotaInformationFile @ stdcall -syscall NtQuerySection(long long ptr long ptr)
From: Rémi Bernon rbernon@codeweavers.com
This assumes that DF is cleared by unix call, which should be guaranteed by the SYSV ABI, and only needs to be restored if it was set on entry. --- dlls/ntdll/unix/signal_x86_64.c | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/dlls/ntdll/unix/signal_x86_64.c b/dlls/ntdll/unix/signal_x86_64.c index 775949092b6..9e867d5adf7 100644 --- a/dlls/ntdll/unix/signal_x86_64.c +++ b/dlls/ntdll/unix/signal_x86_64.c @@ -2845,8 +2845,11 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, "jnz .L__wine_syscall_dispatcher_control_restore\n\t" __ASM_CFI(".cfi_remember_state\n\t") "movq 0x80(%rcx),%r11\n\t" /* frame->eflags */ + "testl $0x204400,%r11d\n\t" /* DF NT ID are preserved */ + "jz .L__wine_syscall_dispatcher_skip_rflags\n\t" "pushq %r11\n\t" "popfq\n\t" + "\n.L__wine_syscall_dispatcher_skip_rflags:\n\t" "movq 0x88(%rcx),%rsp\n\t" __ASM_CFI(".cfi_def_cfa rsp, 0\n\t") __ASM_CFI(".cfi_same_value rsp\n\t")
Hi,
It looks like your patch introduced the new failures shown below. Please investigate and fix them before resubmitting your patch. If they are not new, fixing them anyway would help a lot. Otherwise please ask for the known failures list to be updated.
The tests also ran into some preexisting test failures. If you know how to fix them that would be helpful. See the TestBot job for the details:
The full results can be found at: https://testbot.winehq.org/JobDetails.pl?Key=126252
Your paranoid android.
=== debian11 (32 bit report) ===
ntdll: exception.c:10344: Test failed: Got unexpected Mask 0. exception.c:10347: Test failed: Got unexpected value 0xcccccccc, i 0. exception.c:10347: Test failed: Got unexpected value 0xcccccccc, i 1. exception.c:10347: Test failed: Got unexpected value 0xcccccccc, i 2. exception.c:10347: Test failed: Got unexpected value 0xcccccccc, i 3.
=== debian11 (32 bit zh:CN report) ===
ntdll: exception.c:10344: Test failed: Got unexpected Mask 0. exception.c:10347: Test failed: Got unexpected value 0xcccccccc, i 0. exception.c:10347: Test failed: Got unexpected value 0xcccccccc, i 1. exception.c:10347: Test failed: Got unexpected value 0xcccccccc, i 2. exception.c:10347: Test failed: Got unexpected value 0xcccccccc, i 3.
=== debian11b (64 bit WoW report) ===
ntdll: exception.c:10344: Test failed: Got unexpected Mask 0. exception.c:10347: Test failed: Got unexpected value 0xcccccccc, i 0. exception.c:10347: Test failed: Got unexpected value 0xcccccccc, i 1. exception.c:10347: Test failed: Got unexpected value 0xcccccccc, i 2. exception.c:10347: Test failed: Got unexpected value 0xcccccccc, i 3.