From: Rémi Bernon rbernon@codeweavers.com
This improves performance of unix calls a little by removing one level of indirection and a branch. --- dlls/win32u/Makefile.in | 2 +- dlls/winecrt0/unix_lib.c | 34 +++++++++++++++++----------------- include/wine/unixlib.h | 5 +++-- 3 files changed, 21 insertions(+), 20 deletions(-)
diff --git a/dlls/win32u/Makefile.in b/dlls/win32u/Makefile.in index 692b2d5c9f2..ba6915d91d8 100644 --- a/dlls/win32u/Makefile.in +++ b/dlls/win32u/Makefile.in @@ -2,7 +2,7 @@ EXTRADEFS = -DWINE_NO_LONG_TYPES MODULE = win32u.dll UNIXLIB = win32u.so IMPORTLIB = win32u -IMPORTS = ntdll winecrt0 +IMPORTS = winecrt0 ntdll UNIX_CFLAGS = $(FREETYPE_CFLAGS) $(FONTCONFIG_CFLAGS) UNIX_LIBS = $(CARBON_LIBS) $(APPKIT_LIBS) $(PTHREAD_LIBS) -lm
diff --git a/dlls/winecrt0/unix_lib.c b/dlls/winecrt0/unix_lib.c index c86897b9905..1926fb8d36b 100644 --- a/dlls/winecrt0/unix_lib.c +++ b/dlls/winecrt0/unix_lib.c @@ -29,28 +29,28 @@ #include "winternl.h" #include "wine/unixlib.h"
-static NTSTATUS (WINAPI *p__wine_unix_call)( unixlib_handle_t, unsigned int, void * ); - -static void load_func( void **func, const char *name, void *def ) -{ - if (!*func) - { - HMODULE module = GetModuleHandleW( L"ntdll.dll" ); - void *proc = GetProcAddress( module, name ); - InterlockedExchangePointer( func, proc ? proc : def ); - } -} -#define LOAD_FUNC(name) load_func( (void **)&p ## name, #name, fallback ## name ) - -static NTSTATUS __cdecl fallback__wine_unix_call( unixlib_handle_t handle, unsigned int code, void *args ) +static NTSTATUS WINAPI fallback__wine_unix_call( unixlib_handle_t handle, unsigned int code, void *args ) { return STATUS_DLL_NOT_FOUND; }
-NTSTATUS WINAPI __wine_unix_call( unixlib_handle_t handle, unsigned int code, void *args ) +static NTSTATUS WINAPI delayload__wine_unix_call( unixlib_handle_t handle, unsigned int code, void *args ) { - LOAD_FUNC( __wine_unix_call ); - return p__wine_unix_call( handle, code, args ); + UNICODE_STRING ntdll_str; + ANSI_STRING name_str; + HMODULE module; + void *proc; + + RtlInitUnicodeString( &ntdll_str, L"ntdll.dll" ); + RtlInitAnsiString( &name_str, "__wine_unix_call" ); + + if (LdrGetDllHandleEx( LDR_GET_DLL_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT, NULL, NULL, &ntdll_str, &module )) proc = NULL; + else if (LdrGetProcedureAddress( module, &name_str, 0, &proc )) proc = NULL; + InterlockedExchangePointer( (void *)&__wine_unix_call, proc ? proc : (void *)fallback__wine_unix_call ); + + return __wine_unix_call( handle, code, args ); }
+NTSTATUS (WINAPI *__wine_unix_call)( unixlib_handle_t, unsigned int, void * ) = delayload__wine_unix_call; + #endif /* __WINE_PE_BUILD */ diff --git a/include/wine/unixlib.h b/include/wine/unixlib.h index ef60b32184c..bde7bfc3e72 100644 --- a/include/wine/unixlib.h +++ b/include/wine/unixlib.h @@ -24,10 +24,11 @@ typedef NTSTATUS (*unixlib_entry_t)( void *args ); typedef UINT64 unixlib_handle_t;
+#ifndef WINE_UNIX_LIB +extern NTSTATUS (WINAPI *__wine_unix_call)( unixlib_handle_t handle, unsigned int code, void *args ); +#else extern NTSTATUS WINAPI __wine_unix_call( unixlib_handle_t handle, unsigned int code, void *args );
-#ifdef WINE_UNIX_LIB - /* some useful helpers from ntdll */ extern const char *ntdll_get_build_dir(void); extern const char *ntdll_get_data_dir(void);
From: Rémi Bernon rbernon@codeweavers.com
--- dlls/ntdll/unix/signal_i386.c | 15 ++++++++++++++- dlls/ntdll/unix/signal_x86_64.c | 26 ++++++++++++++++++++++++-- tools/winebuild/import.c | 4 ++++ 3 files changed, 42 insertions(+), 3 deletions(-)
diff --git a/dlls/ntdll/unix/signal_i386.c b/dlls/ntdll/unix/signal_i386.c index efb7cc497ef..60a4696ef48 100644 --- a/dlls/ntdll/unix/signal_i386.c +++ b/dlls/ntdll/unix/signal_i386.c @@ -2582,6 +2582,8 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, "shrl $8,%ebx\n\t" "andl $0x30,%ebx\n\t" /* syscall table number */ "addl 0x38(%ecx),%ebx\n\t" /* frame->syscall_table */ + "cmpl $0xfa57ca11,0x1c(%ecx)\n\t" + "je 5f\n\t" "testl $3,(%ecx)\n\t" /* frame->syscall_flags & (SYSCALL_HAVE_XSAVE | SYSCALL_HAVE_XSAVEC) */ "jz 2f\n\t" "movl $7,%eax\n\t" @@ -2613,7 +2615,18 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, "fxsave 0x40(%ecx)\n\t" "jmp 4f\n" "3:\tfnsave 0x40(%ecx)\n\t" - "fwait\n" + "fwait\n\t" + "jmp 4f\n" + "5:\tmovl %ecx,%esp\n\t" + "subl $4,%esp\n\t" + "andl $~15,%esp\n\t" + "movl 0xc(%esi),%eax\n\t" + "movl %eax,(%esp)\n\t" + "movl 0x8(%esi),%eax\n\t" + "movl (%esi),%edx\n\t" + "call *(%edx,%eax,4)\n\t" + "leal -0x34(%ebp),%esp\n" + "jmp 5f\n" "4:\tmovl %ecx,%esp\n\t" "movl 0x1c(%esp),%edx\n\t" /* frame->eax */ "andl $0xfff,%edx\n\t" /* syscall number */ diff --git a/dlls/ntdll/unix/signal_x86_64.c b/dlls/ntdll/unix/signal_x86_64.c index 263bb64c8cc..7150779e24b 100644 --- a/dlls/ntdll/unix/signal_x86_64.c +++ b/dlls/ntdll/unix/signal_x86_64.c @@ -2665,6 +2665,8 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, * depends on us returning to it. Adjust the return address accordingly. */ "subq $0xb,0x70(%rcx)\n\t" "movl 0xb0(%rcx),%r14d\n\t" /* frame->syscall_flags */ + "cmpl $0xfa57ca11,%eax\n\t" + "je 4f\n\t" "testl $3,%r14d\n\t" /* SYSCALL_HAVE_XSAVE | SYSCALL_HAVE_XSAVEC */ "jz 2f\n\t" "movl $7,%eax\n\t" @@ -2683,7 +2685,20 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, "jmp 3f\n" "1:\txsave64 0xc0(%rcx)\n\t" "jmp 3f\n" - "2:\tfxsave64 0xc0(%rcx)\n" + "2:\tfxsave64 0xc0(%rcx)\n\t" + "jmp 3f\n" + "4:\tstmxcsr 0xd8(%rcx)\n\t" + "fnstcw 0xc0(%rcx)\n\t" + "movdqa %xmm6,0x1c0(%rcx)\n\t" + "movdqa %xmm7,0x1d0(%rcx)\n\t" + "movdqa %xmm8,0x1e0(%rcx)\n\t" + "movdqa %xmm9,0x1f0(%rcx)\n\t" + "movdqa %xmm10,0x200(%rcx)\n\t" + "movdqa %xmm11,0x210(%rcx)\n\t" + "movdqa %xmm12,0x220(%rcx)\n\t" + "movdqa %xmm13,0x230(%rcx)\n\t" + "movdqa %xmm14,0x240(%rcx)\n\t" + "movdqa %xmm15,0x250(%rcx)\n" /* remember state when $rcx is pointing to "frame" */ __ASM_CFI(".cfi_remember_state\n\t") "3:\tleaq 0x98(%rcx),%rbp\n\t" @@ -2716,7 +2731,14 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, "movq %rcx,%rsp\n\t" "movq 0x00(%rcx),%rax\n\t" "movq 0x18(%rcx),%rdx\n\t" - "movl %eax,%ebx\n\t" + "cmpl $0xfa57ca11,%eax\n\t" + "jne 1f\n\t" + "movq %r8,%rdi\n\t" + "subq $0x20,%rsp\n\t" + "callq *(%r10,%rdx,8)\n\t" + "leaq -0x98(%rbp),%rcx\n\t" + "jmp 2f\n" + "1:\tmovl %eax,%ebx\n\t" "shrl $8,%ebx\n\t" "andl $0x30,%ebx\n\t" /* syscall table number */ "movq 0xa8(%rcx),%rcx\n\t" /* frame->syscall_table */ diff --git a/tools/winebuild/import.c b/tools/winebuild/import.c index c934e456cfe..436df547f49 100644 --- a/tools/winebuild/import.c +++ b/tools/winebuild/import.c @@ -1403,6 +1403,8 @@ void output_syscalls( DLLSPEC *spec ) switch (target.cpu) { case CPU_i386: + if (!strcmp(name, "__wine_unix_call@16")) id = 0xfa57ca11; + if (UsePIC) { output( "\tcall %s\n", asm_name("__wine_spec_get_pc_thunk_eax") ); @@ -1419,6 +1421,8 @@ void output_syscalls( DLLSPEC *spec ) output( "\tret $%u\n", odp->type == TYPE_STDCALL ? get_args_size( odp ) : 0 ); break; case CPU_x86_64: + if (!strcmp(name, "__wine_unix_call")) id = 0xfa57ca11; + /* Chromium depends on syscall thunks having the same form as on * Windows. For 64-bit systems the only viable form we can emulate is * having an int $0x2e fallback. Since actually using an interrupt is