Fixes crash on start in Nier Automata after switching kernel32 to PE.
Signed-off-by: Paul Gofman pgofman@codeweavers.com --- The game calls GetProcAddress from its (probably DRM related) code with unaligned stack. Previously in ELF build all the stdcall functions had stack force aligned by gcc, but mingw build doesn't have that.
dlls/kernel32/module.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-)
diff --git a/dlls/kernel32/module.c b/dlls/kernel32/module.c index 12100c1fe23..9b3a3952307 100644 --- a/dlls/kernel32/module.c +++ b/dlls/kernel32/module.c @@ -320,18 +320,18 @@ __ASM_GLOBAL_FUNC( get_proc_address_wrapper, "movq %rsp,%rbp\n\t" __ASM_SEH(".seh_setframe %rbp,0\n\t") __ASM_CFI(".cfi_def_cfa_register %rbp\n\t") - "subq $0x40,%rsp\n\t" - __ASM_SEH(".seh_stackalloc 0x40\n\t") __ASM_SEH(".seh_endprologue\n\t") - "movaps %xmm0,-0x10(%rbp)\n\t" - "movaps %xmm1,-0x20(%rbp)\n\t" - "movaps %xmm2,-0x30(%rbp)\n\t" - "movaps %xmm3,-0x40(%rbp)\n\t" + "subq $0x60,%rsp\n\t" + "andq $~15,%rsp\n\t" + "movaps %xmm0,0x20(%rsp)\n\t" + "movaps %xmm1,0x30(%rsp)\n\t" + "movaps %xmm2,0x40(%rsp)\n\t" + "movaps %xmm3,0x50(%rsp)\n\t" "call " __ASM_NAME("get_proc_address") "\n\t" - "movaps -0x40(%rbp), %xmm3\n\t" - "movaps -0x30(%rbp), %xmm2\n\t" - "movaps -0x20(%rbp), %xmm1\n\t" - "movaps -0x10(%rbp), %xmm0\n\t" + "movaps 0x50(%rsp), %xmm3\n\t" + "movaps 0x40(%rsp), %xmm2\n\t" + "movaps 0x30(%rsp), %xmm1\n\t" + "movaps 0x20(%rsp), %xmm0\n\t" "leaq 0(%rbp),%rsp\n\t" __ASM_CFI(".cfi_def_cfa_register %rsp\n\t") "popq %rbp\n\t"
Signed-off-by: Paul Gofman pgofman@codeweavers.com --- dlls/kernelbase/loader.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/dlls/kernelbase/loader.c b/dlls/kernelbase/loader.c index 4b615e4a08f..4fb4293ca2b 100644 --- a/dlls/kernelbase/loader.c +++ b/dlls/kernelbase/loader.c @@ -439,7 +439,7 @@ BOOL WINAPI DECLSPEC_HOTPATCH GetModuleHandleExW( DWORD flags, LPCWSTR name, HMO * This wrapper saves xmm0 - 3 to the stack. */ __ASM_GLOBAL_FUNC( GetProcAddress, - ".byte 0x48\n\t" /* hotpatch prolog */ + ".byte 0x48,0x8d,0xa4,0x24,0x00,0x00,0x00,0x00\n\t" /* hotpatch prolog */ "pushq %rbp\n\t" __ASM_SEH(".seh_pushreg %rbp\n\t") __ASM_CFI(".cfi_adjust_cfa_offset 8\n\t")
Paul Gofman pgofman@codeweavers.com writes:
Signed-off-by: Paul Gofman pgofman@codeweavers.com
dlls/kernelbase/loader.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/dlls/kernelbase/loader.c b/dlls/kernelbase/loader.c index 4b615e4a08f..4fb4293ca2b 100644 --- a/dlls/kernelbase/loader.c +++ b/dlls/kernelbase/loader.c @@ -439,7 +439,7 @@ BOOL WINAPI DECLSPEC_HOTPATCH GetModuleHandleExW( DWORD flags, LPCWSTR name, HMO
- This wrapper saves xmm0 - 3 to the stack.
*/ __ASM_GLOBAL_FUNC( GetProcAddress,
".byte 0x48\n\t" /* hotpatch prolog */
".byte 0x48,0x8d,0xa4,0x24,0x00,0x00,0x00,0x00\n\t" /* hotpatch prolog */
Is there anything that depends on such a prolog? And is there any evidence that Windows is using this? AFAIU making the first instruction two bytes should be sufficient.
On 10/27/20 22:37, Alexandre Julliard wrote:
Paul Gofman pgofman@codeweavers.com writes:
Signed-off-by: Paul Gofman pgofman@codeweavers.com
dlls/kernelbase/loader.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/dlls/kernelbase/loader.c b/dlls/kernelbase/loader.c index 4b615e4a08f..4fb4293ca2b 100644 --- a/dlls/kernelbase/loader.c +++ b/dlls/kernelbase/loader.c @@ -439,7 +439,7 @@ BOOL WINAPI DECLSPEC_HOTPATCH GetModuleHandleExW( DWORD flags, LPCWSTR name, HMO
- This wrapper saves xmm0 - 3 to the stack.
*/ __ASM_GLOBAL_FUNC( GetProcAddress,
".byte 0x48\n\t" /* hotpatch prolog */
".byte 0x48,0x8d,0xa4,0x24,0x00,0x00,0x00,0x00\n\t" /* hotpatch prolog */
Is there anything that depends on such a prolog? And is there any evidence that Windows is using this? AFAIU making the first instruction two bytes should be sufficient.
I don't think I ever noticed anything requiring hotpatch prologue here, nor I saw any evidence if Windows has hotpatch prologue here or not. I guess a dangling REX prefix can confuse some patchers though, at least that was the case when our import thunks had such. From what I saw debugging some hotpatchers on x64 they are overall less demanding compared to i386 for non-syscall thunk functions and try to guess the first opcode size (while not necessarily understand any possible opcodes and may mess up).
So I guess we could just remove the ".byte 0x48" instead of putting the "official" hotpatch prologue as well, unless we get any evidence that something else is needed.
Signed-off-by: Paul Gofman pgofman@codeweavers.com --- dlls/kernelbase/loader.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-)
diff --git a/dlls/kernelbase/loader.c b/dlls/kernelbase/loader.c index 4fb4293ca2b..1617ac11153 100644 --- a/dlls/kernelbase/loader.c +++ b/dlls/kernelbase/loader.c @@ -447,18 +447,18 @@ __ASM_GLOBAL_FUNC( GetProcAddress, "movq %rsp,%rbp\n\t" __ASM_SEH(".seh_setframe %rbp,0\n\t") __ASM_CFI(".cfi_def_cfa_register %rbp\n\t") - "subq $0x60,%rsp\n\t" - __ASM_SEH(".seh_stackalloc 0x60\n\t") __ASM_SEH(".seh_endprologue\n\t") - "movaps %xmm0,-0x10(%rbp)\n\t" - "movaps %xmm1,-0x20(%rbp)\n\t" - "movaps %xmm2,-0x30(%rbp)\n\t" - "movaps %xmm3,-0x40(%rbp)\n\t" + "subq $0x60,%rsp\n\t" + "andq $~15,%rsp\n\t" + "movaps %xmm0,0x20(%rsp)\n\t" + "movaps %xmm1,0x30(%rsp)\n\t" + "movaps %xmm2,0x40(%rsp)\n\t" + "movaps %xmm3,0x50(%rsp)\n\t" "call " __ASM_NAME("get_proc_address") "\n\t" - "movaps -0x40(%rbp), %xmm3\n\t" - "movaps -0x30(%rbp), %xmm2\n\t" - "movaps -0x20(%rbp), %xmm1\n\t" - "movaps -0x10(%rbp), %xmm0\n\t" + "movaps 0x50(%rsp), %xmm3\n\t" + "movaps 0x40(%rsp), %xmm2\n\t" + "movaps 0x30(%rsp), %xmm1\n\t" + "movaps 0x20(%rsp), %xmm0\n\t" "leaq 0(%rbp),%rsp\n\t" __ASM_CFI(".cfi_def_cfa_register %rsp\n\t") "popq %rbp\n\t"