There is no guarantee that jmp_buf is 16 bytes aligned.
Signed-off-by: Paul Gofman pgofman@codeweavers.com --- dlls/winecrt0/exception.c | 40 +++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-)
diff --git a/dlls/winecrt0/exception.c b/dlls/winecrt0/exception.c index 137ee2d6ee3..d3ce8d1aebb 100644 --- a/dlls/winecrt0/exception.c +++ b/dlls/winecrt0/exception.c @@ -80,16 +80,16 @@ __ASM_GLOBAL_FUNC( __wine_setjmpex, "movq %r15,0x48(%rcx)\n\t" /* jmp_buf->R15 */ "movq (%rsp),%rax\n\t" "movq %rax,0x50(%rcx)\n\t" /* jmp_buf->Rip */ - "movdqa %xmm6,0x60(%rcx)\n\t" /* jmp_buf->Xmm6 */ - "movdqa %xmm7,0x70(%rcx)\n\t" /* jmp_buf->Xmm7 */ - "movdqa %xmm8,0x80(%rcx)\n\t" /* jmp_buf->Xmm8 */ - "movdqa %xmm9,0x90(%rcx)\n\t" /* jmp_buf->Xmm9 */ - "movdqa %xmm10,0xa0(%rcx)\n\t" /* jmp_buf->Xmm10 */ - "movdqa %xmm11,0xb0(%rcx)\n\t" /* jmp_buf->Xmm11 */ - "movdqa %xmm12,0xc0(%rcx)\n\t" /* jmp_buf->Xmm12 */ - "movdqa %xmm13,0xd0(%rcx)\n\t" /* jmp_buf->Xmm13 */ - "movdqa %xmm14,0xe0(%rcx)\n\t" /* jmp_buf->Xmm14 */ - "movdqa %xmm15,0xf0(%rcx)\n\t" /* jmp_buf->Xmm15 */ + "movq %xmm6,0x60(%rcx)\n\t" /* jmp_buf->Xmm6 */ + "movq %xmm7,0x70(%rcx)\n\t" /* jmp_buf->Xmm7 */ + "movq %xmm8,0x80(%rcx)\n\t" /* jmp_buf->Xmm8 */ + "movq %xmm9,0x90(%rcx)\n\t" /* jmp_buf->Xmm9 */ + "movq %xmm10,0xa0(%rcx)\n\t" /* jmp_buf->Xmm10 */ + "movq %xmm11,0xb0(%rcx)\n\t" /* jmp_buf->Xmm11 */ + "movq %xmm12,0xc0(%rcx)\n\t" /* jmp_buf->Xmm12 */ + "movq %xmm13,0xd0(%rcx)\n\t" /* jmp_buf->Xmm13 */ + "movq %xmm14,0xe0(%rcx)\n\t" /* jmp_buf->Xmm14 */ + "movq %xmm15,0xf0(%rcx)\n\t" /* jmp_buf->Xmm15 */ "xorq %rax,%rax\n\t" "retq" )
@@ -103,16 +103,16 @@ __ASM_GLOBAL_FUNC( __wine_longjmp, "movq 0x38(%rcx),%r13\n\t" /* jmp_buf->R13 */ "movq 0x40(%rcx),%r14\n\t" /* jmp_buf->R14 */ "movq 0x48(%rcx),%r15\n\t" /* jmp_buf->R15 */ - "movdqa 0x60(%rcx),%xmm6\n\t" /* jmp_buf->Xmm6 */ - "movdqa 0x70(%rcx),%xmm7\n\t" /* jmp_buf->Xmm7 */ - "movdqa 0x80(%rcx),%xmm8\n\t" /* jmp_buf->Xmm8 */ - "movdqa 0x90(%rcx),%xmm9\n\t" /* jmp_buf->Xmm9 */ - "movdqa 0xa0(%rcx),%xmm10\n\t" /* jmp_buf->Xmm10 */ - "movdqa 0xb0(%rcx),%xmm11\n\t" /* jmp_buf->Xmm11 */ - "movdqa 0xc0(%rcx),%xmm12\n\t" /* jmp_buf->Xmm12 */ - "movdqa 0xd0(%rcx),%xmm13\n\t" /* jmp_buf->Xmm13 */ - "movdqa 0xe0(%rcx),%xmm14\n\t" /* jmp_buf->Xmm14 */ - "movdqa 0xf0(%rcx),%xmm15\n\t" /* jmp_buf->Xmm15 */ + "movq 0x60(%rcx),%xmm6\n\t" /* jmp_buf->Xmm6 */ + "movq 0x70(%rcx),%xmm7\n\t" /* jmp_buf->Xmm7 */ + "movq 0x80(%rcx),%xmm8\n\t" /* jmp_buf->Xmm8 */ + "movq 0x90(%rcx),%xmm9\n\t" /* jmp_buf->Xmm9 */ + "movq 0xa0(%rcx),%xmm10\n\t" /* jmp_buf->Xmm10 */ + "movq 0xb0(%rcx),%xmm11\n\t" /* jmp_buf->Xmm11 */ + "movq 0xc0(%rcx),%xmm12\n\t" /* jmp_buf->Xmm12 */ + "movq 0xd0(%rcx),%xmm13\n\t" /* jmp_buf->Xmm13 */ + "movq 0xe0(%rcx),%xmm14\n\t" /* jmp_buf->Xmm14 */ + "movq 0xf0(%rcx),%xmm15\n\t" /* jmp_buf->Xmm15 */ "movq 0x50(%rcx),%rdx\n\t" /* jmp_buf->Rip */ "movq 0x10(%rcx),%rsp\n\t" /* jmp_buf->Rsp */ "jmp *%rdx" )
Paul Gofman pgofman@codeweavers.com writes:
There is no guarantee that jmp_buf is 16 bytes aligned.
It is using DECLSPEC_ALIGN(16) though. Where do you see it being misaligned?
On 7/13/20 23:05, Alexandre Julliard wrote:
Paul Gofman pgofman@codeweavers.com writes:
There is no guarantee that jmp_buf is 16 bytes aligned.
It is using DECLSPEC_ALIGN(16) though. Where do you see it being misaligned?
Well, I was getting that after turning NtOpenDirectoryObject locally into syscall thunk from __TRY / __CATCH block used by IsBadStringPtrW() from debugstr_w(). But after you pointed out that alignment is there which I initially missed, I found that the stack alignment which is present in syscall thunk generated code seems not to be performed (apparently, not intentionally) if the size of arguments is <= 0x20. I should rather be fixing that, sorry for the noise.
Paul Gofman pgofman@codeweavers.com writes:
On 7/13/20 23:05, Alexandre Julliard wrote:
Paul Gofman pgofman@codeweavers.com writes:
There is no guarantee that jmp_buf is 16 bytes aligned.
It is using DECLSPEC_ALIGN(16) though. Where do you see it being misaligned?
Well, I was getting that after turning NtOpenDirectoryObject locally into syscall thunk from __TRY / __CATCH block used by IsBadStringPtrW() from debugstr_w(). But after you pointed out that alignment is there which I initially missed, I found that the stack alignment which is present in syscall thunk generated code seems not to be performed (apparently, not intentionally) if the size of arguments is <= 0x20. I should rather be fixing that, sorry for the noise.
Fixing that wouldn't hurt, but note that all API functions should already be using force_align_arg_pointer. If that doesn't work correctly we'll have other problems.
On 7/14/20 10:35, Alexandre Julliard wrote:
Paul Gofman pgofman@codeweavers.com writes:
On 7/13/20 23:05, Alexandre Julliard wrote:
Paul Gofman pgofman@codeweavers.com writes:
There is no guarantee that jmp_buf is 16 bytes aligned.
It is using DECLSPEC_ALIGN(16) though. Where do you see it being misaligned?
Well, I was getting that after turning NtOpenDirectoryObject locally into syscall thunk from __TRY / __CATCH block used by IsBadStringPtrW() from debugstr_w(). But after you pointed out that alignment is there which I initially missed, I found that the stack alignment which is present in syscall thunk generated code seems not to be performed (apparently, not intentionally) if the size of arguments is <= 0x20. I should rather be fixing that, sorry for the noise.
Fixing that wouldn't hurt, but note that all API functions should already be using force_align_arg_pointer. If that doesn't work correctly we'll have other problems.
The stack is not aligned in Mingw PE modules for some reason (I checked with a winehq binary also that is not my local build problem only; I also checked by explicitly setting it for function to make sure it is not some configuration problem). It is aligned in .so though, my local problem here was due to making syscall thunks for still PE Nt functions. Yet failure to align stack in Staging syscall thunks back then was breaking builds on some systems even with .so ntdll [1].
On 14/07/2020 11:17, Paul Gofman wrote:
On 7/14/20 10:35, Alexandre Julliard wrote:
Paul Gofman pgofman@codeweavers.com writes:
On 7/13/20 23:05, Alexandre Julliard wrote:
Paul Gofman pgofman@codeweavers.com writes:
There is no guarantee that jmp_buf is 16 bytes aligned.
It is using DECLSPEC_ALIGN(16) though. Where do you see it being misaligned?
Well, I was getting that after turning NtOpenDirectoryObject locally into syscall thunk from __TRY / __CATCH block used by IsBadStringPtrW() from debugstr_w(). But after you pointed out that alignment is there which I initially missed, I found that the stack alignment which is present in syscall thunk generated code seems not to be performed (apparently, not intentionally) if the size of arguments is <= 0x20. I should rather be fixing that, sorry for the noise.
Fixing that wouldn't hurt, but note that all API functions should already be using force_align_arg_pointer. If that doesn't work correctly we'll have other problems.
The stack is not aligned in Mingw PE modules for some reason (I checked with a winehq binary also that is not my local build problem only; I also checked by explicitly setting it for function to make sure it is not some configuration problem).
You mean it's not forcefully aligned, right? If so, I think that's normal since the MS ABI mandates that it is 16 byte aligned. I don't think Windows force aligns the stack either, it just probably doesn't use aligned SSE instructions in the first place (and why it mostly doesn't crash, though I've heard stories of it crashing when people misalign the stack and break the ABI).
For this patch, just for future reference, you should be using movdqu, which anyway it's just as fast as movdqa unless the processor is very old. I don't know if it's still necessary though.
On 7/14/20 19:01, Gabriel Ivăncescu wrote:
You mean it's not forcefully aligned, right? If so, I think that's normal since the MS ABI mandates that it is 16 byte aligned.
I am not sure this is normal if you explicitly use __attribute__((force_align_arg_pointer)) on function, without even a warning with -Wall. Looks like probably gcc or mingw bug to me.
For this patch, just for future reference, you should be using movdqu, which anyway it's just as fast as movdqa unless the processor is very old. I don't know if it's still necessary though.
movdqa / movaps can (or could) work on unaligned addresses on some CPUs, but seems to fault on most. FWIW I generally prefer mov{a|u}ps for SSE regs, even if just to not mess them up with movq like I did.
July 13, 2020 2:43 PM, "Paul Gofman" pgofman@codeweavers.com wrote:
There is no guarantee that jmp_buf is 16 bytes aligned.
Signed-off-by: Paul Gofman pgofman@codeweavers.com
dlls/winecrt0/exception.c | 40 +++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-)
diff --git a/dlls/winecrt0/exception.c b/dlls/winecrt0/exception.c index 137ee2d6ee3..d3ce8d1aebb 100644 --- a/dlls/winecrt0/exception.c +++ b/dlls/winecrt0/exception.c @@ -80,16 +80,16 @@ __ASM_GLOBAL_FUNC( __wine_setjmpex, "movq %r15,0x48(%rcx)\n\t" /* jmp_buf->R15 */ "movq (%rsp),%rax\n\t" "movq %rax,0x50(%rcx)\n\t" /* jmp_buf->Rip */
"movdqa %xmm6,0x60(%rcx)\n\t" /* jmp_buf->Xmm6 */
"movdqa %xmm7,0x70(%rcx)\n\t" /* jmp_buf->Xmm7 */
"movdqa %xmm8,0x80(%rcx)\n\t" /* jmp_buf->Xmm8 */
"movdqa %xmm9,0x90(%rcx)\n\t" /* jmp_buf->Xmm9 */
"movdqa %xmm10,0xa0(%rcx)\n\t" /* jmp_buf->Xmm10 */
"movdqa %xmm11,0xb0(%rcx)\n\t" /* jmp_buf->Xmm11 */
"movdqa %xmm12,0xc0(%rcx)\n\t" /* jmp_buf->Xmm12 */
"movdqa %xmm13,0xd0(%rcx)\n\t" /* jmp_buf->Xmm13 */
"movdqa %xmm14,0xe0(%rcx)\n\t" /* jmp_buf->Xmm14 */
"movdqa %xmm15,0xf0(%rcx)\n\t" /* jmp_buf->Xmm15 */
"movq %xmm6,0x60(%rcx)\n\t" /* jmp_buf->Xmm6 */
"movq %xmm7,0x70(%rcx)\n\t" /* jmp_buf->Xmm7 */
"movq %xmm8,0x80(%rcx)\n\t" /* jmp_buf->Xmm8 */
"movq %xmm9,0x90(%rcx)\n\t" /* jmp_buf->Xmm9 */
"movq %xmm10,0xa0(%rcx)\n\t" /* jmp_buf->Xmm10 */
"movq %xmm11,0xb0(%rcx)\n\t" /* jmp_buf->Xmm11 */
"movq %xmm12,0xc0(%rcx)\n\t" /* jmp_buf->Xmm12 */
"movq %xmm13,0xd0(%rcx)\n\t" /* jmp_buf->Xmm13 */
"movq %xmm14,0xe0(%rcx)\n\t" /* jmp_buf->Xmm14 */
"movq %xmm15,0xf0(%rcx)\n\t" /* jmp_buf->Xmm15 */
Won't this only save the lower 8 bytes?
@@ -103,16 +103,16 @@ __ASM_GLOBAL_FUNC( __wine_longjmp, "movq 0x38(%rcx),%r13\n\t" /* jmp_buf->R13 */ "movq 0x40(%rcx),%r14\n\t" /* jmp_buf->R14 */ "movq 0x48(%rcx),%r15\n\t" /* jmp_buf->R15 */
"movdqa 0x60(%rcx),%xmm6\n\t" /* jmp_buf->Xmm6 */
"movdqa 0x70(%rcx),%xmm7\n\t" /* jmp_buf->Xmm7 */
"movdqa 0x80(%rcx),%xmm8\n\t" /* jmp_buf->Xmm8 */
"movdqa 0x90(%rcx),%xmm9\n\t" /* jmp_buf->Xmm9 */
"movdqa 0xa0(%rcx),%xmm10\n\t" /* jmp_buf->Xmm10 */
"movdqa 0xb0(%rcx),%xmm11\n\t" /* jmp_buf->Xmm11 */
"movdqa 0xc0(%rcx),%xmm12\n\t" /* jmp_buf->Xmm12 */
"movdqa 0xd0(%rcx),%xmm13\n\t" /* jmp_buf->Xmm13 */
"movdqa 0xe0(%rcx),%xmm14\n\t" /* jmp_buf->Xmm14 */
"movdqa 0xf0(%rcx),%xmm15\n\t" /* jmp_buf->Xmm15 */
"movq 0x60(%rcx),%xmm6\n\t" /* jmp_buf->Xmm6 */
"movq 0x70(%rcx),%xmm7\n\t" /* jmp_buf->Xmm7 */
"movq 0x80(%rcx),%xmm8\n\t" /* jmp_buf->Xmm8 */
"movq 0x90(%rcx),%xmm9\n\t" /* jmp_buf->Xmm9 */
"movq 0xa0(%rcx),%xmm10\n\t" /* jmp_buf->Xmm10 */
"movq 0xb0(%rcx),%xmm11\n\t" /* jmp_buf->Xmm11 */
"movq 0xc0(%rcx),%xmm12\n\t" /* jmp_buf->Xmm12 */
"movq 0xd0(%rcx),%xmm13\n\t" /* jmp_buf->Xmm13 */
"movq 0xe0(%rcx),%xmm14\n\t" /* jmp_buf->Xmm14 */
"movq 0xf0(%rcx),%xmm15\n\t" /* jmp_buf->Xmm15 */
And this only restores the lower 8 bytes.
Chip