On 22.01.2016 23:24, Paul Gofman wrote:
Signed-off-by: Paul Gofman gofmanp@gmail.com
dlls/mscoree/corruntimehost.c | 91 +++++++++++++++++++++++++++++++++++++------ 1 file changed, 79 insertions(+), 12 deletions(-)
diff --git a/dlls/mscoree/corruntimehost.c b/dlls/mscoree/corruntimehost.c index e437cc2..05ac9c9 100644 --- a/dlls/mscoree/corruntimehost.c +++ b/dlls/mscoree/corruntimehost.c @@ -919,7 +919,68 @@ static const struct vtable_fixup_thunk thunk_template = {
#include "poppack.h"
-#else /* !defined(__i386__) */ +#elif __x86_64__ /* !__i386__ */
+# define CAN_FIXUP_VTABLE 1
+#include "pshpack1.h"
+struct vtable_fixup_thunk +{
- /* push %rcx; push %rdx; push %r8; push %r9 */
I think it would be better to allocate a proper stack frame (pushq %rbp, movq %rsp, %rbp, subq ...,%rsp) and then use movq instructions.
- BYTE i1[6];
- /* sub $0x68, %rsp ; 0x10*4 + 0x20 + 0x8 (align stack for subsequent func call)
movups %xmm0,0x28(%esp); ...; movups %xmm5,0x58(%esp)
Shouldn't it be %rsp ? Same for the precompiled assembler code below. Also, its theoretically safe to use movdqa like done in various other wine functions.
ReallyFixupVTable will crash if we are called with unaligned stack
There is no need to add a comment for that, its expected to crash when misaligning the stack.
- */
- BYTE i2[28];
- /* mov function,%eax */
- BYTE i3[2];
- void (CDECL *function)(struct dll_fixup *);
- /* mov fixup,%rcx */
- BYTE i4[2];
- struct dll_fixup *fixup;
- /* call *%eax */
- BYTE i5[2];
- /*
movups 0x28(%esp),xmm0; ...; movups 0x58(%esp),xmm3
add $0x68, %rsp
- */
- BYTE i6[28];
- /* pop %r9; pop %r8; pop %rdx; pop %rcx */
- BYTE i7[6];
- /* mov vtable_entry, %rax */
- BYTE i8[2];
- void *vtable_entry;
- /* mov [%rax],%rax
jmp %rax */
- BYTE i9[5];
+};
+static const struct vtable_fixup_thunk thunk_template = {
- {0x51,0x52,0x41,0x50,0x41,0x51},
- {0x48,0x83,0xEC,0x68,
0x67,0x0F,0x11,0x44,0x24,0x28, 0x67,0x0F,0x11,0x4C,0x24,0x38,
0x67,0x0F,0x11,0x54,0x24,0x48, 0x67,0x0F,0x11,0x5C,0x24,0x58,
- },
- {0x48,0xB8},
- NULL,
- {0x48,0xB9},
- NULL,
- {0xFF,0xD0},
- {0x67,0x0F,0x10,0x44,0x24,0x28, 0x67,0x0F,0x10,0x4C,0x24,0x38,
0x67,0x0F,0x10,0x54,0x24,0x48, 0x67,0x0F,0x10,0x5C,0x24,0x58,
0x48,0x83,0xC4,0x68},
- {0x41,0x59,0x41,0x58,0x5A,0x59},
- {0x48,0xB8},
- NULL,
- {0x48,0x8B,0x00,0xFF,0xE0}
+};
+#include "poppack.h"
+#else /* !__i386__ && !__x86_64__ */
# define CAN_FIXUP_VTABLE 0
@@ -982,15 +1043,19 @@ static void CDECL ReallyFixupVTable(struct dll_fixup *fixup) /* Mono needs an image that belongs to an assembly. */ image = mono_assembly_get_image(assembly);
+#if __x86_64__
if (fixup->fixup->type & COR_VTABLE_64BIT)
+#else if (fixup->fixup->type & COR_VTABLE_32BIT) +#endif {
DWORD *vtable = fixup->vtable;
DWORD *tokens = fixup->tokens;
void **vtable = fixup->vtable;
ULONG_PTR *tokens = fixup->tokens; for (i=0; i<fixup->fixup->count; i++) {
TRACE("%x\n", tokens[i]);
vtable[i] = PtrToUint(mono_marshal_get_vtfixup_ftnptr(
image, tokens[i], fixup->fixup->type));
TRACE("%#lx\n", tokens[i]);
vtable[i] = mono_marshal_get_vtfixup_ftnptr(
image, tokens[i], fixup->fixup->type);
I would suggest to move those changes and some of the changes below into a separate patch. They can easily be reviewed separately and using PtrToUint() instead of pointer types wasn't even a good idea on 32-bit. ;)
} }
@@ -1029,16 +1094,18 @@ static void FixupVTableEntry(HMODULE hmodule, VTableFixup *vtable_fixup) fixup->vtable = (BYTE*)hmodule + vtable_fixup->rva; fixup->done = FALSE;
- TRACE("vtable_fixup->type=0x%x\n",vtable_fixup->type);
+#if __x86_64__
- if (vtable_fixup->type & COR_VTABLE_64BIT)
+#else if (vtable_fixup->type & COR_VTABLE_32BIT) +#endif {
DWORD *vtable = fixup->vtable;
DWORD *tokens;
void **vtable = fixup->vtable;
ULONG_PTR *tokens; int i; struct vtable_fixup_thunk *thunks = fixup->thunk_code;
if (sizeof(void*) > 4)
ERR("32-bit fixup in 64-bit mode; broken image?\n");
tokens = fixup->tokens = HeapAlloc(GetProcessHeap(), 0, sizeof(*tokens) * vtable_fixup->count); memcpy(tokens, vtable, sizeof(*tokens) * vtable_fixup->count); for (i=0; i<vtable_fixup->count; i++)
@@ -1047,7 +1114,7 @@ static void FixupVTableEntry(HMODULE hmodule, VTableFixup *vtable_fixup) thunks[i].fixup = fixup; thunks[i].function = ReallyFixupVTable; thunks[i].vtable_entry = &vtable[i];
vtable[i] = PtrToUint(&thunks[i]);
} elsevtable[i] = &thunks[i]; }
I would suggest to move those changes and some of the changes below into a separate patch. They can easily be reviewed separately and using PtrToUint() instead of pointer types wasn't even a good idea on 32-bit. ;)
The original idea was that we'd have a separate loop for vtables with the COR_VTABLE_64BIT flag, and on 32-bit we'd support both flags. Making the flag checked arch-specific means we can use a pointer type, but it also means we can't handle a 64-bit vtable fixup in 32-bit images (which native may not even allow, I haven't checked).
We really should've had some sort of "unsupported flags" check in there, but I didn't want to bring up pre-existing issues.
The reason we didn't allocate a proper stack frame on x86 is that I didn't realize it was an option.
On 01/23/2016 04:52 AM, Sebastian Lackner wrote:
On 22.01.2016 23:24, Paul Gofman wrote:
- /* push %rcx; push %rdx; push %r8; push %r9 */
I think it would be better to allocate a proper stack frame (pushq %rbp, movq %rsp, %rbp, subq ...,%rsp) and then use movq instructions.
I will change pushq to movq (copying regs over esp the same way as for XMM). But why do you prefer to have a stack frame ('pushq %ebp; movq %esp, %ebp')? I probably missing something but I do not see why it will make anything better. Even without it I can see the thunk in a call stack unwind on crash from the inside.
- BYTE i1[6];
- /* sub $0x68, %rsp ; 0x10*4 + 0x20 + 0x8 (align stack for subsequent func call)
movups %xmm0,0x28(%esp); ...; movups %xmm5,0x58(%esp)
Shouldn't it be %rsp ? Same for the precompiled assembler code below.
Shame on me, I shuffled around and stared into these bytes 100 times and did not notice. Thank you for catching this and sorry you had to do it.
Also, its theoretically safe to use movdqa like done in various other wine functions.
I could use aligned move (additionally taking care for moves alignment), but I thought it would be just easier to debug if on initially unaligned stack call it will crash in ReallyFixupVTable (or even not crash if it will be aligning stack sometime in future). I can make aligned moves if you think it is better, but we are really not gaining a performance from it, it is not something that called often, but complicating logic a bit requiring both aligned moves in a thunk and aligned stack on call to ReallyFixupVTable. If to move aligned, do you have any specific prefence to movdqa over movaps? I personally prefer movaps as it is what all the compilers do nowadays (and mnemonic looks nicer) but of course can change to movdqa.
image, tokens[i], fixup->fixup->type);
I would suggest to move those changes and some of the changes below into a separate patch. They can easily be reviewed separately and using PtrToUint() instead of pointer types wasn't even a good idea on 32-bit. ;)
I can move 2 vars type changes, removing PtrToUint and TRACE away into separate (but linked) patch.
On 23.01.2016 15:59, Paul Gofman wrote:
On 01/23/2016 04:52 AM, Sebastian Lackner wrote:
On 22.01.2016 23:24, Paul Gofman wrote:
- /* push %rcx; push %rdx; push %r8; push %r9 */
I think it would be better to allocate a proper stack frame (pushq %rbp, movq %rsp, %rbp, subq ...,%rsp) and then use movq instructions.
I will change pushq to movq (copying regs over esp the same way as for XMM). But why do you prefer to have a stack frame ('pushq %ebp; movq %esp, %ebp')? I probably missing something but I do not see why it will make anything better. Even without it I can see the thunk in a call stack unwind on crash from the inside.
In theory stack unwinding should not work correct with both solutions (yet), The main reason I suggested it was that its probably faster, and that we could implement stack alignment fixup easier later if it turns out to be necessary.
- BYTE i1[6];
- /* sub $0x68, %rsp ; 0x10*4 + 0x20 + 0x8 (align stack for subsequent func call)
movups %xmm0,0x28(%esp); ...; movups %xmm5,0x58(%esp)
Shouldn't it be %rsp ? Same for the precompiled assembler code below.
Shame on me, I shuffled around and stared into these bytes 100 times and did not notice. Thank you for catching this and sorry you had to do it.
No problem, thats what the review is for ;)
Also, its theoretically safe to use movdqa like done in various other wine functions.
I could use aligned move (additionally taking care for moves alignment), but I thought it would be just easier to debug if on initially unaligned stack call it will crash in ReallyFixupVTable (or even not crash if it will be aligning stack sometime in future). I can make aligned moves if you think it is better, but we are really not gaining a performance from it, it is not something that called often, but complicating logic a bit requiring both aligned moves in a thunk and aligned stack on call to ReallyFixupVTable.
If the function is called with wrong alignment, it would only bring us a tiny bit further. The compiler feature for automatic x64 stack alignment fixup is still pretty new, so we would have to implement our own version in assembler anyway.
If to move aligned, do you have any specific prefence to movdqa over movaps? I personally prefer movaps as it is what all the compilers do nowadays (and mnemonic looks nicer) but of course can change to movdqa.
I do not really have a preference, both opcodes are used in the Wine source.
image, tokens[i], fixup->fixup->type);
I would suggest to move those changes and some of the changes below into a separate patch. They can easily be reviewed separately and using PtrToUint() instead of pointer types wasn't even a good idea on 32-bit. ;)
I can move 2 vars type changes, removing PtrToUint and TRACE away into separate (but linked) patch.