Since they're asm functions they conflict when building with LTO (among other things). This is normally hidden when separating translation units, but it's still not correct.
From: Gabriel Ivăncescu gabrielopcode@gmail.com
It conflicts with __chkstk from kernelbase.
Signed-off-by: Gabriel Ivăncescu gabrielopcode@gmail.com --- dlls/ntoskrnl.exe/ntoskrnl.c | 8 ++++---- dlls/ntoskrnl.exe/ntoskrnl.exe.spec | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/dlls/ntoskrnl.exe/ntoskrnl.c b/dlls/ntoskrnl.exe/ntoskrnl.c index 45b8e1ab1c1..993b98426f3 100644 --- a/dlls/ntoskrnl.exe/ntoskrnl.c +++ b/dlls/ntoskrnl.exe/ntoskrnl.c @@ -4148,9 +4148,9 @@ NTSTATUS WINAPI IoCreateFile(HANDLE *handle, ACCESS_MASK access, OBJECT_ATTRIBUT */ #ifdef __x86_64__ /* Supposed to touch all the stack pages, but we shouldn't need that. */ -__ASM_GLOBAL_FUNC( __chkstk, "ret" ); +__ASM_GLOBAL_FUNC( NTOSKRNL___chkstk, "ret" ); #elif defined(__i386__) -__ASM_GLOBAL_FUNC( _chkstk, +__ASM_GLOBAL_FUNC( NTOSKRNL__chkstk, "negl %eax\n\t" "addl %esp,%eax\n\t" "xchgl %esp,%eax\n\t" @@ -4159,11 +4159,11 @@ __ASM_GLOBAL_FUNC( _chkstk, "ret" ) #elif defined(__arm__) /* Incoming r4 contains words to allocate, converting to bytes then return */ -__ASM_GLOBAL_FUNC( __chkstk, "lsl r4, r4, #2\n\t" +__ASM_GLOBAL_FUNC( NTOSKRNL___chkstk, "lsl r4, r4, #2\n\t" "bx lr" ) #elif defined(__aarch64__) /* Supposed to touch all the stack pages, but we shouldn't need that. */ -__ASM_GLOBAL_FUNC( __chkstk, "ret" ); +__ASM_GLOBAL_FUNC( NTOSKRNL___chkstk, "ret" ); #endif
/********************************************************************* diff --git a/dlls/ntoskrnl.exe/ntoskrnl.exe.spec b/dlls/ntoskrnl.exe/ntoskrnl.exe.spec index aa7831eac5f..dbdc83dee2a 100644 --- a/dlls/ntoskrnl.exe/ntoskrnl.exe.spec +++ b/dlls/ntoskrnl.exe/ntoskrnl.exe.spec @@ -1543,7 +1543,7 @@ @ stdcall ZwWriteFile(long long ptr ptr ptr ptr long ptr ptr) NtWriteFile @ stdcall -private ZwYieldExecution() NtYieldExecution @ stdcall -arch=!i386 __C_specific_handler(ptr long ptr ptr) -@ cdecl -arch=!i386 -norelay __chkstk() +@ cdecl -arch=!i386 -norelay __chkstk() NTOSKRNL___chkstk @ cdecl -private -arch=i386 _CIcos() @ cdecl -private -arch=i386 _CIsin() @ cdecl -private -arch=i386 _CIsqrt() @@ -1559,7 +1559,7 @@ @ cdecl -arch=i386 -norelay _aulldvrm(int64 int64) @ cdecl -arch=i386 -norelay -ret64 _aullrem(int64 int64) @ cdecl -arch=i386 -norelay -ret64 _aullshr(int64 long) -@ cdecl -arch=i386 -norelay _chkstk() +@ cdecl -arch=i386 -norelay _chkstk() NTOSKRNL__chkstk @ cdecl -arch=i386 _except_handler2(ptr ptr ptr ptr) @ cdecl -arch=i386 _except_handler3(ptr ptr ptr ptr) @ cdecl _finite(double)
From: Gabriel Ivăncescu gabrielopcode@gmail.com
Signed-off-by: Gabriel Ivăncescu gabrielopcode@gmail.com --- dlls/ntoskrnl.exe/ntoskrnl.c | 6 +++--- dlls/ntoskrnl.exe/ntoskrnl.exe.spec | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/dlls/ntoskrnl.exe/ntoskrnl.c b/dlls/ntoskrnl.exe/ntoskrnl.c index 993b98426f3..722616fca9d 100644 --- a/dlls/ntoskrnl.exe/ntoskrnl.c +++ b/dlls/ntoskrnl.exe/ntoskrnl.c @@ -2187,7 +2187,7 @@ LONG FASTCALL NTOSKRNL_InterlockedIncrement( LONG volatile *dest ) /************************************************************************* * RtlUshortByteSwap (NTOSKRNL.EXE.@) */ -__ASM_FASTCALL_FUNC(RtlUshortByteSwap, 4, +__ASM_FASTCALL_FUNC(NTOSKRNL_RtlUshortByteSwap, 4, "movb %ch,%al\n\t" "movb %cl,%ah\n\t" "ret") @@ -2195,7 +2195,7 @@ __ASM_FASTCALL_FUNC(RtlUshortByteSwap, 4, /************************************************************************* * RtlUlongByteSwap (NTOSKRNL.EXE.@) */ -__ASM_FASTCALL_FUNC(RtlUlongByteSwap, 4, +__ASM_FASTCALL_FUNC(NTOSKRNL_RtlUlongByteSwap, 4, "movl %ecx,%eax\n\t" "bswap %eax\n\t" "ret") @@ -2203,7 +2203,7 @@ __ASM_FASTCALL_FUNC(RtlUlongByteSwap, 4, /************************************************************************* * RtlUlonglongByteSwap (NTOSKRNL.EXE.@) */ -__ASM_FASTCALL_FUNC(RtlUlonglongByteSwap, 8, +__ASM_FASTCALL_FUNC(NTOSKRNL_RtlUlonglongByteSwap, 8, "movl 4(%esp),%edx\n\t" "bswap %edx\n\t" "movl 8(%esp),%eax\n\t" diff --git a/dlls/ntoskrnl.exe/ntoskrnl.exe.spec b/dlls/ntoskrnl.exe/ntoskrnl.exe.spec index dbdc83dee2a..b2d8f78f083 100644 --- a/dlls/ntoskrnl.exe/ntoskrnl.exe.spec +++ b/dlls/ntoskrnl.exe/ntoskrnl.exe.spec @@ -69,9 +69,9 @@ @ stdcall -fastcall ObfDereferenceObject(ptr) @ stdcall -fastcall ObfReferenceObject(ptr) @ stub RtlPrefetchMemoryNonTemporal -@ stdcall -fastcall -arch=i386 -norelay RtlUlongByteSwap(long) -@ stdcall -fastcall -arch=i386 -norelay RtlUlonglongByteSwap(int64) -@ stdcall -fastcall -arch=i386 -norelay RtlUshortByteSwap(long) +@ stdcall -fastcall -arch=i386 -norelay RtlUlongByteSwap(long) NTOSKRNL_RtlUlongByteSwap +@ stdcall -fastcall -arch=i386 -norelay RtlUlonglongByteSwap(int64) NTOSKRNL_RtlUlonglongByteSwap +@ stdcall -fastcall -arch=i386 -norelay RtlUshortByteSwap(long) NTOSKRNL_RtlUshortByteSwap @ stub WmiGetClock @ stub Kei386EoiHelper @ stub Kii386SpinOnSpinLock
Since they're asm functions they conflict when building with LTO (among other things). This is normally hidden when separating translation units, but it's still not correct.
I don't see what's not correct about it, the symbols won't be imported if they are already defined.
On Fri Jan 19 20:32:14 2024 +0000, Alexandre Julliard wrote:
Since they're asm functions they conflict when building with LTO
(among other things). This is normally hidden when separating translation units, but it's still not correct. I don't see what's not correct about it, the symbols won't be imported if they are already defined.
Well, it fails to build with LTO, because they're asm names. Doesn't kernelbase.a for instance (or any import library) define them as well with asm wrappers? I'm not 100% sure why it fails but it does.
I can give you a hack patch to enable LTO build if you want. Obviously it can't go upstream in that shape but this is only part of it.
Well, it fails to build with LTO, because they're asm names. Doesn't kernelbase.a for instance (or any import library) define them as well with asm wrappers? I'm not 100% sure why it fails but it does.
It sounds like it needs more investigation then. There should be nothing wrong with defining a function that also exists in an import library.
On Fri Jan 19 20:40:51 2024 +0000, Alexandre Julliard wrote:
Well, it fails to build with LTO, because they're asm names. Doesn't
kernelbase.a for instance (or any import library) define them as well with asm wrappers? I'm not 100% sure why it fails but it does. It sounds like it needs more investigation then. There should be nothing wrong with defining a function that also exists in an import library.
Ok, I figured it out. First, wine build tools don't create LTO static archives properly (or rather, doesn't pass the arguments properly, or use the correct archiver, despite being set in environment variables). I will send a separate MR for this as it's pretty straightforward I think.
Now the bigger issue are functions defined with `ASM_GLOBAL_FUNC`. In short, GCC doesn't "see" inline asms, it doesn't parse them, so it doesn't know a function is being defined here. But LTO has to know—since it pulls in dependencies during early optimization stages, not after assembly. For example, it has to know if it can inline an imported function from a static library, etc.
So since LTO doesn't "see" the function being defined with `ASM_GLOBAL_FUNC`, it will pull it from the import library (libkernelbase.a). Later, when it's all assembled and a "normal" link is done (not one with LTO), multiple definitions happen since now the ASM_GLOBAL_FUNC is pasted there as-is, while the imported one is also there.
A similar story happens with `winecrt0/setjmp.o` build with LTO. This file has some ASM_GLOBAL_FUNCs. When a module uses them, and tries to import it using winecrt0.a, LTO doesn't "see" them, so it never imports setjmp.o, causing undefined references. If you import setjmp.o directly, it works of course.
One way to tackle this is by making such functions proper functions with the `naked` attribute. I know that attribute is not available in older GCC versions for i386/x86_64, so here's my suggestion:
We define ASM_GLOBAL_FUNC to make use of attribute naked if it's available (GCC version check? or feature check on configure? please let me know which one you prefer). But, that does mean we need to include the function signature (return type and arg types) in the macro, and thus all the uses of it have to include this. I don't think this is a bad idea in itself, since it's like defining a function, can even be a self-documenting in a way.
What do you think? Is that acceptable?
On Mon Jan 22 13:35:37 2024 +0000, Gabriel Ivăncescu wrote:
Ok, I figured it out. First, wine build tools don't create LTO static archives properly (or rather, doesn't pass the arguments properly, or use the correct archiver, despite being set in environment variables). I will send a separate MR for this as it's pretty straightforward I think. Now the bigger issue are functions defined with `ASM_GLOBAL_FUNC`. In short, GCC doesn't "see" inline asms, it doesn't parse them, so it doesn't know a function is being defined here. But LTO has to know—since it pulls in dependencies during early optimization stages, not after assembly. For example, it has to know if it can inline an imported function from a static library, etc. So since LTO doesn't "see" the function being defined with `ASM_GLOBAL_FUNC`, it will pull it from the import library (libkernelbase.a). Later, when it's all assembled and a "normal" link is done (not one with LTO), multiple definitions happen since now the ASM_GLOBAL_FUNC is pasted there as-is, while the imported one is also there. A similar story happens with `winecrt0/setjmp.o` build with LTO. This file has some ASM_GLOBAL_FUNCs. When a module uses them, and tries to import it using winecrt0.a, LTO doesn't "see" them, so it never imports setjmp.o, causing undefined references. If you import setjmp.o directly, it works of course. One way to tackle this is by making such functions proper functions with the `naked` attribute. I know that attribute is not available in older GCC versions for i386/x86_64, so here's my suggestion: We define ASM_GLOBAL_FUNC to make use of attribute naked if it's available (GCC version check? or feature check on configure? please let me know which one you prefer). But, that does mean we need to include the function signature (return type and arg types) in the macro, and thus all the uses of it have to include this. I don't think this is a bad idea in itself, since it's like defining a function, can even be a self-documenting in a way. What do you think? Is that acceptable?
Unfortunately naked functions don't have well-defined semantics, I don't think we can use them.
On Mon Jan 22 15:35:13 2024 +0000, Alexandre Julliard wrote:
Unfortunately naked functions don't have well-defined semantics, I don't think we can use them.
They seem to be well defined nowadays, on the x86 page:
https://gcc.gnu.org/onlinedocs/gcc/x86-Function-Attributes.html
They're there since [GCC 8 apparently](https://gcc.gnu.org/onlinedocs/gcc-8.1.0/gcc/x86-Function-Attributes.html), but I'm not sure if they were reliable back then or not. We can use a version check to guard them in such case?
And of course we already use basic asm so that shouldn't be an issue right?
On Mon Jan 22 15:39:52 2024 +0000, Gabriel Ivăncescu wrote:
They seem to be well defined nowadays, on the x86 page: https://gcc.gnu.org/onlinedocs/gcc/x86-Function-Attributes.html They're there since [GCC 8 apparently](https://gcc.gnu.org/onlinedocs/gcc-8.1.0/gcc/x86-Function-Attributes.html), but I'm not sure if they were reliable back then or not. We can use a version check to guard them in such case? And of course we already use basic asm so that shouldn't be an issue right?
No, for instance on gcc they contain a .seh_endprologue which interferes with SEH handling.
On Mon Jan 22 15:46:26 2024 +0000, Alexandre Julliard wrote:
No, for instance on gcc they contain a .seh_endprologue which interferes with SEH handling.
I see, that's a bit of a bummer. Is there no way to workaround it (that's acceptable without hacks like using a macro for .seh_endprologue, obviously)? Or ideas to try something else? Or give up on trying to upstream it?
On Mon Jan 22 16:10:58 2024 +0000, Gabriel Ivăncescu wrote:
I see, that's a bit of a bummer. Is there no way to workaround it (that's acceptable without hacks like using a macro for .seh_endprologue, obviously)? Or ideas to try something else? Or give up on trying to upstream it?
As long as it requires a non-PE build, I don't think that LTO is interesting. In a PE build presumably it could be made to handle import libraries correctly.
On Mon Jan 22 16:25:50 2024 +0000, Alexandre Julliard wrote:
As long as it requires a non-PE build, I don't think that LTO is interesting. In a PE build presumably it could be made to handle import libraries correctly.
FWIW I believe the only three modules that have issues with this are:
* kernel32 (ExitProcess and for i386 the Interlocked* funcs) * ntoskrnl (this MR) * winecrt0 (setjmp.c but that can be compiled without LTO as a workaround, though I'm not sure how hacky that would be)
Since I've been able to get a successful build with some hacks for them. Of course some functions in other modules (ntdll) may need to be marked with `__attribute__((used))` because they're called only from asm funcs, but that's completely safe and a no-op normally so it's not a concern.
From what I see those 3 modules are all PE, right? Just curious what exactly do you mean by "handle import libraries correctly", can you please elaborate a bit?
On Mon Jan 22 17:10:07 2024 +0000, Gabriel Ivăncescu wrote:
FWIW I believe the only three modules that have issues with this are:
- kernel32 (ExitProcess and for i386 the Interlocked* funcs)
- ntoskrnl (this MR)
- winecrt0 (setjmp.c but that can be compiled without LTO as a
workaround, though I'm not sure how hacky that would be) Since I've been able to get a successful build with some hacks for them. Of course some functions in other modules (ntdll) may need to be marked with `__attribute__((used))` because they're called only from asm funcs, but that's completely safe and a no-op normally so it's not a concern. From what I see those 3 modules are all PE, right? Just curious what exactly do you mean by "handle import libraries correctly", can you please elaborate a bit?
Import libraries are not regular static libs, they don't contain real code, so it wouldn't make sense to have them participate in LTO.
On Mon Jan 22 18:00:34 2024 +0000, Alexandre Julliard wrote:
Import libraries are not regular static libs, they don't contain real code, so it wouldn't make sense to have them participate in LTO.
Unfortunately it doesn't seem like they're built with LTO to begin with (and even if I disable it, it's the same). They still contain just jump stubs and actual code even without `-ffat-lto-objects`, so they're already without LTO from what I can see (and still fails, for whatever reason). "Actual code" means actual compiled object files, because LTO stores only the internal IR without fat mode. Maybe LTO still tries to resolve/import them even if they don't contain LTO IR bytecode…
I did find a somewhat hacky solution using the `no_reorder` attribute. Per the documentation it guarantees the function is not reordered between top level asms, so we can do some assembler magic:
```c __ASM_GLOBAL_FUNC( _chkstk, /* same asm as normal */ ) asm(".if 0"); void __attribute__((__no_reorder__,__naked__)) _chkstk(void) { asm(""); } asm(".endif"); ```
Note the `naked` attribute with the empty asm, which tells LTO that it can't know the contents of the function (so it can't optimize anything which would be wrong). And the `no_reorder` which guarantees the function is placed between the top level asm statements. And of course this could be integrated into the macro.
It works, but I'm guessing this is too much of a hack for upstream?
It works, but I'm guessing this is too much of a hack for upstream?
Yes it's quite ugly...
This merge request was closed by Gabriel Ivăncescu.
I'm going to close this until I find an acceptable alternative, probably move them to a separate file and build that without LTO somehow, maybe with a pragma?