[PATCH 0/3] MR10119: winegcc: Use compiler-rt instead of -lgcc on mingw targets.
This treats mingw targets the same way as msvc. We already have the necessary infrastructure; we are just missing a few functions to be able to use it for mingw as well. In my case, I was looking at adding emutls support for GCC. Since it is typically provided by libgcc or compiler-rt, this should be straightforward to implement on top of this MR. -- https://gitlab.winehq.org/wine/wine/-/merge_requests/10119
From: Jacek Caban <jacek@codeweavers.com> --- libs/compiler-rt/Makefile.in | 9 +- libs/compiler-rt/lib/builtins/i386/chkstk.S | 34 ++++ libs/compiler-rt/lib/builtins/i386/chkstk2.S | 40 +++++ libs/compiler-rt/lib/builtins/i386/moddi3.S | 169 ++++++++++++++++++ libs/compiler-rt/lib/builtins/popcountdi2.c | 36 ++++ libs/compiler-rt/lib/builtins/popcountsi2.c | 33 ++++ libs/compiler-rt/lib/builtins/x86_64/chkstk.S | 39 ++++ .../compiler-rt/lib/builtins/x86_64/chkstk2.S | 42 +++++ 8 files changed, 401 insertions(+), 1 deletion(-) create mode 100644 libs/compiler-rt/lib/builtins/i386/chkstk.S create mode 100644 libs/compiler-rt/lib/builtins/i386/chkstk2.S create mode 100644 libs/compiler-rt/lib/builtins/i386/moddi3.S create mode 100644 libs/compiler-rt/lib/builtins/popcountdi2.c create mode 100644 libs/compiler-rt/lib/builtins/popcountsi2.c create mode 100644 libs/compiler-rt/lib/builtins/x86_64/chkstk.S create mode 100644 libs/compiler-rt/lib/builtins/x86_64/chkstk2.S diff --git a/libs/compiler-rt/Makefile.in b/libs/compiler-rt/Makefile.in index 11be92d1dbf..fba197ccea1 100644 --- a/libs/compiler-rt/Makefile.in +++ b/libs/compiler-rt/Makefile.in @@ -20,7 +20,12 @@ SOURCES = \ lib/builtins/floatdisf.c \ lib/builtins/floatundidf.c \ lib/builtins/floatundisf.c \ + lib/builtins/i386/chkstk.S \ + lib/builtins/i386/chkstk2.S \ + lib/builtins/i386/moddi3.S \ lib/builtins/mingw_fixfloat.c \ + lib/builtins/popcountdi2.c \ + lib/builtins/popcountsi2.c \ lib/builtins/udivdi3.c \ lib/builtins/udivmoddi4.c \ lib/builtins/udivmodsi4.c \ @@ -29,6 +34,8 @@ SOURCES = \ lib/builtins/udivti3.c \ lib/builtins/umoddi3.c \ lib/builtins/umodsi3.c \ - lib/builtins/umodti3.c + lib/builtins/umodti3.c \ + lib/builtins/x86_64/chkstk.S \ + lib/builtins/x86_64/chkstk2.S INSTALL_DEV = $(EXTLIB) diff --git a/libs/compiler-rt/lib/builtins/i386/chkstk.S b/libs/compiler-rt/lib/builtins/i386/chkstk.S new file mode 100644 index 00000000000..b59974868f2 --- /dev/null +++ b/libs/compiler-rt/lib/builtins/i386/chkstk.S @@ -0,0 +1,34 @@ +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. + +#include "../assembly.h" + +// _chkstk routine +// This routine is windows specific +// http://msdn.microsoft.com/en-us/library/ms648426.aspx + +#ifdef __i386__ + +.text +.balign 4 +DEFINE_COMPILERRT_FUNCTION(__chkstk_ms) + push %ecx + push %eax + cmp $0x1000,%eax + lea 12(%esp),%ecx + jb 1f +2: + sub $0x1000,%ecx + test %ecx,(%ecx) + sub $0x1000,%eax + cmp $0x1000,%eax + ja 2b +1: + sub %eax,%ecx + test %ecx,(%ecx) + pop %eax + pop %ecx + ret +END_COMPILERRT_FUNCTION(__chkstk_ms) + +#endif // __i386__ diff --git a/libs/compiler-rt/lib/builtins/i386/chkstk2.S b/libs/compiler-rt/lib/builtins/i386/chkstk2.S new file mode 100644 index 00000000000..7d65bb08892 --- /dev/null +++ b/libs/compiler-rt/lib/builtins/i386/chkstk2.S @@ -0,0 +1,40 @@ +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. + +#include "../assembly.h" + +#ifdef __i386__ + +// _chkstk (_alloca) routine - probe stack between %esp and (%esp-%eax) in 4k increments, +// then decrement %esp by %eax. Preserves all registers except %esp and flags. +// This routine is windows specific +// http://msdn.microsoft.com/en-us/library/ms648426.aspx + +.text +.balign 4 +DEFINE_COMPILERRT_FUNCTION(_alloca) // _chkstk and _alloca are the same function +DEFINE_COMPILERRT_FUNCTION(__chkstk) + push %ecx + cmp $0x1000,%eax + lea 8(%esp),%ecx // esp before calling this routine -> ecx + jb 1f +2: + sub $0x1000,%ecx + test %ecx,(%ecx) + sub $0x1000,%eax + cmp $0x1000,%eax + ja 2b +1: + sub %eax,%ecx + test %ecx,(%ecx) + + lea 4(%esp),%eax // load pointer to the return address into eax + mov %ecx,%esp // install the new top of stack pointer into esp + mov -4(%eax),%ecx // restore ecx + push (%eax) // push return address onto the stack + sub %esp,%eax // restore the original value in eax + ret +END_COMPILERRT_FUNCTION(__chkstk) +END_COMPILERRT_FUNCTION(_alloca) + +#endif // __i386__ diff --git a/libs/compiler-rt/lib/builtins/i386/moddi3.S b/libs/compiler-rt/lib/builtins/i386/moddi3.S new file mode 100644 index 00000000000..a5bf9ce8ea0 --- /dev/null +++ b/libs/compiler-rt/lib/builtins/i386/moddi3.S @@ -0,0 +1,169 @@ +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. + +#include "../assembly.h" + +// di_int __moddi3(di_int a, di_int b); + +// result = remainder of a / b. +// both inputs and the output are 64-bit signed integers. +// This will do whatever the underlying hardware is set to do on division by zero. +// No other exceptions are generated, as the divide cannot overflow. +// +// This is targeted at 32-bit x86 *only*, as this can be done directly in hardware +// on x86_64. The performance goal is ~40 cycles per divide, which is faster than +// currently possible via simulation of integer divides on the x87 unit. +// + +// Stephen Canon, December 2008 + +#ifdef __i386__ + +.text +.balign 4 +DEFINE_COMPILERRT_FUNCTION(__moddi3) + +/* This is currently implemented by wrapping the unsigned modulus up in an absolute + value. This could certainly be improved upon. */ + + pushl %esi + movl 20(%esp), %edx // high word of b + movl 16(%esp), %eax // low word of b + movl %edx, %ecx + sarl $31, %ecx // (b < 0) ? -1 : 0 + xorl %ecx, %eax + xorl %ecx, %edx // EDX:EAX = (b < 0) ? not(b) : b + subl %ecx, %eax + sbbl %ecx, %edx // EDX:EAX = abs(b) + movl %edx, 20(%esp) + movl %eax, 16(%esp) // store abs(b) back to stack + + movl 12(%esp), %edx // high word of b + movl 8(%esp), %eax // low word of b + movl %edx, %ecx + sarl $31, %ecx // (a < 0) ? -1 : 0 + xorl %ecx, %eax + xorl %ecx, %edx // EDX:EAX = (a < 0) ? not(a) : a + subl %ecx, %eax + sbbl %ecx, %edx // EDX:EAX = abs(a) + movl %edx, 12(%esp) + movl %eax, 8(%esp) // store abs(a) back to stack + movl %ecx, %esi // set aside sign of a + + pushl %ebx + movl 24(%esp), %ebx // Find the index i of the leading bit in b. + bsrl %ebx, %ecx // If the high word of b is zero, jump to + jz 9f // the code to handle that special case [9]. + + /* High word of b is known to be non-zero on this branch */ + + movl 20(%esp), %eax // Construct bhi, containing bits [1+i:32+i] of b + + shrl %cl, %eax // Practically, this means that bhi is given by: + shrl %eax // + notl %ecx // bhi = (high word of b) << (31 - i) | + shll %cl, %ebx // (low word of b) >> (1 + i) + orl %eax, %ebx // + movl 16(%esp), %edx // Load the high and low words of a, and jump + movl 12(%esp), %eax // to [2] if the high word is larger than bhi + cmpl %ebx, %edx // to avoid overflowing the upcoming divide. + jae 2f + + /* High word of a is greater than or equal to (b >> (1 + i)) on this branch */ + + divl %ebx // eax <-- qs, edx <-- r such that ahi:alo = bs*qs + r + + pushl %edi + notl %ecx + shrl %eax + shrl %cl, %eax // q = qs >> (1 + i) + movl %eax, %edi + mull 24(%esp) // q*blo + movl 16(%esp), %ebx + movl 20(%esp), %ecx // ECX:EBX = a + subl %eax, %ebx + sbbl %edx, %ecx // ECX:EBX = a - q*blo + movl 28(%esp), %eax + imull %edi, %eax // q*bhi + subl %eax, %ecx // ECX:EBX = a - q*b + + jnc 1f // if positive, this is the result. + addl 24(%esp), %ebx // otherwise + adcl 28(%esp), %ecx // ECX:EBX = a - (q-1)*b = result +1: movl %ebx, %eax + movl %ecx, %edx + + addl %esi, %eax // Restore correct sign to result + adcl %esi, %edx + xorl %esi, %eax + xorl %esi, %edx + popl %edi // Restore callee-save registers + popl %ebx + popl %esi + retl // Return + +2: /* High word of a is greater than or equal to (b >> (1 + i)) on this branch */ + + subl %ebx, %edx // subtract bhi from ahi so that divide will not + divl %ebx // overflow, and find q and r such that + // + // ahi:alo = (1:q)*bhi + r + // + // Note that q is a number in (31-i).(1+i) + // fix point. + + pushl %edi + notl %ecx + shrl %eax + orl $0x80000000, %eax + shrl %cl, %eax // q = (1:qs) >> (1 + i) + movl %eax, %edi + mull 24(%esp) // q*blo + movl 16(%esp), %ebx + movl 20(%esp), %ecx // ECX:EBX = a + subl %eax, %ebx + sbbl %edx, %ecx // ECX:EBX = a - q*blo + movl 28(%esp), %eax + imull %edi, %eax // q*bhi + subl %eax, %ecx // ECX:EBX = a - q*b + + jnc 3f // if positive, this is the result. + addl 24(%esp), %ebx // otherwise + adcl 28(%esp), %ecx // ECX:EBX = a - (q-1)*b = result +3: movl %ebx, %eax + movl %ecx, %edx + + addl %esi, %eax // Restore correct sign to result + adcl %esi, %edx + xorl %esi, %eax + xorl %esi, %edx + popl %edi // Restore callee-save registers + popl %ebx + popl %esi + retl // Return + +9: /* High word of b is zero on this branch */ + + movl 16(%esp), %eax // Find qhi and rhi such that + movl 20(%esp), %ecx // + xorl %edx, %edx // ahi = qhi*b + rhi with 0 ≤ rhi < b + divl %ecx // + movl %eax, %ebx // + movl 12(%esp), %eax // Find rlo such that + divl %ecx // + movl %edx, %eax // rhi:alo = qlo*b + rlo with 0 ≤ rlo < b + popl %ebx // + xorl %edx, %edx // and return 0:rlo + + addl %esi, %eax // Restore correct sign to result + adcl %esi, %edx + xorl %esi, %eax + xorl %esi, %edx + popl %esi + retl // Return +END_COMPILERRT_FUNCTION(__moddi3) + +#endif // __i386__ + +NO_EXEC_STACK_DIRECTIVE + diff --git a/libs/compiler-rt/lib/builtins/popcountdi2.c b/libs/compiler-rt/lib/builtins/popcountdi2.c new file mode 100644 index 00000000000..5e8a62f075e --- /dev/null +++ b/libs/compiler-rt/lib/builtins/popcountdi2.c @@ -0,0 +1,36 @@ +/* ===-- popcountdi2.c - Implement __popcountdi2 ----------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __popcountdi2 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +/* Returns: count of 1 bits */ + +COMPILER_RT_ABI si_int +__popcountdi2(di_int a) +{ + du_int x2 = (du_int)a; + x2 = x2 - ((x2 >> 1) & 0x5555555555555555uLL); + /* Every 2 bits holds the sum of every pair of bits (32) */ + x2 = ((x2 >> 2) & 0x3333333333333333uLL) + (x2 & 0x3333333333333333uLL); + /* Every 4 bits holds the sum of every 4-set of bits (3 significant bits) (16) */ + x2 = (x2 + (x2 >> 4)) & 0x0F0F0F0F0F0F0F0FuLL; + /* Every 8 bits holds the sum of every 8-set of bits (4 significant bits) (8) */ + su_int x = (su_int)(x2 + (x2 >> 32)); + /* The lower 32 bits hold four 16 bit sums (5 significant bits). */ + /* Upper 32 bits are garbage */ + x = x + (x >> 16); + /* The lower 16 bits hold two 32 bit sums (6 significant bits). */ + /* Upper 16 bits are garbage */ + return (x + (x >> 8)) & 0x0000007F; /* (7 significant bits) */ +} diff --git a/libs/compiler-rt/lib/builtins/popcountsi2.c b/libs/compiler-rt/lib/builtins/popcountsi2.c new file mode 100644 index 00000000000..44544ff4989 --- /dev/null +++ b/libs/compiler-rt/lib/builtins/popcountsi2.c @@ -0,0 +1,33 @@ +/* ===-- popcountsi2.c - Implement __popcountsi2 ---------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __popcountsi2 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +/* Returns: count of 1 bits */ + +COMPILER_RT_ABI si_int +__popcountsi2(si_int a) +{ + su_int x = (su_int)a; + x = x - ((x >> 1) & 0x55555555); + /* Every 2 bits holds the sum of every pair of bits */ + x = ((x >> 2) & 0x33333333) + (x & 0x33333333); + /* Every 4 bits holds the sum of every 4-set of bits (3 significant bits) */ + x = (x + (x >> 4)) & 0x0F0F0F0F; + /* Every 8 bits holds the sum of every 8-set of bits (4 significant bits) */ + x = (x + (x >> 16)); + /* The lower 16 bits hold two 8 bit sums (5 significant bits).*/ + /* Upper 16 bits are garbage */ + return (x + (x >> 8)) & 0x0000003F; /* (6 significant bits) */ +} diff --git a/libs/compiler-rt/lib/builtins/x86_64/chkstk.S b/libs/compiler-rt/lib/builtins/x86_64/chkstk.S new file mode 100644 index 00000000000..4149ac63d9d --- /dev/null +++ b/libs/compiler-rt/lib/builtins/x86_64/chkstk.S @@ -0,0 +1,39 @@ +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. + +#include "../assembly.h" + +// _chkstk routine +// This routine is windows specific +// http://msdn.microsoft.com/en-us/library/ms648426.aspx + +// Notes from r227519 +// MSVC x64s __chkstk and cygmings ___chkstk_ms do not adjust %rsp +// themselves. It also does not clobber %rax so we can reuse it when +// adjusting %rsp. + +#ifdef __x86_64__ + +.text +.balign 4 +DEFINE_COMPILERRT_FUNCTION(___chkstk_ms) + push %rcx + push %rax + cmp $0x1000,%rax + lea 24(%rsp),%rcx + jb 1f +2: + sub $0x1000,%rcx + test %rcx,(%rcx) + sub $0x1000,%rax + cmp $0x1000,%rax + ja 2b +1: + sub %rax,%rcx + test %rcx,(%rcx) + pop %rax + pop %rcx + ret +END_COMPILERRT_FUNCTION(___chkstk_ms) + +#endif // __x86_64__ diff --git a/libs/compiler-rt/lib/builtins/x86_64/chkstk2.S b/libs/compiler-rt/lib/builtins/x86_64/chkstk2.S new file mode 100644 index 00000000000..ac1eb920e0e --- /dev/null +++ b/libs/compiler-rt/lib/builtins/x86_64/chkstk2.S @@ -0,0 +1,42 @@ +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. + +#include "../assembly.h" + +#ifdef __x86_64__ + +// _chkstk (_alloca) routine - probe stack between %rsp and (%rsp-%rax) in 4k increments, +// then decrement %rsp by %rax. Preserves all registers except %rsp and flags. +// This routine is windows specific +// http://msdn.microsoft.com/en-us/library/ms648426.aspx + +.text +.balign 4 +DEFINE_COMPILERRT_FUNCTION(__alloca) + mov %rcx,%rax // x64 _alloca is a normal function with parameter in rcx + // fallthrough +DEFINE_COMPILERRT_FUNCTION(___chkstk) + push %rcx + cmp $0x1000,%rax + lea 16(%rsp),%rcx // rsp before calling this routine -> rcx + jb 1f +2: + sub $0x1000,%rcx + test %rcx,(%rcx) + sub $0x1000,%rax + cmp $0x1000,%rax + ja 2b +1: + sub %rax,%rcx + test %rcx,(%rcx) + + lea 8(%rsp),%rax // load pointer to the return address into rax + mov %rcx,%rsp // install the new top of stack pointer into rsp + mov -8(%rax),%rcx // restore rcx + push (%rax) // push return address onto the stack + sub %rsp,%rax // restore the original value in rax + ret +END_COMPILERRT_FUNCTION(___chkstk) +END_COMPILERRT_FUNCTION(__alloca) + +#endif // __x86_64__ -- GitLab https://gitlab.winehq.org/wine/wine/-/merge_requests/10119
From: Jacek Caban <jacek@codeweavers.com> --- configure.ac | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/configure.ac b/configure.ac index 3137bc9a806..9874be868f0 100644 --- a/configure.ac +++ b/configure.ac @@ -540,9 +540,7 @@ This is an error since --enable-archs=$wine_arch was requested.])]) AS_VAR_IF([wine_arch],[$extra_arch],[],[AS_VAR_APPEND([PE_ARCHS],[" $wine_arch"])]) - case "$target" in - *-windows) wine_compiler_rt_libs="compiler-rt" ;; - esac + wine_compiler_rt_libs="compiler-rt" WINE_TRY_PE_CFLAGS([-fno-strict-aliasing]) dnl clang needs to be told to fail on unknown options -- GitLab https://gitlab.winehq.org/wine/wine/-/merge_requests/10119
From: Jacek Caban <jacek@codeweavers.com> --- tools/winegcc/winegcc.c | 60 +---------------------------------------- 1 file changed, 1 insertion(+), 59 deletions(-) diff --git a/tools/winegcc/winegcc.c b/tools/winegcc/winegcc.c index 3ce6ed1ee4f..58ae9fbe7fb 100644 --- a/tools/winegcc/winegcc.c +++ b/tools/winegcc/winegcc.c @@ -1024,51 +1024,6 @@ static char *find_static_lib( const char *dll ) return NULL; } -static const char *find_libgcc(void) -{ - const char *out = make_temp_file( "find_libgcc", ".out" ); - const char *err = make_temp_file( "find_libgcc", ".err" ); - struct strarray link = get_translator(); - int sout = -1, serr = -1; - char *libgcc, *p; - struct stat st; - size_t cnt; - int ret; - - STRARRAY_FOR_EACH( arg, &linker_args ) - if (strcmp(arg, "--no-default-config" )) strarray_add( &link, arg ); - - strarray_add( &link, "-print-libgcc-file-name" ); - - sout = dup( fileno(stdout) ); - freopen( out, "w", stdout ); - serr = dup( fileno(stderr) ); - freopen( err, "w", stderr ); - ret = spawn( link, 1 ); - if (sout >= 0) - { - dup2( sout, fileno(stdout) ); - close( sout ); - } - if (serr >= 0) - { - dup2( serr, fileno(stderr) ); - close( serr ); - } - - if (ret || stat(out, &st) || !st.st_size) return NULL; - - libgcc = xmalloc(st.st_size + 1); - sout = open(out, O_RDONLY); - if (sout == -1) return NULL; - cnt = read(sout, libgcc, st.st_size); - close(sout); - libgcc[cnt] = 0; - if ((p = strchr(libgcc, '\n'))) *p = 0; - return libgcc; -} - - /* add specified library to the list of files */ static void add_library( struct strarray lib_dirs, struct strarray *files, const char *library ) { @@ -1329,8 +1284,7 @@ static void build(struct strarray input_files, const char *output) add_library(lib_dirs, &files, "advapi32"); add_library(lib_dirs, &files, "user32"); add_library(lib_dirs, &files, "winecrt0"); - if (target.platform == PLATFORM_WINDOWS) - add_library(lib_dirs, &files, "compiler-rt"); + if (is_pe) add_library(lib_dirs, &files, "compiler-rt"); if (use_msvcrt) { if (!crt_lib) @@ -1381,18 +1335,6 @@ static void build(struct strarray input_files, const char *output) /* link everything together now */ link_args = get_link_args( output_name ); - - switch (target.platform) - { - case PLATFORM_MINGW: - case PLATFORM_CYGWIN: - libgcc = find_libgcc(); - if (!libgcc) libgcc = "-lgcc"; - break; - default: - break; - } - strarray_add(&link_args, "-o"); strarray_add(&link_args, output_file_name); -- GitLab https://gitlab.winehq.org/wine/wine/-/merge_requests/10119
participants (2)
-
Jacek Caban -
Jacek Caban (@jacek)