As the preloader is linked with -nodefaultlibs, we need to provide builtin helpers for division on arm (that normally are provided by libgcc or compiler-rt). The bundled version is taken from the LLVM 8.0.0 release, which still was dual licensed under the MIT and University of Illinois licenses. (In the 9.0.0 release, all of LLVM was relicensed to a different license.)
Since 28fe84da45bea7de56539b50eac8ebcec54342de, the main exe image must be mappable at its desired base address, which essentially requires the preloader.
This fixes the original issue that lead to https://bugs.winehq.org/show_bug.cgi?id=51539.
Signed-off-by: Martin Storsjö martin@martin.st --- v2: Make sure the right -Ttext-segment= option is set when building the main 'wine' executable for arm. Without it, the preloader would load that executable overlapping the reserved ranges, unless the preloader was linked as PIE. --- configure.ac | 4 +- dlls/ntdll/unix/loader.c | 6 +- loader/Makefile.in | 4 +- loader/aeabi_uidivmod.S | 59 +++++++++++ loader/assembly.h | 204 +++++++++++++++++++++++++++++++++++++++ loader/preloader.c | 103 +++++++++++++++++++- loader/udivmodsi4.S | 177 +++++++++++++++++++++++++++++++++ 7 files changed, 548 insertions(+), 9 deletions(-) create mode 100644 loader/aeabi_uidivmod.S create mode 100644 loader/assembly.h create mode 100644 loader/udivmodsi4.S
diff --git a/configure.ac b/configure.ac index b5d3217f2a0..c018c89f748 100644 --- a/configure.ac +++ b/configure.ac @@ -940,7 +940,7 @@ case $host_os in WINEPRELOADER_LDFLAGS="-static -nostartfiles -nodefaultlibs -Wl,-Ttext=0x7d400000"
case $host_cpu in - *i[[3456789]]86* | x86_64 | *aarch64*) + *i[[3456789]]86* | x86_64 | *aarch64* | arm*) WINE_TRY_CFLAGS([-Wl,-Ttext-segment=0x7bc00000], [case $host_os in freebsd* | kfreebsd*-gnu) WINELOADER_LDFLAGS="$WINELOADER_LDFLAGS -Wl,-Ttext-segment=0x60000000" ;; @@ -2161,7 +2161,7 @@ esac case $host_os in linux*) case $host_cpu in - *i[[3456789]]86*|x86_64*|*aarch64*) + *i[[3456789]]86*|x86_64*|*aarch64*|arm*) test "$wine_binary" = wine || WINE_IGNORE_FILE("loader/wine-preloader") WINELOADER_PROGRAMS="$WINELOADER_PROGRAMS $wine_binary-preloader" ;; diff --git a/dlls/ntdll/unix/loader.c b/dlls/ntdll/unix/loader.c index 0e580cd7556..14f26cbd520 100644 --- a/dlls/ntdll/unix/loader.c +++ b/dlls/ntdll/unix/loader.c @@ -2155,11 +2155,7 @@ static int pre_exec(void) int temp;
check_vmsplit( &temp ); -#ifdef __i386__ - return 1; /* we have a preloader on x86 */ -#else - return 0; -#endif + return 1; /* we have a preloader on x86/arm */ }
#elif defined(__linux__) && (defined(__x86_64__) || defined(__aarch64__)) diff --git a/loader/Makefile.in b/loader/Makefile.in index 7302c231915..534bf829f3f 100644 --- a/loader/Makefile.in +++ b/loader/Makefile.in @@ -1,7 +1,9 @@ SOURCES = \ + aeabi_uidivmod.S \ main.c \ preloader.c \ preloader_mac.c \ + udivmodsi4.S \ wine.de.UTF-8.man.in \ wine.desktop \ wine.fr.UTF-8.man.in \ @@ -25,7 +27,7 @@ wine64_OBJS = main.o wine64_DEPS = $(WINELOADER_DEPENDS) wine64_LDFLAGS = $(WINELOADER_LDFLAGS) $(LDEXECFLAGS) $(PTHREAD_LIBS)
-wine_preloader_OBJS = preloader.o preloader_mac.o +wine_preloader_OBJS = preloader.o preloader_mac.o aeabi_uidivmod.o udivmodsi4.o wine_preloader_DEPS = $(WINELOADER_DEPENDS) wine_preloader_LDFLAGS = $(WINEPRELOADER_LDFLAGS)
diff --git a/loader/aeabi_uidivmod.S b/loader/aeabi_uidivmod.S new file mode 100644 index 00000000000..9cef2d17e6c --- /dev/null +++ b/loader/aeabi_uidivmod.S @@ -0,0 +1,59 @@ +//===-- aeabi_uidivmod.S - EABI uidivmod implementation -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifdef __arm__ +#include "assembly.h" + +// struct { unsigned quot, unsigned rem} +// __aeabi_uidivmod(unsigned numerator, unsigned denominator) { +// unsigned rem, quot; +// quot = __udivmodsi4(numerator, denominator, &rem); +// return {quot, rem}; +// } + +#if defined(__MINGW32__) +#define __aeabi_uidivmod __rt_udiv +#endif + + .syntax unified + .text + DEFINE_CODE_STATE + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__aeabi_uidivmod) +#if defined(USE_THUMB_1) + cmp r0, r1 + bcc LOCAL_LABEL(case_denom_larger) + push {r0, r1, lr} + bl SYMBOL_NAME(__aeabi_uidiv) + pop {r1, r2, r3} + muls r2, r0, r2 // r2 = quot * denom + subs r1, r1, r2 + JMP (r3) +LOCAL_LABEL(case_denom_larger): + movs r1, r0 + movs r0, #0 + JMP (lr) +#else // defined(USE_THUMB_1) + push { lr } + sub sp, sp, #4 + mov r2, sp +#if defined(__MINGW32__) + mov r3, r0 + mov r0, r1 + mov r1, r3 +#endif + bl SYMBOL_NAME(__udivmodsi4) + ldr r1, [sp] + add sp, sp, #4 + pop { pc } +#endif +END_COMPILERRT_FUNCTION(__aeabi_uidivmod) + +NO_EXEC_STACK_DIRECTIVE +#endif diff --git a/loader/assembly.h b/loader/assembly.h new file mode 100644 index 00000000000..3f5e59b2544 --- /dev/null +++ b/loader/assembly.h @@ -0,0 +1,204 @@ +/* ===-- assembly.h - compiler-rt assembler support macros -----------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file defines macros for use in compiler-rt assembler source. + * This file is not part of the interface of this library. + * + * ===----------------------------------------------------------------------=== + */ + +#ifndef COMPILERRT_ASSEMBLY_H +#define COMPILERRT_ASSEMBLY_H + +#if defined(__POWERPC__) || defined(__powerpc__) || defined(__ppc__) +#define SEPARATOR @ +#else +#define SEPARATOR ; +#endif + +#if defined(__APPLE__) +#define HIDDEN(name) .private_extern name +#define LOCAL_LABEL(name) L_##name +// tell linker it can break up file at label boundaries +#define FILE_LEVEL_DIRECTIVE .subsections_via_symbols +#define SYMBOL_IS_FUNC(name) +#define CONST_SECTION .const + +#define NO_EXEC_STACK_DIRECTIVE + +#elif defined(__ELF__) + +#define HIDDEN(name) .hidden name +#define LOCAL_LABEL(name) .L_##name +#define FILE_LEVEL_DIRECTIVE +#if defined(__arm__) +#define SYMBOL_IS_FUNC(name) .type name,%function +#else +#define SYMBOL_IS_FUNC(name) .type name,@function +#endif +#define CONST_SECTION .section .rodata + +#if defined(__GNU__) || defined(__FreeBSD__) || defined(__Fuchsia__) || \ + defined(__linux__) +#define NO_EXEC_STACK_DIRECTIVE .section .note.GNU-stack,"",%progbits +#else +#define NO_EXEC_STACK_DIRECTIVE +#endif + +#else // !__APPLE__ && !__ELF__ + +#define HIDDEN(name) +#define LOCAL_LABEL(name) .L ## name +#define FILE_LEVEL_DIRECTIVE +#define SYMBOL_IS_FUNC(name) \ + .def name SEPARATOR \ + .scl 2 SEPARATOR \ + .type 32 SEPARATOR \ + .endef +#define CONST_SECTION .section .rdata,"rd" + +#define NO_EXEC_STACK_DIRECTIVE + +#endif + +#if defined(__arm__) + +/* + * Determine actual [ARM][THUMB[1][2]] ISA using compiler predefined macros: + * - for '-mthumb -march=armv6' compiler defines '__thumb__' + * - for '-mthumb -march=armv7' compiler defines '__thumb__' and '__thumb2__' + */ +#if defined(__thumb2__) || defined(__thumb__) +#define DEFINE_CODE_STATE .thumb SEPARATOR +#define DECLARE_FUNC_ENCODING .thumb_func SEPARATOR +#if defined(__thumb2__) +#define USE_THUMB_2 +#define IT(cond) it cond +#define ITT(cond) itt cond +#define ITE(cond) ite cond +#else +#define USE_THUMB_1 +#define IT(cond) +#define ITT(cond) +#define ITE(cond) +#endif // defined(__thumb__2) +#else // !defined(__thumb2__) && !defined(__thumb__) +#define DEFINE_CODE_STATE .arm SEPARATOR +#define DECLARE_FUNC_ENCODING +#define IT(cond) +#define ITT(cond) +#define ITE(cond) +#endif + +#if defined(USE_THUMB_1) && defined(USE_THUMB_2) +#error "USE_THUMB_1 and USE_THUMB_2 can't be defined together." +#endif + +#if defined(__ARM_ARCH_4T__) || __ARM_ARCH >= 5 +#define ARM_HAS_BX +#endif +#if !defined(__ARM_FEATURE_CLZ) && !defined(USE_THUMB_1) && \ + (__ARM_ARCH >= 6 || (__ARM_ARCH == 5 && !defined(__ARM_ARCH_5__))) +#define __ARM_FEATURE_CLZ +#endif + +#ifdef ARM_HAS_BX +#define JMP(r) bx r +#define JMPc(r, c) bx##c r +#else +#define JMP(r) mov pc, r +#define JMPc(r, c) mov##c pc, r +#endif + +// pop {pc} can't switch Thumb mode on ARMv4T +#if __ARM_ARCH >= 5 +#define POP_PC() pop {pc} +#else +#define POP_PC() \ + pop {ip}; \ + JMP(ip) +#endif + +#if defined(USE_THUMB_2) +#define WIDE(op) op.w +#else +#define WIDE(op) op +#endif +#else // !defined(__arm) +#define DECLARE_FUNC_ENCODING +#define DEFINE_CODE_STATE +#endif + +#define GLUE2(a, b) a##b +#define GLUE(a, b) GLUE2(a, b) +#define SYMBOL_NAME(name) GLUE(__USER_LABEL_PREFIX__, name) + +#ifdef VISIBILITY_HIDDEN +#define DECLARE_SYMBOL_VISIBILITY(name) \ + HIDDEN(SYMBOL_NAME(name)) SEPARATOR +#else +#define DECLARE_SYMBOL_VISIBILITY(name) +#endif + +#define DEFINE_COMPILERRT_FUNCTION(name) \ + DEFINE_CODE_STATE \ + FILE_LEVEL_DIRECTIVE SEPARATOR \ + .globl SYMBOL_NAME(name) SEPARATOR \ + SYMBOL_IS_FUNC(SYMBOL_NAME(name)) SEPARATOR \ + DECLARE_SYMBOL_VISIBILITY(name) \ + DECLARE_FUNC_ENCODING \ + SYMBOL_NAME(name): + +#define DEFINE_COMPILERRT_THUMB_FUNCTION(name) \ + DEFINE_CODE_STATE \ + FILE_LEVEL_DIRECTIVE SEPARATOR \ + .globl SYMBOL_NAME(name) SEPARATOR \ + SYMBOL_IS_FUNC(SYMBOL_NAME(name)) SEPARATOR \ + DECLARE_SYMBOL_VISIBILITY(name) SEPARATOR \ + .thumb_func SEPARATOR \ + SYMBOL_NAME(name): + +#define DEFINE_COMPILERRT_PRIVATE_FUNCTION(name) \ + DEFINE_CODE_STATE \ + FILE_LEVEL_DIRECTIVE SEPARATOR \ + .globl SYMBOL_NAME(name) SEPARATOR \ + SYMBOL_IS_FUNC(SYMBOL_NAME(name)) SEPARATOR \ + HIDDEN(SYMBOL_NAME(name)) SEPARATOR \ + DECLARE_FUNC_ENCODING \ + SYMBOL_NAME(name): + +#define DEFINE_COMPILERRT_PRIVATE_FUNCTION_UNMANGLED(name) \ + DEFINE_CODE_STATE \ + .globl name SEPARATOR \ + SYMBOL_IS_FUNC(name) SEPARATOR \ + HIDDEN(name) SEPARATOR \ + DECLARE_FUNC_ENCODING \ + name: + +#define DEFINE_COMPILERRT_FUNCTION_ALIAS(name, target) \ + .globl SYMBOL_NAME(name) SEPARATOR \ + SYMBOL_IS_FUNC(SYMBOL_NAME(name)) SEPARATOR \ + DECLARE_SYMBOL_VISIBILITY(SYMBOL_NAME(name)) SEPARATOR \ + .set SYMBOL_NAME(name), SYMBOL_NAME(target) SEPARATOR + +#if defined(__ARM_EABI__) +#define DEFINE_AEABI_FUNCTION_ALIAS(aeabi_name, name) \ + DEFINE_COMPILERRT_FUNCTION_ALIAS(aeabi_name, name) +#else +#define DEFINE_AEABI_FUNCTION_ALIAS(aeabi_name, name) +#endif + +#ifdef __ELF__ +#define END_COMPILERRT_FUNCTION(name) \ + .size SYMBOL_NAME(name), . - SYMBOL_NAME(name) +#else +#define END_COMPILERRT_FUNCTION(name) +#endif + +#endif /* COMPILERRT_ASSEMBLY_H */ diff --git a/loader/preloader.c b/loader/preloader.c index bb16ecd96a4..4b53791e782 100644 --- a/loader/preloader.c +++ b/loader/preloader.c @@ -111,7 +111,7 @@
static struct wine_preload_info preload_info[] = { -#ifdef __i386__ +#if defined(__i386__) || defined(__arm__) { (void *)0x00000000, 0x00010000 }, /* low 64k */ { (void *)0x00010000, 0x00100000 }, /* DOS area */ { (void *)0x00110000, 0x67ef0000 }, /* low memory area */ @@ -536,6 +536,104 @@ SYSCALL_NOERR( wld_geteuid, 175 /* SYS_geteuid */ ); gid_t wld_getegid(void); SYSCALL_NOERR( wld_getegid, 177 /* SYS_getegid */ );
+#elif defined(__arm__) + +void *thread_data[256]; + +/* + * The _start function is the entry and exit point of this program + * + * It calls wld_start, passing a pointer to the args it receives + * then jumps to the address wld_start returns. + */ +void _start(void); +extern char _end[]; +__ASM_GLOBAL_FUNC(_start, + "mov r0, sp\n\t" + "sub sp, sp, #144\n\t" /* allocate some space for extra aux values */ + "str r0, [sp]\n\t" /* orig stack pointer */ + "ldr r0, =thread_data\n\t" + "movw r7, #0x0005\n\t" /* __ARM_NR_set_tls */ + "movt r7, #0xf\n\t" /* __ARM_NR_set_tls */ + "svc #0\n\t" + "mov r0, sp\n\t" /* ptr to orig stack pointer */ + "bl wld_start\n\t" + "ldr r1, [sp]\n\t" /* new stack pointer */ + "mov sp, r1\n\t" + "mov lr, r0\n\t" + "mov r0, #0\n\t" + "mov r1, #0\n\t" + "mov r2, #0\n\t" + "mov r3, #0\n\t" + "mov r12, #0\n\t" + "bx lr\n\t" + ".ltorg\n\t") + +#define SYSCALL_FUNC( name, nr ) \ + __ASM_GLOBAL_FUNC( name, \ + "push {r4-r5,r7,lr}\n\t" \ + "ldr r4, [sp, #16]\n\t" \ + "ldr r5, [sp, #20]\n\t" \ + "mov r7, #" #nr "\n\t" \ + "svc #0\n\t" \ + "cmn r0, #4096\n\t" \ + "it hi\n\t" \ + "movhi r0, #-1\n\t" \ + "pop {r4-r5,r7,pc}\n\t" ) + +#define SYSCALL_NOERR( name, nr ) \ + __ASM_GLOBAL_FUNC( name, \ + "push {r7,lr}\n\t" \ + "mov r7, #" #nr "\n\t" \ + "svc #0\n\t" \ + "pop {r7,pc}\n\t" ) + +void wld_exit( int code ) __attribute__((noreturn)); +SYSCALL_NOERR( wld_exit, 1 /* SYS_exit */ ); + +ssize_t wld_read( int fd, void *buffer, size_t len ); +SYSCALL_FUNC( wld_read, 3 /* SYS_read */ ); + +ssize_t wld_write( int fd, const void *buffer, size_t len ); +SYSCALL_FUNC( wld_write, 4 /* SYS_write */ ); + +int wld_openat( int dirfd, const char *name, int flags ); +SYSCALL_FUNC( wld_openat, 322 /* SYS_openat */ ); + +int wld_open( const char *name, int flags ) +{ + return wld_openat(-100 /* AT_FDCWD */, name, flags); +} + +int wld_close( int fd ); +SYSCALL_FUNC( wld_close, 6 /* SYS_close */ ); + +void *wld_mmap2( void *start, size_t len, int prot, int flags, int fd, int offset ); +SYSCALL_FUNC( wld_mmap2, 192 /* SYS_mmap2 */ ); + +void *wld_mmap( void *start, size_t len, int prot, int flags, int fd, off_t offset ) +{ + return wld_mmap2(start, len, prot, flags, fd, offset >> 12); +} + +int wld_mprotect( const void *addr, size_t len, int prot ); +SYSCALL_FUNC( wld_mprotect, 125 /* SYS_mprotect */ ); + +int wld_prctl( int code, long arg ); +SYSCALL_FUNC( wld_prctl, 172 /* SYS_prctl */ ); + +uid_t wld_getuid(void); +SYSCALL_NOERR( wld_getuid, 24 /* SYS_getuid */ ); + +gid_t wld_getgid(void); +SYSCALL_NOERR( wld_getgid, 47 /* SYS_getgid */ ); + +uid_t wld_geteuid(void); +SYSCALL_NOERR( wld_geteuid, 49 /* SYS_geteuid */ ); + +gid_t wld_getegid(void); +SYSCALL_NOERR( wld_getegid, 50 /* SYS_getegid */ ); + #else #error preloader not implemented for this CPU #endif @@ -809,6 +907,9 @@ static void map_so_lib( const char *name, struct wld_link_map *l) #elif defined(__aarch64__) if( header->e_machine != EM_AARCH64 ) fatal_error("%s: not an aarch64 ELF binary... don't know how to load it\n", name ); +#elif defined(__arm__) + if( header->e_machine != EM_ARM ) + fatal_error("%s: not an arm ELF binary... don't know how to load it\n", name ); #endif
if (header->e_phnum > sizeof(loadcmds)/sizeof(loadcmds[0])) diff --git a/loader/udivmodsi4.S b/loader/udivmodsi4.S new file mode 100644 index 00000000000..2b89a52e3cb --- /dev/null +++ b/loader/udivmodsi4.S @@ -0,0 +1,177 @@ +/*===-- udivmodsi4.S - 32-bit unsigned integer divide and modulus ---------===// + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===// + * + * This file implements the __udivmodsi4 (32-bit unsigned integer divide and + * modulus) function for the ARM 32-bit architecture. + * + *===----------------------------------------------------------------------===*/ + +#ifdef __arm__ +#include "assembly.h" + + .syntax unified + .text + DEFINE_CODE_STATE + +@ unsigned int __udivmodsi4(unsigned int divident, unsigned int divisor, +@ unsigned int *remainder) +@ Calculate the quotient and remainder of the (unsigned) division. The return +@ value is the quotient, the remainder is placed in the variable. + + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__udivmodsi4) +#if __ARM_ARCH_EXT_IDIV__ + tst r1, r1 + beq LOCAL_LABEL(divby0) + mov r3, r0 + udiv r0, r3, r1 + mls r1, r0, r1, r3 + str r1, [r2] + bx lr +#else + cmp r1, #1 + bcc LOCAL_LABEL(divby0) + beq LOCAL_LABEL(divby1) + cmp r0, r1 + bcc LOCAL_LABEL(quotient0) + /* + * Implement division using binary long division algorithm. + * + * r0 is the numerator, r1 the denominator. + * + * The code before JMP computes the correct shift I, so that + * r0 and (r1 << I) have the highest bit set in the same position. + * At the time of JMP, ip := .Ldiv0block - 12 * I. + * This depends on the fixed instruction size of block. + * For ARM mode, this is 12 Bytes, for THUMB mode 14 Bytes. + * + * block(shift) implements the test-and-update-quotient core. + * It assumes (r0 << shift) can be computed without overflow and + * that (r0 << shift) < 2 * r1. The quotient is stored in r3. + */ + +# ifdef __ARM_FEATURE_CLZ + clz ip, r0 + clz r3, r1 + /* r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3. */ + sub r3, r3, ip +# if defined(USE_THUMB_2) + adr ip, LOCAL_LABEL(div0block) + 1 + sub ip, ip, r3, lsl #1 +# else + adr ip, LOCAL_LABEL(div0block) +# endif + sub ip, ip, r3, lsl #2 + sub ip, ip, r3, lsl #3 + mov r3, #0 + bx ip +# else +# if defined(USE_THUMB_2) +# error THUMB mode requires CLZ or UDIV +# endif + str r4, [sp, #-8]! + + mov r4, r0 + adr ip, LOCAL_LABEL(div0block) + + lsr r3, r4, #16 + cmp r3, r1 + movhs r4, r3 + subhs ip, ip, #(16 * 12) + + lsr r3, r4, #8 + cmp r3, r1 + movhs r4, r3 + subhs ip, ip, #(8 * 12) + + lsr r3, r4, #4 + cmp r3, r1 + movhs r4, r3 + subhs ip, #(4 * 12) + + lsr r3, r4, #2 + cmp r3, r1 + movhs r4, r3 + subhs ip, ip, #(2 * 12) + + /* Last block, no need to update r3 or r4. */ + cmp r1, r4, lsr #1 + subls ip, ip, #(1 * 12) + + ldr r4, [sp], #8 /* restore r4, we are done with it. */ + mov r3, #0 + + JMP(ip) +# endif + +#define IMM # + +#define block(shift) \ + cmp r0, r1, lsl IMM shift; \ + ITT(hs); \ + WIDE(addhs) r3, r3, IMM (1 << shift); \ + WIDE(subhs) r0, r0, r1, lsl IMM shift + + block(31) + block(30) + block(29) + block(28) + block(27) + block(26) + block(25) + block(24) + block(23) + block(22) + block(21) + block(20) + block(19) + block(18) + block(17) + block(16) + block(15) + block(14) + block(13) + block(12) + block(11) + block(10) + block(9) + block(8) + block(7) + block(6) + block(5) + block(4) + block(3) + block(2) + block(1) +LOCAL_LABEL(div0block): + block(0) + + str r0, [r2] + mov r0, r3 + JMP(lr) + +LOCAL_LABEL(quotient0): + str r0, [r2] + mov r0, #0 + JMP(lr) + +LOCAL_LABEL(divby1): + mov r3, #0 + str r3, [r2] + JMP(lr) +#endif /* __ARM_ARCH_EXT_IDIV__ */ + +LOCAL_LABEL(divby0): + mov r0, #0 + JMP(lr) + +END_COMPILERRT_FUNCTION(__udivmodsi4) + +NO_EXEC_STACK_DIRECTIVE +#endif