From: Jinoh Kang jinoh.kang.kr@gmail.com
Today, the preloader removes the vDSO entries (AT_SYSINFO*) from the auxiliary vector when it conflicts with one of the predefined reserved ranges.
vDSO is a shared object provided by the kernel. Among other things, it provides a mechanism to issue certain system calls without the overhead of switching to the kernel mode.
Without vDSO, libc still works; however, it is expected that some system call functions (e.g. gettimeofday, clock_gettime) will show degraded performance.
Fix this by relocating vDSO to another address (if supported by the kernel) instead of erasing it from auxv entirely.
Since this is a potentially risky change, this behaviour is hidden behind the "WINEPRELOADREMAPVDSO" environment variable. To activate the behaviour, the user needs to set "WINEPRELOADREMAPVDSO=on-conflict". After sufficient testing has been done via staging process, the new behaviour could be the default and the environment variables removed.
Wine-Bug: https://bugs.winehq.org/show_bug.cgi?id=52313 Signed-off-by: Jinoh Kang jinoh.kang.kr@gmail.com --- loader/preloader.c | 580 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 577 insertions(+), 3 deletions(-)
diff --git a/loader/preloader.c b/loader/preloader.c index 7d17136d3bc..52036dee554 100644 --- a/loader/preloader.c +++ b/loader/preloader.c @@ -72,6 +72,7 @@ #include <sys/types.h> #include <sys/stat.h> #include <fcntl.h> +#include <signal.h> #include <sys/mman.h> #ifdef HAVE_SYS_SYSCALL_H # include <sys/syscall.h> @@ -86,6 +87,9 @@ #ifdef HAVE_SYS_LINK_H # include <sys/link.h> #endif +#ifdef HAVE_SYS_UCONTEXT_H +# include <sys/ucontext.h> +#endif
#include "wine/asm.h" #include "main.h" @@ -102,6 +106,11 @@ #ifndef MAP_NORESERVE #define MAP_NORESERVE 0 #endif +#ifndef MREMAP_FIXED +#define MREMAP_FIXED 2 +#endif + +#define REMAP_TEST_SIG SIGIO /* Any signal GDB doesn't stop on */
static struct wine_preload_info preload_info[] = { @@ -165,6 +174,18 @@ struct wld_auxv } a_un; };
+typedef unsigned long wld_sigset_t[8 / sizeof(unsigned long)]; + +struct wld_sigaction { + /* Prefix all fields since they may collide with macros from libc headers */ + void (*wld_sa_sigaction)(int, siginfo_t *, void *); + unsigned long wld_sa_flags; + void (*wld_sa_restorer)(void); + wld_sigset_t wld_sa_mask; +}; + +#define WLD_SA_SIGINFO 4 + /* Aggregates information about initial program stack and variables * (e.g. argv and envp) that reside in it. */ @@ -193,10 +214,55 @@ struct linebuffer int truncated; };
+/* + * Flags that specify the kind of each VMA entry read from /proc/self/maps. + * + * On Linux, Reading /proc/self/maps is the only reliable way to identify the + * exact range of vDSO/vvar mapping. The reason is twofold: + * + * 1. vDSO usually hard-codes vvar's offset relative to vDSO. Therefore, + * remapping vDSO requires vvar to be also remapped as well. However, vvar's + * size and its location relative to vDSO is *not* guaranteed by ABI, and has + * changed all the time. + * + * - x86: [vvar] orginally resided at a fixed address 0xffffffffff5ff000 + * (64-bit) [1], but was later changed so that it precedes [vdso] [2]. + * There, sym_vvar_start is a negative value [3]. text_start is the base + * address of vDSO, and addr becomes the address of vvar. + * + * - AArch32: [vvar] is a single page and precedes [vdso] [4]. + * + * - AArch64: [vvar] is two pages long and precedes [vdso] [5]. + * Before v5.9, [vvar] was a single page [6]. + * + * 2. It's very difficult to deduce vDSO and vvar's size and offset relative to + * each other. Since vvar's symbol does not exist in vDSO's symtab, + * determining the layout would require parsing vDSO's code. + * + * Also note that CRIU (Checkpoint Restore In Userspace) has maps parsing code + * just for relocating vDSO [7]. + * + * [1] https://lwn.net/Articles/615809/ + * [2] https://elixir.bootlin.com/linux/v5.16.3/source/arch/x86/entry/vdso/vma.c#L2... + * [3] https://elixir.bootlin.com/linux/v5.16.3/source/arch/x86/include/asm/vdso.h#... + * [4] https://elixir.bootlin.com/linux/v5.16.3/source/arch/arm/kernel/vdso.c#L236 + * [5] https://elixir.bootlin.com/linux/v5.16.3/source/arch/arm64/kernel/vdso.c#L21... + * [6] https://elixir.bootlin.com/linux/v5.8/source/arch/arm64/kernel/vdso.c#L161 + * [7] https://github.com/checkpoint-restore/criu/blob/a315774e11b4da1eb36446ae996e... + */ +enum vma_type_flags +{ + VMA_NORMAL = 0x01, + VMA_VDSO = 0x02, + VMA_VVAR = 0x04, +}; + struct vma_area { unsigned long start; unsigned long end; + unsigned char type_flags; /* enum vma_type_flags */ + unsigned char moved; /* has been mremap()'d? */ };
struct vma_area_list @@ -209,6 +275,57 @@ struct vma_area_list #define FOREACH_VMA(list, item) \ for ((item) = (list)->base; (item) != (list)->list_end; (item)++)
+/* + * Allow the user to configure the remapping behaviour if it causes trouble. + * The "force" (REMAP_POLICY_FORCE) value can be used to test the remapping + * code path unconditionally. + */ +enum remap_policy +{ + REMAP_POLICY_ON_CONFLICT = 0, + REMAP_POLICY_FORCE = 1, + REMAP_POLICY_SKIP = 2, + LAST_REMAP_POLICY, + + REMAP_POLICY_DEFAULT_VDSO = REMAP_POLICY_SKIP, +}; + +/* + * Used in a signal handler that tests if mremap() on vDSO works on the current + * kernel. + */ +struct remap_test_block { + /* The old address range of vDSO or sigpage. Used to test if pages are remapped properly. */ + unsigned long old_mapping_start; + unsigned long old_mapping_size; + + struct vma_area_list *vma_list; + + /* + * Difference between the base address of the new mapping and the old mapping. + * + * Set to zero if the handler reverted mappings to old state before returning + * in order to safely return when it detects failed remapping. + */ + unsigned long delta; + + /* + * Whether remapping was successfully recognised by the kernel. + * + * If the signal handler is never called (due to e.g. being blocked), it is counted + * as being unsuccessful. + */ + unsigned char is_successful; + + /* + * Whether remapping could not be recognised by the kernel. + * + * If both is_successful and is_failed are set, is_failed takes precedence. + * The flags are intentionally made redundant to detect multiple successive + * invocation of the signal handler due to external signal delivery. */ + unsigned char is_failed; +} remap_test; + /* * The __bb_init_func is an empty function only called when file is * compiled with gcc flags "-fprofile-arcs -ftest-coverage". This @@ -244,6 +361,15 @@ struct unsigned int garbage : 25; } thread_ldt = { -1, (unsigned long)thread_data, 0xfffff, 1, 0, 0, 1, 0, 1, 0 };
+typedef unsigned long wld_old_sigset_t; + +struct wld_old_sigaction { + /* Prefix all fields since they may collide with macros from libc headers */ + void (*wld_sa_sigaction)(int, siginfo_t *, void *); + wld_old_sigset_t wld_sa_mask; + unsigned long wld_sa_flags; + void (*wld_sa_restorer)(void); +};
/* * The _start function is the entry and exit point of this program @@ -381,6 +507,16 @@ static inline int wld_munmap( void *addr, size_t len ) return SYSCALL_RET(ret); }
+static inline void *wld_mremap( void *old_addr, size_t old_len, size_t new_size, int flags, void *new_addr ) +{ + int ret; + __asm__ __volatile__( "pushl %%ebx; movl %2,%%ebx; int $0x80; popl %%ebx" + : "=a" (ret) : "0" (163 /* SYS_mremap */), "r" (old_addr), "c" (old_len), + "d" (new_size), "S" (flags), "D" (new_addr) + : "memory" ); + return (void *)SYSCALL_RET(ret); +} + static inline int wld_prctl( int code, long arg ) { int ret; @@ -389,6 +525,64 @@ static inline int wld_prctl( int code, long arg ) return SYSCALL_RET(ret); }
+static void copy_old_sigset( void *dest, const void *src ) +{ + /* Avoid aliasing */ + size_t i; + for (i = 0; i < sizeof(wld_old_sigset_t); i++) + *((unsigned char *)dest + i) = *((const unsigned char *)src + i); +} + +static inline int wld_sigaction( int signum, const struct wld_sigaction *act, struct wld_sigaction *old_act ) +{ + int ret; + __asm__ __volatile__( "pushl %%ebx; movl %2,%%ebx; int $0x80; popl %%ebx" + : "=a" (ret) : "0" (174 /* SYS_rt_sigaction */), "r" (signum), "c" (act), "d" (old_act), "S" (sizeof(act->wld_sa_mask)) + : "memory" ); + if (ret == -38 /* ENOSYS */) { + struct wld_old_sigaction act_buf, old_act_buf, *act_real, *old_act_real; + + if (act) { + act_real = &act_buf; + act_buf.wld_sa_sigaction = act->wld_sa_sigaction; + copy_old_sigset(&act_buf.wld_sa_mask, &act->wld_sa_mask); + act_buf.wld_sa_flags = act->wld_sa_flags; + act_buf.wld_sa_restorer = act->wld_sa_restorer; + } + + if (old_act) old_act_real = &old_act_buf; + + __asm__ __volatile__( "pushl %%ebx; movl %2,%%ebx; int $0x80; popl %%ebx" + : "=a" (ret) : "0" (67 /* SYS_sigaction */), "r" (signum), "c" (act_real), "d" (old_act_real) + : "memory" ); + + if (old_act && ret >= 0) { + old_act->wld_sa_sigaction = old_act_buf.wld_sa_sigaction; + old_act->wld_sa_flags = old_act_buf.wld_sa_flags; + old_act->wld_sa_restorer = old_act_buf.wld_sa_restorer; + copy_old_sigset(&old_act->wld_sa_mask, &old_act_buf.wld_sa_mask); + } + } + return SYSCALL_RET(ret); +} + +static inline int wld_kill( pid_t pid, int sig ) +{ + int ret; + __asm__ __volatile__( "pushl %%ebx; movl %2,%%ebx; int $0x80; popl %%ebx" + : "=a" (ret) : "0" (37 /* SYS_kill */), "r" (pid), "c" (sig) + : "memory" /* clobber: signal handler side effects on raise() */ ); + return SYSCALL_RET(ret); +} + +static inline pid_t wld_getpid( void ) +{ + int ret; + __asm__ __volatile__( "int $0x80" + : "=a" (ret) : "0" (20 /* SYS_getpid */) ); + return ret; +} + #elif defined(__x86_64__)
void *thread_data[256]; @@ -467,9 +661,15 @@ SYSCALL_FUNC( wld_mprotect, 10 /* SYS_mprotect */ ); int wld_munmap( void *addr, size_t len ); SYSCALL_FUNC( wld_munmap, 11 /* SYS_munmap */ );
+void *wld_mremap( void *old_addr, size_t old_len, size_t new_size, int flags, void *new_addr ); +SYSCALL_FUNC( wld_mremap, 25 /* SYS_mremap */ ); + int wld_prctl( int code, long arg ); SYSCALL_FUNC( wld_prctl, 157 /* SYS_prctl */ );
+pid_t wld_getpid(void); +SYSCALL_NOERR( wld_getpid, 39 /* SYS_getpid */ ); + uid_t wld_getuid(void); SYSCALL_NOERR( wld_getuid, 102 /* SYS_getuid */ );
@@ -577,9 +777,26 @@ SYSCALL_FUNC( wld_mprotect, 226 /* SYS_mprotect */ ); int wld_munmap( void *addr, size_t len ); SYSCALL_FUNC( wld_munmap, 215 /* SYS_munmap */ );
+void *wld_mremap( void *old_addr, size_t old_len, size_t new_size, int flags, void *new_addr ); +SYSCALL_FUNC( wld_mremap, 216 /* SYS_mremap */ ); + int wld_prctl( int code, long arg ); SYSCALL_FUNC( wld_prctl, 167 /* SYS_prctl */ );
+int wld_rt_sigaction( int signum, const struct wld_sigaction *act, struct wld_sigaction *old_act, size_t sigsetsize ); +SYSCALL_FUNC( wld_rt_sigaction, 134 /* SYS_rt_sigaction */ ); + +static inline int wld_sigaction( int signum, const struct wld_sigaction *act, struct wld_sigaction *old_act ) +{ + return wld_rt_sigaction( signum, act, old_act, sizeof(act->wld_sa_mask) ); +} + +int wld_kill( pid_t pid, int sig ); +SYSCALL_FUNC( wld_kill, 129 /* SYS_kill */ ); + +pid_t wld_getpid(void); +SYSCALL_NOERR( wld_getpid, 172 /* SYS_getpid */ ); + uid_t wld_getuid(void); SYSCALL_NOERR( wld_getuid, 174 /* SYS_getuid */ );
@@ -679,9 +896,26 @@ SYSCALL_FUNC( wld_mprotect, 125 /* SYS_mprotect */ ); int wld_munmap( void *addr, size_t len ); SYSCALL_FUNC( wld_munmap, 91 /* SYS_munmap */ );
+void *wld_mremap( void *old_addr, size_t old_len, size_t new_size, int flags, void *new_addr ); +SYSCALL_FUNC( wld_mremap, 163 /* SYS_mremap */ ); + int wld_prctl( int code, long arg ); SYSCALL_FUNC( wld_prctl, 172 /* SYS_prctl */ );
+int wld_rt_sigaction( int signum, const struct wld_sigaction *act, struct wld_sigaction *old_act, size_t sigsetsize ); +SYSCALL_FUNC( wld_rt_sigaction, 174 /* SYS_rt_sigaction */ ); + +static inline int wld_sigaction( int signum, const struct wld_sigaction *act, struct wld_sigaction *old_act ) +{ + return wld_rt_sigaction( signum, act, old_act, sizeof(act->wld_sa_mask) ); +} + +int wld_kill( pid_t pid, int sig ); +SYSCALL_FUNC( wld_kill, 37 /* SYS_kill */ ); + +pid_t wld_getpid(void); +SYSCALL_NOERR( wld_getpid, 20 /* SYS_getpid */ ); + uid_t wld_getuid(void); SYSCALL_NOERR( wld_getuid, 24 /* SYS_getuid */ );
@@ -1661,6 +1895,7 @@ static char *linebuffer_getline( struct linebuffer *lbuf ) static int parse_maps_line( struct vma_area *entry, const char *line ) { struct vma_area item = { 0 }; + unsigned long dev_maj, dev_min; char *ptr = (char *)line; int overflow;
@@ -1691,11 +1926,11 @@ static int parse_maps_line( struct vma_area *entry, const char *line ) if (*ptr != ' ') fatal_error( "parse error in /proc/self/maps\n" ); ptr++;
- parse_ul( ptr, &ptr, 16, NULL ); + dev_maj = parse_ul( ptr, &ptr, 16, NULL ); if (*ptr != ':') fatal_error( "parse error in /proc/self/maps\n" ); ptr++;
- parse_ul( ptr, &ptr, 16, NULL ); + dev_min = parse_ul( ptr, &ptr, 16, NULL ); if (*ptr != ' ') fatal_error( "parse error in /proc/self/maps\n" ); ptr++;
@@ -1703,6 +1938,17 @@ static int parse_maps_line( struct vma_area *entry, const char *line ) if (*ptr != ' ') fatal_error( "parse error in /proc/self/maps\n" ); ptr++;
+ while (*ptr == ' ') + ptr++; + + if (dev_maj == 0 && dev_min == 0) + { + if (wld_strcmp(ptr, "[vdso]") == 0) + item.type_flags |= VMA_VDSO; + else if (wld_strcmp(ptr, "[vvar]") == 0) + item.type_flags |= VMA_VVAR; + } + *entry = item; return 0; } @@ -1802,6 +2048,76 @@ static void insert_vma_entry( struct vma_area_list *list, const struct vma_area return; }
+/* + * find_vma_envelope_range + * + * Compute the smallest range that contains all VMAs with any of the given + * type flags. + */ +static int find_vma_envelope_range( const struct vma_area_list *list, int type_mask, unsigned long *startp, unsigned long *sizep ) +{ + const struct vma_area *item; + unsigned long start = ULONG_MAX; + unsigned long end = 0; + + FOREACH_VMA(list, item) + { + if (item->type_flags & type_mask) + { + if (start > item->start) start = item->start; + if (end < item->end) end = item->end; + } + } + + if (start >= end) return -1; + + *startp = start; + *sizep = end - start; + return 0; +} + +/* + * remap_multiple_vmas + * + * Relocate all VMAs with the given type flags. + * This function can also be used to reverse the effects of previous + * remap_multiple_vmas(). + */ +static int remap_multiple_vmas( struct vma_area_list *list, unsigned long delta, int type_mask, unsigned char revert ) +{ + struct vma_area *item; + void *old_addr, *desired_addr, *mapped_addr; + size_t size; + + FOREACH_VMA(list, item) + { + if ((item->type_flags & type_mask) && item->moved == revert) + { + if (revert) + { + old_addr = (void *)(item->start + delta); + desired_addr = (void *)item->start; + } + else + { + old_addr = (void *)item->start; + desired_addr = (void *)(item->start + delta); + } + size = item->end - item->start; + mapped_addr = wld_mremap( old_addr, size, size, MREMAP_FIXED | MREMAP_MAYMOVE, desired_addr ); + if (mapped_addr == (void *)-1) return -1; + if (mapped_addr != desired_addr) + { + if (mapped_addr == old_addr) return -1; /* kernel deoesn't support MREMAP_FIXED */ + fatal_error( "mremap() returned different address\n" ); + } + item->moved = !revert; + } + } + + return 0; +} + /* * scan_vma * @@ -1887,6 +2203,262 @@ static void alloc_scan_vma( struct vma_area_list *listp ) } }
+/* + * stackargs_get_remap_policy + * + * Parse the remap policy value from the given environment variable. + */ +static enum remap_policy stackargs_get_remap_policy( const struct stackarg_info *info, const char *name, + enum remap_policy default_policy ) +{ + char *valstr = stackargs_getenv( info, name ), *endptr; + unsigned long valnum; + + if (valstr) + { + if (wld_strcmp(valstr, "auto") == 0 || wld_strcmp(valstr, "on-conflict") == 0) + return REMAP_POLICY_ON_CONFLICT; + if (wld_strcmp(valstr, "always") == 0 || wld_strcmp(valstr, "force") == 0) + return REMAP_POLICY_FORCE; + if (wld_strcmp(valstr, "never") == 0 || wld_strcmp(valstr, "skip") == 0) + return REMAP_POLICY_SKIP; + valnum = parse_ul( valstr, &endptr, 10, NULL ); + if (!*endptr && valnum < LAST_REMAP_POLICY) return valnum; + } + + return default_policy; +} + +/* + * check_remap_policy + * + * Check remap policy against the given range and determine the action to take. + * + * -1: fail + * 0: do nothing + * 1: proceed with remapping + */ +static int check_remap_policy( struct preloader_state *state, + const char *policy_envname, enum remap_policy default_policy, + unsigned long start, unsigned long size ) +{ + switch (stackargs_get_remap_policy( &state->s, policy_envname, default_policy )) + { + case REMAP_POLICY_SKIP: + return -1; + case REMAP_POLICY_ON_CONFLICT: + if (find_preload_reserved_area( (void *)start, size ) < 0) + return 0; + /* fallthrough */ + case REMAP_POLICY_FORCE: + default: + return 1; + } +} + +#ifndef __x86_64__ +/* + * remap_test_in_old_address_range + * + * Determine whether the address falls in the old mapping address range + * (i.e. before mremap). + */ +static int remap_test_in_old_address_range( unsigned long address ) +{ + return address - remap_test.old_mapping_start < remap_test.old_mapping_size; +} + +/* + * remap_test_signal_handler + * + * A signal handler that detects whether the kernel has acknowledged the new + * addresss for the remapped vDSO. + */ +static void remap_test_signal_handler( int signum, siginfo_t *sinfo, void *context ) +{ + (void)signum; + (void)sinfo; + (void)context; + + if (remap_test_in_old_address_range((unsigned long)__builtin_return_address(0))) goto fail; + +#ifdef __i386__ + /* test for SYSENTER/SYSEXIT return address (int80_landing_pad) */ + if (remap_test_in_old_address_range(((ucontext_t *)context)->uc_mcontext.gregs[REG_EIP])) goto fail; +#endif + + remap_test.is_successful = 1; + return; + +fail: + /* Kernel too old to support remapping. Restore vDSO/sigpage to return safely. */ + if (remap_test.delta) { + if (remap_multiple_vmas( remap_test.vma_list, remap_test.delta, -1, 1 ) < 0) + fatal_error( "Cannot restore remapped VMAs\n" ); + remap_test.delta = 0; + } + + /* Signal handler might be called several times externally, + * so overwrite with the latest status just to be safe. */ + remap_test.is_failed = 1; +} +#endif + +/* + * test_remap_successful + * + * Test if the kernel has acknowledged the remapped vDSO. + * + * Remapping vDSO requires explicit kernel support for most architectures, but + * the support is missing in old Linux kernels (pre-4.8). Among other things, + * vDSO contains the default signal restorer (sigreturn trampoline) and on i386 + * the fast syscall gate (which uses SYSENTER on Intel CPUs). The kernel keeps + * track of the addresses of both of these things per process, and those + * addresses need to be updated accordingly if the vDSO address changes. + * Without proper support, mremap() on vDSO still succeeds, but the kernel still + * uses old addresses for the vDSO components, resulting in crashes or other + * unpredictable behaviour if any of those addresses are used. + * + * We attempt to detect this condition by installing a signal handler and + * sending a signal to ourselves. The signal handler will test if the restorer + * address falls in the old address range; if this is the case, we remap the + * vDSO to its old address and report failure (i.e. no support from kernel). On + * i386, we additionally check for the syscall gate. If the addresses do not + * overlap with the old address range, the kernel is new enough to support vDSO + * remapping and we can proceed as normal. + */ +static int test_remap_successful( struct vma_area_list *vma_list, struct preloader_state *state, + unsigned long old_mapping_start, unsigned long old_mapping_size, + unsigned long delta ) +{ +#ifdef __x86_64__ + (void)vma_list; + (void)state; + (void)old_mapping_start; + (void)old_mapping_size; + (void)delta; + + /* x86-64 doesn't use SYSENTER for syscalls, and requires sa_restorer for + * signal handlers. We can safely relocate vDSO without kernel support + * (vdso_mremap). */ + return 0; +#else + struct wld_sigaction sigact; + pid_t pid; + int result = -1; + unsigned long syscall_addr = 0; + + pid = wld_getpid(); + if (pid < 0) fatal_error( "failed to get PID\n" ); + +#ifdef __i386__ + syscall_addr = get_auxiliary( state->s.auxv, AT_SYSINFO, 0 ); + if (syscall_addr - old_mapping_start < old_mapping_size) syscall_addr += delta; +#endif + + remap_test.old_mapping_start = old_mapping_start; + remap_test.old_mapping_size = old_mapping_size; + remap_test.vma_list = vma_list; + remap_test.delta = delta; + remap_test.is_successful = 0; + remap_test.is_failed = 0; + + wld_memset( &sigact, 0, sizeof(sigact) ); + sigact.wld_sa_sigaction = remap_test_signal_handler; + sigact.wld_sa_flags = WLD_SA_SIGINFO; + /* We deliberately skip sa_restorer, since we're trying to get the address + * of the kernel's built-in restorer function. */ + + if (wld_sigaction( REMAP_TEST_SIG, &sigact, &sigact ) < 0) fatal_error( "cannot register test signal handler\n" ); + + /* Unsafe region below - may race with signal handler */ +#ifdef __i386__ + if (syscall_addr) { + /* Also test __kernel_vsyscall return as well */ + __asm__ __volatile__( "call *%1" + : "=a" (result) : "r" (syscall_addr), "0" (37 /* SYS_kill */), "b" (pid), "c" (REMAP_TEST_SIG) ); + result = SYSCALL_RET(result); + } +#else + syscall_addr = 0; +#endif + if (!syscall_addr) result = wld_kill( pid, REMAP_TEST_SIG ); + /* Unsafe region above - may race with signal handler */ + + if (wld_sigaction( REMAP_TEST_SIG, &sigact, &sigact ) < 0) fatal_error( "cannot unregister test signal handler\n" ); + if (result == -1) fatal_error( "cannot raise test signal\n" ); + + /* Now that the signal handler can no longer be called, + * we can safely access the result data. */ + if (remap_test.is_failed || !remap_test.is_successful) { + if (remap_test.delta && remap_multiple_vmas( remap_test.vma_list, remap_test.delta, -1, 1 ) < 0) + fatal_error( "Cannot restore remapped VMAs\n" ); + return -1; + } + + return 0; +#endif +} + +/* + * remap_vdso + * + * Perform vDSO remapping if it conflicts with one of the reserved address ranges. + */ +static int remap_vdso( struct vma_area_list *vma_list, struct preloader_state *state ) +{ + int result; + unsigned long vdso_start, vdso_size, delta; + void *new_vdso; + struct wld_auxv *auxv; + + if (find_vma_envelope_range( vma_list, VMA_VDSO | VMA_VVAR, &vdso_start, &vdso_size ) < 0) return 0; + + result = check_remap_policy( state, "WINEPRELOADREMAPVDSO", + REMAP_POLICY_DEFAULT_VDSO, + vdso_start, vdso_size ); + if (result <= 0) return result; + + new_vdso = wld_mmap( NULL, vdso_size, PROT_NONE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0 ); + if (new_vdso == (void *)-1) return -1; + + delta = (unsigned long)new_vdso - vdso_start; + /* It's easier to undo vvar remapping, so we remap it first. */ + if (remap_multiple_vmas( vma_list, delta, VMA_VVAR, 0 ) < 0 || + remap_multiple_vmas( vma_list, delta, VMA_VDSO, 0 ) < 0) goto remap_restore; + + /* NOTE: AArch32 may have restorer in vDSO if we're running on an old ARM64 kernel. */ + if (test_remap_successful( vma_list, state, vdso_start, vdso_size, delta ) < 0) + { + /* mapping restore done by test_remap_successful */ + return -1; + } + + for (auxv = state->s.auxv; auxv->a_type != AT_NULL; auxv++) + { + switch (auxv->a_type) + { + case AT_SYSINFO: + case AT_SYSINFO_EHDR: + if ((unsigned long)auxv->a_un.a_val - vdso_start < vdso_size) + auxv->a_un.a_val += delta; + break; + } + } + + /* Refresh VMA list */ + free_vma_list( vma_list ); + alloc_scan_vma( vma_list ); + return 1; + +remap_restore: + if (remap_multiple_vmas( vma_list, delta, -1, 1 ) < 0) + fatal_error( "Cannot restore remapped VMAs\n" ); + + return -1; +} + /* * map_reserve_preload_ranges * @@ -1974,6 +2546,8 @@ void* wld_start( void **stack ) alloc_scan_vma( &vma_list ); map_reserve_preload_ranges( &vma_list, &state.s );
+ if (remap_vdso( &vma_list, &state ) > 0) map_reserve_preload_ranges( &vma_list, &state.s ); + /* add an executable page at the top of the address space to defeat * broken no-exec protections that play with the code selector limit */ if (find_preload_reserved_area( (char *)0x80000000 - page_size, page_size ) >= 0) @@ -2003,7 +2577,7 @@ void* wld_start( void **stack ) #undef SET_NEW_AV
i = 0; - /* delete sysinfo values if addresses conflict */ + /* delete sysinfo values if addresses conflict and remap failed */ if (is_in_preload_range( state.s.auxv, AT_SYSINFO ) || is_in_preload_range( state.s.auxv, AT_SYSINFO_EHDR )) { delete_av[i++].a_type = AT_SYSINFO;