Although int = long effectively in i386, this makes it clear that a machine word-width value is being returned.
Also, future patches adding syscalls returning pointers can use the same type (long) for return value consistently.
Signed-off-by: Jinoh Kang jinoh.kang.kr@gmail.com ---
Notes: v1 -> v2: don't change SYSCALL_RET().
loader/preloader.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/loader/preloader.c b/loader/preloader.c index 585be50624f..937843eb9c5 100644 --- a/loader/preloader.c +++ b/loader/preloader.c @@ -247,7 +247,7 @@ static inline __attribute__((noreturn)) void wld_exit( int code )
static inline int wld_open( const char *name, int flags ) { - int ret; + long ret; __asm__ __volatile__( "pushl %%ebx; movl %2,%%ebx; int $0x80; popl %%ebx" : "=a" (ret) : "0" (5 /* SYS_open */), "r" (name), "c" (flags) ); return SYSCALL_RET(ret); @@ -255,7 +255,7 @@ static inline int wld_open( const char *name, int flags )
static inline int wld_close( int fd ) { - int ret; + long ret; __asm__ __volatile__( "pushl %%ebx; movl %2,%%ebx; int $0x80; popl %%ebx" : "=a" (ret) : "0" (6 /* SYS_close */), "r" (fd) ); return SYSCALL_RET(ret); @@ -263,7 +263,7 @@ static inline int wld_close( int fd )
static inline ssize_t wld_read( int fd, void *buffer, size_t len ) { - int ret; + long ret; __asm__ __volatile__( "pushl %%ebx; movl %2,%%ebx; int $0x80; popl %%ebx" : "=a" (ret) : "0" (3 /* SYS_read */), "r" (fd), "c" (buffer), "d" (len) @@ -273,7 +273,7 @@ static inline ssize_t wld_read( int fd, void *buffer, size_t len )
static inline ssize_t wld_write( int fd, const void *buffer, size_t len ) { - int ret; + long ret; __asm__ __volatile__( "pushl %%ebx; movl %2,%%ebx; int $0x80; popl %%ebx" : "=a" (ret) : "0" (4 /* SYS_write */), "r" (fd), "c" (buffer), "d" (len) ); return SYSCALL_RET(ret); @@ -281,7 +281,7 @@ static inline ssize_t wld_write( int fd, const void *buffer, size_t len )
static inline int wld_mprotect( const void *addr, size_t len, int prot ) { - int ret; + long ret; __asm__ __volatile__( "pushl %%ebx; movl %2,%%ebx; int $0x80; popl %%ebx" : "=a" (ret) : "0" (125 /* SYS_mprotect */), "r" (addr), "c" (len), "d" (prot) ); return SYSCALL_RET(ret); @@ -328,7 +328,7 @@ __ASM_GLOBAL_FUNC(wld_mmap,
static inline int wld_prctl( int code, long arg ) { - int ret; + long ret; __asm__ __volatile__( "pushl %%ebx; movl %2,%%ebx; int $0x80; popl %%ebx" : "=a" (ret) : "0" (172 /* SYS_prctl */), "r" (code), "c" (arg) ); return SYSCALL_RET(ret);
Arbitrarily manipulating the stack pointer in inline assembly interferes with stack unwinding and debugging experience. Note that it's impossible to reliably specify unwinding information in inline assembly, other than adjusting CFA offset.
The workaround appears to be due to "buggy" register allocation that manifests in GCC <= 4.x when emitting position-independent code. This is not an issue, since the preloader isn't a position-independent executable or a shared library.
Fix this by getting rid of extra spilling and value transfer of the EBX register, and telling the compiler to allocate EBX directly.
Signed-off-by: Jinoh Kang jinoh.kang.kr@gmail.com --- loader/preloader.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-)
diff --git a/loader/preloader.c b/loader/preloader.c index 937843eb9c5..9675dc3f8eb 100644 --- a/loader/preloader.c +++ b/loader/preloader.c @@ -241,32 +241,32 @@ __ASM_GLOBAL_FUNC(_start, static inline __attribute__((noreturn)) void wld_exit( int code ) { for (;;) /* avoid warning */ - __asm__ __volatile__( "pushl %%ebx; movl %1,%%ebx; int $0x80; popl %%ebx" - : : "a" (1 /* SYS_exit */), "r" (code) ); + __asm__ __volatile__( "int $0x80" + : : "a" (1 /* SYS_exit */), "b" (code) ); }
static inline int wld_open( const char *name, int flags ) { long ret; - __asm__ __volatile__( "pushl %%ebx; movl %2,%%ebx; int $0x80; popl %%ebx" - : "=a" (ret) : "0" (5 /* SYS_open */), "r" (name), "c" (flags) ); + __asm__ __volatile__( "int $0x80" + : "=a" (ret) : "0" (5 /* SYS_open */), "b" (name), "c" (flags) ); return SYSCALL_RET(ret); }
static inline int wld_close( int fd ) { long ret; - __asm__ __volatile__( "pushl %%ebx; movl %2,%%ebx; int $0x80; popl %%ebx" - : "=a" (ret) : "0" (6 /* SYS_close */), "r" (fd) ); + __asm__ __volatile__( "int $0x80" + : "=a" (ret) : "0" (6 /* SYS_close */), "b" (fd) ); return SYSCALL_RET(ret); }
static inline ssize_t wld_read( int fd, void *buffer, size_t len ) { long ret; - __asm__ __volatile__( "pushl %%ebx; movl %2,%%ebx; int $0x80; popl %%ebx" + __asm__ __volatile__( "int $0x80" : "=a" (ret) - : "0" (3 /* SYS_read */), "r" (fd), "c" (buffer), "d" (len) + : "0" (3 /* SYS_read */), "b" (fd), "c" (buffer), "d" (len) : "memory" ); return SYSCALL_RET(ret); } @@ -274,16 +274,16 @@ static inline ssize_t wld_read( int fd, void *buffer, size_t len ) static inline ssize_t wld_write( int fd, const void *buffer, size_t len ) { long ret; - __asm__ __volatile__( "pushl %%ebx; movl %2,%%ebx; int $0x80; popl %%ebx" - : "=a" (ret) : "0" (4 /* SYS_write */), "r" (fd), "c" (buffer), "d" (len) ); + __asm__ __volatile__( "int $0x80" + : "=a" (ret) : "0" (4 /* SYS_write */), "b" (fd), "c" (buffer), "d" (len) ); return SYSCALL_RET(ret); }
static inline int wld_mprotect( const void *addr, size_t len, int prot ) { long ret; - __asm__ __volatile__( "pushl %%ebx; movl %2,%%ebx; int $0x80; popl %%ebx" - : "=a" (ret) : "0" (125 /* SYS_mprotect */), "r" (addr), "c" (len), "d" (prot) ); + __asm__ __volatile__( "int $0x80" + : "=a" (ret) : "0" (125 /* SYS_mprotect */), "b" (addr), "c" (len), "d" (prot) ); return SYSCALL_RET(ret); }
@@ -329,8 +329,8 @@ __ASM_GLOBAL_FUNC(wld_mmap, static inline int wld_prctl( int code, long arg ) { long ret; - __asm__ __volatile__( "pushl %%ebx; movl %2,%%ebx; int $0x80; popl %%ebx" - : "=a" (ret) : "0" (172 /* SYS_prctl */), "r" (code), "c" (arg) ); + __asm__ __volatile__( "int $0x80" + : "=a" (ret) : "0" (172 /* SYS_prctl */), "b" (code), "c" (arg) ); return SYSCALL_RET(ret); }
Collect scattered variables holding stack addresses (e.g. pargc, argv, envp, auxv) in one place.
This facilitates modifying stack values (e.g. removing argv[0], switching stacks due to address conflict with reserved regions) without leaving pointer variables stale.
Signed-off-by: Jinoh Kang jinoh.kang.kr@gmail.com ---
Notes: v1 -> v2: - Zero argc slot before writing to it - s/stackargs_eat_args/stackargs_shift_args/ - s/shift_stackargs/stackargs_switch_stack/ - s/offset/delta/ - slightly change auxv append logic to match the original closer
loader/preloader.c | 243 ++++++++++++++++++++++++++++++++------------- 1 file changed, 173 insertions(+), 70 deletions(-)
diff --git a/loader/preloader.c b/loader/preloader.c index 9675dc3f8eb..446e2f0e239 100644 --- a/loader/preloader.c +++ b/loader/preloader.c @@ -164,6 +164,21 @@ struct wld_auxv } a_un; };
+struct stackarg_info +{ + void *stack; + int argc; + char **argv; + char **envp; + struct wld_auxv *auxv; + struct wld_auxv *auxv_end; +}; + +struct preloader_state +{ + struct stackarg_info s; +}; + /* * The __bb_init_func is an empty function only called when file is * compiled with gcc flags "-fprofile-arcs -ftest-coverage". This @@ -674,6 +689,32 @@ static inline void *wld_memset( void *dest, int val, size_t len ) return dest; }
+static size_t wld_strlen( const char *str ) +{ + const char *ptr = str; + while (*ptr) ptr++; + return ptr - str; +} + +static inline void *wld_memmove( void *dest, const void *src, size_t len ) +{ + unsigned char *destp = dest; + const unsigned char *srcp = src; + + if ((unsigned long)dest - (unsigned long)src < len) + { + destp += len; + srcp += len; + while (len--) *--destp = *--srcp; + } + else + { + while (len--) *destp++ = *srcp++; + } + + return dest; +} + /* * wld_printf - just the basics * @@ -794,72 +835,145 @@ static void dump_auxiliary( struct wld_auxv *av ) } #endif
+static void parse_stackargs( struct stackarg_info *outinfo, void *stack ) +{ + int argc; + char **argv, **envp, **env_end; + struct wld_auxv *auxv, *auxv_end; + + argc = *(int *)stack; + argv = (char **)stack + 1; + envp = argv + (unsigned int)argc + 1; + + env_end = envp; + while (*env_end++) + ; + auxv = (struct wld_auxv *)env_end; + + auxv_end = auxv; + while ((auxv_end++)->a_type != AT_NULL) + ; + + outinfo->stack = stack; + outinfo->argc = argc; + outinfo->argv = argv; + outinfo->envp = envp; + outinfo->auxv = auxv; + outinfo->auxv_end = auxv_end; +} + +static char *stackargs_getenv( const struct stackarg_info *info, const char *name ) +{ + char **envp = info->envp; + size_t namelen = wld_strlen( name ); + + while (*envp) + { + if (wld_strncmp( *envp, name, namelen ) == 0 && + (*envp)[namelen] == '=') return *envp + namelen + 1; + envp++; + } + return NULL; +} + +static void stackargs_shift_args( struct stackarg_info *info, int num_args ) +{ + info->stack = (char **)info->stack + num_args; + info->argc -= num_args; + info->argv = (char **)info->stack + 1; + + wld_memset( info->stack, 0, sizeof(char *) ); + /* Don't coalesce zeroing and setting argc -- we *might* support big endian in the future */ + *(int *)info->stack = info->argc; +} + +static void stackargs_switch_stack( struct stackarg_info *newinfo, struct stackarg_info *oldinfo, void *newstack ) +{ + unsigned long delta = (unsigned long)newstack - (unsigned long)oldinfo->stack; + + /* NOTE it is legal that newinfo == oldinfo */ + newinfo->stack = newstack; + newinfo->argc = oldinfo->argc; + newinfo->argv = (void *)((unsigned long)oldinfo->argv + delta); + newinfo->envp = (void *)((unsigned long)oldinfo->envp + delta); + newinfo->auxv = (void *)((unsigned long)oldinfo->auxv + delta); + newinfo->auxv_end = (void *)((unsigned long)oldinfo->auxv_end + delta); +} + /* * set_auxiliary_values * * Set the new auxiliary values */ -static void set_auxiliary_values( struct wld_auxv *av, const struct wld_auxv *new_av, - const struct wld_auxv *delete_av, void **stack ) +static void set_auxiliary_values( struct preloader_state *state, + const struct wld_auxv *new_av, + const struct wld_auxv *delete_av ) { - int i, j, av_count = 0, new_count = 0, delete_count = 0; - char *src, *dst; - - /* count how many aux values we have already */ - while (av[av_count].a_type != AT_NULL) av_count++; + size_t i, new_count = 0, delete_count = 0; + unsigned long dst; + struct wld_auxv *avpd, *avps, *avp; + int is_deleted;
/* delete unwanted values */ - for (j = 0; delete_av[j].a_type != AT_NULL; j++) + for (avps = avpd = state->s.auxv; avps + 1 != state->s.auxv_end; avps++) { - for (i = 0; i < av_count; i++) if (av[i].a_type == delete_av[j].a_type) + is_deleted = 0; + for (i = 0; delete_av[i].a_type != AT_NULL; i++) + { + if (avps->a_type == new_av[i].a_type) + { + is_deleted = 1; + break; + } + } + if (is_deleted) { - av[i].a_type = av[av_count-1].a_type; - av[i].a_un.a_val = av[av_count-1].a_un.a_val; - av[--av_count].a_type = AT_NULL; delete_count++; - break; + continue; } + if (avpd != avps) + { + avpd->a_type = avps->a_type; + avpd->a_un.a_val = avps->a_un.a_val; + } + avpd++; } + avpd->a_type = AT_NULL; + avpd->a_un.a_val = 0; + state->s.auxv_end = avpd + 1;
/* count how many values we have in new_av that aren't in av */ - for (j = 0; new_av[j].a_type != AT_NULL; j++) + for (i = 0; new_av[i].a_type != AT_NULL; i++) { - for (i = 0; i < av_count; i++) if (av[i].a_type == new_av[j].a_type) break; - if (i == av_count) new_count++; + for (avp = state->s.auxv; avp + 1 != state->s.auxv_end; avp++) if (avp->a_type == new_av[i].a_type) break; + if (avp + 1 == state->s.auxv_end) new_count++; }
- src = (char *)*stack; - dst = src - (new_count - delete_count) * sizeof(*av); - dst = (char *)((unsigned long)dst & ~15); - if (dst < src) /* need to make room for the extra values */ - { - int len = (char *)(av + av_count + 1) - src; - for (i = 0; i < len; i++) dst[i] = src[i]; - } - else if (dst > src) /* get rid of unused values */ - { - int len = (char *)(av + av_count + 1) - src; - for (i = len - 1; i >= 0; i--) dst[i] = src[i]; - } - *stack = dst; - av = (struct wld_auxv *)((char *)av + (dst - src)); + dst = ((unsigned long)state->s.stack - + (new_count - delete_count) * sizeof(struct wld_auxv)) & ~15; + wld_memmove( (void *)dst, state->s.stack, + (unsigned long)state->s.auxv_end - + (unsigned long)state->s.stack ); + stackargs_switch_stack( &state->s, &state->s, (void *)dst );
/* now set the values */ - for (j = 0; new_av[j].a_type != AT_NULL; j++) + for (i = 0; new_av[i].a_type != AT_NULL; i++) { - for (i = 0; i < av_count; i++) if (av[i].a_type == new_av[j].a_type) break; - if (i < av_count) av[i].a_un.a_val = new_av[j].a_un.a_val; + for (avp = state->s.auxv; avp + 1 != state->s.auxv_end; avp++) if (avp->a_type == new_av[i].a_type) break; + if (avp + 1 != state->s.auxv_end) avp->a_un.a_val = new_av[i].a_un.a_val; else { - av[av_count].a_type = new_av[j].a_type; - av[av_count].a_un.a_val = new_av[j].a_un.a_val; - av_count++; + avp->a_type = new_av[i].a_type; + avp->a_un.a_val = new_av[i].a_un.a_val; + state->s.auxv_end++; } } + state->s.auxv_end[-1].a_type = AT_NULL; + state->s.auxv_end[-1].a_un.a_val = 0;
#ifdef DUMP_AUX_INFO wld_printf("New auxiliary info:\n"); - dump_auxiliary( av ); + dump_auxiliary( state->s.auxv ); #endif }
@@ -1369,47 +1483,36 @@ static void set_process_name( int argc, char *argv[] ) */ void* wld_start( void **stack ) { - long i, *pargc; - char **argv, **p; - char *interp, *reserve = NULL; - struct wld_auxv new_av[8], delete_av[3], *av; + long i; + char *interp, *reserve; + struct wld_auxv new_av[8], delete_av[3]; struct wld_link_map main_binary_map, ld_so_map; struct wine_preload_info **wine_main_preload_info; + struct preloader_state state = { 0 };
- pargc = *stack; - argv = (char **)pargc + 1; - if (*pargc < 2) fatal_error( "Usage: %s wine_binary [args]\n", argv[0] ); + parse_stackargs( &state.s, *stack );
- /* skip over the parameters */ - p = argv + *pargc + 1; + if (state.s.argc < 2) fatal_error( "Usage: %s wine_binary [args]\n", state.s.argv[0] );
- /* skip over the environment */ - while (*p) - { - static const char res[] = "WINEPRELOADRESERVE="; - if (!wld_strncmp( *p, res, sizeof(res)-1 )) reserve = *p + sizeof(res) - 1; - p++; - } - - av = (struct wld_auxv *)(p+1); - page_size = get_auxiliary( av, AT_PAGESZ, 4096 ); + page_size = get_auxiliary( state.s.auxv, AT_PAGESZ, 4096 ); page_mask = page_size - 1;
preloader_start = (char *)_start - ((unsigned long)_start & page_mask); preloader_end = (char *)((unsigned long)(_end + page_mask) & ~page_mask);
#ifdef DUMP_AUX_INFO - wld_printf( "stack = %p\n", *stack ); - for( i = 0; i < *pargc; i++ ) wld_printf("argv[%lx] = %s\n", i, argv[i]); - dump_auxiliary( av ); + wld_printf( "stack = %p\n", state.s.stack ); + for( i = 0; i < state.s.argc; i++ ) wld_printf("argv[%lx] = %s\n", i, state.s.argv[i]); + dump_auxiliary( state.s.auxv ); #endif
/* reserve memory that Wine needs */ + reserve = stackargs_getenv( &state.s, "WINEPRELOADRESERVE" ); if (reserve) preload_reserve( reserve ); for (i = 0; preload_info[i].size; i++) { - if ((char *)av >= (char *)preload_info[i].addr && - (char *)pargc <= (char *)preload_info[i].addr + preload_info[i].size) + if ((char *)state.s.auxv >= (char *)preload_info[i].addr && + (char *)state.s.stack <= (char *)preload_info[i].addr + preload_info[i].size) { remove_preload_range( i ); i--; @@ -1436,7 +1539,7 @@ void* wld_start( void **stack ) wld_mprotect( (char *)0x80000000 - page_size, page_size, PROT_EXEC | PROT_READ );
/* load the main binary */ - map_so_lib( argv[1], &main_binary_map ); + map_so_lib( state.s.argv[1], &main_binary_map );
/* load the ELF interpreter */ interp = (char *)main_binary_map.l_addr + main_binary_map.l_interp; @@ -1453,14 +1556,14 @@ void* wld_start( void **stack ) SET_NEW_AV( 2, AT_PHNUM, main_binary_map.l_phnum ); SET_NEW_AV( 3, AT_PAGESZ, page_size ); SET_NEW_AV( 4, AT_BASE, ld_so_map.l_addr ); - SET_NEW_AV( 5, AT_FLAGS, get_auxiliary( av, AT_FLAGS, 0 ) ); + SET_NEW_AV( 5, AT_FLAGS, get_auxiliary( state.s.auxv, AT_FLAGS, 0 ) ); SET_NEW_AV( 6, AT_ENTRY, main_binary_map.l_entry ); SET_NEW_AV( 7, AT_NULL, 0 ); #undef SET_NEW_AV
i = 0; /* delete sysinfo values if addresses conflict */ - if (is_in_preload_range( av, AT_SYSINFO ) || is_in_preload_range( av, AT_SYSINFO_EHDR )) + if (is_in_preload_range( state.s.auxv, AT_SYSINFO ) || is_in_preload_range( state.s.auxv, AT_SYSINFO_EHDR )) { delete_av[i++].a_type = AT_SYSINFO; delete_av[i++].a_type = AT_SYSINFO_EHDR; @@ -1468,14 +1571,13 @@ void* wld_start( void **stack ) delete_av[i].a_type = AT_NULL;
/* get rid of first argument */ - set_process_name( *pargc, argv ); - pargc[1] = pargc[0] - 1; - *stack = pargc + 1; + set_process_name( state.s.argc, state.s.argv ); + stackargs_shift_args( &state.s, 1 );
- set_auxiliary_values( av, new_av, delete_av, stack ); + set_auxiliary_values( &state, new_av, delete_av );
#ifdef DUMP_AUX_INFO - wld_printf("new stack = %p\n", *stack); + wld_printf("new stack = %p\n", state.s.stack); wld_printf("jumping to %p\n", (void *)ld_so_map.l_entry); #endif #ifdef DUMP_MAPS @@ -1490,6 +1592,7 @@ void* wld_start( void **stack ) } #endif
+ *stack = state.s.stack; return (void *)ld_so_map.l_entry; }
Signed-off-by: Jinoh Kang jinoh.kang.kr@gmail.com --- loader/preloader.c | 56 ++++++++++++++++++++++++++++++++-------------- 1 file changed, 39 insertions(+), 17 deletions(-)
diff --git a/loader/preloader.c b/loader/preloader.c index 446e2f0e239..54a8b8bac2f 100644 --- a/loader/preloader.c +++ b/loader/preloader.c @@ -68,6 +68,7 @@ #include <stdio.h> #include <stdlib.h> #include <string.h> +#include <limits.h> #include <sys/types.h> #include <sys/stat.h> #include <fcntl.h> @@ -715,6 +716,34 @@ static inline void *wld_memmove( void *dest, const void *src, size_t len ) return dest; }
+static inline unsigned long parse_ul( const char *nptr, char **endptr, unsigned int radix, int *overflow ) +{ + const char *p = nptr; + unsigned long value, thresh; + int ovfl = 0; + + value = 0; + thresh = ULONG_MAX / radix; + for (;;) + { + unsigned int digit; + if (*p >= '0' && *p <= '9') digit = *p - '0'; + else if (*p >= 'a' && *p <= 'z') digit = *p - 'a' + 10; + else if (*p >= 'A' && *p <= 'Z') digit = *p - 'A' + 10; + else break; + if (digit >= radix) break; + if (value > thresh) ovfl = 1; + value *= radix; + if (value > value + digit) ovfl = 1; + value += digit; + p++; + } + + if (endptr) *endptr = (char *)p; + if (overflow) *overflow = ovfl; + return value; +} + /* * wld_printf - just the basics * @@ -1357,29 +1386,22 @@ found: * * Reserve a range specified in string format */ -static void preload_reserve( const char *str ) +static void preload_reserve( char *str ) { - const char *p; + char *p = str; unsigned long result = 0; void *start = NULL, *end = NULL; - int i, first = 1; + int i;
- for (p = str; *p; p++) + result = parse_ul( p, &p, 16, NULL ); + if (*p == '-') { - if (*p >= '0' && *p <= '9') result = result * 16 + *p - '0'; - else if (*p >= 'a' && *p <= 'f') result = result * 16 + *p - 'a' + 10; - else if (*p >= 'A' && *p <= 'F') result = result * 16 + *p - 'A' + 10; - else if (*p == '-') - { - if (!first) goto error; - start = (void *)(result & ~page_mask); - result = 0; - first = 0; - } - else goto error; + start = (void *)(result & ~page_mask); + result = parse_ul( p + 1, &p, 16, NULL ); + if (*p) goto error; + end = (void *)((result + page_mask) & ~page_mask); } - if (!first) end = (void *)((result + page_mask) & ~page_mask); - else if (result) goto error; /* single value '0' is allowed */ + else if (*p || result) goto error; /* single value '0' is allowed */
/* sanity checks */ if (end <= start) start = end = NULL;
Rename is_addr_reserved to find_preload_reserved_area, with the following changes:
- Accept second argument "size" which specifies the size of the address range to test. - Return the index of the matching entry, or -1 if none found.
Signed-off-by: Jinoh Kang jinoh.kang.kr@gmail.com ---
Notes: v1 -> v2: handle overflows
loader/preloader.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-)
diff --git a/loader/preloader.c b/loader/preloader.c index 54a8b8bac2f..fb22eff5f61 100644 --- a/loader/preloader.c +++ b/loader/preloader.c @@ -1436,17 +1436,22 @@ error: }
/* check if address is in one of the reserved ranges */ -static int is_addr_reserved( const void *addr ) +static int find_preload_reserved_area( const void *addr, size_t size ) { + unsigned long start = (unsigned long)addr; + unsigned long end = (unsigned long)addr + size - 1; int i;
+ if (!size) + return -1; + for (i = 0; preload_info[i].size; i++) { - if ((const char *)addr >= (const char *)preload_info[i].addr && - (const char *)addr < (const char *)preload_info[i].addr + preload_info[i].size) - return 1; + if (end >= (unsigned long)preload_info[i].addr && + start <= (unsigned long)preload_info[i].addr + preload_info[i].size - 1) + return i; } - return 0; + return -1; }
/* remove a range from the preload list */ @@ -1469,7 +1474,7 @@ static int is_in_preload_range( const struct wld_auxv *av, int type ) { while (av->a_type != AT_NULL) { - if (av->a_type == type) return is_addr_reserved( (const void *)av->a_un.a_val ); + if (av->a_type == type) return find_preload_reserved_area( (const void *)av->a_un.a_val, 1 ) >= 0; av++; } return 0; @@ -1557,7 +1562,7 @@ void* wld_start( void **stack )
/* add an executable page at the top of the address space to defeat * broken no-exec protections that play with the code selector limit */ - if (is_addr_reserved( (char *)0x80000000 - page_size )) + if (find_preload_reserved_area( (char *)0x80000000 - page_size, page_size ) >= 0) wld_mprotect( (char *)0x80000000 - page_size, page_size, PROT_EXEC | PROT_READ );
/* load the main binary */
Today, the preloader reserves some predefined address ranges without checking if there are any overlapping virtual memory mappings.
One side effect of this behaviour is that the preloader's ELF EHDR gets unmapped. Note the following overlapping address ranges:
- 0x00110000 - 0x68000000: low memory area (preload_info) - 0x08040000 - 0x08041000: preloader ELF EHDR (x86) - 0x00400000 - 0x00401000: preloader ELF EHDR (AMD64)
In practice, unmapping the preloader ELF EHDR is harmless; this is because the dynamic linker does not recognise the preloader binary.
Make the unmapping behaviour explicit by calling munmap() on the preloader's ELF EHDR.
Signed-off-by: Jinoh Kang jinoh.kang.kr@gmail.com ---
Notes: v1 -> v2: fix comparing text segment start against EHDR start
loader/preloader.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+)
diff --git a/loader/preloader.c b/loader/preloader.c index fb22eff5f61..3f1d4ff7b21 100644 --- a/loader/preloader.c +++ b/loader/preloader.c @@ -223,6 +223,7 @@ struct * then jumps to the address wld_start returns. */ void _start(void); +extern char __executable_start[]; extern char _end[]; __ASM_GLOBAL_FUNC(_start, __ASM_CFI("\t.cfi_undefined %eip\n") @@ -342,6 +343,15 @@ __ASM_GLOBAL_FUNC(wld_mmap, __ASM_CFI(".cfi_adjust_cfa_offset -4\n\t") "\tret\n" )
+static inline int wld_munmap( void *addr, size_t len ) +{ + long ret; + __asm__ __volatile__( "int $0x80" + : "=a" (ret) : "0" (91 /* SYS_munmap */), "b" (addr), "c" (len) + : "memory" ); + return SYSCALL_RET(ret); +} + static inline int wld_prctl( int code, long arg ) { long ret; @@ -361,6 +371,7 @@ void *thread_data[256]; * then jumps to the address wld_start returns. */ void _start(void); +extern char __executable_start[]; extern char _end[]; __ASM_GLOBAL_FUNC(_start, __ASM_CFI(".cfi_undefined %rip\n\t") @@ -424,6 +435,9 @@ SYSCALL_FUNC( wld_mmap, 9 /* SYS_mmap */ ); int wld_mprotect( const void *addr, size_t len, int prot ); SYSCALL_FUNC( wld_mprotect, 10 /* SYS_mprotect */ );
+int wld_munmap( void *addr, size_t len ); +SYSCALL_FUNC( wld_munmap, 11 /* SYS_munmap */ ); + int wld_prctl( int code, long arg ); SYSCALL_FUNC( wld_prctl, 157 /* SYS_prctl */ );
@@ -450,6 +464,7 @@ void *thread_data[256]; * then jumps to the address wld_start returns. */ void _start(void); +extern char __executable_start[]; extern char _end[]; __ASM_GLOBAL_FUNC(_start, "mov x0, SP\n\t" @@ -530,6 +545,9 @@ SYSCALL_FUNC( wld_mmap, 222 /* SYS_mmap */ ); int wld_mprotect( const void *addr, size_t len, int prot ); SYSCALL_FUNC( wld_mprotect, 226 /* SYS_mprotect */ );
+int wld_munmap( void *addr, size_t len ); +SYSCALL_FUNC( wld_munmap, 215 /* SYS_munmap */ ); + int wld_prctl( int code, long arg ); SYSCALL_FUNC( wld_prctl, 167 /* SYS_prctl */ );
@@ -556,6 +574,7 @@ void *thread_data[256]; * then jumps to the address wld_start returns. */ void _start(void); +extern char __executable_start[]; extern char _end[]; __ASM_GLOBAL_FUNC(_start, "mov r0, sp\n\t" @@ -628,6 +647,9 @@ void *wld_mmap( void *start, size_t len, int prot, int flags, int fd, off_t offs int wld_mprotect( const void *addr, size_t len, int prot ); SYSCALL_FUNC( wld_mprotect, 125 /* SYS_mprotect */ );
+int wld_munmap( void *addr, size_t len ); +SYSCALL_FUNC( wld_munmap, 91 /* SYS_munmap */ ); + int wld_prctl( int code, long arg ); SYSCALL_FUNC( wld_prctl, 172 /* SYS_prctl */ );
@@ -1527,6 +1549,14 @@ void* wld_start( void **stack ) preloader_start = (char *)_start - ((unsigned long)_start & page_mask); preloader_end = (char *)((unsigned long)(_end + page_mask) & ~page_mask);
+ if ((unsigned long)preloader_start >= (unsigned long)__executable_start + page_size) + { + /* Unmap preloader's ELF EHDR */ + wld_munmap( __executable_start, + ((unsigned long)preloader_start - + (unsigned long)__executable_start) & ~page_mask ); + } + #ifdef DUMP_AUX_INFO wld_printf( "stack = %p\n", state.s.stack ); for( i = 0; i < state.s.argc; i++ ) wld_printf("argv[%lx] = %s\n", i, state.s.argv[i]);
Today, the preloader makes no attempt to avoid unmapping existing memory mappings except the initial stack. This results in irrevocably unmapping some useful preallocated memory areas, such as vDSO.
Fix this by reading /proc/self/maps for existing VMAs, and splitting mmap() calls to avoid erasing existing memory mappings.
Note that MAP_FIXED_NOREPLACE is not suitable for this kind of job: it fails entirely if there exist *any* overlapping memory mappings.
Signed-off-by: Jinoh Kang jinoh.kang.kr@gmail.com ---
Notes: v1 -> v2: - linebuffer_init() - add comment on subtracting 1 from ->limit - linebuffer_getline() - fix typo in memmove size - parse_maps_line() - use -page_size for max page address instad of ULONG_MAX - lookup_vma_entry() - skip forward if mid->end == address in binary search - free_vma_list() - use NULL instead of 0 - alloc_scan_vma() - use page_size instead of hard-coded 4096 - use -1 instead of MAP_FAILED macro for consistency - map_reserve_preload_ranges() - compute exclude region from stackarg_info instead of directly receiving it - make agnostic to pointer comparison signedness - make explicit one-byte padding before and after padding region - handle potential off-by-one overflow
loader/preloader.c | 332 +++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 309 insertions(+), 23 deletions(-)
diff --git a/loader/preloader.c b/loader/preloader.c index 3f1d4ff7b21..6a3ac41ad7c 100644 --- a/loader/preloader.c +++ b/loader/preloader.c @@ -180,6 +180,31 @@ struct preloader_state struct stackarg_info s; };
+struct linebuffer +{ + char *base; + char *limit; + char *head; + char *tail; + int overflow; +}; + +struct vma_area +{ + unsigned long start; + unsigned long end; +}; + +struct vma_area_list +{ + struct vma_area *base; + struct vma_area *list_end; + struct vma_area *alloc_end; +}; + +#define FOREACH_VMA(list, item) \ + for ((item) = (list)->base; (item) != (list)->list_end; (item)++) + /* * The __bb_init_func is an empty function only called when file is * compiled with gcc flags "-fprofile-arcs -ftest-coverage". This @@ -738,6 +763,17 @@ static inline void *wld_memmove( void *dest, const void *src, size_t len ) return dest; }
+static inline void *wld_memchr( const void *mem, int val, size_t len ) +{ + const unsigned char *ptr = mem, *end = (const unsigned char *)ptr + len; + + for (ptr = mem; ptr != end; ptr++) + if (*ptr == (unsigned char)val) + return (void *)ptr; + + return NULL; +} + static inline unsigned long parse_ul( const char *nptr, char **endptr, unsigned int radix, int *overflow ) { const char *p = nptr; @@ -1522,6 +1558,273 @@ static void set_process_name( int argc, char *argv[] ) for (i = 1; i < argc; i++) argv[i] -= off; }
+static void linebuffer_init( struct linebuffer *lbuf, char *base, size_t len ) +{ + lbuf->base = base; + lbuf->limit = base + (len - 1); /* extra NULL byte */ + lbuf->head = base; + lbuf->tail = base; + lbuf->overflow = 0; +} + +static char *linebuffer_getline( struct linebuffer *lbuf, char delim ) +{ + char *lnp, *line; + + while ((lnp = wld_memchr( lbuf->tail, delim, lbuf->head - lbuf->tail ))) + { + line = lbuf->tail; + lbuf->tail = lnp + 1; + if (!lbuf->overflow) + { + *lnp = '\0'; + return line; + } + lbuf->overflow = 0; + } + + if (lbuf->base == lbuf->tail) + { + if (lbuf->head == lbuf->limit) + { + line = lbuf->tail; + lbuf->tail = lbuf->head; + lbuf->overflow = 1; + *lbuf->head = '\0'; + return line; + } + } + else wld_memmove( lbuf->base, lbuf->tail, lbuf->head - lbuf->tail); + lbuf->head -= lbuf->tail - lbuf->base; + lbuf->tail = lbuf->base; + + return NULL; +} + +static int parse_maps_line( struct vma_area *entry, char *line ) +{ + struct vma_area item = { 0 }; + char *ptr = line; + int overflow; + + item.start = parse_ul( ptr, &ptr, 16, &overflow ); + if (overflow) return -1; + if (*ptr != '-') fatal_error( "parse error in /proc/self/maps\n" ); + ptr++; + + item.end = parse_ul( ptr, &ptr, 16, &overflow ); + if (overflow) item.end = -page_size; + if (*ptr != ' ') fatal_error( "parse error in /proc/self/maps\n" ); + ptr++; + + if (item.start >= item.end) return -1; + + if (*ptr != 'r' && *ptr != '-') fatal_error( "parse error in /proc/self/maps\n" ); + ptr++; + if (*ptr != 'w' && *ptr != '-') fatal_error( "parse error in /proc/self/maps\n" ); + ptr++; + if (*ptr != 'x' && *ptr != '-') fatal_error( "parse error in /proc/self/maps\n" ); + ptr++; + if (*ptr != 's' && *ptr != 'p') fatal_error( "parse error in /proc/self/maps\n" ); + ptr++; + if (*ptr != ' ') fatal_error( "parse error in /proc/self/maps\n" ); + ptr++; + + parse_ul( ptr, &ptr, 16, NULL ); + if (*ptr != ' ') fatal_error( "parse error in /proc/self/maps\n" ); + ptr++; + + parse_ul( ptr, &ptr, 16, NULL ); + if (*ptr != ':') fatal_error( "parse error in /proc/self/maps\n" ); + ptr++; + + parse_ul( ptr, &ptr, 16, NULL ); + if (*ptr != ' ') fatal_error( "parse error in /proc/self/maps\n" ); + ptr++; + + parse_ul( ptr, &ptr, 16, NULL ); + if (*ptr != ' ') fatal_error( "parse error in /proc/self/maps\n" ); + ptr++; + + *entry = item; + return 0; +} + +static struct vma_area *lookup_vma_entry( const struct vma_area_list *list, unsigned long address ) +{ + const struct vma_area *left = list->base, *right = list->list_end, *mid; + while (left < right) + { + mid = left + (right - left) / 2; + if (mid->end <= address) left = mid + 1; + else right = mid; + } + return (struct vma_area *)left; +} + +static int map_reserve_range( void *addr, size_t size ) +{ + if (addr == (void *)-1 || + wld_mmap( addr, size, PROT_NONE, + MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0) != addr) + return -1; + return 0; +} + +static int map_reserve_unmapped_range( const struct vma_area_list *list, void *addr, size_t size ) +{ + unsigned long range_start = (unsigned long)addr, + range_end = (unsigned long)addr + size; + const struct vma_area *start, *item; + unsigned long last_addr = range_start; + + start = lookup_vma_entry( list, range_start ); + for (item = start; item != list->list_end && item->start < range_end; item++) + { + if (item->start > last_addr && + map_reserve_range( (void *)last_addr, item->start - last_addr ) < 0) + goto fail; + last_addr = item->end; + } + + if (range_end > last_addr && + map_reserve_range( (void *)last_addr, range_end - last_addr ) < 0) + goto fail; + return 0; + +fail: + while (item != start) + { + item--; + last_addr = item == start ? range_start : item[-1].end; + if (item->start > last_addr) + wld_munmap( (void *)last_addr, item->start - last_addr ); + } + return -1; +} + +static void insert_vma_entry( struct vma_area_list *list, const struct vma_area *item ) +{ + struct vma_area *left = list->base, *right, *mid; + + if (list->base < list->list_end) + { + right = list->list_end; + mid = right - 1; /* start search from end */ + do + { + if (mid->start < item->start) left = mid + 1; + else right = mid; + mid = left + (right - left) / 2; + } while (mid < right); + } + wld_memmove(left + 1, left, list->list_end - left); + wld_memmove(left, item, sizeof(*item)); + list->list_end++; + return; +} + +static void scan_vma( struct vma_area_list *list, size_t *act_count ) +{ + int fd; + size_t n = 0; + ssize_t nread; + struct linebuffer lbuf; + char buffer[80 + PATH_MAX], *line; + struct vma_area item; + + fd = wld_open( "/proc/self/maps", O_RDONLY ); + if (fd == -1) fatal_error( "could not open /proc/self/maps\n" ); + + linebuffer_init(&lbuf, buffer, sizeof(buffer)); + for (;;) + { + nread = wld_read( fd, lbuf.head, lbuf.limit - lbuf.head ); + if (nread < 0) fatal_error( "could not read /proc/self/maps\n" ); + if (nread == 0) break; + lbuf.head += nread; + + while ((line = linebuffer_getline( &lbuf, '\n' ))) + { + if (parse_maps_line( &item, line ) >= 0) + { + if (list->list_end < list->alloc_end) insert_vma_entry( list, &item ); + n++; + } + } + } + + wld_close(fd); + *act_count = n; +} + +static void free_vma_list( struct vma_area_list *list ) +{ + if (list->base) + wld_munmap( list->base, + (unsigned char *)list->alloc_end - (unsigned char *)list->base ); + list->base = NULL; + list->list_end = NULL; + list->alloc_end = NULL; +} + +static void alloc_scan_vma( struct vma_area_list *listp ) +{ + size_t max_count = page_size / sizeof(struct vma_area); + struct vma_area_list vma_list; + + for (;;) + { + vma_list.base = wld_mmap( NULL, sizeof(struct vma_area) * max_count, + PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, + -1, 0 ); + if (vma_list.base == (struct vma_area *)-1) + fatal_error( "could not allocate memory for VMA list\n"); + vma_list.list_end = vma_list.base; + vma_list.alloc_end = vma_list.base + max_count; + + scan_vma( &vma_list, &max_count ); + if (vma_list.list_end - vma_list.base == max_count) + { + wld_memmove(listp, &vma_list, sizeof(*listp)); + break; + } + + free_vma_list( &vma_list ); + } +} + +static void map_reserve_preload_ranges( const struct vma_area_list *vma_list, + const struct stackarg_info *stackinfo ) +{ + size_t i; + unsigned long exclude_start = (unsigned long)stackinfo->stack - 1; + unsigned long exclude_end = (unsigned long)stackinfo->auxv + 1; + + for (i = 0; preload_info[i].size; i++) + { + if (exclude_end > (unsigned long)preload_info[i].addr && + exclude_start <= (unsigned long)preload_info[i].addr + preload_info[i].size - 1) + { + remove_preload_range( i ); + i--; + } + else if (map_reserve_unmapped_range( vma_list, preload_info[i].addr, preload_info[i].size ) < 0) + { + /* don't warn for low 64k */ + if (preload_info[i].addr >= (void *)0x10000 +#ifdef __aarch64__ + && preload_info[i].addr < (void *)0x7fffffffff /* ARM64 address space might end here*/ +#endif + ) + wld_printf( "preloader: Warning: failed to reserve range %p-%p\n", + preload_info[i].addr, (char *)preload_info[i].addr + preload_info[i].size ); + remove_preload_range( i ); + i--; + } + } +} +
/* * wld_start @@ -1538,6 +1841,7 @@ void* wld_start( void **stack ) struct wld_link_map main_binary_map, ld_so_map; struct wine_preload_info **wine_main_preload_info; struct preloader_state state = { 0 }; + struct vma_area_list vma_list = { NULL };
parse_stackargs( &state.s, *stack );
@@ -1566,29 +1870,9 @@ void* wld_start( void **stack ) /* reserve memory that Wine needs */ reserve = stackargs_getenv( &state.s, "WINEPRELOADRESERVE" ); if (reserve) preload_reserve( reserve ); - for (i = 0; preload_info[i].size; i++) - { - if ((char *)state.s.auxv >= (char *)preload_info[i].addr && - (char *)state.s.stack <= (char *)preload_info[i].addr + preload_info[i].size) - { - remove_preload_range( i ); - i--; - } - else if (wld_mmap( preload_info[i].addr, preload_info[i].size, PROT_NONE, - MAP_FIXED | MAP_PRIVATE | MAP_ANON | MAP_NORESERVE, -1, 0 ) == (void *)-1) - { - /* don't warn for low 64k */ - if (preload_info[i].addr >= (void *)0x10000 -#ifdef __aarch64__ - && preload_info[i].addr < (void *)0x7fffffffff /* ARM64 address space might end here*/ -#endif - ) - wld_printf( "preloader: Warning: failed to reserve range %p-%p\n", - preload_info[i].addr, (char *)preload_info[i].addr + preload_info[i].size ); - remove_preload_range( i ); - i--; - } - } + + alloc_scan_vma( &vma_list ); + map_reserve_preload_ranges( &vma_list, &state.s );
/* add an executable page at the top of the address space to defeat * broken no-exec protections that play with the code selector limit */ @@ -1649,6 +1933,8 @@ void* wld_start( void **stack ) } #endif
+ free_vma_list( &vma_list ); + *stack = state.s.stack; return (void *)ld_so_map.l_entry; }
Jinoh Kang jinoh.kang.kr@gmail.com writes:
Today, the preloader makes no attempt to avoid unmapping existing memory mappings except the initial stack. This results in irrevocably unmapping some useful preallocated memory areas, such as vDSO.
Fix this by reading /proc/self/maps for existing VMAs, and splitting mmap() calls to avoid erasing existing memory mappings.
That defeats the purpose of using the preloader. The whole point is to make sure the specified ranges are available. Note that since you don't update the ranges info, the mappings will get erased by Wine later anyway.
On 1/26/22 00:48, Alexandre Julliard wrote:
Jinoh Kang jinoh.kang.kr@gmail.com writes:
Today, the preloader makes no attempt to avoid unmapping existing memory mappings except the initial stack. This results in irrevocably unmapping some useful preallocated memory areas, such as vDSO.
Fix this by reading /proc/self/maps for existing VMAs, and splitting mmap() calls to avoid erasing existing memory mappings.
That defeats the purpose of using the preloader.
The intention was to *incrementally* scrape memory areas for the reserved ranges, relocating any critical areas (vDSO, stack, ...) along the way.
It's also why this change is useless without the subsequent patches, which calls map_reserve_preload_ranges again to actually fill out all the gaps previously occupied by vDSO/stack.
The whole point is to make sure the specified ranges are available.
It is. That's why I leave the preload_info ranges for Wine even though I don't actually munmap() those pages.
Note that since you don't update the ranges info, the mappings will get erased by Wine later anyway.
That was exactly what I intended: after jumping to ld.so, we no longer need the code/data from preloader (except stack etc.), so we let Wine unmap them.
I'll make this clear in the next revision.
(Note: the patch does update the ranges info when needed, particularly when it fails to remap() vDSO/stack.)
On 1/26/22 11:52, Jinoh Kang wrote:
On 1/26/22 00:48, Alexandre Julliard wrote:
Jinoh Kang jinoh.kang.kr@gmail.com writes:
Today, the preloader makes no attempt to avoid unmapping existing memory mappings except the initial stack. This results in irrevocably unmapping some useful preallocated memory areas, such as vDSO.
Fix this by reading /proc/self/maps for existing VMAs, and splitting mmap() calls to avoid erasing existing memory mappings.
That defeats the purpose of using the preloader.
The intention was to *incrementally* scrape memory areas for the reserved ranges, relocating any critical areas (vDSO, stack, ...) along the way.
It's also why this change is useless without the subsequent patches, which calls map_reserve_preload_ranges again to actually fill out all the gaps previously occupied by vDSO/stack.
The whole point is to make sure the specified ranges are available.
It is. That's why I leave the preload_info ranges for Wine even though I don't actually munmap() those pages.
Note that since you don't update the ranges info, the mappings will get erased by Wine later anyway.
That was exactly what I intended: after jumping to ld.so, we no longer need the code/data from preloader (except stack etc.), so we let Wine unmap them.
Also note that munmap()-ping preloader code and data from the preloader itself is unsafe, hence the commit message. This is why I'm deferring the actual unmapping to Wine itself.
I'll make this clear in the next revision.
(Note: the patch does update the ranges info when needed, particularly when it fails to remap() vDSO/stack.)
On 1/26/22 11:52, Jinoh Kang wrote:
On 1/26/22 00:48, Alexandre Julliard wrote:
Jinoh Kang jinoh.kang.kr@gmail.com writes:
Today, the preloader makes no attempt to avoid unmapping existing memory mappings except the initial stack. This results in irrevocably unmapping some useful preallocated memory areas, such as vDSO.
Fix this by reading /proc/self/maps for existing VMAs, and splitting mmap() calls to avoid erasing existing memory mappings.
That defeats the purpose of using the preloader.
The intention was to *incrementally* scrape memory areas for the reserved ranges, relocating any critical areas (vDSO, stack, ...) along the way.
To clarify, the purpose of the preloader is to reserve some predefined (fixed) virtual memory ranges so that it's safe for Wine to use. It does so in the following manner:
1. It reserves the memory ranges so that future mmap() operations will not touch them. 2. It removes all existing references (from e.g. auxv) to the memory ranges.
Note that if the range is already mapped, we can skip (1) and only perform (2). We can forcibly do (1) too, but there are the following drawbacks:
- There's no point in re-reserving already occupied memory range. The OS will not reuse them for other allocations anyway, especially since we also do (2).
- It might actually be harmful if the memory range coincides with memory that is being actively used (e.g. preloader code/data, stack, etc.).
Since Wine does not distinguish between normally mapped pages (e.g. preloader code/data stack) and explicitly reserved pages (PROT_NONE), we can leave the existing memory mappings as-is. After all, (2) is what is actually important.
It's also why this change is useless without the subsequent patches, which calls map_reserve_preload_ranges again to actually fill out all the gaps previously occupied by vDSO/stack.
The whole point is to make sure the specified ranges are available.
It is. That's why I leave the preload_info ranges for Wine even though I don't actually munmap() those pages.
Note that since you don't update the ranges info, the mappings will get erased by Wine later anyway.
That was exactly what I intended: after jumping to ld.so, we no longer need the code/data from preloader (except stack etc.), so we let Wine unmap them.
I'll make this clear in the next revision.
(Note: the patch does update the ranges info when needed, particularly when it fails to remap() vDSO/stack.)
Jinoh Kang jinoh.kang.kr@gmail.com writes:
On 1/26/22 00:48, Alexandre Julliard wrote:
Jinoh Kang jinoh.kang.kr@gmail.com writes:
Today, the preloader makes no attempt to avoid unmapping existing memory mappings except the initial stack. This results in irrevocably unmapping some useful preallocated memory areas, such as vDSO.
Fix this by reading /proc/self/maps for existing VMAs, and splitting mmap() calls to avoid erasing existing memory mappings.
That defeats the purpose of using the preloader.
The intention was to *incrementally* scrape memory areas for the reserved ranges, relocating any critical areas (vDSO, stack, ...) along the way.
It's also why this change is useless without the subsequent patches, which calls map_reserve_preload_ranges again to actually fill out all the gaps previously occupied by vDSO/stack.
I don't see the point. If you want to remap vDSO you can do that first, and then reserve the full range. You don't need all that complexity.
And as general advice for your patches, please try to avoid changing things that don't need changing, or adding infrastructure that isn't needed. It will make it easier to see the actual changes.
On 1/26/22 18:06, Alexandre Julliard wrote:
Jinoh Kang jinoh.kang.kr@gmail.com writes:
On 1/26/22 00:48, Alexandre Julliard wrote:
Jinoh Kang jinoh.kang.kr@gmail.com writes:
Today, the preloader makes no attempt to avoid unmapping existing memory mappings except the initial stack. This results in irrevocably unmapping some useful preallocated memory areas, such as vDSO.
Fix this by reading /proc/self/maps for existing VMAs, and splitting mmap() calls to avoid erasing existing memory mappings.
That defeats the purpose of using the preloader.
The intention was to *incrementally* scrape memory areas for the reserved ranges, relocating any critical areas (vDSO, stack, ...) along the way.
It's also why this change is useless without the subsequent patches, which calls map_reserve_preload_ranges again to actually fill out all the gaps previously occupied by vDSO/stack.
I don't see the point. If you want to remap vDSO you can do that first, and then reserve the full range. You don't need all that complexity.
If we remap vDSO _without_ reserving memory first, then the kernel may end up choosing a reserved address for the new vDSO address, especially on 32-bit.
We can implement the address allocation algorithm from scratch avoiding the preload area explicitly ourselves, but I suppose it would result in even more complexity. This is also why we reserve those ranges in the first place -- to let OS pick the address for us.
And as general advice for your patches, please try to avoid changing things that don't need changing, or adding infrastructure that isn't needed. It will make it easier to see the actual changes.
My apologies for complicating the review work. I'll try to separate such patches into another serie, or omit them entirely if there's really no need for them.
Perhaps there was something that I was thinking wrong about along the way. In that case, I'll try my best to explain my motivation and/or assumption behind those patches. I'd like to not repeat the same mistake again, so I'd be happy to learn about what went wrong (so that I can correct it). Please feel free to ignore them if you wish, though.
- loader: Use long instead of int for syscall return type in i386 code.
I was planning to add more syscalls, and found that i386 code used "int" for syscall returns. My assumption was that it was a style issue (presumably 32-bit-area Wine's leftover) that I was expected to address _before_ I could add more inline asm blocks.
[The reason why I found "int" confusing was that I usually read "long" as "machine register-width integer," and "int" as "integer that either only needs to hold small values (e.g. exit code) _or_ has to be 32-bit for some reason." Granted int = long on ILP32, but someone looking at the code might not be able to immediately verify that the type is correct unless they also consider that "int $0x80" is for the i386 ABI (or that the code region is guarded by ifdef __i386__), and i386 is a 32-bit architecture. I supposed that would be extra cognitive load for someone reviewing/auditing the code.]
Perhaps my mistake here was one of the following: 1. The code style wasn't an issue at all. I (incorrectly) assumed that the code style was a general consensus among systems programmers. 2. The code style was indeed a problem, but it wasn't really required to fix them in the *same* patch serie. 3. The style fix was required, but it came too early in the serie.
- loader: Remove GCC <=4.x EBX register spilling workaround for i386.
Similar to above, but debugging related. Again I was adding asm blocks around, so I felt it obliged to fix it before adding more code.
- loader: Enable dumping additional vectors in dump_auxiliary.
This is not a critical patch, but merely a debugging aid. Since it was obvious from the subject, I supposed it wouldn't hurt to include it towards the end of the series. I agree it would certainly have been better off as a separate patch, however.
Meanwhile, following are patches that I deem necessary and not gratuitous:
- loader: Refactor argv/envp/auxv management.
This was necessary because I had to pass around the pointers to stack objects a lot. Examples include getenv() for letting user control remapping behaviour via env vars, and stack relocation (which requires shifting all the argv/envp/auxv pointers). If the pointer variables were not in one place, the latter would make the code a lot hairy (by passing around all the scattered stack pointers) and unmaintainable.
- loader: Refactor number parsing to own function.
Number parsing was only for WINEPRELOADRESERVE. Now I need it for parsing /proc/self/maps as well.
I also looked through all of my past patchsets, and here's my thoughts about them:
- kernel32/tests: Test module refcounting with forwarded exports. => You have pointed out that the IATGAS hack is ugly (which is true). Perhaps this one counts as an "unnecessary infrastructure."
As for the justification of a new DLL (forward4), it was particularly hard to find a concrete example that exactly suits the tests' needs.
1. The DLL has to forward calls to another DLL *but* must not actually import from it. This excludes DLLs like kernel32 which both forwards to *and* imports from kernelbase. 2. The above condition must hold for all Windows versions being tested *and* in Wine. 3. The DLL must not have been already loaded (either directly or indirectly via dependencies) in the process. This excludes DLLs like kernel32, advapi, user32, gdi32, shell32, setupapi, userenv, and etc.
The most recent iteration of this patchset uses ICMP/IPHLPAPI, but I'm still not 100% sure about the compatibility issue. An alternative would be to keep forward4 and modify winebuild so that TESTDLLs can import from other TESTDLLs, but it seemed like an overkill.
- winedbg(gdbproxy): don't misbehave on module names w/ special characters; modernise => Patchset does contain a lot of refactoring work, but I tried to justify them in the commit message body to my best.
- sock_recv() fix series => I attempted to minimize extra changes and/or infrastructure, but it might not have been perfect. In case some non-obvious change was needed, I tried to justify them in the patch message body. Perhaps there are other ways to implement them I couldn't think of.
Meanwhile following patches are ostensibly gratuitous in nature (it might or might not be useful; it's up to your decision):
- winedbg: use heap allocation for module filenames in handle_debug_event. => May seem gratuitous, but fixes module filenames being truncated over MAX_PATH. - server: Allow skipping debug handle retrieval in get_process_debug_info. => Functionality may be gratuitous, but may closely match Windows NT kernel more. Practially untestable (Wine lacks needed infrastructure) for now, so up to the maintainer's decision. - ntdll: Implement NtSetInformationVirtualMemory. => Functionality may be gratuitous or maybe some apps need it, up to the maintainer's decision. - ntdll: Implement __fastfail(). => Functionality may be gratuitous or maybe some apps need it, up to the maintainer's decision. - ntdll: Make syscall dispatcher properly restore X16 and X17 in ARM64. => Completely up to the maintainer's decision. - ws2_32/tests: Add order-agnostic check in test_simultaneous_async_recv. => Completely up to the maintainer's decision.
Again, thanks a lot!
Jinoh Kang jinoh.kang.kr@gmail.com writes:
loader: Use long instead of int for syscall return type in i386 code.
I was planning to add more syscalls, and found that i386 code used "int" for syscall returns. My assumption was that it was a style issue (presumably 32-bit-area Wine's leftover) that I was expected to address _before_ I could add more inline asm blocks.
[The reason why I found "int" confusing was that I usually read "long" as "machine register-width integer," and "int" as "integer that either only needs to hold small values (e.g. exit code) _or_ has to be 32-bit for some reason." Granted int = long on ILP32, but someone looking at the code might not be able to immediately verify that the type is correct unless they also consider that "int $0x80" is for the i386 ABI (or that the code region is guarded by ifdef __i386__), and i386 is a 32-bit architecture. I supposed that would be extra cognitive load for someone reviewing/auditing the code.]
"long" doesn't mean pointer size in Win32, and since it differs between Win32 and Unix it's confusing, so we try to avoid it as much as possible. Here it's obviously safe, but that still makes it more confusing than "int", not less.
loader: Remove GCC <=4.x EBX register spilling workaround for i386.
Similar to above, but debugging related. Again I was adding asm blocks around, so I felt it obliged to fix it before adding more code.
I'm not convinced that this is safe, did you test on a really old gcc?
Meanwhile, following are patches that I deem necessary and not gratuitous:
loader: Refactor argv/envp/auxv management.
This was necessary because I had to pass around the pointers to stack objects a lot. Examples include getenv() for letting user control remapping behaviour via env vars, and stack relocation (which requires shifting all the argv/envp/auxv pointers). If the pointer variables were not in one place, the latter would make the code a lot hairy (by passing around all the scattered stack pointers) and unmaintainable.
Well, letting user control it through env vars is exactly what I mean with "unneeded infrastructure". No user is going to want to understand or tweak these things.
loader: Refactor number parsing to own function.
Number parsing was only for WINEPRELOADRESERVE. Now I need it for parsing /proc/self/maps as well.
You shouldn't need to parse /proc/self/maps at all.
On 1/27/22 16:37, Alexandre Julliard wrote:
Jinoh Kang jinoh.kang.kr@gmail.com writes:
loader: Use long instead of int for syscall return type in i386 code.
I was planning to add more syscalls, and found that i386 code used "int" for syscall returns. My assumption was that it was a style issue (presumably 32-bit-area Wine's leftover) that I was expected to address _before_ I could add more inline asm blocks.
[The reason why I found "int" confusing was that I usually read "long" as "machine register-width integer," and "int" as "integer that either only needs to hold small values (e.g. exit code) _or_ has to be 32-bit for some reason." Granted int = long on ILP32, but someone looking at the code might not be able to immediately verify that the type is correct unless they also consider that "int $0x80" is for the i386 ABI (or that the code region is guarded by ifdef __i386__), and i386 is a 32-bit architecture. I supposed that would be extra cognitive load for someone reviewing/auditing the code.]
"long" doesn't mean pointer size in Win32, and since it differs between Win32 and Unix it's confusing, so we try to avoid it as much as possible. Here it's obviously safe, but that still makes it more confusing than "int", not less.
Thanks for the comment. It sounds reasonable. Perhaps if I were going for readability, I should have used "uintptr_t" instead.
I'll switch to int for the next iteration.
loader: Remove GCC <=4.x EBX register spilling workaround for i386.
Similar to above, but debugging related. Again I was adding asm blocks around, so I felt it obliged to fix it before adding more code.
I'm not convinced that this is safe, did you test on a really old gcc?
Yes, I did. See:
https://godbolt.org/z/nq1T1rr93 (gcc -m32 -O2 -fno-PIC)
versus:
https://godbolt.org/z/8ebsK7Mzx (gcc -m32 -O2 -fPIC)
Note that using EBX works flawlessly if PIC is disabled (which is the case for the preloader).
Furthermore I have verified that the patch works on CentOS 7 with GCC 4.8.5:
$ gcc --version gcc (GCC) 4.8.5 20150623 (Red Hat 4.8.5-44) Copyright (C) 2015 Free Software Foundation, Inc. This is free software; see the source for copying conditions. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
$ WINEPRELOADREMAPVDSO=always WINEPRELOADREMAPSTACK=always strace -e trace=mremap ./wine winecfg --- SIGCHLD {si_signo=SIGCHLD, si_code=CLD_EXITED, si_pid=23970, si_uid=1000, si_status=0, si_utime=0, si_stime=0} --- --- SIGCHLD {si_signo=SIGCHLD, si_code=CLD_EXITED, si_pid=23971, si_uid=1000, si_status=0, si_utime=0, si_stime=0} --- --- SIGCHLD {si_signo=SIGCHLD, si_code=CLD_EXITED, si_pid=23972, si_uid=1000, si_status=0, si_utime=0, si_stime=0} --- --- SIGCHLD {si_signo=SIGCHLD, si_code=CLD_EXITED, si_pid=23973, si_uid=1000, si_status=0, si_utime=0, si_stime=0} --- strace: [ Process PID=23969 runs in 32 bit mode. ] mremap(0xf127d000, 12288, 12288, MREMAP_MAYMOVE|MREMAP_FIXED, 0xf1278000) = 0xf1278000 mremap(0xf1280000, 4096, 4096, MREMAP_MAYMOVE|MREMAP_FIXED, 0xf127b000) = 0xf127b000 --- SIGIO {si_signo=SIGIO, si_code=SI_USER, si_pid=23969, si_uid=1000} --- --- SIGCHLD {si_signo=SIGCHLD, si_code=CLD_EXITED, si_pid=23974, si_uid=1000, si_status=0, si_utime=0, si_stime=0} --- --- SIGCHLD {si_signo=SIGCHLD, si_code=CLD_EXITED, si_pid=23976, si_uid=1000, si_status=0, si_utime=0, si_stime=0} --- --- SIGCHLD {si_signo=SIGCHLD, si_code=CLD_EXITED, si_pid=23978, si_uid=1000, si_status=0, si_utime=0, si_stime=0} --- --- SIGUSR1 {si_signo=SIGUSR1, si_code=SI_TKILL, si_pid=23975, si_uid=1000} --- --- SIGUSR1 {si_signo=SIGUSR1, si_code=SI_TKILL, si_pid=23975, si_uid=1000} --- --- SIGCHLD {si_signo=SIGCHLD, si_code=CLD_EXITED, si_pid=23980, si_uid=1000, si_status=0, si_utime=0, si_stime=0} --- 0044:err:ole:start_rpcss Failed to open service manager 003c:fixme:imm:ImeSetActiveContext (0x25ae88, 1): stub 003c:fixme:imm:ImmReleaseContext (00010054, 0025AE88): stub 0044:fixme:imm:ImeSetActiveContext (0x257eb0, 0): stub 0044:fixme:imm:ImmReleaseContext (00010020, 00257EB0): stub
Meanwhile, following are patches that I deem necessary and not gratuitous:
loader: Refactor argv/envp/auxv management.
This was necessary because I had to pass around the pointers to stack objects a lot. Examples include getenv() for letting user control remapping behaviour via env vars, and stack relocation (which requires shifting all the argv/envp/auxv pointers). If the pointer variables were not in one place, the latter would make the code a lot hairy (by passing around all the scattered stack pointers) and unmaintainable.
Well, letting user control it through env vars is exactly what I mean with "unneeded infrastructure".
It was intended to avoid regression by gradually adopting the new behaviour.
My plan was to:
1. Hide the new code path behind a flag. 2. Make it default on Wine-Staging (https://bugs.winehq.org/show_bug.cgi?id=52313). 3. Instruct users experiencing performance problem (intermittent or persistent) to enable the new behaviour and see if it fixes the problem. 4. If enough users have tested the new feature, make it non-configurable.
If you think this process is unnecessary, the environment variables are of course nonsense.
(I've got no replies on the ticket there, and I guessed it might have been better off on the list after all...)
No user is going to want to understand or tweak these things.
Yes. It is intended only for testers and tech/community support, not end-users.
loader: Refactor number parsing to own function.
Number parsing was only for WINEPRELOADRESERVE. Now I need it for parsing /proc/self/maps as well.
You shouldn't need to parse /proc/self/maps at all.
Honestly the part I spent the most time on this patchset was to try to avoid having to parse /proc/self/maps entirely.
The problem is that it's impossible to reliably identity the exact range of Linux vDSO/vvar mapping without reading /proc/self/maps.
1. vDSO hard-codes vvar's offset relative to vDSO. Therefore, remapping vDSO requires vvar to be *also* remapped as well. However, vvar's size and its location relative to vDSO is *not* guaranteed by ABI, and has changed all the time.
- x86: [vvar] orginally resided at a fixed address 0xffffffffff5ff000 (64-bit) [1], but was later changed so that it precedes [vdso] [2]. There, sym_vvar_start is a negative value [3]. text_start is the base address of vDSO, and addr becomes the address of vvar.
- AArch32: [vvar] is a single page and precedes [vdso] [4].
- AArch64: [vvar] is two pages long and precedes [vdso] [5]. Before v5.9, [vvar] was a single page [6].
2. It's very difficult to deduce vDSO and vvar's size and offset relative to each other. Since vvar's symbol does not exist in vDSO's symtab, determining the layout would require parsing vDSO's code.
Also note that CRIU (Checkpoint Restore In Userspace) has maps parsing code just for relocating vDSO [7].
[1] https://lwn.net/Articles/615809/ [2] https://elixir.bootlin.com/linux/v5.16.3/source/arch/x86/entry/vdso/vma.c#L2... [3] https://elixir.bootlin.com/linux/v5.16.3/source/arch/x86/include/asm/vdso.h#... [4] https://elixir.bootlin.com/linux/v5.16.3/source/arch/arm/kernel/vdso.c#L236 [5] https://elixir.bootlin.com/linux/v5.16.3/source/arch/arm64/kernel/vdso.c#L21... [6] https://elixir.bootlin.com/linux/v5.8/source/arch/arm64/kernel/vdso.c#L161 [7] https://github.com/checkpoint-restore/criu/blob/a315774e11b4da1eb36446ae996e...
Signed-off-by: Jinoh Kang jinoh.kang.kr@gmail.com --- loader/preloader.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/loader/preloader.c b/loader/preloader.c index 6a3ac41ad7c..eefbcff3469 100644 --- a/loader/preloader.c +++ b/loader/preloader.c @@ -1069,7 +1069,7 @@ static void set_auxiliary_values( struct preloader_state *state, * * Get a field of the auxiliary structure */ -static int get_auxiliary( struct wld_auxv *av, int type, int def_val ) +static ElfW(Addr) get_auxiliary( struct wld_auxv *av, int type, ElfW(Addr) def_val ) { for ( ; av->a_type != AT_NULL; av++) if( av->a_type == type ) return av->a_un.a_val;
Today, the preloader removes the vDSO entries (AT_SYSINFO*) from the auxiliary vector when it conflicts with one of the predefined reserved ranges.
vDSO is a shared object provided by the kernel. Among other things, it provides a mechanism to issue certain system calls without the overhead of switching to the kernel mode.
Without vDSO, libc still works; however, it is expected that some system call functions (e.g. gettimeofday, clock_gettime) will show degraded performance.
Fix this by relocating vDSO to another address (if supported by the kernel) instead of erasing it from auxv entirely.
This behaviour is enabled only when the "WINEPRELOADREMAPVDSO" environment variable is set to "on-conflict". In the future, it could become the default behaviour.
Signed-off-by: Jinoh Kang jinoh.kang.kr@gmail.com ---
Notes: v1 -> v2: - s/offset/delta/g - remap_vdso() - significantly improve kernel vdso_mremap() support detection logic - add comments - only modify AT_SYSINFO* if it's in vDSO range - guard against vdso_start + vdso_size overflow - remove erroneous MAP_GROWSDOWN - fix remap_multiple_vmas() when revert = 1 - some refactoring
loader/preloader.c | 460 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 457 insertions(+), 3 deletions(-)
diff --git a/loader/preloader.c b/loader/preloader.c index eefbcff3469..7526a4fcaa4 100644 --- a/loader/preloader.c +++ b/loader/preloader.c @@ -72,6 +72,7 @@ #include <sys/types.h> #include <sys/stat.h> #include <fcntl.h> +#include <signal.h> #include <sys/mman.h> #ifdef HAVE_SYS_SYSCALL_H # include <sys/syscall.h> @@ -86,6 +87,9 @@ #ifdef HAVE_SYS_LINK_H # include <sys/link.h> #endif +#ifdef HAVE_SYS_UCONTEXT_H +# include <sys/ucontext.h> +#endif
#include "wine/asm.h" #include "main.h" @@ -102,6 +106,11 @@ #ifndef MAP_NORESERVE #define MAP_NORESERVE 0 #endif +#ifndef MREMAP_FIXED +#define MREMAP_FIXED 2 +#endif + +#define REMAP_TEST_SIG SIGIO /* Any signal GDB doesn't stop on */
static struct wine_preload_info preload_info[] = { @@ -165,6 +174,18 @@ struct wld_auxv } a_un; };
+typedef unsigned long wld_sigset_t[8 / sizeof(unsigned long)]; + +struct wld_sigaction { + /* Prefix all fields since they may collide with macros from libc headers */ + void (*wld_sa_sigaction)(int, siginfo_t *, void *); + unsigned long wld_sa_flags; + void (*wld_sa_restorer)(void); + wld_sigset_t wld_sa_mask; +}; + +#define WLD_SA_SIGINFO 4 + struct stackarg_info { void *stack; @@ -189,10 +210,19 @@ struct linebuffer int overflow; };
+enum vma_type_flags +{ + VMA_NORMAL = 0x01, + VMA_VDSO = 0x02, + VMA_VVAR = 0x04, +}; + struct vma_area { unsigned long start; unsigned long end; + unsigned char type_flags; + unsigned char moved; };
struct vma_area_list @@ -205,6 +235,45 @@ struct vma_area_list #define FOREACH_VMA(list, item) \ for ((item) = (list)->base; (item) != (list)->list_end; (item)++)
+enum remap_policy +{ + REMAP_POLICY_ON_CONFLICT = 0, + REMAP_POLICY_FORCE = 1, + REMAP_POLICY_SKIP = 2, + LAST_REMAP_POLICY, + + REMAP_POLICY_DEFAULT_VDSO = REMAP_POLICY_SKIP, +}; + +struct remap_test_block { + /* The old address range of vDSO or sigpage. Used to test if pages are remapped properly. */ + unsigned long old_mapping_start; + unsigned long old_mapping_size; + + struct vma_area_list *vma_list; + + /* Difference between the base address of the new mapping and the old mapping. + * + * Set to zero if the handler reverted mappings to old state before returning + * in order to safely return when it detects failed remapping. + */ + unsigned long delta; + + /* Set to 1 to indicate that remapping was successfully recognised by the kernel. + * + * If the signal handler is never called (due to e.g. being blocked), it is counted + * as being unsuccessful. + */ + unsigned char is_successful; + + /* Set to 1 to indicate that remapping could not be recognised by the kernel. + * + * If both is_successful and is_failed are set, is_failed takes precedence. + * The flags are intentionally made redundant to detect multiple successive + * invocation of the signal handler due to external signal delivery. */ + unsigned char is_failed; +} remap_test; + /* * The __bb_init_func is an empty function only called when file is * compiled with gcc flags "-fprofile-arcs -ftest-coverage". This @@ -240,6 +309,15 @@ struct unsigned int garbage : 25; } thread_ldt = { -1, (unsigned long)thread_data, 0xfffff, 1, 0, 0, 1, 0, 1, 0 };
+typedef unsigned long wld_old_sigset_t; + +struct wld_old_sigaction { + /* Prefix all fields since they may collide with macros from libc headers */ + void (*wld_sa_sigaction)(int, siginfo_t *, void *); + wld_old_sigset_t wld_sa_mask; + unsigned long wld_sa_flags; + void (*wld_sa_restorer)(void); +};
/* * The _start function is the entry and exit point of this program @@ -377,6 +455,16 @@ static inline int wld_munmap( void *addr, size_t len ) return SYSCALL_RET(ret); }
+static inline void *wld_mremap( void *old_addr, size_t old_len, size_t new_size, int flags, void *new_addr ) +{ + long ret; + __asm__ __volatile__( "int $0x80" + : "=a" (ret) : "0" (163 /* SYS_mremap */), "b" (old_addr), "c" (old_len), + "d" (new_size), "S" (flags), "D" (new_addr) + : "memory" ); + return (void *)SYSCALL_RET(ret); +} + static inline int wld_prctl( int code, long arg ) { long ret; @@ -385,6 +473,64 @@ static inline int wld_prctl( int code, long arg ) return SYSCALL_RET(ret); }
+static void copy_old_sigset(void *dest, const void *src) +{ + /* Avoid aliasing */ + size_t i; + for (i = 0; i < sizeof(wld_old_sigset_t); i++) + *((unsigned char *)dest + i) = *((const unsigned char *)src + i); +} + +static inline int wld_sigaction( int signum, const struct wld_sigaction *act, struct wld_sigaction *old_act ) +{ + long ret; + __asm__ __volatile__( "int $0x80" + : "=a" (ret) : "0" (174 /* SYS_rt_sigaction */), "b" (signum), "c" (act), "d" (old_act), "S" (sizeof(act->wld_sa_mask)) + : "memory" ); + if (ret == -38 /* ENOSYS */) { + struct wld_old_sigaction act_buf, old_act_buf, *act_real, *old_act_real; + + if (act) { + act_real = &act_buf; + act_buf.wld_sa_sigaction = act->wld_sa_sigaction; + copy_old_sigset(&act_buf.wld_sa_mask, &act->wld_sa_mask); + act_buf.wld_sa_flags = act->wld_sa_flags; + act_buf.wld_sa_restorer = act->wld_sa_restorer; + } + + if (old_act) old_act_real = &old_act_buf; + + __asm__ __volatile__( "int $0x80" + : "=a" (ret) : "0" (67 /* SYS_sigaction */), "b" (signum), "c" (act_real), "d" (old_act_real) + : "memory" ); + + if (old_act && ret >= 0) { + old_act->wld_sa_sigaction = old_act_buf.wld_sa_sigaction; + old_act->wld_sa_flags = old_act_buf.wld_sa_flags; + old_act->wld_sa_restorer = old_act_buf.wld_sa_restorer; + copy_old_sigset(&old_act->wld_sa_mask, &old_act_buf.wld_sa_mask); + } + } + return SYSCALL_RET(ret); +} + +static inline int wld_kill( pid_t pid, int sig ) +{ + long ret; + __asm__ __volatile__( "int $0x80" + : "=a" (ret) : "0" (37 /* SYS_kill */), "b" (pid), "c" (sig) + : "memory" /* clobber: signal handler side effects on raise() */ ); + return SYSCALL_RET(ret); +} + +static inline pid_t wld_getpid( void ) +{ + long ret; + __asm__ __volatile__( "int $0x80" + : "=a" (ret) : "0" (20 /* SYS_getpid */) ); + return ret; +} + #elif defined(__x86_64__)
void *thread_data[256]; @@ -463,9 +609,15 @@ SYSCALL_FUNC( wld_mprotect, 10 /* SYS_mprotect */ ); int wld_munmap( void *addr, size_t len ); SYSCALL_FUNC( wld_munmap, 11 /* SYS_munmap */ );
+void *wld_mremap( void *old_addr, size_t old_len, size_t new_size, int flags, void *new_addr ); +SYSCALL_FUNC( wld_mremap, 25 /* SYS_mremap */ ); + int wld_prctl( int code, long arg ); SYSCALL_FUNC( wld_prctl, 157 /* SYS_prctl */ );
+pid_t wld_getpid(void); +SYSCALL_NOERR( wld_getpid, 39 /* SYS_getpid */ ); + uid_t wld_getuid(void); SYSCALL_NOERR( wld_getuid, 102 /* SYS_getuid */ );
@@ -573,9 +725,26 @@ SYSCALL_FUNC( wld_mprotect, 226 /* SYS_mprotect */ ); int wld_munmap( void *addr, size_t len ); SYSCALL_FUNC( wld_munmap, 215 /* SYS_munmap */ );
+void *wld_mremap( void *old_addr, size_t old_len, size_t new_size, int flags, void *new_addr ); +SYSCALL_FUNC( wld_mremap, 216 /* SYS_mremap */ ); + int wld_prctl( int code, long arg ); SYSCALL_FUNC( wld_prctl, 167 /* SYS_prctl */ );
+int wld_rt_sigaction( int signum, const struct wld_sigaction *act, struct wld_sigaction *old_act, size_t sigsetsize ); +SYSCALL_FUNC( wld_rt_sigaction, 134 /* SYS_rt_sigaction */ ); + +static inline int wld_sigaction( int signum, const struct wld_sigaction *act, struct wld_sigaction *old_act ) +{ + return wld_rt_sigaction( signum, act, old_act, sizeof(act->wld_sa_mask) ); +} + +int wld_kill( pid_t pid, int sig ); +SYSCALL_FUNC( wld_kill, 129 /* SYS_kill */ ); + +pid_t wld_getpid(void); +SYSCALL_NOERR( wld_getpid, 172 /* SYS_getpid */ ); + uid_t wld_getuid(void); SYSCALL_NOERR( wld_getuid, 174 /* SYS_getuid */ );
@@ -675,9 +844,26 @@ SYSCALL_FUNC( wld_mprotect, 125 /* SYS_mprotect */ ); int wld_munmap( void *addr, size_t len ); SYSCALL_FUNC( wld_munmap, 91 /* SYS_munmap */ );
+void *wld_mremap( void *old_addr, size_t old_len, size_t new_size, int flags, void *new_addr ); +SYSCALL_FUNC( wld_mremap, 163 /* SYS_mremap */ ); + int wld_prctl( int code, long arg ); SYSCALL_FUNC( wld_prctl, 172 /* SYS_prctl */ );
+int wld_rt_sigaction( int signum, const struct wld_sigaction *act, struct wld_sigaction *old_act, size_t sigsetsize ); +SYSCALL_FUNC( wld_rt_sigaction, 174 /* SYS_rt_sigaction */ ); + +static inline int wld_sigaction( int signum, const struct wld_sigaction *act, struct wld_sigaction *old_act ) +{ + return wld_rt_sigaction( signum, act, old_act, sizeof(act->wld_sa_mask) ); +} + +int wld_kill( pid_t pid, int sig ); +SYSCALL_FUNC( wld_kill, 37 /* SYS_kill */ ); + +pid_t wld_getpid(void); +SYSCALL_NOERR( wld_getpid, 20 /* SYS_getpid */ ); + uid_t wld_getuid(void); SYSCALL_NOERR( wld_getuid, 24 /* SYS_getuid */ );
@@ -1604,6 +1790,7 @@ static char *linebuffer_getline( struct linebuffer *lbuf, char delim ) static int parse_maps_line( struct vma_area *entry, char *line ) { struct vma_area item = { 0 }; + unsigned long dev_maj, dev_min; char *ptr = line; int overflow;
@@ -1634,11 +1821,11 @@ static int parse_maps_line( struct vma_area *entry, char *line ) if (*ptr != ' ') fatal_error( "parse error in /proc/self/maps\n" ); ptr++;
- parse_ul( ptr, &ptr, 16, NULL ); + dev_maj = parse_ul( ptr, &ptr, 16, NULL ); if (*ptr != ':') fatal_error( "parse error in /proc/self/maps\n" ); ptr++;
- parse_ul( ptr, &ptr, 16, NULL ); + dev_min = parse_ul( ptr, &ptr, 16, NULL ); if (*ptr != ' ') fatal_error( "parse error in /proc/self/maps\n" ); ptr++;
@@ -1646,6 +1833,17 @@ static int parse_maps_line( struct vma_area *entry, char *line ) if (*ptr != ' ') fatal_error( "parse error in /proc/self/maps\n" ); ptr++;
+ while (*ptr == ' ') + ptr++; + + if (dev_maj == 0 && dev_min == 0) + { + if (wld_strcmp(ptr, "[vdso]") == 0) + item.type_flags |= VMA_VDSO; + else if (wld_strcmp(ptr, "[vvar]") == 0) + item.type_flags |= VMA_VVAR; + } + *entry = item; return 0; } @@ -1724,6 +1922,60 @@ static void insert_vma_entry( struct vma_area_list *list, const struct vma_area return; }
+static int find_vma_envelope_range( const struct vma_area_list *list, int type_mask, unsigned long *startp, unsigned long *sizep ) +{ + const struct vma_area *item; + unsigned long start = ULONG_MAX; + unsigned long end = 0; + + FOREACH_VMA(list, item) + { + if (item->type_flags & type_mask) + { + if (start > item->start) start = item->start; + if (end < item->end) end = item->end; + } + } + + if (start >= end) return -1; + + *startp = start; + *sizep = end - start; + return 0; +} + +static int remap_multiple_vmas( struct vma_area_list *list, unsigned long delta, int type_mask, unsigned char revert ) +{ + struct vma_area *item; + void *old_addr, *expect_addr, *mapped_addr; + size_t size; + + FOREACH_VMA(list, item) + { + if ((item->type_flags & type_mask) && item->moved == revert) + { + if (revert) { + old_addr = (void *)(item->start + delta); + expect_addr = (void *)item->start; + } else { + old_addr = (void *)item->start; + expect_addr = (void *)(item->start + delta); + } + size = item->end - item->start; + mapped_addr = wld_mremap( old_addr, size, size, MREMAP_FIXED | MREMAP_MAYMOVE, expect_addr ); + if (mapped_addr == (void *)-1) return -1; + if (mapped_addr != expect_addr) + { + if (mapped_addr == old_addr) return -1; /* kernel deoesn't support MREMAP_FIXED */ + fatal_error( "mremap() returned different address\n" ); + } + item->moved = !revert; + } + } + + return 0; +} + static void scan_vma( struct vma_area_list *list, size_t *act_count ) { int fd; @@ -1794,6 +2046,206 @@ static void alloc_scan_vma( struct vma_area_list *listp ) } }
+static enum remap_policy stackargs_get_remap_policy( const struct stackarg_info *info, const char *name, + enum remap_policy default_policy ) +{ + char *valstr = stackargs_getenv( info, name ), *endptr; + unsigned long valnum; + + if (valstr) { + if (wld_strcmp(valstr, "auto") == 0 || wld_strcmp(valstr, "on-conflict") == 0) + return REMAP_POLICY_ON_CONFLICT; + if (wld_strcmp(valstr, "always") == 0 || wld_strcmp(valstr, "force") == 0) + return REMAP_POLICY_FORCE; + if (wld_strcmp(valstr, "never") == 0 || wld_strcmp(valstr, "skip") == 0) + return REMAP_POLICY_SKIP; + valnum = parse_ul( valstr, &endptr, 10, NULL ); + if (!*endptr && valnum < LAST_REMAP_POLICY) return valnum; + } + + return default_policy; +} + +static int find_remap_area( const struct vma_area_list *vma_list, struct preloader_state *state, + const char *policy_envname, enum remap_policy default_policy, + int type_mask, unsigned long *startp, unsigned long *sizep ) +{ + enum remap_policy policy; + unsigned long start, size; + + if (find_vma_envelope_range( vma_list, type_mask, &start, &size ) < 0) return 0; + + policy = stackargs_get_remap_policy( &state->s, policy_envname, default_policy ); + if (policy == REMAP_POLICY_SKIP) return -1; + if (policy != REMAP_POLICY_FORCE && + find_preload_reserved_area( (void *)start, size ) < 0) return 0; + + *startp = start; + *sizep = size; + return 1; +} + +#ifndef __x86_64__ +static int remap_test_in_old_address_range( unsigned long address ) +{ + return address - remap_test.old_mapping_start < remap_test.old_mapping_size; +} + +static void remap_test_signal_handler( int signum, siginfo_t *sinfo, void *context ) +{ + (void)signum; + (void)sinfo; + (void)context; + + if (remap_test_in_old_address_range((unsigned long)__builtin_return_address(0))) goto fail; + +#ifdef __i386__ + /* test for SYSENTER/SYSEXIT return address (int80_landing_pad) */ + if (remap_test_in_old_address_range(((ucontext_t *)context)->uc_mcontext.gregs[REG_EIP])) goto fail; +#endif + + remap_test.is_successful = 1; + return; + +fail: + /* Kernel too old to support remapping. Restore vDSO/sigpage to return safely. */ + if (remap_test.delta) { + if (remap_multiple_vmas( remap_test.vma_list, remap_test.delta, -1, 1 ) < 0) + fatal_error( "Cannot restore remapped VMAs\n" ); + remap_test.delta = 0; + } + + /* Signal handler might be called several times externally, + * so overwrite with the latest status just to be safe. */ + remap_test.is_failed = 1; +} +#endif + +static int test_remap_successful( struct vma_area_list *vma_list, struct preloader_state *state, + unsigned long old_mapping_start, unsigned long old_mapping_size, + unsigned long delta ) +{ +#ifdef __x86_64__ + (void)vma_list; + (void)state; + (void)old_mapping_start; + (void)old_mapping_size; + (void)delta; + + /* x86-64 doesn't use SYSENTER for syscalls, and requires sa_restorer for + * signal handlers. We can safely relocate vDSO without kernel support + * (vdso_mremap). */ + return 0; +#else + struct wld_sigaction sigact; + pid_t pid; + int result = -1; + unsigned long syscall_addr = 0; + + pid = wld_getpid(); + if (pid < 0) fatal_error( "failed to get PID\n" ); + +#ifdef __i386__ + syscall_addr = get_auxiliary( state->s.auxv, AT_SYSINFO, 0 ); + if (syscall_addr - old_mapping_start < old_mapping_size) syscall_addr += delta; +#endif + + remap_test.old_mapping_start = old_mapping_start; + remap_test.old_mapping_size = old_mapping_size; + remap_test.vma_list = vma_list; + remap_test.delta = delta; + remap_test.is_successful = 0; + remap_test.is_failed = 0; + + wld_memset( &sigact, 0, sizeof(sigact) ); + sigact.wld_sa_sigaction = remap_test_signal_handler; + sigact.wld_sa_flags = WLD_SA_SIGINFO; + /* We deliberately skip sa_restorer, since we're trying to get the address + * of the kernel's built-in restorer function. */ + + if (wld_sigaction( REMAP_TEST_SIG, &sigact, &sigact ) < 0) fatal_error( "cannot register test signal handler\n" ); + + /* Critical region below - may race with signal handler */ +#ifdef __i386__ + if (syscall_addr) { + /* Also test __kernel_vsyscall return as well */ + __asm__ __volatile__( "call *%1" + : "=a" (result) : "r" (syscall_addr), "0" (37 /* SYS_kill */), "b" (pid), "c" (REMAP_TEST_SIG) ); + result = SYSCALL_RET(result); + } +#else + syscall_addr = 0; +#endif + if (!syscall_addr) result = wld_kill( pid, REMAP_TEST_SIG ); + /* Critical region above - may race with signal handler */ + + if (wld_sigaction( REMAP_TEST_SIG, &sigact, &sigact ) < 0) fatal_error( "cannot unregister test signal handler\n" ); + if (result == -1) fatal_error( "cannot raise test signal\n" ); + + /* Now that the signal handler can no longer be called, + * we can safely access the result data. */ + if (remap_test.is_failed || !remap_test.is_successful) { + if (remap_test.delta && remap_multiple_vmas( remap_test.vma_list, remap_test.delta, -1, 1 ) < 0) + fatal_error( "Cannot restore remapped VMAs\n" ); + return -1; + } + + return 0; +#endif +} + +static int remap_vdso( struct vma_area_list *vma_list, struct preloader_state *state ) +{ + int result; + unsigned long vdso_start, vdso_size, delta; + void *new_vdso; + struct wld_auxv *auxv; + + result = find_remap_area( vma_list, state, + "WINEPRELOADREMAPVDSO", REMAP_POLICY_DEFAULT_VDSO, + VMA_VDSO | VMA_VVAR, &vdso_start, &vdso_size ); + if (result <= 0) return result; + + new_vdso = wld_mmap( NULL, vdso_size, PROT_NONE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0 ); + if (new_vdso == (void *)-1) return -1; + + delta = (unsigned long)new_vdso - vdso_start; + /* It's easier to undo vvar remapping, so we remap it first. */ + if (remap_multiple_vmas( vma_list, delta, VMA_VVAR, 0 ) < 0 || + remap_multiple_vmas( vma_list, delta, VMA_VDSO, 0 ) < 0) goto remap_restore; + + /* AArch32 may have restorer in vDSO if we're running on an ARM64 kernel. */ + if (test_remap_successful( vma_list, state, vdso_start, vdso_size, delta ) < 0) + { + /* mapping restore done by test_remap_successful */ + return -1; + } + + for (auxv = state->s.auxv; auxv->a_type != AT_NULL; auxv++) + { + switch (auxv->a_type) + { + case AT_SYSINFO: + case AT_SYSINFO_EHDR: + if ((unsigned long)auxv->a_un.a_val - vdso_start < vdso_size) + auxv->a_un.a_val += delta; + break; + } + } + + /* Refresh VMA list */ + free_vma_list( vma_list ); + alloc_scan_vma( vma_list ); + return 1; + +remap_restore: + if (remap_multiple_vmas( vma_list, delta, -1, 1 ) < 0) + fatal_error( "Cannot restore remapped VMAs\n" ); + + return -1; +} + static void map_reserve_preload_ranges( const struct vma_area_list *vma_list, const struct stackarg_info *stackinfo ) { @@ -1874,6 +2326,8 @@ void* wld_start( void **stack ) alloc_scan_vma( &vma_list ); map_reserve_preload_ranges( &vma_list, &state.s );
+ if (remap_vdso( &vma_list, &state ) > 0) map_reserve_preload_ranges( &vma_list, &state.s ); + /* add an executable page at the top of the address space to defeat * broken no-exec protections that play with the code selector limit */ if (find_preload_reserved_area( (char *)0x80000000 - page_size, page_size ) >= 0) @@ -1903,7 +2357,7 @@ void* wld_start( void **stack ) #undef SET_NEW_AV
i = 0; - /* delete sysinfo values if addresses conflict */ + /* delete sysinfo values if addresses conflict and remap failed */ if (is_in_preload_range( state.s.auxv, AT_SYSINFO ) || is_in_preload_range( state.s.auxv, AT_SYSINFO_EHDR )) { delete_av[i++].a_type = AT_SYSINFO;
Today, the preloader makes no attempt to remap the sigpage when it conflicts with reserved addresses. If libc doesn't have its own signal restorer, this results in inability to return from signal handlers.
Fix this by relocating sigpage to another address whenever possible.
This behaviour is enabled only when the "WINEPRELOADREMAPSIGPAGE" environment variable is set to "on-conflict". In the future, it could become the default behaviour.
Signed-off-by: Jinoh Kang jinoh.kang.kr@gmail.com ---
Notes: v1 -> v2: new patch
loader/preloader.c | 67 ++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 62 insertions(+), 5 deletions(-)
diff --git a/loader/preloader.c b/loader/preloader.c index 7526a4fcaa4..ab89daa2972 100644 --- a/loader/preloader.c +++ b/loader/preloader.c @@ -212,9 +212,12 @@ struct linebuffer
enum vma_type_flags { - VMA_NORMAL = 0x01, - VMA_VDSO = 0x02, - VMA_VVAR = 0x04, + VMA_NORMAL = 0x01, + VMA_VDSO = 0x02, + VMA_VVAR = 0x04, +#ifdef __arm__ + VMA_SIGPAGE = 0x08, +#endif };
struct vma_area @@ -242,7 +245,10 @@ enum remap_policy REMAP_POLICY_SKIP = 2, LAST_REMAP_POLICY,
- REMAP_POLICY_DEFAULT_VDSO = REMAP_POLICY_SKIP, + REMAP_POLICY_DEFAULT_VDSO = REMAP_POLICY_SKIP, +#ifdef __arm__ + REMAP_POLICY_DEFAULT_SIGPAGE = REMAP_POLICY_SKIP, +#endif };
struct remap_test_block { @@ -1842,6 +1848,10 @@ static int parse_maps_line( struct vma_area *entry, char *line ) item.type_flags |= VMA_VDSO; else if (wld_strcmp(ptr, "[vvar]") == 0) item.type_flags |= VMA_VVAR; +#ifdef __arm__ + else if (wld_strcmp(ptr, "[sigpage]") == 0) + item.type_flags |= VMA_SIGPAGE; +#endif }
*entry = item; @@ -2246,6 +2256,47 @@ remap_restore: return -1; }
+#ifdef __arm__ +/* sigpage remapping shouldn't really be necessary, since modern libcs + * use their own signal restorer anyway. But better be safe than sorry... + */ +static int remap_sigpage( struct vma_area_list *vma_list, struct preloader_state *state ) +{ + int result; + unsigned long sigpage_start, sigpage_size, delta; + void *new_sigpage; + + result = find_remap_area( vma_list, state, + "WINEPRELOADREMAPSIGPAGE", REMAP_POLICY_DEFAULT_SIGPAGE, + VMA_SIGPAGE, &sigpage_start, &sigpage_size ); + if (result <= 0) return result; + + new_sigpage = wld_mmap( NULL, sigpage_size, PROT_NONE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0 ); + if (new_sigpage == (void *)-1) return -1; + + delta = (unsigned long)new_sigpage - sigpage_start; + if (remap_multiple_vmas( vma_list, delta, VMA_SIGPAGE, 0 ) < 0) goto remap_restore; + + if (test_remap_successful( vma_list, state, sigpage_start, sigpage_size, delta ) < 0) + { + /* mapping restore done by test_remap_successful */ + return -1; + } + + /* Refresh VMA list */ + free_vma_list( vma_list ); + alloc_scan_vma( vma_list ); + return 1; + +remap_restore: + if (remap_multiple_vmas( vma_list, delta, -1, 1 ) < 0) + fatal_error( "Cannot restore remapped VMAs\n" ); + + return -1; +} +#endif + static void map_reserve_preload_ranges( const struct vma_area_list *vma_list, const struct stackarg_info *stackinfo ) { @@ -2294,6 +2345,7 @@ void* wld_start( void **stack ) struct wine_preload_info **wine_main_preload_info; struct preloader_state state = { 0 }; struct vma_area_list vma_list = { NULL }; + int remap_done;
parse_stackargs( &state.s, *stack );
@@ -2326,7 +2378,12 @@ void* wld_start( void **stack ) alloc_scan_vma( &vma_list ); map_reserve_preload_ranges( &vma_list, &state.s );
- if (remap_vdso( &vma_list, &state ) > 0) map_reserve_preload_ranges( &vma_list, &state.s ); + remap_done = 0; + remap_done |= remap_vdso( &vma_list, &state ) > 0; +#ifdef __arm__ + remap_done |= remap_sigpage( &vma_list, &state ) > 0; +#endif + if (remap_done) map_reserve_preload_ranges( &vma_list, &state.s );
/* add an executable page at the top of the address space to defeat * broken no-exec protections that play with the code selector limit */
Today, the preloader abandons reserved address ranges that conflict with the call stack area.
Fix this by attempting to copy the stack somewhere else, and switching to it before entering the ld.so entry point. This way, the preloader does not have to give up the address reservation.
This behaviour is enabled only when the "WINEPRELOADREMAPSTACK" environment variable is set to "on-conflict". In the future, it could become the default behaviour.
Note that changes to argv and envp is *not* visible in /proc/PID/{environ,cmdline} after the stack has been switched, since kernel mm pointer fields are still pointing to the old stack.
Signed-off-by: Jinoh Kang jinoh.kang.kr@gmail.com ---
Notes: v1 -> v2: - s/offset/delta/g - shift VMA_STACK to 0x10 from 0x08 (now taken by VMA_SIGPAGE)
loader/preloader.c | 123 ++++++++++++++++++++++++++++++++++++++------- 1 file changed, 104 insertions(+), 19 deletions(-)
diff --git a/loader/preloader.c b/loader/preloader.c index ab89daa2972..69a14c27b91 100644 --- a/loader/preloader.c +++ b/loader/preloader.c @@ -218,6 +218,7 @@ enum vma_type_flags #ifdef __arm__ VMA_SIGPAGE = 0x08, #endif + VMA_STACK = 0x10, };
struct vma_area @@ -249,6 +250,7 @@ enum remap_policy #ifdef __arm__ REMAP_POLICY_DEFAULT_SIGPAGE = REMAP_POLICY_SKIP, #endif + REMAP_POLICY_DEFAULT_STACK = REMAP_POLICY_SKIP, };
struct remap_test_block { @@ -1179,6 +1181,59 @@ static void stackargs_switch_stack( struct stackarg_info *newinfo, struct stacka newinfo->auxv_end = (void *)((unsigned long)oldinfo->auxv_end + delta); }
+static size_t relocate_argvec( char **dest, char **src, size_t count ) +{ + size_t i; + unsigned long delta = (unsigned long)dest - (unsigned long)src; + + for (i = 0; i < count && src[i]; i++) + dest[i] = src[i] + delta; + + dest[i] = 0; + return i; +} + +static size_t relocate_auxvec( struct wld_auxv *dest, struct wld_auxv *src ) +{ + size_t i; + unsigned long delta = (unsigned long)dest - (unsigned long)src; + + for (i = 0; src[i].a_type != AT_NULL; i++) + { + dest[i].a_type = src[i].a_type; + switch (dest[i].a_type) + { + case AT_RANDOM: + case AT_PLATFORM: + case AT_BASE_PLATFORM: + case AT_EXECFN: + if (src[i].a_un.a_val >= (unsigned long)src) + { + dest[i].a_un.a_val = src[i].a_un.a_val + delta; + break; + } + /* fallthrough */ + default: + dest[i].a_un.a_val = src[i].a_un.a_val; + break; + } + } + + return i; +} + +static void copy_stackargs( struct stackarg_info *newinfo, struct stackarg_info *oldinfo, void *newstack, void *newstackend ) +{ + stackargs_switch_stack( newinfo, oldinfo, newstack ); + + *(int *)newstack = *(int *)oldinfo->stack; + relocate_argvec( newinfo->argv, oldinfo->argv, newinfo->envp - newinfo->argv ); + relocate_argvec( newinfo->envp, oldinfo->envp, (char **)newinfo->auxv - newinfo->envp ); + relocate_auxvec( newinfo->auxv, oldinfo->auxv ); + wld_memmove( newinfo->auxv_end, oldinfo->auxv_end, + (unsigned long)newstackend - (unsigned long)newinfo->auxv_end ); +} + /* * set_auxiliary_values * @@ -1986,7 +2041,7 @@ static int remap_multiple_vmas( struct vma_area_list *list, unsigned long delta, return 0; }
-static void scan_vma( struct vma_area_list *list, size_t *act_count ) +static void scan_vma( struct vma_area_list *list, size_t *act_count, void *stack_ptr ) { int fd; size_t n = 0; @@ -2010,6 +2065,9 @@ static void scan_vma( struct vma_area_list *list, size_t *act_count ) { if (parse_maps_line( &item, line ) >= 0) { + if (item.start <= (unsigned long)stack_ptr && + item.end > (unsigned long)stack_ptr) + item.type_flags |= VMA_STACK; if (list->list_end < list->alloc_end) insert_vma_entry( list, &item ); n++; } @@ -2030,7 +2088,7 @@ static void free_vma_list( struct vma_area_list *list ) list->alloc_end = NULL; }
-static void alloc_scan_vma( struct vma_area_list *listp ) +static void alloc_scan_vma( struct vma_area_list *listp, void *stack_ptr ) { size_t max_count = page_size / sizeof(struct vma_area); struct vma_area_list vma_list; @@ -2045,7 +2103,7 @@ static void alloc_scan_vma( struct vma_area_list *listp ) vma_list.list_end = vma_list.base; vma_list.alloc_end = vma_list.base + max_count;
- scan_vma( &vma_list, &max_count ); + scan_vma( &vma_list, &max_count, stack_ptr ); if (vma_list.list_end - vma_list.base == max_count) { wld_memmove(listp, &vma_list, sizeof(*listp)); @@ -2246,7 +2304,7 @@ static int remap_vdso( struct vma_area_list *vma_list, struct preloader_state *s
/* Refresh VMA list */ free_vma_list( vma_list ); - alloc_scan_vma( vma_list ); + alloc_scan_vma( vma_list, state->s.stack ); return 1;
remap_restore: @@ -2286,7 +2344,7 @@ static int remap_sigpage( struct vma_area_list *vma_list, struct preloader_state
/* Refresh VMA list */ free_vma_list( vma_list ); - alloc_scan_vma( vma_list ); + alloc_scan_vma( vma_list, state->s.stack ); return 1;
remap_restore: @@ -2297,22 +2355,48 @@ remap_restore: } #endif
-static void map_reserve_preload_ranges( const struct vma_area_list *vma_list, - const struct stackarg_info *stackinfo ) +static int remap_stack( struct vma_area_list *vma_list, struct preloader_state *state ) +{ + enum remap_policy policy; + unsigned long stack_start, stack_size; + struct stackarg_info newinfo; + void *new_stack, *new_stack_base; + int i; + + if (find_vma_envelope_range( vma_list, VMA_STACK, + &stack_start, &stack_size ) < 0) return 0; + + policy = stackargs_get_remap_policy( &state->s, "WINEPRELOADREMAPSTACK", REMAP_POLICY_DEFAULT_STACK ); + if (policy == REMAP_POLICY_SKIP) goto remove_from_reserve; + if (policy != REMAP_POLICY_FORCE && + find_preload_reserved_area( (void *)stack_start, stack_size ) < 0) return 0; + + new_stack_base = wld_mmap( NULL, stack_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_GROWSDOWN, -1, 0 ); + if (new_stack_base == (void *)-1) goto remove_from_reserve; + + new_stack = (void *)((unsigned long)new_stack_base + ((unsigned long)state->s.stack - stack_start)); + copy_stackargs( &newinfo, &state->s, new_stack, (void *)((unsigned long)new_stack_base + stack_size) ); + + wld_memmove( &state->s, &newinfo, sizeof(state->s) ); + + free_vma_list( vma_list ); + alloc_scan_vma( vma_list, state->s.stack ); + return 1; + +remove_from_reserve: + while ((i = find_preload_reserved_area( (void *)stack_start, stack_size )) >= 0) + remove_preload_range( i ); + return -1; +} + +static void map_reserve_preload_ranges( const struct vma_area_list *vma_list ) { size_t i; - unsigned long exclude_start = (unsigned long)stackinfo->stack - 1; - unsigned long exclude_end = (unsigned long)stackinfo->auxv + 1;
for (i = 0; preload_info[i].size; i++) { - if (exclude_end > (unsigned long)preload_info[i].addr && - exclude_start <= (unsigned long)preload_info[i].addr + preload_info[i].size - 1) - { - remove_preload_range( i ); - i--; - } - else if (map_reserve_unmapped_range( vma_list, preload_info[i].addr, preload_info[i].size ) < 0) + if (map_reserve_unmapped_range( vma_list, preload_info[i].addr, preload_info[i].size ) < 0) { /* don't warn for low 64k */ if (preload_info[i].addr >= (void *)0x10000 @@ -2375,15 +2459,16 @@ void* wld_start( void **stack ) reserve = stackargs_getenv( &state.s, "WINEPRELOADRESERVE" ); if (reserve) preload_reserve( reserve );
- alloc_scan_vma( &vma_list ); - map_reserve_preload_ranges( &vma_list, &state.s ); + alloc_scan_vma( &vma_list, state.s.stack ); + map_reserve_preload_ranges( &vma_list );
remap_done = 0; remap_done |= remap_vdso( &vma_list, &state ) > 0; #ifdef __arm__ remap_done |= remap_sigpage( &vma_list, &state ) > 0; #endif - if (remap_done) map_reserve_preload_ranges( &vma_list, &state.s ); + remap_done |= remap_stack( &vma_list, &state ) > 0; + if (remap_done) map_reserve_preload_ranges( &vma_list );
/* add an executable page at the top of the address space to defeat * broken no-exec protections that play with the code selector limit */
Signed-off-by: Jinoh Kang jinoh.kang.kr@gmail.com --- loader/preloader.c | 5 +++++ 1 file changed, 5 insertions(+)
diff --git a/loader/preloader.c b/loader/preloader.c index 69a14c27b91..775661a0735 100644 --- a/loader/preloader.c +++ b/loader/preloader.c @@ -1101,6 +1101,11 @@ static void dump_auxiliary( struct wld_auxv *av ) NAME(AT_SYSINFO), NAME(AT_SYSINFO_EHDR), NAME(AT_UID), + NAME(AT_SECURE), + NAME(AT_RANDOM), + NAME(AT_HWCAP2), + NAME(AT_EXECFN), + NAME(AT_MINSIGSTKSZ), { 0, NULL } }; #undef NAME
Signed-off-by: Jinoh Kang jinoh.kang.kr@gmail.com --- loader/preloader.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/loader/preloader.c b/loader/preloader.c index 775661a0735..4cbc60ad499 100644 --- a/loader/preloader.c +++ b/loader/preloader.c @@ -246,11 +246,11 @@ enum remap_policy REMAP_POLICY_SKIP = 2, LAST_REMAP_POLICY,
- REMAP_POLICY_DEFAULT_VDSO = REMAP_POLICY_SKIP, + REMAP_POLICY_DEFAULT_VDSO = REMAP_POLICY_ON_CONFLICT, #ifdef __arm__ - REMAP_POLICY_DEFAULT_SIGPAGE = REMAP_POLICY_SKIP, + REMAP_POLICY_DEFAULT_SIGPAGE = REMAP_POLICY_ON_CONFLICT, #endif - REMAP_POLICY_DEFAULT_STACK = REMAP_POLICY_SKIP, + REMAP_POLICY_DEFAULT_STACK = REMAP_POLICY_ON_CONFLICT, };
struct remap_test_block {