Although int = long effectively in i386, this makes it clear that a machine word-width value is being returned.
Also, future patches adding syscalls returning pointers can use the same type (long) for return value consistently.
While we're at it, slightly change SYSCALL_RET function macro to include the parameter only once.
Signed-off-by: Jinoh Kang jinoh.kang.kr@gmail.com --- loader/preloader.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-)
diff --git a/loader/preloader.c b/loader/preloader.c index 585be50624f..cb20afd4d5a 100644 --- a/loader/preloader.c +++ b/loader/preloader.c @@ -236,7 +236,7 @@ __ASM_GLOBAL_FUNC(_start,
/* wrappers for Linux system calls */
-#define SYSCALL_RET(ret) (((ret) < 0 && (ret) > -4096) ? -1 : (ret)) +#define SYSCALL_RET(ret) (((unsigned long)(ret) > -4096UL) ? -1 : (ret))
static inline __attribute__((noreturn)) void wld_exit( int code ) { @@ -247,7 +247,7 @@ static inline __attribute__((noreturn)) void wld_exit( int code )
static inline int wld_open( const char *name, int flags ) { - int ret; + long ret; __asm__ __volatile__( "pushl %%ebx; movl %2,%%ebx; int $0x80; popl %%ebx" : "=a" (ret) : "0" (5 /* SYS_open */), "r" (name), "c" (flags) ); return SYSCALL_RET(ret); @@ -255,7 +255,7 @@ static inline int wld_open( const char *name, int flags )
static inline int wld_close( int fd ) { - int ret; + long ret; __asm__ __volatile__( "pushl %%ebx; movl %2,%%ebx; int $0x80; popl %%ebx" : "=a" (ret) : "0" (6 /* SYS_close */), "r" (fd) ); return SYSCALL_RET(ret); @@ -263,7 +263,7 @@ static inline int wld_close( int fd )
static inline ssize_t wld_read( int fd, void *buffer, size_t len ) { - int ret; + long ret; __asm__ __volatile__( "pushl %%ebx; movl %2,%%ebx; int $0x80; popl %%ebx" : "=a" (ret) : "0" (3 /* SYS_read */), "r" (fd), "c" (buffer), "d" (len) @@ -273,7 +273,7 @@ static inline ssize_t wld_read( int fd, void *buffer, size_t len )
static inline ssize_t wld_write( int fd, const void *buffer, size_t len ) { - int ret; + long ret; __asm__ __volatile__( "pushl %%ebx; movl %2,%%ebx; int $0x80; popl %%ebx" : "=a" (ret) : "0" (4 /* SYS_write */), "r" (fd), "c" (buffer), "d" (len) ); return SYSCALL_RET(ret); @@ -281,7 +281,7 @@ static inline ssize_t wld_write( int fd, const void *buffer, size_t len )
static inline int wld_mprotect( const void *addr, size_t len, int prot ) { - int ret; + long ret; __asm__ __volatile__( "pushl %%ebx; movl %2,%%ebx; int $0x80; popl %%ebx" : "=a" (ret) : "0" (125 /* SYS_mprotect */), "r" (addr), "c" (len), "d" (prot) ); return SYSCALL_RET(ret); @@ -328,7 +328,7 @@ __ASM_GLOBAL_FUNC(wld_mmap,
static inline int wld_prctl( int code, long arg ) { - int ret; + long ret; __asm__ __volatile__( "pushl %%ebx; movl %2,%%ebx; int $0x80; popl %%ebx" : "=a" (ret) : "0" (172 /* SYS_prctl */), "r" (code), "c" (arg) ); return SYSCALL_RET(ret);
Arbitrarily manipulating the stack pointer in inline assembly interferes with stack unwinding and debugging experience. Note that it's impossible to reliably specify unwinding information in inline assembly.
The workaround appears to be due to "buggy" register allocation that manifests in GCC <= 4.x when emitting position-independent code. This is not an issue, since the preloader isn't a position-independent executable or a shared library.
Fix this by getting rid of extra spilling and value transfer of the EBX register, and telling the compiler to allocate EBX directly.
Signed-off-by: Jinoh Kang jinoh.kang.kr@gmail.com --- loader/preloader.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-)
diff --git a/loader/preloader.c b/loader/preloader.c index cb20afd4d5a..540394eb932 100644 --- a/loader/preloader.c +++ b/loader/preloader.c @@ -241,32 +241,32 @@ __ASM_GLOBAL_FUNC(_start, static inline __attribute__((noreturn)) void wld_exit( int code ) { for (;;) /* avoid warning */ - __asm__ __volatile__( "pushl %%ebx; movl %1,%%ebx; int $0x80; popl %%ebx" - : : "a" (1 /* SYS_exit */), "r" (code) ); + __asm__ __volatile__( "int $0x80" + : : "a" (1 /* SYS_exit */), "b" (code) ); }
static inline int wld_open( const char *name, int flags ) { long ret; - __asm__ __volatile__( "pushl %%ebx; movl %2,%%ebx; int $0x80; popl %%ebx" - : "=a" (ret) : "0" (5 /* SYS_open */), "r" (name), "c" (flags) ); + __asm__ __volatile__( "int $0x80" + : "=a" (ret) : "0" (5 /* SYS_open */), "b" (name), "c" (flags) ); return SYSCALL_RET(ret); }
static inline int wld_close( int fd ) { long ret; - __asm__ __volatile__( "pushl %%ebx; movl %2,%%ebx; int $0x80; popl %%ebx" - : "=a" (ret) : "0" (6 /* SYS_close */), "r" (fd) ); + __asm__ __volatile__( "int $0x80" + : "=a" (ret) : "0" (6 /* SYS_close */), "b" (fd) ); return SYSCALL_RET(ret); }
static inline ssize_t wld_read( int fd, void *buffer, size_t len ) { long ret; - __asm__ __volatile__( "pushl %%ebx; movl %2,%%ebx; int $0x80; popl %%ebx" + __asm__ __volatile__( "int $0x80" : "=a" (ret) - : "0" (3 /* SYS_read */), "r" (fd), "c" (buffer), "d" (len) + : "0" (3 /* SYS_read */), "b" (fd), "c" (buffer), "d" (len) : "memory" ); return SYSCALL_RET(ret); } @@ -274,16 +274,16 @@ static inline ssize_t wld_read( int fd, void *buffer, size_t len ) static inline ssize_t wld_write( int fd, const void *buffer, size_t len ) { long ret; - __asm__ __volatile__( "pushl %%ebx; movl %2,%%ebx; int $0x80; popl %%ebx" - : "=a" (ret) : "0" (4 /* SYS_write */), "r" (fd), "c" (buffer), "d" (len) ); + __asm__ __volatile__( "int $0x80" + : "=a" (ret) : "0" (4 /* SYS_write */), "b" (fd), "c" (buffer), "d" (len) ); return SYSCALL_RET(ret); }
static inline int wld_mprotect( const void *addr, size_t len, int prot ) { long ret; - __asm__ __volatile__( "pushl %%ebx; movl %2,%%ebx; int $0x80; popl %%ebx" - : "=a" (ret) : "0" (125 /* SYS_mprotect */), "r" (addr), "c" (len), "d" (prot) ); + __asm__ __volatile__( "int $0x80" + : "=a" (ret) : "0" (125 /* SYS_mprotect */), "b" (addr), "c" (len), "d" (prot) ); return SYSCALL_RET(ret); }
@@ -329,8 +329,8 @@ __ASM_GLOBAL_FUNC(wld_mmap, static inline int wld_prctl( int code, long arg ) { long ret; - __asm__ __volatile__( "pushl %%ebx; movl %2,%%ebx; int $0x80; popl %%ebx" - : "=a" (ret) : "0" (172 /* SYS_prctl */), "r" (code), "c" (arg) ); + __asm__ __volatile__( "int $0x80" + : "=a" (ret) : "0" (172 /* SYS_prctl */), "b" (code), "c" (arg) ); return SYSCALL_RET(ret); }
Signed-off-by: Jinoh Kang jinoh.kang.kr@gmail.com --- loader/preloader.c | 242 +++++++++++++++++++++++++++++++-------------- 1 file changed, 169 insertions(+), 73 deletions(-)
diff --git a/loader/preloader.c b/loader/preloader.c index 540394eb932..0397a534f82 100644 --- a/loader/preloader.c +++ b/loader/preloader.c @@ -164,6 +164,21 @@ struct wld_auxv } a_un; };
+struct stackarg_info +{ + void *stack; + int argc; + char **argv; + char **envp; + struct wld_auxv *auxv; + struct wld_auxv *auxv_end; +}; + +struct preloader_state +{ + struct stackarg_info s; +}; + /* * The __bb_init_func is an empty function only called when file is * compiled with gcc flags "-fprofile-arcs -ftest-coverage". This @@ -674,6 +689,32 @@ static inline void *wld_memset( void *dest, int val, size_t len ) return dest; }
+static size_t wld_strlen( const char *str ) +{ + const char *ptr = str; + while (*ptr) ptr++; + return ptr - str; +} + +static inline void *wld_memmove( void *dest, const void *src, size_t len ) +{ + unsigned char *destp = dest; + const unsigned char *srcp = src; + + if ((unsigned long)dest - (unsigned long)src < len) + { + destp += len; + srcp += len; + while (len--) *--destp = *--srcp; + } + else + { + while (len--) *destp++ = *srcp++; + } + + return dest; +} + /* * wld_printf - just the basics * @@ -794,72 +835,138 @@ static void dump_auxiliary( struct wld_auxv *av ) } #endif
+static void parse_stackargs( struct stackarg_info *outinfo, void *stack ) +{ + int argc; + char **argv, **envp, **env_end; + struct wld_auxv *auxv, *auxv_end; + + argc = *(int *)stack; + argv = (char **)stack + 1; + envp = argv + (unsigned int)argc + 1; + + env_end = envp; + while (*env_end++) + ; + auxv = (struct wld_auxv *)env_end; + + auxv_end = auxv; + while ((auxv_end++)->a_type != AT_NULL) + ; + + outinfo->stack = stack; + outinfo->argc = argc; + outinfo->argv = argv; + outinfo->envp = envp; + outinfo->auxv = auxv; + outinfo->auxv_end = auxv_end; +} + +static char *stackargs_getenv( const struct stackarg_info *info, const char *name ) +{ + char **envp = info->envp; + size_t namelen = wld_strlen( name ); + + while (*envp) + { + if (wld_strncmp(*envp, name, namelen) == 0 && + (*envp)[namelen] == '=') return *envp + namelen + 1; + envp++; + } + return NULL; +} + +static void stackargs_eat_args( struct stackarg_info *info, int num_args ) +{ + info->stack = (void *)((unsigned long)info->stack + sizeof(char *) * num_args); + info->argc -= num_args; + info->argv = (char **)info->stack + 1; + *(int *)info->stack = info->argc; +} + +static void shift_stackargs( struct stackarg_info *newinfo, struct stackarg_info *oldinfo, void *newstack ) +{ + unsigned long offset = (unsigned long)newstack - (unsigned long)oldinfo->stack; + + /* NOTE it is legal that newinfo == oldinfo */ + newinfo->stack = newstack; + newinfo->argc = oldinfo->argc; + newinfo->argv = (void *)((unsigned long)oldinfo->argv + offset); + newinfo->envp = (void *)((unsigned long)oldinfo->envp + offset); + newinfo->auxv = (void *)((unsigned long)oldinfo->auxv + offset); + newinfo->auxv_end = (void *)((unsigned long)oldinfo->auxv_end + offset); +} + /* * set_auxiliary_values * * Set the new auxiliary values */ -static void set_auxiliary_values( struct wld_auxv *av, const struct wld_auxv *new_av, - const struct wld_auxv *delete_av, void **stack ) +static void set_auxiliary_values( struct preloader_state *state, + const struct wld_auxv *new_av, + const struct wld_auxv *delete_av ) { - int i, j, av_count = 0, new_count = 0, delete_count = 0; - char *src, *dst; - - /* count how many aux values we have already */ - while (av[av_count].a_type != AT_NULL) av_count++; + size_t i, new_count = 0, delete_count = 0; + unsigned long dst; + struct wld_auxv *avpd, *avps, *avp; + int is_deleted;
/* delete unwanted values */ - for (j = 0; delete_av[j].a_type != AT_NULL; j++) + for (avps = avpd = state->s.auxv; avps + 1 != state->s.auxv_end; avps++) { - for (i = 0; i < av_count; i++) if (av[i].a_type == delete_av[j].a_type) + is_deleted = 0; + for (i = 0; delete_av[i].a_type != AT_NULL; i++) + { + if (avps->a_type == new_av[i].a_type) + { + is_deleted = 1; + break; + } + } + if (is_deleted) { - av[i].a_type = av[av_count-1].a_type; - av[i].a_un.a_val = av[av_count-1].a_un.a_val; - av[--av_count].a_type = AT_NULL; delete_count++; - break; + continue; } + if (avpd != avps) + { + avpd->a_type = avps->a_type; + avpd->a_un.a_val = avps->a_un.a_val; + } + avpd++; } + avpd->a_type = AT_NULL; + avpd->a_un.a_val = 0; + state->s.auxv_end = avpd + 1;
/* count how many values we have in new_av that aren't in av */ - for (j = 0; new_av[j].a_type != AT_NULL; j++) + for (i = 0; new_av[i].a_type != AT_NULL; i++) { - for (i = 0; i < av_count; i++) if (av[i].a_type == new_av[j].a_type) break; - if (i == av_count) new_count++; + for (avp = state->s.auxv; avp + 1 != state->s.auxv_end; avp++) if (avp->a_type == new_av[i].a_type) break; + if (avp + 1 == state->s.auxv_end) new_count++; }
- src = (char *)*stack; - dst = src - (new_count - delete_count) * sizeof(*av); - dst = (char *)((unsigned long)dst & ~15); - if (dst < src) /* need to make room for the extra values */ - { - int len = (char *)(av + av_count + 1) - src; - for (i = 0; i < len; i++) dst[i] = src[i]; - } - else if (dst > src) /* get rid of unused values */ - { - int len = (char *)(av + av_count + 1) - src; - for (i = len - 1; i >= 0; i--) dst[i] = src[i]; - } - *stack = dst; - av = (struct wld_auxv *)((char *)av + (dst - src)); + dst = ((unsigned long)state->s.stack - + (new_count - delete_count) * sizeof(struct wld_auxv)) & ~15; + wld_memmove( (void *)dst, state->s.stack, + (unsigned long)state->s.auxv_end - + (unsigned long)state->s.stack ); + shift_stackargs( &state->s, &state->s, (void *)dst );
/* now set the values */ - for (j = 0; new_av[j].a_type != AT_NULL; j++) + for (i = 0; new_av[i].a_type != AT_NULL; i++) { - for (i = 0; i < av_count; i++) if (av[i].a_type == new_av[j].a_type) break; - if (i < av_count) av[i].a_un.a_val = new_av[j].a_un.a_val; - else - { - av[av_count].a_type = new_av[j].a_type; - av[av_count].a_un.a_val = new_av[j].a_un.a_val; - av_count++; - } + for (avp = state->s.auxv; avp + 1 != state->s.auxv_end; avp++) if (avp->a_type == new_av[i].a_type) break; + avp->a_type = new_av[i].a_type; + avp->a_un.a_val = new_av[i].a_un.a_val; + if (avp + 1 == state->s.auxv_end) state->s.auxv_end++; } + state->s.auxv_end[-1].a_type = AT_NULL; + state->s.auxv_end[-1].a_un.a_val = 0;
#ifdef DUMP_AUX_INFO wld_printf("New auxiliary info:\n"); - dump_auxiliary( av ); + dump_auxiliary( state->s.auxv ); #endif }
@@ -1369,47 +1476,36 @@ static void set_process_name( int argc, char *argv[] ) */ void* wld_start( void **stack ) { - long i, *pargc; - char **argv, **p; - char *interp, *reserve = NULL; - struct wld_auxv new_av[8], delete_av[3], *av; + long i; + char *interp, *reserve; + struct wld_auxv new_av[8], delete_av[3]; struct wld_link_map main_binary_map, ld_so_map; struct wine_preload_info **wine_main_preload_info; + struct preloader_state state = { 0 };
- pargc = *stack; - argv = (char **)pargc + 1; - if (*pargc < 2) fatal_error( "Usage: %s wine_binary [args]\n", argv[0] ); + parse_stackargs( &state.s, *stack );
- /* skip over the parameters */ - p = argv + *pargc + 1; + if (state.s.argc < 2) fatal_error( "Usage: %s wine_binary [args]\n", state.s.argv[0] );
- /* skip over the environment */ - while (*p) - { - static const char res[] = "WINEPRELOADRESERVE="; - if (!wld_strncmp( *p, res, sizeof(res)-1 )) reserve = *p + sizeof(res) - 1; - p++; - } - - av = (struct wld_auxv *)(p+1); - page_size = get_auxiliary( av, AT_PAGESZ, 4096 ); + page_size = get_auxiliary( state.s.auxv, AT_PAGESZ, 4096 ); page_mask = page_size - 1;
preloader_start = (char *)_start - ((unsigned long)_start & page_mask); preloader_end = (char *)((unsigned long)(_end + page_mask) & ~page_mask);
#ifdef DUMP_AUX_INFO - wld_printf( "stack = %p\n", *stack ); - for( i = 0; i < *pargc; i++ ) wld_printf("argv[%lx] = %s\n", i, argv[i]); - dump_auxiliary( av ); + wld_printf( "stack = %p\n", state.s.stack ); + for( i = 0; i < state.s.argc; i++ ) wld_printf("argv[%lx] = %s\n", i, state.s.argv[i]); + dump_auxiliary( state.s.auxv ); #endif
/* reserve memory that Wine needs */ + reserve = stackargs_getenv( &state.s, "WINEPRELOADRESERVE" ); if (reserve) preload_reserve( reserve ); for (i = 0; preload_info[i].size; i++) { - if ((char *)av >= (char *)preload_info[i].addr && - (char *)pargc <= (char *)preload_info[i].addr + preload_info[i].size) + if ((char *)state.s.auxv >= (char *)preload_info[i].addr && + (char *)state.s.stack <= (char *)preload_info[i].addr + preload_info[i].size) { remove_preload_range( i ); i--; @@ -1436,7 +1532,7 @@ void* wld_start( void **stack ) wld_mprotect( (char *)0x80000000 - page_size, page_size, PROT_EXEC | PROT_READ );
/* load the main binary */ - map_so_lib( argv[1], &main_binary_map ); + map_so_lib( state.s.argv[1], &main_binary_map );
/* load the ELF interpreter */ interp = (char *)main_binary_map.l_addr + main_binary_map.l_interp; @@ -1453,14 +1549,14 @@ void* wld_start( void **stack ) SET_NEW_AV( 2, AT_PHNUM, main_binary_map.l_phnum ); SET_NEW_AV( 3, AT_PAGESZ, page_size ); SET_NEW_AV( 4, AT_BASE, ld_so_map.l_addr ); - SET_NEW_AV( 5, AT_FLAGS, get_auxiliary( av, AT_FLAGS, 0 ) ); + SET_NEW_AV( 5, AT_FLAGS, get_auxiliary( state.s.auxv, AT_FLAGS, 0 ) ); SET_NEW_AV( 6, AT_ENTRY, main_binary_map.l_entry ); SET_NEW_AV( 7, AT_NULL, 0 ); #undef SET_NEW_AV
i = 0; /* delete sysinfo values if addresses conflict */ - if (is_in_preload_range( av, AT_SYSINFO ) || is_in_preload_range( av, AT_SYSINFO_EHDR )) + if (is_in_preload_range( state.s.auxv, AT_SYSINFO ) || is_in_preload_range( state.s.auxv, AT_SYSINFO_EHDR )) { delete_av[i++].a_type = AT_SYSINFO; delete_av[i++].a_type = AT_SYSINFO_EHDR; @@ -1468,14 +1564,13 @@ void* wld_start( void **stack ) delete_av[i].a_type = AT_NULL;
/* get rid of first argument */ - set_process_name( *pargc, argv ); - pargc[1] = pargc[0] - 1; - *stack = pargc + 1; + set_process_name( state.s.argc, state.s.argv ); + stackargs_eat_args( &state.s, 1 );
- set_auxiliary_values( av, new_av, delete_av, stack ); + set_auxiliary_values( &state, new_av, delete_av );
#ifdef DUMP_AUX_INFO - wld_printf("new stack = %p\n", *stack); + wld_printf("new stack = %p\n", state.s.stack); wld_printf("jumping to %p\n", (void *)ld_so_map.l_entry); #endif #ifdef DUMP_MAPS @@ -1490,6 +1585,7 @@ void* wld_start( void **stack ) } #endif
+ *stack = state.s.stack; return (void *)ld_so_map.l_entry; }
Signed-off-by: Jinoh Kang jinoh.kang.kr@gmail.com --- loader/preloader.c | 56 ++++++++++++++++++++++++++++++++-------------- 1 file changed, 39 insertions(+), 17 deletions(-)
diff --git a/loader/preloader.c b/loader/preloader.c index 0397a534f82..bf249b35f4d 100644 --- a/loader/preloader.c +++ b/loader/preloader.c @@ -68,6 +68,7 @@ #include <stdio.h> #include <stdlib.h> #include <string.h> +#include <limits.h> #include <sys/types.h> #include <sys/stat.h> #include <fcntl.h> @@ -715,6 +716,34 @@ static inline void *wld_memmove( void *dest, const void *src, size_t len ) return dest; }
+static inline unsigned long parse_ul( const char *nptr, char **endptr, unsigned int radix, int *overflow ) +{ + const char *p = nptr; + unsigned long value, thresh; + int ovfl = 0; + + value = 0; + thresh = ULONG_MAX / radix; + for (;;) + { + unsigned int digit; + if (*p >= '0' && *p <= '9') digit = *p - '0'; + else if (*p >= 'a' && *p <= 'z') digit = *p - 'a' + 10; + else if (*p >= 'A' && *p <= 'Z') digit = *p - 'A' + 10; + else break; + if (digit >= radix) break; + if (value > thresh) ovfl = 1; + value *= radix; + if (value > value + digit) ovfl = 1; + value += digit; + p++; + } + + if (endptr) *endptr = (char *)p; + if (overflow) *overflow = ovfl; + return value; +} + /* * wld_printf - just the basics * @@ -1350,29 +1379,22 @@ found: * * Reserve a range specified in string format */ -static void preload_reserve( const char *str ) +static void preload_reserve( char *str ) { - const char *p; + char *p = str; unsigned long result = 0; void *start = NULL, *end = NULL; - int i, first = 1; + int i;
- for (p = str; *p; p++) + result = parse_ul( p, &p, 16, NULL ); + if (*p == '-') { - if (*p >= '0' && *p <= '9') result = result * 16 + *p - '0'; - else if (*p >= 'a' && *p <= 'f') result = result * 16 + *p - 'a' + 10; - else if (*p >= 'A' && *p <= 'F') result = result * 16 + *p - 'A' + 10; - else if (*p == '-') - { - if (!first) goto error; - start = (void *)(result & ~page_mask); - result = 0; - first = 0; - } - else goto error; + start = (void *)(result & ~page_mask); + result = parse_ul( p + 1, &p, 16, NULL ); + if (*p) goto error; + end = (void *)((result + page_mask) & ~page_mask); } - if (!first) end = (void *)((result + page_mask) & ~page_mask); - else if (result) goto error; /* single value '0' is allowed */ + else if (*p || result) goto error; /* single value '0' is allowed */
/* sanity checks */ if (end <= start) start = end = NULL;
Rename is_addr_reserved to find_preload_reserved_area, with the following changes:
- Accept second argument "size" which specifies the size of the address range to test. - Return the index of the matching entry, or -1 if none found.
Signed-off-by: Jinoh Kang jinoh.kang.kr@gmail.com --- loader/preloader.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-)
diff --git a/loader/preloader.c b/loader/preloader.c index bf249b35f4d..713bb305e72 100644 --- a/loader/preloader.c +++ b/loader/preloader.c @@ -1429,17 +1429,19 @@ error: }
/* check if address is in one of the reserved ranges */ -static int is_addr_reserved( const void *addr ) +static int find_preload_reserved_area( const void *addr, size_t size ) { + unsigned long start = (unsigned long)addr; + unsigned long end = (unsigned long)addr + size; int i;
for (i = 0; preload_info[i].size; i++) { - if ((const char *)addr >= (const char *)preload_info[i].addr && - (const char *)addr < (const char *)preload_info[i].addr + preload_info[i].size) - return 1; + if (end > (unsigned long)preload_info[i].addr && + start < (unsigned long)preload_info[i].addr + preload_info[i].size) + return i; } - return 0; + return -1; }
/* remove a range from the preload list */ @@ -1462,7 +1464,7 @@ static int is_in_preload_range( const struct wld_auxv *av, int type ) { while (av->a_type != AT_NULL) { - if (av->a_type == type) return is_addr_reserved( (const void *)av->a_un.a_val ); + if (av->a_type == type) return find_preload_reserved_area( (const void *)av->a_un.a_val, 1 ) >= 0; av++; } return 0; @@ -1550,7 +1552,7 @@ void* wld_start( void **stack )
/* add an executable page at the top of the address space to defeat * broken no-exec protections that play with the code selector limit */ - if (is_addr_reserved( (char *)0x80000000 - page_size )) + if (find_preload_reserved_area( (char *)0x80000000 - page_size, page_size ) >= 0) wld_mprotect( (char *)0x80000000 - page_size, page_size, PROT_EXEC | PROT_READ );
/* load the main binary */
Today, the preloader reserves some predefined address ranges without checking if there are any overlapping virtual memory mappings.
One side effect of this behaviour is that the preloader's ELF EHDR gets unmapped. Note the following overlapping address ranges:
- 0x00110000 - 0x68000000: low memory area (preload_info) - 0x08040000 - 0x08041000: preloader ELF EHDR (x86) - 0x00400000 - 0x00401000: preloader ELF EHDR (AMD64)
In practice, unmapping the preloader ELF EHDR is harmless; this is because the dynamic linker does not recognise the preloader binary.
Make the unmapping behaviour explicit by calling munmap() on the preloader's ELF EHDR.
Signed-off-by: Jinoh Kang jinoh.kang.kr@gmail.com --- loader/preloader.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+)
diff --git a/loader/preloader.c b/loader/preloader.c index 713bb305e72..967c8e18c7f 100644 --- a/loader/preloader.c +++ b/loader/preloader.c @@ -223,6 +223,7 @@ struct * then jumps to the address wld_start returns. */ void _start(void); +extern char __executable_start[]; extern char _end[]; __ASM_GLOBAL_FUNC(_start, __ASM_CFI("\t.cfi_undefined %eip\n") @@ -342,6 +343,14 @@ __ASM_GLOBAL_FUNC(wld_mmap, __ASM_CFI(".cfi_adjust_cfa_offset -4\n\t") "\tret\n" )
+static inline int wld_munmap( void *addr, size_t len ) +{ + long ret; + __asm__ __volatile__( "int $0x80" + : "=a" (ret) : "0" (91 /* SYS_munmap */), "b" (addr), "c" (len) ); + return SYSCALL_RET(ret); +} + static inline int wld_prctl( int code, long arg ) { long ret; @@ -361,6 +370,7 @@ void *thread_data[256]; * then jumps to the address wld_start returns. */ void _start(void); +extern char __executable_start[]; extern char _end[]; __ASM_GLOBAL_FUNC(_start, __ASM_CFI(".cfi_undefined %rip\n\t") @@ -424,6 +434,9 @@ SYSCALL_FUNC( wld_mmap, 9 /* SYS_mmap */ ); int wld_mprotect( const void *addr, size_t len, int prot ); SYSCALL_FUNC( wld_mprotect, 10 /* SYS_mprotect */ );
+int wld_munmap( void *addr, size_t len ); +SYSCALL_FUNC( wld_munmap, 11 /* SYS_munmap */ ); + int wld_prctl( int code, long arg ); SYSCALL_FUNC( wld_prctl, 157 /* SYS_prctl */ );
@@ -450,6 +463,7 @@ void *thread_data[256]; * then jumps to the address wld_start returns. */ void _start(void); +extern char __executable_start[]; extern char _end[]; __ASM_GLOBAL_FUNC(_start, "mov x0, SP\n\t" @@ -530,6 +544,9 @@ SYSCALL_FUNC( wld_mmap, 222 /* SYS_mmap */ ); int wld_mprotect( const void *addr, size_t len, int prot ); SYSCALL_FUNC( wld_mprotect, 226 /* SYS_mprotect */ );
+int wld_munmap( void *addr, size_t len ); +SYSCALL_FUNC( wld_munmap, 215 /* SYS_munmap */ ); + int wld_prctl( int code, long arg ); SYSCALL_FUNC( wld_prctl, 167 /* SYS_prctl */ );
@@ -556,6 +573,7 @@ void *thread_data[256]; * then jumps to the address wld_start returns. */ void _start(void); +extern char __executable_start[]; extern char _end[]; __ASM_GLOBAL_FUNC(_start, "mov r0, sp\n\t" @@ -628,6 +646,9 @@ void *wld_mmap( void *start, size_t len, int prot, int flags, int fd, off_t offs int wld_mprotect( const void *addr, size_t len, int prot ); SYSCALL_FUNC( wld_mprotect, 125 /* SYS_mprotect */ );
+int wld_munmap( void *addr, size_t len ); +SYSCALL_FUNC( wld_munmap, 91 /* SYS_munmap */ ); + int wld_prctl( int code, long arg ); SYSCALL_FUNC( wld_prctl, 172 /* SYS_prctl */ );
@@ -1517,6 +1538,13 @@ void* wld_start( void **stack ) preloader_start = (char *)_start - ((unsigned long)_start & page_mask); preloader_end = (char *)((unsigned long)(_end + page_mask) & ~page_mask);
+ if ((unsigned long)_start < (unsigned long)__executable_start) + { + /* Unmap preloader's ELF EHDR */ + wld_munmap( __executable_start, + (unsigned long)_start - (unsigned long)__executable_start); + } + #ifdef DUMP_AUX_INFO wld_printf( "stack = %p\n", state.s.stack ); for( i = 0; i < state.s.argc; i++ ) wld_printf("argv[%lx] = %s\n", i, state.s.argv[i]);
Today, the preloader makes no attempt to avoid unmapping existing memory mappings except the initial stack. This results in irrevocably unmapping some useful preallocated memory areas, such as vDSO.
Fix this by reading /proc/self/maps for existing VMAs, and splitting mmap() calls to avoid erasing existing memory mappings.
Note that MAP_FIXED_NOREPLACE is not suitable for this kind of job: it fails entirely if there exist *any* overlapping memory mappings.
Signed-off-by: Jinoh Kang jinoh.kang.kr@gmail.com --- loader/preloader.c | 328 +++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 305 insertions(+), 23 deletions(-)
diff --git a/loader/preloader.c b/loader/preloader.c index 967c8e18c7f..8e6b5768e10 100644 --- a/loader/preloader.c +++ b/loader/preloader.c @@ -180,6 +180,31 @@ struct preloader_state struct stackarg_info s; };
+struct linebuffer +{ + char *base; + char *limit; + char *head; + char *tail; + int overflow; +}; + +struct vma_area +{ + unsigned long start; + unsigned long end; +}; + +struct vma_area_list +{ + struct vma_area *base; + struct vma_area *list_end; + struct vma_area *alloc_end; +}; + +#define FOREACH_VMA(list, item) \ + for ((item) = (list)->base; (item) != (list)->list_end; (item)++) + /* * The __bb_init_func is an empty function only called when file is * compiled with gcc flags "-fprofile-arcs -ftest-coverage". This @@ -737,6 +762,17 @@ static inline void *wld_memmove( void *dest, const void *src, size_t len ) return dest; }
+static inline void *wld_memchr( const void *mem, int val, size_t len ) +{ + const unsigned char *ptr = mem, *end = (const unsigned char *)ptr + len; + + for (ptr = mem; ptr != end; ptr++) + if (*ptr == (unsigned char)val) + return (void *)ptr; + + return NULL; +} + static inline unsigned long parse_ul( const char *nptr, char **endptr, unsigned int radix, int *overflow ) { const char *p = nptr; @@ -1511,6 +1547,269 @@ static void set_process_name( int argc, char *argv[] ) for (i = 1; i < argc; i++) argv[i] -= off; }
+static void linebuffer_init( struct linebuffer *lbuf, char *base, size_t len ) +{ + lbuf->base = base; + lbuf->limit = base + (len - 1); + lbuf->head = base; + lbuf->tail = base; + lbuf->overflow = 0; +} + +static char *linebuffer_getline( struct linebuffer *lbuf, char delim ) +{ + char *lnp, *line; + + while ((lnp = wld_memchr( lbuf->tail, delim, lbuf->head - lbuf->tail ))) + { + line = lbuf->tail; + lbuf->tail = lnp + 1; + if (!lbuf->overflow) + { + *lnp = '\0'; + return line; + } + lbuf->overflow = 0; + } + + if (lbuf->base == lbuf->tail) + { + if (lbuf->head == lbuf->limit) + { + line = lbuf->tail; + lbuf->tail = lbuf->head; + lbuf->overflow = 1; + *lbuf->head = '\0'; + return line; + } + } + else wld_memmove( lbuf->base, lbuf->tail, lbuf->head - lbuf->head); + lbuf->head -= lbuf->tail - lbuf->base; + lbuf->tail = lbuf->base; + + return NULL; +} + +static int parse_maps_line( struct vma_area *entry, char *line ) +{ + struct vma_area item = { 0 }; + char *ptr = line; + int overflow; + + item.start = parse_ul( ptr, &ptr, 16, &overflow ); + if (overflow) return -1; + if (*ptr != '-') fatal_error( "parse error in /proc/self/maps\n" ); + ptr++; + + item.end = parse_ul( ptr, &ptr, 16, &overflow ); + if (overflow) item.end = ULONG_MAX; + if (*ptr != ' ') fatal_error( "parse error in /proc/self/maps\n" ); + ptr++; + + if (item.start >= item.end) return -1; + + if (*ptr != 'r' && *ptr != '-') fatal_error( "parse error in /proc/self/maps\n" ); + ptr++; + if (*ptr != 'w' && *ptr != '-') fatal_error( "parse error in /proc/self/maps\n" ); + ptr++; + if (*ptr != 'x' && *ptr != '-') fatal_error( "parse error in /proc/self/maps\n" ); + ptr++; + if (*ptr != 's' && *ptr != 'p') fatal_error( "parse error in /proc/self/maps\n" ); + ptr++; + if (*ptr != ' ') fatal_error( "parse error in /proc/self/maps\n" ); + ptr++; + + parse_ul( ptr, &ptr, 16, NULL ); + if (*ptr != ' ') fatal_error( "parse error in /proc/self/maps\n" ); + ptr++; + + parse_ul( ptr, &ptr, 16, NULL ); + if (*ptr != ':') fatal_error( "parse error in /proc/self/maps\n" ); + ptr++; + + parse_ul( ptr, &ptr, 16, NULL ); + if (*ptr != ' ') fatal_error( "parse error in /proc/self/maps\n" ); + ptr++; + + parse_ul( ptr, &ptr, 16, NULL ); + if (*ptr != ' ') fatal_error( "parse error in /proc/self/maps\n" ); + ptr++; + + *entry = item; + return 0; +} + +static struct vma_area *lookup_vma_entry( const struct vma_area_list *list, unsigned long address ) +{ + const struct vma_area *left = list->base, *right = list->list_end, *mid; + while (left < right) + { + mid = left + (right - left) / 2; + if (mid->end < address) left = mid + 1; + else right = mid; + } + return (struct vma_area *)left; +} + +static int map_reserve_range( void *addr, size_t size ) +{ + if (addr == (void *)-1 || + wld_mmap( addr, size, PROT_NONE, + MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0) != addr) + return -1; + return 0; +} + +static int map_reserve_unmapped_range( const struct vma_area_list *list, void *addr, size_t size ) +{ + unsigned long range_start = (unsigned long)addr, + range_end = (unsigned long)addr + size; + const struct vma_area *start, *item; + unsigned long last_addr = range_start; + + start = lookup_vma_entry( list, range_start ); + for (item = start; item != list->list_end && item->start < range_end; item++) + { + if (item->start > last_addr && + map_reserve_range( (void *)last_addr, item->start - last_addr ) < 0) + goto fail; + last_addr = item->end; + } + + if (range_end > last_addr && + map_reserve_range( (void *)last_addr, range_end - last_addr ) < 0) + goto fail; + return 0; + +fail: + while (item != start) + { + item--; + last_addr = item == start ? range_start : item[-1].end; + if (item->start > last_addr) + wld_munmap( (void *)last_addr, item->start - last_addr ); + } + return -1; +} + +static void insert_vma_entry( struct vma_area_list *list, const struct vma_area *item ) +{ + struct vma_area *left = list->base, *right, *mid; + + if (list->base < list->list_end) + { + right = list->list_end; + mid = right - 1; /* start search from end */ + do + { + if (mid->start < item->start) left = mid + 1; + else right = mid; + mid = left + (right - left) / 2; + } while (mid < right); + } + wld_memmove(left + 1, left, list->list_end - left); + wld_memmove(left, item, sizeof(*item)); + list->list_end++; + return; +} + +static void scan_vma( struct vma_area_list *list, size_t *act_count ) +{ + int fd; + size_t n = 0; + ssize_t nread; + struct linebuffer lbuf; + char buffer[80 + PATH_MAX], *line; + struct vma_area item; + + fd = wld_open( "/proc/self/maps", O_RDONLY ); + if (fd == -1) fatal_error( "could not open /proc/self/maps\n" ); + + linebuffer_init(&lbuf, buffer, sizeof(buffer)); + for (;;) + { + nread = wld_read( fd, lbuf.head, lbuf.limit - lbuf.head ); + if (nread < 0) fatal_error( "could not read /proc/self/maps\n" ); + if (nread == 0) break; + lbuf.head += nread; + + while ((line = linebuffer_getline( &lbuf, '\n' ))) + { + if (parse_maps_line( &item, line ) >= 0) + { + if (list->list_end < list->alloc_end) insert_vma_entry( list, &item ); + n++; + } + } + } + + wld_close(fd); + *act_count = n; +} + +static void free_vma_list( struct vma_area_list *list ) +{ + if (list->base) + wld_munmap( list->base, + (unsigned char *)list->alloc_end - (unsigned char *)list->base ); + list->base = NULL; + list->list_end = NULL; + list->alloc_end = 0; +} + +static void alloc_scan_vma( struct vma_area_list *listp ) +{ + size_t max_count = 4096 / sizeof(struct vma_area); + struct vma_area_list vma_list; + + for (;;) + { + vma_list.base = wld_mmap( NULL, sizeof(struct vma_area) * max_count, + PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, + -1, 0 ); + if (vma_list.base == (struct vma_area *)MAP_FAILED) + fatal_error( "could not allocate memory for VMA list\n"); + vma_list.list_end = vma_list.base; + vma_list.alloc_end = vma_list.base + max_count; + + scan_vma( &vma_list, &max_count ); + if (vma_list.list_end - vma_list.base == max_count) + { + wld_memmove(listp, &vma_list, sizeof(*listp)); + break; + } + + free_vma_list( &vma_list ); + } +} + +static void map_reserve_preload_ranges( struct vma_area_list *vma_list, void *exclude_start, void *exclude_end ) +{ + size_t i; + for (i = 0; preload_info[i].size; i++) + { + if ((char *)exclude_end >= (char *)preload_info[i].addr && + (char *)exclude_start <= (char *)preload_info[i].addr + preload_info[i].size) + { + remove_preload_range( i ); + i--; + } + else if (map_reserve_unmapped_range( vma_list, preload_info[i].addr, preload_info[i].size ) < 0) + { + /* don't warn for low 64k */ + if (preload_info[i].addr >= (void *)0x10000 +#ifdef __aarch64__ + && preload_info[i].addr < (void *)0x7fffffffff /* ARM64 address space might end here*/ +#endif + ) + wld_printf( "preloader: Warning: failed to reserve range %p-%p\n", + preload_info[i].addr, (char *)preload_info[i].addr + preload_info[i].size ); + remove_preload_range( i ); + i--; + } + } +} +
/* * wld_start @@ -1527,6 +1826,7 @@ void* wld_start( void **stack ) struct wld_link_map main_binary_map, ld_so_map; struct wine_preload_info **wine_main_preload_info; struct preloader_state state = { 0 }; + struct vma_area_list vma_list = { NULL };
parse_stackargs( &state.s, *stack );
@@ -1554,29 +1854,9 @@ void* wld_start( void **stack ) /* reserve memory that Wine needs */ reserve = stackargs_getenv( &state.s, "WINEPRELOADRESERVE" ); if (reserve) preload_reserve( reserve ); - for (i = 0; preload_info[i].size; i++) - { - if ((char *)state.s.auxv >= (char *)preload_info[i].addr && - (char *)state.s.stack <= (char *)preload_info[i].addr + preload_info[i].size) - { - remove_preload_range( i ); - i--; - } - else if (wld_mmap( preload_info[i].addr, preload_info[i].size, PROT_NONE, - MAP_FIXED | MAP_PRIVATE | MAP_ANON | MAP_NORESERVE, -1, 0 ) == (void *)-1) - { - /* don't warn for low 64k */ - if (preload_info[i].addr >= (void *)0x10000 -#ifdef __aarch64__ - && preload_info[i].addr < (void *)0x7fffffffff /* ARM64 address space might end here*/ -#endif - ) - wld_printf( "preloader: Warning: failed to reserve range %p-%p\n", - preload_info[i].addr, (char *)preload_info[i].addr + preload_info[i].size ); - remove_preload_range( i ); - i--; - } - } + + alloc_scan_vma( &vma_list ); + map_reserve_preload_ranges( &vma_list, state.s.stack, state.s.auxv );
/* add an executable page at the top of the address space to defeat * broken no-exec protections that play with the code selector limit */ @@ -1637,6 +1917,8 @@ void* wld_start( void **stack ) } #endif
+ free_vma_list( &vma_list ); + *stack = state.s.stack; return (void *)ld_so_map.l_entry; }
Today, the preloader removes the vDSO entries (AT_SYSINFO*) from the auxiliary vector when it conflicts with one of the predefined reserved ranges.
vDSO is a shared object provided by the kernel. Among other things, it provides a mechanism to issue certain system calls without the overhead of switching to the kernel mode.
Without vDSO, libc still works; however, it is expected that some system call functions (e.g. gettimeofday, clock_gettime) will show degraded performance.
Fix this by relocating vDSO to another address (if supported by the kernel) instead of erasing it from auxv entirely.
This behavior is enabled only when the "WINEPRELOADREMAPVDSO" environment variable is set to "on-conflict". In the future, it could become the default behaviour.
Signed-off-by: Jinoh Kang jinoh.kang.kr@gmail.com --- loader/preloader.c | 177 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 174 insertions(+), 3 deletions(-)
diff --git a/loader/preloader.c b/loader/preloader.c index 8e6b5768e10..b13ba1dac57 100644 --- a/loader/preloader.c +++ b/loader/preloader.c @@ -102,6 +102,9 @@ #ifndef MAP_NORESERVE #define MAP_NORESERVE 0 #endif +#ifndef MREMAP_FIXED +#define MREMAP_FIXED 2 +#endif
static struct wine_preload_info preload_info[] = { @@ -189,10 +192,19 @@ struct linebuffer int overflow; };
+enum vma_type_flags +{ + VMA_NORMAL = 0x01, + VMA_VDSO = 0x02, + VMA_VVAR = 0x04, +}; + struct vma_area { unsigned long start; unsigned long end; + unsigned char type_flags; + unsigned char moved; };
struct vma_area_list @@ -205,6 +217,16 @@ struct vma_area_list #define FOREACH_VMA(list, item) \ for ((item) = (list)->base; (item) != (list)->list_end; (item)++)
+enum remap_policy +{ + REMAP_POLICY_ON_CONFLICT = 0, + REMAP_POLICY_FORCE = 1, + REMAP_POLICY_SKIP = 2, + LAST_REMAP_POLICY, + + REMAP_POLICY_DEFAULT_VDSO = REMAP_POLICY_SKIP, +}; + /* * The __bb_init_func is an empty function only called when file is * compiled with gcc flags "-fprofile-arcs -ftest-coverage". This @@ -376,6 +398,16 @@ static inline int wld_munmap( void *addr, size_t len ) return SYSCALL_RET(ret); }
+static inline void *wld_mremap( void *old_addr, size_t old_len, size_t new_size, int flags, void *new_addr ) +{ + long ret; + __asm__ __volatile__( "int $0x80" + : "=a" (ret) : "0" (163 /* SYS_mremap */), "b" (old_addr), "c" (old_len), + "d" (new_size), "S" (flags), "D" (new_addr) + : "memory" ); + return (void *)SYSCALL_RET(ret); +} + static inline int wld_prctl( int code, long arg ) { long ret; @@ -462,6 +494,9 @@ SYSCALL_FUNC( wld_mprotect, 10 /* SYS_mprotect */ ); int wld_munmap( void *addr, size_t len ); SYSCALL_FUNC( wld_munmap, 11 /* SYS_munmap */ );
+void *wld_mremap( void *old_addr, size_t old_len, size_t new_size, int flags, void *new_addr ); +SYSCALL_FUNC( wld_mremap, 25 /* SYS_mremap */ ); + int wld_prctl( int code, long arg ); SYSCALL_FUNC( wld_prctl, 157 /* SYS_prctl */ );
@@ -572,6 +607,9 @@ SYSCALL_FUNC( wld_mprotect, 226 /* SYS_mprotect */ ); int wld_munmap( void *addr, size_t len ); SYSCALL_FUNC( wld_munmap, 215 /* SYS_munmap */ );
+void *wld_mremap( void *old_addr, size_t old_len, size_t new_size, int flags, void *new_addr ); +SYSCALL_FUNC( wld_mremap, 216 /* SYS_mremap */ ); + int wld_prctl( int code, long arg ); SYSCALL_FUNC( wld_prctl, 167 /* SYS_prctl */ );
@@ -674,6 +712,9 @@ SYSCALL_FUNC( wld_mprotect, 125 /* SYS_mprotect */ ); int wld_munmap( void *addr, size_t len ); SYSCALL_FUNC( wld_munmap, 91 /* SYS_munmap */ );
+void *wld_mremap( void *old_addr, size_t old_len, size_t new_size, int flags, void *new_addr ); +SYSCALL_FUNC( wld_mremap, 163 /* SYS_mremap */ ); + int wld_prctl( int code, long arg ); SYSCALL_FUNC( wld_prctl, 172 /* SYS_prctl */ );
@@ -1593,6 +1634,7 @@ static char *linebuffer_getline( struct linebuffer *lbuf, char delim ) static int parse_maps_line( struct vma_area *entry, char *line ) { struct vma_area item = { 0 }; + unsigned long dev_maj, dev_min; char *ptr = line; int overflow;
@@ -1623,11 +1665,11 @@ static int parse_maps_line( struct vma_area *entry, char *line ) if (*ptr != ' ') fatal_error( "parse error in /proc/self/maps\n" ); ptr++;
- parse_ul( ptr, &ptr, 16, NULL ); + dev_maj = parse_ul( ptr, &ptr, 16, NULL ); if (*ptr != ':') fatal_error( "parse error in /proc/self/maps\n" ); ptr++;
- parse_ul( ptr, &ptr, 16, NULL ); + dev_min = parse_ul( ptr, &ptr, 16, NULL ); if (*ptr != ' ') fatal_error( "parse error in /proc/self/maps\n" ); ptr++;
@@ -1635,6 +1677,17 @@ static int parse_maps_line( struct vma_area *entry, char *line ) if (*ptr != ' ') fatal_error( "parse error in /proc/self/maps\n" ); ptr++;
+ while (*ptr == ' ') + ptr++; + + if (dev_maj == 0 && dev_min == 0) + { + if (wld_strcmp(ptr, "[vdso]") == 0) + item.type_flags |= VMA_VDSO; + else if (wld_strcmp(ptr, "[vvar]") == 0) + item.type_flags |= VMA_VVAR; + } + *entry = item; return 0; } @@ -1713,6 +1766,51 @@ static void insert_vma_entry( struct vma_area_list *list, const struct vma_area return; }
+static int find_vma_envelope_range( const struct vma_area_list *list, int type_mask, unsigned long *startp, unsigned long *sizep ) +{ + const struct vma_area *item; + unsigned long start = ULONG_MAX; + unsigned long end = 0; + + FOREACH_VMA(list, item) + { + if (item->type_flags & type_mask) + { + if (start > item->start) start = item->start; + if (end < item->end) end = item->end; + } + } + + if (start >= end) return -1; + + *startp = start; + *sizep = end - start; + return 0; +} + +static int remap_multiple_vmas( struct vma_area_list *list, unsigned long offset, int type_mask, unsigned char revert ) +{ + struct vma_area *item; + void *expect_addr, *mapped_addr; + size_t size; + + FOREACH_VMA(list, item) + { + if ((item->type_flags & type_mask) && item->moved == revert) + { + expect_addr = (void *)(item->start + offset); + size = item->end - item->start; + mapped_addr = wld_mremap( (void *)item->start, size, size, + MREMAP_FIXED | MREMAP_MAYMOVE, expect_addr ); + if (mapped_addr == (void *)-1) return -1; + if (mapped_addr != (void *)item->start) item->moved = !revert; + if (mapped_addr != expect_addr) return -1; + } + } + + return 0; +} + static void scan_vma( struct vma_area_list *list, size_t *act_count ) { int fd; @@ -1783,6 +1881,77 @@ static void alloc_scan_vma( struct vma_area_list *listp ) } }
+static enum remap_policy stackargs_get_remap_policy( const struct stackarg_info *info, const char *name, + enum remap_policy default_policy ) +{ + char *valstr = stackargs_getenv( info, name ), *endptr; + unsigned long valnum; + + if (valstr) { + if (wld_strcmp(valstr, "auto") == 0 || wld_strcmp(valstr, "on-conflict") == 0) + return REMAP_POLICY_ON_CONFLICT; + if (wld_strcmp(valstr, "always") == 0 || wld_strcmp(valstr, "force") == 0) + return REMAP_POLICY_FORCE; + if (wld_strcmp(valstr, "never") == 0 || wld_strcmp(valstr, "skip") == 0) + return REMAP_POLICY_SKIP; + valnum = parse_ul( valstr, &endptr, 10, NULL ); + if (!*endptr && valnum < LAST_REMAP_POLICY) return valnum; + } + + return default_policy; +} + +static int remap_vdso( struct vma_area_list *vma_list, struct preloader_state *state ) +{ + enum remap_policy policy; + unsigned long vdso_start, vdso_size, offset; + unsigned long new_vdso_start, new_vdso_size; + void *new_vdso; + struct wld_auxv *auxv; + + if (find_vma_envelope_range( vma_list, VMA_VDSO | VMA_VVAR, + &vdso_start, &vdso_size ) < 0) return 0; + + policy = stackargs_get_remap_policy( &state->s, "WINEPRELOADREMAPVDSO", REMAP_POLICY_DEFAULT_VDSO ); + if (policy == REMAP_POLICY_SKIP) return -1; + if (policy != REMAP_POLICY_FORCE && + find_preload_reserved_area( (void *)vdso_start, vdso_size ) < 0) return 0; + + new_vdso = wld_mmap( NULL, vdso_size, PROT_NONE, + MAP_GROWSDOWN | MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0 ); + if (new_vdso == (void *)-1) return -1; + + offset = (unsigned long)new_vdso - vdso_start; + if (remap_multiple_vmas( vma_list, offset, VMA_VVAR, 0 ) < 0 || + remap_multiple_vmas( vma_list, offset, VMA_VDSO, 0 ) < 0) goto remap_restore; + + free_vma_list( vma_list ); + alloc_scan_vma( vma_list ); + + if (find_vma_envelope_range( vma_list, VMA_VDSO | VMA_VVAR, &new_vdso_start, &new_vdso_size ) < 0 || + vdso_start + offset != new_vdso_start || + vdso_size != new_vdso_size) goto remap_restore; + + for (auxv = state->s.auxv; auxv->a_type != AT_NULL; auxv++) + { + switch (auxv->a_type) + { + case AT_SYSINFO: + case AT_SYSINFO_EHDR: + auxv->a_un.a_val += offset; + break; + } + } + + return 1; + +remap_restore: + if (remap_multiple_vmas( vma_list, -offset, -1, 1 ) < 0) + fatal_error( "Cannot restore VDSO VMAs\n" ); + + return -1; +} + static void map_reserve_preload_ranges( struct vma_area_list *vma_list, void *exclude_start, void *exclude_end ) { size_t i; @@ -1858,6 +2027,8 @@ void* wld_start( void **stack ) alloc_scan_vma( &vma_list ); map_reserve_preload_ranges( &vma_list, state.s.stack, state.s.auxv );
+ if (remap_vdso( &vma_list, &state ) > 0) map_reserve_preload_ranges( &vma_list, state.s.stack, state.s.auxv ); + /* add an executable page at the top of the address space to defeat * broken no-exec protections that play with the code selector limit */ if (find_preload_reserved_area( (char *)0x80000000 - page_size, page_size ) >= 0) @@ -1887,7 +2058,7 @@ void* wld_start( void **stack ) #undef SET_NEW_AV
i = 0; - /* delete sysinfo values if addresses conflict */ + /* delete sysinfo values if addresses conflict and remap failed */ if (is_in_preload_range( state.s.auxv, AT_SYSINFO ) || is_in_preload_range( state.s.auxv, AT_SYSINFO_EHDR )) { delete_av[i++].a_type = AT_SYSINFO;
Today, the preloader abandons reserved address ranges that conflict with the call stack area.
Fix this by attempting to copy the stack somewhere else, and switching to it before entering the ld.so entry point. This way, the preloader does not have to give up the address reservation.
This behavior is enabled only when the "WINEPRELOADREMAPSTACK" environment variable is set to "on-conflict". In the future, it could become the default behaviour.
Signed-off-by: Jinoh Kang jinoh.kang.kr@gmail.com --- loader/preloader.c | 123 +++++++++++++++++++++++++++++++++++++++------ 1 file changed, 107 insertions(+), 16 deletions(-)
diff --git a/loader/preloader.c b/loader/preloader.c index b13ba1dac57..6c4c4c806bb 100644 --- a/loader/preloader.c +++ b/loader/preloader.c @@ -197,6 +197,7 @@ enum vma_type_flags VMA_NORMAL = 0x01, VMA_VDSO = 0x02, VMA_VVAR = 0x04, + VMA_STACK = 0x08, };
struct vma_area @@ -224,7 +225,8 @@ enum remap_policy REMAP_POLICY_SKIP = 2, LAST_REMAP_POLICY,
- REMAP_POLICY_DEFAULT_VDSO = REMAP_POLICY_SKIP, + REMAP_POLICY_DEFAULT_VDSO = REMAP_POLICY_SKIP, + REMAP_POLICY_DEFAULT_STACK = REMAP_POLICY_SKIP, };
/* @@ -1024,6 +1026,59 @@ static void shift_stackargs( struct stackarg_info *newinfo, struct stackarg_info newinfo->auxv_end = (void *)((unsigned long)oldinfo->auxv_end + offset); }
+static size_t relocate_argvec( char **dest, char **src, size_t count ) +{ + size_t i; + unsigned long offset = (unsigned long)dest - (unsigned long)src; + + for (i = 0; i < count && src[i]; i++) + dest[i] = src[i] + offset; + + dest[i] = 0; + return i; +} + +static size_t relocate_auxvec( struct wld_auxv *dest, struct wld_auxv *src ) +{ + size_t i; + unsigned long offset = (unsigned long)dest - (unsigned long)src; + + for (i = 0; src[i].a_type != AT_NULL; i++) + { + dest[i].a_type = src[i].a_type; + switch (dest[i].a_type) + { + case AT_RANDOM: + case AT_PLATFORM: + case AT_BASE_PLATFORM: + case AT_EXECFN: + if (src[i].a_un.a_val >= (unsigned long)src) + { + dest[i].a_un.a_val = src[i].a_un.a_val + offset; + break; + } + /* fallthrough */ + default: + dest[i].a_un.a_val = src[i].a_un.a_val; + break; + } + } + + return i; +} + +static void copy_stackargs( struct stackarg_info *newinfo, struct stackarg_info *oldinfo, void *newstack, void *newstackend ) +{ + shift_stackargs( newinfo, oldinfo, newstack ); + + *(int *)newstack = *(int *)oldinfo->stack; + relocate_argvec( newinfo->argv, oldinfo->argv, newinfo->envp - newinfo->argv ); + relocate_argvec( newinfo->envp, oldinfo->envp, (char **)newinfo->auxv - newinfo->envp ); + relocate_auxvec( newinfo->auxv, oldinfo->auxv ); + wld_memmove( newinfo->auxv_end, oldinfo->auxv_end, + (unsigned long)newstackend - (unsigned long)newinfo->auxv_end ); +} + /* * set_auxiliary_values * @@ -1811,7 +1866,7 @@ static int remap_multiple_vmas( struct vma_area_list *list, unsigned long offset return 0; }
-static void scan_vma( struct vma_area_list *list, size_t *act_count ) +static void scan_vma( struct vma_area_list *list, size_t *act_count, void *stack_ptr ) { int fd; size_t n = 0; @@ -1835,6 +1890,9 @@ static void scan_vma( struct vma_area_list *list, size_t *act_count ) { if (parse_maps_line( &item, line ) >= 0) { + if (item.start <= (unsigned long)stack_ptr && + item.end > (unsigned long)stack_ptr) + item.type_flags |= VMA_STACK; if (list->list_end < list->alloc_end) insert_vma_entry( list, &item ); n++; } @@ -1855,7 +1913,7 @@ static void free_vma_list( struct vma_area_list *list ) list->alloc_end = 0; }
-static void alloc_scan_vma( struct vma_area_list *listp ) +static void alloc_scan_vma( struct vma_area_list *listp, void *stack_ptr ) { size_t max_count = 4096 / sizeof(struct vma_area); struct vma_area_list vma_list; @@ -1870,7 +1928,7 @@ static void alloc_scan_vma( struct vma_area_list *listp ) vma_list.list_end = vma_list.base; vma_list.alloc_end = vma_list.base + max_count;
- scan_vma( &vma_list, &max_count ); + scan_vma( &vma_list, &max_count, stack_ptr ); if (vma_list.list_end - vma_list.base == max_count) { wld_memmove(listp, &vma_list, sizeof(*listp)); @@ -1926,7 +1984,7 @@ static int remap_vdso( struct vma_area_list *vma_list, struct preloader_state *s remap_multiple_vmas( vma_list, offset, VMA_VDSO, 0 ) < 0) goto remap_restore;
free_vma_list( vma_list ); - alloc_scan_vma( vma_list ); + alloc_scan_vma( vma_list, state->s.stack );
if (find_vma_envelope_range( vma_list, VMA_VDSO | VMA_VVAR, &new_vdso_start, &new_vdso_size ) < 0 || vdso_start + offset != new_vdso_start || @@ -1952,18 +2010,47 @@ remap_restore: return -1; }
-static void map_reserve_preload_ranges( struct vma_area_list *vma_list, void *exclude_start, void *exclude_end ) +static int remap_stack( struct vma_area_list *vma_list, struct preloader_state *state ) +{ + enum remap_policy policy; + unsigned long stack_start, stack_size; + struct stackarg_info newinfo; + void *new_stack, *new_stack_base; + int i; + + if (find_vma_envelope_range( vma_list, VMA_STACK, + &stack_start, &stack_size ) < 0) return 0; + + policy = stackargs_get_remap_policy( &state->s, "WINEPRELOADREMAPSTACK", REMAP_POLICY_DEFAULT_STACK ); + if (policy == REMAP_POLICY_SKIP) goto remove_from_reserve; + if (policy != REMAP_POLICY_FORCE && + find_preload_reserved_area( (void *)stack_start, stack_size ) < 0) return 0; + + new_stack_base = wld_mmap( NULL, stack_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_GROWSDOWN, -1, 0 ); + if (new_stack_base == (void *)-1) goto remove_from_reserve; + + new_stack = (void *)((unsigned long)new_stack_base + ((unsigned long)state->s.stack - stack_start)); + copy_stackargs( &newinfo, &state->s, new_stack, (void *)((unsigned long)new_stack_base + stack_size) ); + + wld_memmove( &state->s, &newinfo, sizeof(state->s) ); + + free_vma_list( vma_list ); + alloc_scan_vma( vma_list, state->s.stack ); + return 1; + +remove_from_reserve: + while ((i = find_preload_reserved_area( (void *)stack_start, stack_size )) >= 0) + remove_preload_range( i ); + return -1; +} + +static void map_reserve_preload_ranges( struct vma_area_list *vma_list ) { size_t i; for (i = 0; preload_info[i].size; i++) { - if ((char *)exclude_end >= (char *)preload_info[i].addr && - (char *)exclude_start <= (char *)preload_info[i].addr + preload_info[i].size) - { - remove_preload_range( i ); - i--; - } - else if (map_reserve_unmapped_range( vma_list, preload_info[i].addr, preload_info[i].size ) < 0) + if (map_reserve_unmapped_range( vma_list, preload_info[i].addr, preload_info[i].size ) < 0) { /* don't warn for low 64k */ if (preload_info[i].addr >= (void *)0x10000 @@ -1996,6 +2083,7 @@ void* wld_start( void **stack ) struct wine_preload_info **wine_main_preload_info; struct preloader_state state = { 0 }; struct vma_area_list vma_list = { NULL }; + int remap_done;
parse_stackargs( &state.s, *stack );
@@ -2024,10 +2112,13 @@ void* wld_start( void **stack ) reserve = stackargs_getenv( &state.s, "WINEPRELOADRESERVE" ); if (reserve) preload_reserve( reserve );
- alloc_scan_vma( &vma_list ); - map_reserve_preload_ranges( &vma_list, state.s.stack, state.s.auxv ); + alloc_scan_vma( &vma_list, state.s.stack ); + map_reserve_preload_ranges( &vma_list );
- if (remap_vdso( &vma_list, &state ) > 0) map_reserve_preload_ranges( &vma_list, state.s.stack, state.s.auxv ); + remap_done = 0; + remap_done |= remap_vdso( &vma_list, &state ) > 0; + remap_done |= remap_stack( &vma_list, &state) > 0; + if (remap_done) map_reserve_preload_ranges( &vma_list );
/* add an executable page at the top of the address space to defeat * broken no-exec protections that play with the code selector limit */
Signed-off-by: Jinoh Kang jinoh.kang.kr@gmail.com --- loader/preloader.c | 5 +++++ 1 file changed, 5 insertions(+)
diff --git a/loader/preloader.c b/loader/preloader.c index 6c4c4c806bb..fbb3c0d3202 100644 --- a/loader/preloader.c +++ b/loader/preloader.c @@ -949,6 +949,11 @@ static void dump_auxiliary( struct wld_auxv *av ) NAME(AT_SYSINFO), NAME(AT_SYSINFO_EHDR), NAME(AT_UID), + NAME(AT_SECURE), + NAME(AT_RANDOM), + NAME(AT_HWCAP2), + NAME(AT_EXECFN), + NAME(AT_MINSIGSTKSZ), { 0, NULL } }; #undef NAME
December 29, 2021 12:06 PM, "Jinoh Kang" jinoh.kang.kr@gmail.com wrote:
diff --git a/loader/preloader.c b/loader/preloader.c index 585be50624f..cb20afd4d5a 100644 --- a/loader/preloader.c +++ b/loader/preloader.c @@ -236,7 +236,7 @@ __ASM_GLOBAL_FUNC(_start,
/* wrappers for Linux system calls */
-#define SYSCALL_RET(ret) (((ret) < 0 && (ret) > -4096) ? -1 : (ret)) +#define SYSCALL_RET(ret) (((unsigned long)(ret) > -4096UL) ? -1 : (ret))
This seems wrong. It actually happens to be correct according to the rules of 2's-complement arithmetic (-4096 == 0xFFFFF000, -1 == 0xFFFFFFFF), but that's not obvious looking at the code. I had to work that out to verify this. That doesn't sit well with me, and I suspect it won't sit well with Alexandre, either.
static inline __attribute__((noreturn)) void wld_exit( int code ) {
Chip