Signed-off-by: Paul Gofman pgofman@codeweavers.com --- All the native Linux calls are executed on kernel stack now and 128k is very small. The known problems in multiple games are for Vulkan pipeline creation calls which require large enough stack. The exact stack requirements are unknown neither with Mesa nor Nvidia but it is beleived that 1MB should suffice.
dlls/ntdll/unix/unix_private.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/dlls/ntdll/unix/unix_private.h b/dlls/ntdll/unix/unix_private.h index 86e87e6e29a..795fc148479 100644 --- a/dlls/ntdll/unix/unix_private.h +++ b/dlls/ntdll/unix/unix_private.h @@ -84,7 +84,7 @@ static const SIZE_T page_size = 0x1000; static const SIZE_T teb_size = 0x3800; /* TEB64 + TEB32 + debug info */ static const SIZE_T signal_stack_mask = 0xffff; static const SIZE_T signal_stack_size = 0x10000 - 0x3800; -static const SIZE_T kernel_stack_size = 0x20000; +static const SIZE_T kernel_stack_size = 0x100000; static const SIZE_T min_kernel_stack = 0x2000; static const LONG teb_offset = 0x2000;
Signed-off-by: Paul Gofman pgofman@codeweavers.com --- dlls/ntdll/unix/thread.c | 4 ---- 1 file changed, 4 deletions(-)
diff --git a/dlls/ntdll/unix/thread.c b/dlls/ntdll/unix/thread.c index b8546edde87..503230e4634 100644 --- a/dlls/ntdll/unix/thread.c +++ b/dlls/ntdll/unix/thread.c @@ -71,10 +71,6 @@ WINE_DEFAULT_DEBUG_CHANNEL(thread); WINE_DECLARE_DEBUG_CHANNEL(seh);
-#ifndef PTHREAD_STACK_MIN -#define PTHREAD_STACK_MIN 16384 -#endif - static int nb_threads = 1;
static inline int get_unix_exit_code( NTSTATUS status )
Signed-off-by: Paul Gofman pgofman@codeweavers.com --- dlls/ntdll/unix/thread.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/dlls/ntdll/unix/thread.c b/dlls/ntdll/unix/thread.c index 503230e4634..0f754493fdb 100644 --- a/dlls/ntdll/unix/thread.c +++ b/dlls/ntdll/unix/thread.c @@ -1339,8 +1339,7 @@ NTSTATUS WINAPI NtCreateThreadEx( HANDLE *handle, ACCESS_MASK access, OBJECT_ATT thread_data->param = param;
pthread_attr_init( &pthread_attr ); - pthread_attr_setstack( &pthread_attr, teb->DeallocationStack, - (char *)thread_data->kernel_stack + kernel_stack_size - (char *)teb->DeallocationStack ); + pthread_attr_setstack( &pthread_attr, thread_data->kernel_stack, kernel_stack_size ); pthread_attr_setguardsize( &pthread_attr, 0 ); pthread_attr_setscope( &pthread_attr, PTHREAD_SCOPE_SYSTEM ); /* force creating a kernel thread */ InterlockedIncrement( &nb_threads );
Signed-off-by: Paul Gofman pgofman@codeweavers.com --- dlls/ntdll/unix/signal_arm.c | 4 ++++ dlls/ntdll/unix/signal_arm64.c | 4 ++++ dlls/ntdll/unix/signal_i386.c | 4 ++++ dlls/ntdll/unix/signal_x86_64.c | 4 ++++ dlls/ntdll/unix/thread.c | 3 ++- dlls/ntdll/unix/unix_private.h | 10 +++++++++- dlls/ntdll/unix/virtual.c | 5 +++++ 7 files changed, 32 insertions(+), 2 deletions(-)
diff --git a/dlls/ntdll/unix/signal_arm.c b/dlls/ntdll/unix/signal_arm.c index 1fea76f6563..1c03b707890 100644 --- a/dlls/ntdll/unix/signal_arm.c +++ b/dlls/ntdll/unix/signal_arm.c @@ -780,6 +780,10 @@ static BOOL handle_syscall_fault( ucontext_t *context, EXCEPTION_RECORD *rec ) (DWORD)IP_sig(context), (DWORD)SP_sig(context), (DWORD)LR_sig(context), (DWORD)PC_sig(context), (DWORD)CPSR_sig(context) );
+ if (rec->ExceptionCode == STATUS_ACCESS_VIOLATION + && is_inside_syscall_stack_guard( (char *)rec->ExceptionInformation[1] )) + ERR_(seh)( "Syscall stack overrun.\n "); + if (ntdll_get_thread_data()->jmp_buf) { TRACE( "returning to handler\n" ); diff --git a/dlls/ntdll/unix/signal_arm64.c b/dlls/ntdll/unix/signal_arm64.c index 1df97f16f13..917351cb6a8 100644 --- a/dlls/ntdll/unix/signal_arm64.c +++ b/dlls/ntdll/unix/signal_arm64.c @@ -834,6 +834,10 @@ static BOOL handle_syscall_fault( ucontext_t *context, EXCEPTION_RECORD *rec ) (DWORD64)REGn_sig(28, context), (DWORD64)FP_sig(context), (DWORD64)LR_sig(context), (DWORD64)SP_sig(context) );
+ if (rec->ExceptionCode == STATUS_ACCESS_VIOLATION + && is_inside_syscall_stack_guard( (char *)rec->ExceptionInformation[1] )) + ERR_(seh)( "Syscall stack overrun.\n "); + if (ntdll_get_thread_data()->jmp_buf) { TRACE( "returning to handler\n" ); diff --git a/dlls/ntdll/unix/signal_i386.c b/dlls/ntdll/unix/signal_i386.c index e2a6148d609..28c45a512d0 100644 --- a/dlls/ntdll/unix/signal_i386.c +++ b/dlls/ntdll/unix/signal_i386.c @@ -1731,6 +1731,10 @@ static BOOL handle_syscall_fault( ucontext_t *sigcontext, void *stack_ptr, context->Ebp, context->Esp, context->SegCs, context->SegDs, context->SegEs, context->SegFs, context->SegGs, context->EFlags );
+ if (rec->ExceptionCode == STATUS_ACCESS_VIOLATION + && is_inside_syscall_stack_guard( (char *)rec->ExceptionInformation[1] )) + ERR_(seh)( "Syscall stack overrun.\n "); + if (ntdll_get_thread_data()->jmp_buf) { TRACE( "returning to handler\n" ); diff --git a/dlls/ntdll/unix/signal_x86_64.c b/dlls/ntdll/unix/signal_x86_64.c index 34334f72ff0..54788c65958 100644 --- a/dlls/ntdll/unix/signal_x86_64.c +++ b/dlls/ntdll/unix/signal_x86_64.c @@ -2540,6 +2540,10 @@ static BOOL handle_syscall_fault( ucontext_t *sigcontext, EXCEPTION_RECORD *rec, TRACE_(seh)( " r12=%016lx r13=%016lx r14=%016lx r15=%016lx\n", context->R12, context->R13, context->R14, context->R15 );
+ if (rec->ExceptionCode == STATUS_ACCESS_VIOLATION + && is_inside_syscall_stack_guard( (char *)rec->ExceptionInformation[1] )) + ERR_(seh)( "Syscall stack overrun.\n "); + if (ntdll_get_thread_data()->jmp_buf) { TRACE_(seh)( "returning to handler\n" ); diff --git a/dlls/ntdll/unix/thread.c b/dlls/ntdll/unix/thread.c index 0f754493fdb..6a5dca0f060 100644 --- a/dlls/ntdll/unix/thread.c +++ b/dlls/ntdll/unix/thread.c @@ -1339,7 +1339,8 @@ NTSTATUS WINAPI NtCreateThreadEx( HANDLE *handle, ACCESS_MASK access, OBJECT_ATT thread_data->param = param;
pthread_attr_init( &pthread_attr ); - pthread_attr_setstack( &pthread_attr, thread_data->kernel_stack, kernel_stack_size ); + pthread_attr_setstack( &pthread_attr, (char *)thread_data->kernel_stack + kernel_stack_guard_size, + kernel_stack_size - kernel_stack_guard_size ); pthread_attr_setguardsize( &pthread_attr, 0 ); pthread_attr_setscope( &pthread_attr, PTHREAD_SCOPE_SYSTEM ); /* force creating a kernel thread */ InterlockedIncrement( &nb_threads ); diff --git a/dlls/ntdll/unix/unix_private.h b/dlls/ntdll/unix/unix_private.h index 795fc148479..f805147048d 100644 --- a/dlls/ntdll/unix/unix_private.h +++ b/dlls/ntdll/unix/unix_private.h @@ -85,7 +85,8 @@ static const SIZE_T teb_size = 0x3800; /* TEB64 + TEB32 + debug info */ static const SIZE_T signal_stack_mask = 0xffff; static const SIZE_T signal_stack_size = 0x10000 - 0x3800; static const SIZE_T kernel_stack_size = 0x100000; -static const SIZE_T min_kernel_stack = 0x2000; +static const SIZE_T kernel_stack_guard_size = 0x1000; +static const SIZE_T min_kernel_stack = 0x3000; static const LONG teb_offset = 0x2000;
#define FILE_WRITE_TO_END_OF_FILE ((LONGLONG)-1) @@ -319,6 +320,13 @@ static inline BOOL is_inside_signal_stack( void *ptr ) (char *)ptr < (char *)get_signal_stack() + signal_stack_size); }
+static inline BOOL is_inside_syscall_stack_guard( const char *stack_ptr ) +{ + const char *kernel_stack = ntdll_get_thread_data()->kernel_stack; + + return (stack_ptr >= kernel_stack && stack_ptr < kernel_stack + kernel_stack_guard_size); +} + static inline void mutex_lock( pthread_mutex_t *mutex ) { if (!process_exiting) pthread_mutex_lock( mutex ); diff --git a/dlls/ntdll/unix/virtual.c b/dlls/ntdll/unix/virtual.c index 94b300c5057..621e7be996d 100644 --- a/dlls/ntdll/unix/virtual.c +++ b/dlls/ntdll/unix/virtual.c @@ -3100,6 +3100,7 @@ NTSTATUS virtual_alloc_thread_stack( INITIAL_TEB *stack, ULONG_PTR zero_bits, SI SIZE_T commit_size, SIZE_T extra_size ) { struct file_view *view; + char *kernel_stack; NTSTATUS status; sigset_t sigset; SIZE_T size; @@ -3143,6 +3144,10 @@ NTSTATUS virtual_alloc_thread_stack( INITIAL_TEB *stack, ULONG_PTR zero_bits, SI delete_view( view ); goto done; } + /* setup kernel stack no access guard page */ + kernel_stack = (char *)view->base + view->size; + set_page_vprot( kernel_stack, kernel_stack_guard_size, VPROT_COMMITTED ); + mprotect_range( kernel_stack, kernel_stack_guard_size, 0, 0 ); }
/* note: limit is lower than base since the stack grows down */
On Mon, Apr 11, 2022, 7:27 PM Paul Gofman pgofman@codeweavers.com wrote:
Signed-off-by: Paul Gofman pgofman@codeweavers.com
dlls/ntdll/unix/signal_arm.c | 4 ++++ dlls/ntdll/unix/signal_arm64.c | 4 ++++ dlls/ntdll/unix/signal_i386.c | 4 ++++ dlls/ntdll/unix/signal_x86_64.c | 4 ++++ dlls/ntdll/unix/thread.c | 3 ++- dlls/ntdll/unix/unix_private.h | 10 +++++++++- dlls/ntdll/unix/virtual.c | 5 +++++ 7 files changed, 32 insertions(+), 2 deletions(-)
diff --git a/dlls/ntdll/unix/signal_arm.c b/dlls/ntdll/unix/signal_arm.c index 1fea76f6563..1c03b707890 100644 --- a/dlls/ntdll/unix/signal_arm.c +++ b/dlls/ntdll/unix/signal_arm.c @@ -780,6 +780,10 @@ static BOOL handle_syscall_fault( ucontext_t *context, EXCEPTION_RECORD *rec ) (DWORD)IP_sig(context), (DWORD)SP_sig(context), (DWORD)LR_sig(context), (DWORD)PC_sig(context), (DWORD)CPSR_sig(context) );
- if (rec->ExceptionCode == STATUS_ACCESS_VIOLATION
&& is_inside_syscall_stack_guard( (char
*)rec->ExceptionInformation[1] ))
ERR_(seh)( "Syscall stack overrun.\n ");
Would it be a good idea to let it grow instead? Instead of, or in addition to, the increased 1M stack limit.
I don’t think this complication worth it. What we would like to save is address space on 32 bit, but we can’t (before running 32 on 64 of course where this stack will be on 64 only). We apparently need to reserve address space for stack at once, so it has the space to grow, and then growing would only mean saving committed pages.
On 12 Apr 2022, at 04:23, Jin-oh Kang jinoh.kang.kr@gmail.com wrote:
Would it be a good idea to let it grow instead? Instead of, or in addition to, the increased 1M stack limit.
On Tue, Apr 12, 2022, 5:59 PM Paul Gofman pgofman@codeweavers.com wrote:
I don’t think this complication worth it. What we would like to save is address space on 32 bit, but we can’t (before running 32 on 64 of course where this stack will be on 64 only). We apparently need to reserve address space for stack at once, so it has the space to grow, and then growing would only mean saving committed pages.
Maybe allocate extra stack only for function calls that might need it? Something like this: https://docs.microsoft.com/en-us/windows-hardware/drivers/ddi/ntddk/nf-ntddk...
On Sun, Apr 17, 2022, 3:59 AM Jin-oh Kang jinoh.kang.kr@gmail.com wrote:
On Tue, Apr 12, 2022, 5:59 PM Paul Gofman pgofman@codeweavers.com wrote:
I don’t think this complication worth it. What we would like to save is address space on 32 bit, but we can’t (before running 32 on 64 of course where this stack will be on 64 only). We apparently need to reserve address space for stack at once, so it has the space to grow, and then growing would only mean saving committed pages.
Maybe allocate extra stack only for function calls that might need it? Something like this: https://docs.microsoft.com/en-us/windows-hardware/drivers/ddi/ntddk/nf-ntddk...
There's also https://github.com/gcc-mirror/gcc/blob/master/libgcc/config/i386/morestack.S, which is used by gcc -fsplit-stack, but we might need something more portable.
On 4/16/22 22:06, Jin-oh Kang wrote:
On Sun, Apr 17, 2022, 3:59 AM Jin-oh Kang jinoh.kang.kr@gmail.com wrote:
On Tue, Apr 12, 2022, 5:59 PM Paul Gofman <pgofman@codeweavers.com> wrote: I don’t think this complication worth it. What we would like to save is address space on 32 bit, but we can’t (before running 32 on 64 of course where this stack will be on 64 only). We apparently need to reserve address space for stack at once, so it has the space to grow, and then growing would only mean saving committed pages. Maybe allocate extra stack only for function calls that might need it? Something like this: https://docs.microsoft.com/en-us/windows-hardware/drivers/ddi/ntddk/nf-ntddk-keexpandkernelstackandcallout
There's also https://github.com/gcc-mirror/gcc/blob/master/libgcc/config/i386/morestack.S, which is used by gcc -fsplit-stack, but we might need something more portable.
Will that work with existing host libraries which actually use the stack and are supposed to be be compatible with this split stack mechanics if we do something like that?
On 4/16/22 21:59, Jin-oh Kang wrote:
On Tue, Apr 12, 2022, 5:59 PM Paul Gofman pgofman@codeweavers.com wrote:
I don’t think this complication worth it. What we would like to save is address space on 32 bit, but we can’t (before running 32 on 64 of course where this stack will be on 64 only). We apparently need to reserve address space for stack at once, so it has the space to grow, and then growing would only mean saving committed pages.
Maybe allocate extra stack only for function calls that might need it? Something like this: https://docs.microsoft.com/en-us/windows-hardware/drivers/ddi/ntddk/nf-ntddk...
That can technically be made to work, but to do it we'd need:
- maintain some pool of larger thread kernel stacks;
- make a secondary switch to it in wine_unix_call or whenever we think we need a larger stack (as apparently it is not known in wine_syscall_dispatcher);
- hunt the functions requiring larger stack one by one.
All this together (and especially the last point) sounds like essentially a no go for me. Mind also that long term when 32 on 64 is on by default the syscall stack will exist on 64 only.
On 4/18/22 20:54, Paul Gofman wrote:
On 4/16/22 21:59, Jin-oh Kang wrote:
On Tue, Apr 12, 2022, 5:59 PM Paul Gofman pgofman@codeweavers.com wrote:
I don’t think this complication worth it. What we would like to save is address space on 32 bit, but we can’t (before running 32 on 64 of course where this stack will be on 64 only). We apparently need to reserve address space for stack at once, so it has the space to grow, and then growing would only mean saving committed pages.
Maybe allocate extra stack only for function calls that might need it? Something like this: https://docs.microsoft.com/en-us/windows-hardware/drivers/ddi/ntddk/nf-ntddk...
That can technically be made to work, but to do it we'd need:
maintain some pool of larger thread kernel stacks;
make a secondary switch to it in wine_unix_call or whenever we think we need a larger stack (as apparently it is not known in wine_syscall_dispatcher);
hunt the functions requiring larger stack one by one.
All this together (and especially the last point) sounds like essentially a no go for me. Mind also that long term when 32 on 64 is on by default the syscall stack will exist on 64 only.
If we're de-prioritizing pure 32-bit OS support, well, then my argument is kind of moot.