Currently usr1_handler always delivers the context to server. With AVX enabled the single context size is around 1k (while there are two of those for wow64). I am now working on a more generic xstate support in contexts (mostly motivated by AVX512), with AVX512 the single context to be transferred to server is ~1k bigger.
The context is needed to be passed to the server from usr1_handler only for NtGetContextThread, NtSetContextThread and NtSuspendThread handling (e. g., when stop_thread is called on the server). The vast majority of usr1_handler usage is for kernel APCs (e. g., APC_ASYNC_IO involved in every async operation) that don't need the thread context transferred to the server and back.
My measurements of single SERVER_START_REQ( select ) in server_select() shows that the turnaround time of the request with the context (on native x64 without wow context) is almost two times bigger on average when currently supported AVX context is involved (that is, on every usr1_handle invocation on a machine supporting AVX). So, this patch is expected to increase the time of relatively rare calls which actually need contexts by roughly 50% but decrease the turnaround time of frequent calls involving system APCs by 50%. The difference will be more in favour of this patch once huge AVX512 contexts are added.
From: Paul Gofman pgofman@codeweavers.com
--- dlls/ntdll/unix/server.c | 7 ++++--- dlls/ntdll/unix/signal_arm.c | 2 ++ dlls/ntdll/unix/signal_arm64.c | 2 ++ dlls/ntdll/unix/signal_i386.c | 2 ++ dlls/ntdll/unix/signal_x86_64.c | 2 ++ dlls/ntdll/unix/thread.c | 12 +++++++----- dlls/ntdll/unix/unix_private.h | 2 +- server/protocol.def | 1 + server/thread.c | 7 +++++++ 9 files changed, 28 insertions(+), 9 deletions(-)
diff --git a/dlls/ntdll/unix/server.c b/dlls/ntdll/unix/server.c index 69e6567c911..f96a08e40b2 100644 --- a/dlls/ntdll/unix/server.c +++ b/dlls/ntdll/unix/server.c @@ -693,7 +693,6 @@ unsigned int server_select( const select_op_t *select_op, data_size_t size, UINT unsigned int ret; int cookie; obj_handle_t apc_handle = 0; - BOOL suspend_context = !!context; apc_result_t result; sigset_t old_set; int signaled; @@ -704,6 +703,8 @@ unsigned int server_select( const select_op_t *select_op, data_size_t size, UINT context_t context[2]; } reply_data;
+ assert(!context || (flags & SELECT_SUSPEND)); + memset( &result, 0, sizeof(result) );
do @@ -720,11 +721,11 @@ unsigned int server_select( const select_op_t *select_op, data_size_t size, UINT req->size = size; wine_server_add_data( req, &result, sizeof(result) ); wine_server_add_data( req, select_op, size ); - if (suspend_context) + if (context && flags & SELECT_SUSPEND) { data_size_t ctx_size = (context[1].machine ? 2 : 1) * sizeof(*context); wine_server_add_data( req, context, ctx_size ); - suspend_context = FALSE; /* server owns the context now */ + flags &= ~SELECT_SUSPEND; /* server owns the context now */ } wine_server_set_reply( req, &reply_data, context ? sizeof(reply_data) : sizeof(reply_data.call) ); diff --git a/dlls/ntdll/unix/signal_arm.c b/dlls/ntdll/unix/signal_arm.c index 5115fa7ec3a..46f8d93cdb4 100644 --- a/dlls/ntdll/unix/signal_arm.c +++ b/dlls/ntdll/unix/signal_arm.c @@ -1518,6 +1518,8 @@ static void usr1_handler( int signal, siginfo_t *siginfo, void *sigcontext ) { CONTEXT context;
+ if (wait_suspend( NULL ) != STATUS_MORE_PROCESSING_REQUIRED) return; + if (is_inside_syscall( sigcontext )) { context.ContextFlags = CONTEXT_FULL; diff --git a/dlls/ntdll/unix/signal_arm64.c b/dlls/ntdll/unix/signal_arm64.c index f96ec330796..ac8d23f918e 100644 --- a/dlls/ntdll/unix/signal_arm64.c +++ b/dlls/ntdll/unix/signal_arm64.c @@ -1485,6 +1485,8 @@ static void usr1_handler( int signal, siginfo_t *siginfo, void *sigcontext ) { CONTEXT context;
+ if (wait_suspend( NULL ) != STATUS_MORE_PROCESSING_REQUIRED) return; + if (is_inside_syscall( sigcontext )) { context.ContextFlags = CONTEXT_FULL; diff --git a/dlls/ntdll/unix/signal_i386.c b/dlls/ntdll/unix/signal_i386.c index 751b0081534..f7ee66b926c 100644 --- a/dlls/ntdll/unix/signal_i386.c +++ b/dlls/ntdll/unix/signal_i386.c @@ -2129,6 +2129,8 @@ static void usr1_handler( int signal, siginfo_t *siginfo, void *sigcontext ) struct xcontext xcontext;
init_handler( sigcontext ); + if (wait_suspend( NULL ) != STATUS_MORE_PROCESSING_REQUIRED) return; + if (is_inside_syscall( sigcontext )) { DECLSPEC_ALIGN(64) XSTATE xs; diff --git a/dlls/ntdll/unix/signal_x86_64.c b/dlls/ntdll/unix/signal_x86_64.c index 53827629af4..e8f8cda53d4 100644 --- a/dlls/ntdll/unix/signal_x86_64.c +++ b/dlls/ntdll/unix/signal_x86_64.c @@ -2151,6 +2151,8 @@ static void usr1_handler( int signal, siginfo_t *siginfo, void *sigcontext ) ucontext_t *ucontext = init_handler( sigcontext ); struct xcontext context;
+ if (wait_suspend( NULL ) != STATUS_MORE_PROCESSING_REQUIRED) return; + if (is_inside_syscall( ucontext )) { DECLSPEC_ALIGN(64) XSTATE xs; diff --git a/dlls/ntdll/unix/thread.c b/dlls/ntdll/unix/thread.c index 9e84ec3cc96..c67e29e47fd 100644 --- a/dlls/ntdll/unix/thread.c +++ b/dlls/ntdll/unix/thread.c @@ -1458,16 +1458,18 @@ void exit_process( int status ) * * Wait until the thread is no longer suspended. */ -void wait_suspend( CONTEXT *context ) +NTSTATUS wait_suspend( CONTEXT *context ) { int saved_errno = errno; context_t server_contexts[2]; + NTSTATUS status;
- contexts_to_server( server_contexts, context ); + if (context) contexts_to_server( server_contexts, context ); /* wait with 0 timeout, will only return once the thread is no longer suspended */ - server_select( NULL, 0, SELECT_INTERRUPTIBLE, 0, server_contexts, NULL ); - contexts_from_server( context, server_contexts ); + status = server_select( NULL, 0, SELECT_INTERRUPTIBLE | SELECT_SUSPEND, 0, context ? server_contexts : NULL, NULL ); + if (context) contexts_from_server( context, server_contexts ); errno = saved_errno; + return status; }
@@ -1513,7 +1515,7 @@ NTSTATUS send_debug_event( EXCEPTION_RECORD *rec, CONTEXT *context, BOOL first_c select_op.wait.handles[0] = handle;
contexts_to_server( server_contexts, context ); - server_select( &select_op, offsetof( select_op_t, wait.handles[1] ), SELECT_INTERRUPTIBLE, + server_select( &select_op, offsetof( select_op_t, wait.handles[1] ), SELECT_INTERRUPTIBLE | SELECT_SUSPEND, TIMEOUT_INFINITE, server_contexts, NULL );
SERVER_START_REQ( get_exception_status ) diff --git a/dlls/ntdll/unix/unix_private.h b/dlls/ntdll/unix/unix_private.h index 07f2724eac7..1c8016e6471 100644 --- a/dlls/ntdll/unix/unix_private.h +++ b/dlls/ntdll/unix/unix_private.h @@ -218,7 +218,7 @@ extern NTSTATUS init_thread_stack( TEB *teb, ULONG_PTR limit, SIZE_T reserve_siz extern void DECLSPEC_NORETURN abort_thread( int status ); extern void DECLSPEC_NORETURN abort_process( int status ); extern void DECLSPEC_NORETURN exit_process( int status ); -extern void wait_suspend( CONTEXT *context ); +extern NTSTATUS wait_suspend( CONTEXT *context ); extern NTSTATUS send_debug_event( EXCEPTION_RECORD *rec, CONTEXT *context, BOOL first_chance ); extern NTSTATUS set_thread_context( HANDLE handle, const void *context, BOOL *self, USHORT machine ); extern NTSTATUS get_thread_context( HANDLE handle, void *context, BOOL *self, USHORT machine ); diff --git a/server/protocol.def b/server/protocol.def index 5d60e7fcda3..86019a1ad32 100644 --- a/server/protocol.def +++ b/server/protocol.def @@ -1237,6 +1237,7 @@ typedef struct @END #define SELECT_ALERTABLE 1 #define SELECT_INTERRUPTIBLE 2 +#define SELECT_SUSPEND 4
/* Create an event */ diff --git a/server/thread.c b/server/thread.c index 56f57cefd8f..8fcf192c20c 100644 --- a/server/thread.c +++ b/server/thread.c @@ -1629,6 +1629,13 @@ DECL_HANDLER(select) current->suspend_cookie = req->cookie; wake_up( &ctx->obj, 0 ); } + else if (current->context && req->flags & SELECT_SUSPEND) + { + /* select is called from signal and thread context is required. */ + set_error( STATUS_MORE_PROCESSING_REQUIRED ); + reply->signaled = 1; + return; + }
if (!req->cookie) goto invalid_param;
Hi,
It looks like your patch introduced the new failures shown below. Please investigate and fix them before resubmitting your patch. If they are not new, fixing them anyway would help a lot. Otherwise please ask for the known failures list to be updated.
The tests also ran into some preexisting test failures. If you know how to fix them that would be helpful. See the TestBot job for the details:
The full results can be found at: https://testbot.winehq.org/JobDetails.pl?Key=142267
Your paranoid android.
=== debian11b (64 bit WoW report) ===
kernel32: sync.c:2726: Test failed: expected WAIT_OBJECT_0, got 258
mf: Unhandled exception: divide by zero in 64-bit code (0x0000000042e22c).