The IP_TOS control data is 1 byte on Unix, but 4 bytes on Windows. Properly zero-extend the value instead of copying 3 bytes of garbage.
Signed-off-by: Jinoh Kang jinoh.kang.kr@gmail.com --- dlls/ntdll/unix/socket.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/dlls/ntdll/unix/socket.c b/dlls/ntdll/unix/socket.c index f82c7ae1ddf..71dfcdd1114 100644 --- a/dlls/ntdll/unix/socket.c +++ b/dlls/ntdll/unix/socket.c @@ -429,8 +429,9 @@ static int convert_control_headers(struct msghdr *hdr, WSABUF *control) #if defined(IP_TOS) case IP_TOS: { + INT tos = *(unsigned char *)CMSG_DATA(cmsg_unix); ptr = fill_control_message( WS_IPPROTO_IP, WS_IP_TOS, ptr, &ctlsize, - CMSG_DATA(cmsg_unix), sizeof(INT) ); + &tos, sizeof(INT) ); if (!ptr) goto error; break; }
This select type is not used to emulate any NtWait* system call on Windows; instead, it is intended be used in place of async_wait() to set the async result and "wait" for the async object in one server call.
Signed-off-by: Jinoh Kang jinoh.kang.kr@gmail.com --- dlls/ntdll/unix/server.c | 2 ++ dlls/ntdll/unix/sync.c | 14 ++++++++++++++ dlls/ntdll/unix/unix_private.h | 1 + server/async.c | 13 ++++++++++--- server/object.h | 4 ++++ server/protocol.def | 11 ++++++++++- server/thread.c | 10 ++++++++++ 7 files changed, 51 insertions(+), 4 deletions(-)
diff --git a/dlls/ntdll/unix/server.c b/dlls/ntdll/unix/server.c index 9d0594d3374..100bf943292 100644 --- a/dlls/ntdll/unix/server.c +++ b/dlls/ntdll/unix/server.c @@ -632,6 +632,8 @@ unsigned int server_select( const select_op_t *select_op, data_size_t size, UINT /* don't signal multiple times */ if (size >= sizeof(select_op->signal_and_wait) && select_op->op == SELECT_SIGNAL_AND_WAIT) size = offsetof( select_op_t, signal_and_wait.signal ); + if (size >= sizeof(select_op->signal_wait_async) && select_op->op == SELECT_SIGNAL_WAIT_ASYNC ) + size = offsetof( select_op_t, signal_wait_async.signal ); } pthread_sigmask( SIG_SETMASK, &old_set, NULL ); if (signaled) break; diff --git a/dlls/ntdll/unix/sync.c b/dlls/ntdll/unix/sync.c index 442243d8bcf..3b206a8f0f2 100644 --- a/dlls/ntdll/unix/sync.c +++ b/dlls/ntdll/unix/sync.c @@ -2510,3 +2510,17 @@ NTSTATUS WINAPI NtWaitForAlertByThreadId( const void *address, const LARGE_INTEG }
#endif + +NTSTATUS signal_wait_async( HANDLE handle, BOOL alertable, BOOL signal, NTSTATUS status, ULONG_PTR information ) +{ + select_op_t select_op; + UINT flags = SELECT_INTERRUPTIBLE; + + if (alertable) flags |= SELECT_ALERTABLE; + select_op.signal_wait_async.op = SELECT_SIGNAL_WAIT_ASYNC; + select_op.signal_wait_async.handle = wine_server_obj_handle( handle ); + select_op.signal_wait_async.signal = signal; + select_op.signal_wait_async.status = status; + select_op.signal_wait_async.total = information; + return server_wait( &select_op, sizeof(select_op.signal_wait_async), flags, NULL ); +} diff --git a/dlls/ntdll/unix/unix_private.h b/dlls/ntdll/unix/unix_private.h index a79edabc37c..5455521c1a0 100644 --- a/dlls/ntdll/unix/unix_private.h +++ b/dlls/ntdll/unix/unix_private.h @@ -274,6 +274,7 @@ extern NTSTATUS get_device_info( int fd, struct _FILE_FS_DEVICE_INFORMATION *inf extern void init_files(void) DECLSPEC_HIDDEN; extern void init_cpu_info(void) DECLSPEC_HIDDEN; extern void add_completion( HANDLE handle, ULONG_PTR value, NTSTATUS status, ULONG info, BOOL async ) DECLSPEC_HIDDEN; +extern NTSTATUS signal_wait_async( HANDLE handle, BOOL alertable, BOOL signal, NTSTATUS status, ULONG_PTR information );
extern void dbg_init(void) DECLSPEC_HIDDEN;
diff --git a/server/async.c b/server/async.c index 7aef28355f0..339d050b086 100644 --- a/server/async.c +++ b/server/async.c @@ -98,6 +98,11 @@ static inline void async_reselect( struct async *async ) if (async->queue && async->fd) fd_reselect_async( async->fd, async->queue ); }
+struct async *get_async_obj( struct process *process, obj_handle_t handle, unsigned int access ) +{ + return (struct async *)get_handle_obj( process, handle, access, &async_ops ); +} + static void async_dump( struct object *obj, int verbose ) { struct async *async = (struct async *)obj; @@ -121,10 +126,7 @@ static void async_satisfied( struct object *obj, struct wait_queue_entry *entry assert( async->iosb );
if (async->direct_result) - { async_set_result( &async->obj, async->iosb->status, async->iosb->result ); - async->direct_result = 0; - }
if (async->initial_status == STATUS_PENDING && async->blocking) set_wait_status( entry, async->iosb->status ); @@ -460,6 +462,11 @@ void async_set_result( struct object *obj, unsigned int status, apc_param_t tota
assert( async->terminated ); /* it must have been woken up if we get a result */
+ /* If status == STATUS_PENDING, future results will be delivered via APC. + * If status != STATUS_PENDING, this prevents async_satisfied() from + * overriding I/O result set by SELECT_SIGNAL_WAIT_ASYNC. */ + async->direct_result = 0; + if (async->alerted && status == STATUS_PENDING) /* restart it */ { async->terminated = 0; diff --git a/server/object.h b/server/object.h index f156f1d2f13..8645a00f73a 100644 --- a/server/object.h +++ b/server/object.h @@ -288,6 +288,10 @@ extern struct object *create_symlink( struct object *root, const struct unicode_ unsigned int attr, const struct unicode_str *target, const struct security_descriptor *sd );
+/* async I/O functions */ + +extern struct async *get_async_obj( struct process *process, obj_handle_t handle, unsigned int access ); + /* global variables */
/* command-line options */ diff --git a/server/protocol.def b/server/protocol.def index db73f0418a9..ae4729a9e56 100644 --- a/server/protocol.def +++ b/server/protocol.def @@ -446,7 +446,8 @@ enum select_op SELECT_WAIT_ALL, SELECT_SIGNAL_AND_WAIT, SELECT_KEYED_EVENT_WAIT, - SELECT_KEYED_EVENT_RELEASE + SELECT_KEYED_EVENT_RELEASE, + SELECT_SIGNAL_WAIT_ASYNC, };
typedef union @@ -469,6 +470,14 @@ typedef union obj_handle_t handle; client_ptr_t key; } keyed_event; + struct + { + enum select_op op; /* SELECT_SIGNAL_WAIT_ASYNC */ + obj_handle_t handle; + int signal; /* set to 0 to ignore status and total (e.g. on retries) */ + unsigned int status; /* new status of async operation */ + apc_param_t total; /* bytes transferred */ + } signal_wait_async; } select_op_t;
enum apc_type diff --git a/server/thread.c b/server/thread.c index 467ccd1f0db..8eb473f2c58 100644 --- a/server/thread.c +++ b/server/thread.c @@ -1029,6 +1029,16 @@ static int select_on( const select_op_t *select_op, data_size_t op_size, client_ current->wait->key = select_op->keyed_event.key; break;
+ case SELECT_SIGNAL_WAIT_ASYNC: + object = (struct object *)get_async_obj( current->process, select_op->signal_wait_async.handle, SYNCHRONIZE ); + if (!object) return 1; + if (select_op->signal_wait_async.signal) + async_set_result( object, select_op->signal_wait_async.status, select_op->signal_wait_async.total ); + ret = wait_on( select_op, 1, &object, flags, when ); + release_object( object ); + if (!ret) return 1; + break; + default: set_error( STATUS_INVALID_PARAMETER ); return 1;
If the server detects that an I/O request could be completed immediately (e.g. reading from socket that already has incoming data), it can now return STATUS_ALERTED to allow opportunistic synchronous I/O. The client will then attempt to perform I/O in nonblocking mode and report back the I/O status to the server with signal_wait_async().
If the operation returns e.g. EAGAIN or EWOULDBLOCK, the client can opt to either abandon the request (by specifying an error status) or poll for it in the server as usual (by specifying STATUS_PENDING).
Without this mechanism the client cannot safely perform immediately satiable I/O operations synchronously, since it can potentially conflict with other pending I/O operations that have already been queued.
Signed-off-by: Jinoh Kang jinoh.kang.kr@gmail.com --- server/async.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-)
diff --git a/server/async.c b/server/async.c index 339d050b086..e74a141c459 100644 --- a/server/async.c +++ b/server/async.c @@ -128,7 +128,8 @@ static void async_satisfied( struct object *obj, struct wait_queue_entry *entry if (async->direct_result) async_set_result( &async->obj, async->iosb->status, async->iosb->result );
- if (async->initial_status == STATUS_PENDING && async->blocking) + if ((async->initial_status == STATUS_PENDING && async->blocking) || + async->initial_status == STATUS_ALERTED) set_wait_status( entry, async->iosb->status ); else set_wait_status( entry, async->initial_status ); @@ -471,6 +472,17 @@ void async_set_result( struct object *obj, unsigned int status, apc_param_t tota { async->terminated = 0; async->alerted = 0; + + /* If the client attempted to complete synchronously and failed, + * then it would have called signal_wait_async() to restart the + * operation in the full asynchronous mode. In this case, we set + * the pending flag so that the completion port notification and + * APC call will be triggered appropriately. Also, the async + * object is currently in signaled state; unset the signaled flag + * if the client wants to block on this async. */ + async->pending = 1; + if (async->blocking) async->signaled = 0; + async_reselect( async ); } else
Otherwise, try_recv() call from sock_recv() may race against try_recv() call from async_recv_proc(), shuffling the packet order.
Wine-Bug: https://bugs.winehq.org/show_bug.cgi?id=52401 Signed-off-by: Jinoh Kang jinoh.kang.kr@gmail.com --- dlls/ntdll/unix/socket.c | 38 ++++++++++++++++++------------------- dlls/ws2_32/tests/sock.c | 8 ++++---- server/protocol.def | 4 ++-- server/sock.c | 41 ++++++++++++++++++++++++++++------------ 4 files changed, 54 insertions(+), 37 deletions(-)
diff --git a/dlls/ntdll/unix/socket.c b/dlls/ntdll/unix/socket.c index 71dfcdd1114..b4e4240a9b6 100644 --- a/dlls/ntdll/unix/socket.c +++ b/dlls/ntdll/unix/socket.c @@ -686,6 +686,7 @@ static NTSTATUS sock_recv( HANDLE handle, HANDLE event, PIO_APC_ROUTINE apc, voi NTSTATUS status; unsigned int i; ULONG options; + BOOL may_restart, alerted;
if (unix_flags & MSG_OOB) { @@ -736,37 +737,36 @@ static NTSTATUS sock_recv( HANDLE handle, HANDLE event, PIO_APC_ROUTINE apc, voi } }
- status = try_recv( fd, async, &information ); - - if (status != STATUS_SUCCESS && status != STATUS_BUFFER_OVERFLOW && status != STATUS_DEVICE_NOT_READY) - { - release_fileio( &async->io ); - return status; - } - - if (status == STATUS_DEVICE_NOT_READY && force_async) - status = STATUS_PENDING; - SERVER_START_REQ( recv_socket ) { - req->status = status; - req->total = information; + req->force_async = force_async; req->async = server_async( handle, &async->io, event, apc, apc_user, iosb_client_ptr(io) ); req->oob = !!(unix_flags & MSG_OOB); status = wine_server_call( req ); wait_handle = wine_server_ptr_handle( reply->wait ); options = reply->options; - if ((!NT_ERROR(status) || wait_handle) && status != STATUS_PENDING) - { - io->Status = status; - io->Information = information; - } + may_restart = reply->may_restart; } SERVER_END_REQ;
+ information = 0; + alerted = status == STATUS_ALERTED; + if (alerted) + { + status = try_recv( fd, async, &information ); + if (status == STATUS_DEVICE_NOT_READY && may_restart) + status = STATUS_PENDING; + } + + if ((!NT_ERROR(status) || wait_handle) && status != STATUS_PENDING) + { + io->Status = status; + io->Information = information; + } + if (status != STATUS_PENDING) release_fileio( &async->io );
- if (wait_handle) status = wait_async( wait_handle, options & FILE_SYNCHRONOUS_IO_ALERT ); + if (wait_handle) status = signal_wait_async( wait_handle, options & FILE_SYNCHRONOUS_IO_ALERT, alerted, status, information ); return status; }
diff --git a/dlls/ws2_32/tests/sock.c b/dlls/ws2_32/tests/sock.c index 054e597b719..4199676f460 100644 --- a/dlls/ws2_32/tests/sock.c +++ b/dlls/ws2_32/tests/sock.c @@ -7750,8 +7750,8 @@ static void test_shutdown(void)
WSASetLastError(0xdeadbeef); ret = recv(server, buffer, sizeof(buffer), 0); - todo_wine ok(ret == -1, "got %d\n", ret); - todo_wine ok(WSAGetLastError() == WSAESHUTDOWN, "got error %u\n", WSAGetLastError()); + ok(ret == -1, "got %d\n", ret); + ok(WSAGetLastError() == WSAESHUTDOWN, "got error %u\n", WSAGetLastError());
ret = send(server, "test", 5, 0); ok(ret == 5, "got %d\n", ret); @@ -7845,8 +7845,8 @@ static void test_shutdown(void)
WSASetLastError(0xdeadbeef); ret = recv(server, buffer, sizeof(buffer), 0); - todo_wine ok(ret == -1, "got %d\n", ret); - todo_wine ok(WSAGetLastError() == WSAESHUTDOWN, "got error %u\n", WSAGetLastError()); + ok(ret == -1, "got %d\n", ret); + ok(WSAGetLastError() == WSAESHUTDOWN, "got error %u\n", WSAGetLastError());
WSASetLastError(0xdeadbeef); ret = send(server, "test", 5, 0); diff --git a/server/protocol.def b/server/protocol.def index ae4729a9e56..d047c7d0cf2 100644 --- a/server/protocol.def +++ b/server/protocol.def @@ -1444,11 +1444,11 @@ enum server_fd_type @REQ(recv_socket) int oob; /* are we receiving OOB data? */ async_data_t async; /* async I/O parameters */ - unsigned int status; /* status of initial call */ - unsigned int total; /* number of bytes already read */ + int force_async; /* Force asynchronous mode? */ @REPLY obj_handle_t wait; /* handle to wait on for blocking recv */ unsigned int options; /* device open options */ + int may_restart; /* May restart async? */ @END
diff --git a/server/sock.c b/server/sock.c index 650e67a2e0a..5430b27a1e2 100644 --- a/server/sock.c +++ b/server/sock.c @@ -3396,7 +3396,8 @@ struct object *create_socket_device( struct object *root, const struct unicode_s DECL_HANDLER(recv_socket) { struct sock *sock = (struct sock *)get_handle_obj( current->process, req->async.handle, 0, &sock_ops ); - unsigned int status = req->status; + unsigned int status = STATUS_PENDING; + int pending = req->force_async; timeout_t timeout = 0; struct async *async; struct fd *fd; @@ -3404,8 +3405,8 @@ DECL_HANDLER(recv_socket) if (!sock) return; fd = sock->fd;
- /* recv() returned EWOULDBLOCK, i.e. no data available yet */ - if (status == STATUS_DEVICE_NOT_READY && !sock->nonblocking) + /* Synchronous, *blocking* I/O requested? */ + if (!req->force_async && !sock->nonblocking) { /* Set a timeout on the async if necessary. * @@ -3416,29 +3417,44 @@ DECL_HANDLER(recv_socket) if (is_fd_overlapped( fd )) timeout = (timeout_t)sock->rcvtimeo * -10000;
- status = STATUS_PENDING; + pending = 1; }
- if ((status == STATUS_PENDING || status == STATUS_DEVICE_NOT_READY) && sock->rd_shutdown) + if (status == STATUS_PENDING && sock->rd_shutdown) status = STATUS_PIPE_DISCONNECTED;
+ /* NOTE: If read_q is not empty, we cannot really tell if the already queued asyncs + * NOTE: will not consume all available data; if there's no data available, + * NOTE: the current request won't be immediately satiable. */ + if (status == STATUS_PENDING && !async_queued( &sock->read_q )) + { + struct pollfd pollfd; + pollfd.fd = get_unix_fd( sock->fd ); + pollfd.events = req->oob ? POLLPRI : POLLIN; + pollfd.revents = 0; + if (poll(&pollfd, 1, 0) >= 0 && pollfd.revents) + { + /* Give the client opportunity to complete synchronously. + * If it turns out that the I/O request is not actually immediately satiable, + * the client may then choose to re-queue the async (with STATUS_PENDING). */ + status = STATUS_ALERTED; + } + } + + if (!pending && status == STATUS_PENDING) status = STATUS_DEVICE_NOT_READY; + if (NT_ERROR( status )) pending = 0; + sock->pending_events &= ~(req->oob ? AFD_POLL_OOB : AFD_POLL_READ); sock->reported_events &= ~(req->oob ? AFD_POLL_OOB : AFD_POLL_READ);
if ((async = create_request_async( fd, get_fd_comp_flags( fd ), &req->async ))) { - if (status == STATUS_SUCCESS) - { - struct iosb *iosb = async_get_iosb( async ); - iosb->result = req->total; - release_object( iosb ); - } set_error( status );
if (timeout) async_set_timeout( async, timeout, STATUS_IO_TIMEOUT );
- if (status == STATUS_PENDING) + if (status == STATUS_PENDING || status == STATUS_ALERTED) queue_async( &sock->read_q, async );
/* always reselect; we changed reported_events above */ @@ -3446,6 +3462,7 @@ DECL_HANDLER(recv_socket)
reply->wait = async_handoff( async, NULL, 0 ); reply->options = get_fd_options( fd ); + reply->may_restart = pending && status == STATUS_ALERTED; release_object( async ); } release_object( sock );
On 1/18/22 13:30, Jinoh Kang wrote:
Otherwise, try_recv() call from sock_recv() may race against try_recv() call from async_recv_proc(), shuffling the packet order.
Wine-Bug: https://bugs.winehq.org/show_bug.cgi?id=52401 Signed-off-by: Jinoh Kang jinoh.kang.kr@gmail.com
dlls/ntdll/unix/socket.c | 38 ++++++++++++++++++------------------- dlls/ws2_32/tests/sock.c | 8 ++++---- server/protocol.def | 4 ++-- server/sock.c | 41 ++++++++++++++++++++++++++++------------ 4 files changed, 54 insertions(+), 37 deletions(-)
Despite the attempts to split 2/4 and 3/4 out of this patch, it's still doing two things at once. Namely, the optimization whereby recv_socket returns STATUS_ALERTED should not be part of this patch.
As I discussed in [1] it's not really clear to me that three server calls is significantly worse than two. It'd be nice to know that just removing the initial try_recv() call is really going to make programs worse if we're going to pursue it. I.e. personally I'd rather just leave it alone until we find a program that gets worse.
That aside, though, I'm not sure that adding a new select type is really the right way to go about this. One approach that occurs to me, which might end up being simpler, would be to return an apc_call_t, essentially as if the select request had been called immediately afterward. (In that case perhaps we should return STATUS_KERNEL_APC rather than STATUS_ALERTED).
[1] https://www.winehq.org/pipermail/wine-devel/2021-December/202825.html
On 1/19/22 02:08, Zebediah Figura (she/her) wrote:
On 1/18/22 13:30, Jinoh Kang wrote:
Otherwise, try_recv() call from sock_recv() may race against try_recv() call from async_recv_proc(), shuffling the packet order.
Wine-Bug: https://bugs.winehq.org/show_bug.cgi?id=52401 Signed-off-by: Jinoh Kang jinoh.kang.kr@gmail.com
dlls/ntdll/unix/socket.c | 38 ++++++++++++++++++------------------- dlls/ws2_32/tests/sock.c | 8 ++++---- server/protocol.def | 4 ++-- server/sock.c | 41 ++++++++++++++++++++++++++++------------ 4 files changed, 54 insertions(+), 37 deletions(-)
Despite the attempts to split 2/4 and 3/4 out of this patch, it's still doing two things at once. Namely, the optimization whereby recv_socket returns STATUS_ALERTED should not be part of this patch.
As I discussed in [1] it's not really clear to me that three server calls is significantly worse than two. It'd be nice to know that just removing the initial try_recv() call is really going to make programs worse if we're going to pursue it.
GeForce NOW comes to mind with network speed times slower in Wine vs Windows, although I didn't measure anything myself with it nor have any proof that the major part of slowdown server roundtrip.
I was under impression that the major socket performance compromise in network heavy applications happens due to server calls during socket I/O. Are there reasons to think it might not be the case? If that is considered to be the likely case, the question is: if there is a planned change which is going to increase amount of server calls per every sync socket operation from 2 to 3 should it be presumed ok and subject to be proven otherwise? Or should instead some metrics and reasoning suggesting that the added overhead is not sufficient should be provided instead before making such a change? I am not sure if the issue addressed by these patches is a regression, maybe if that is a regression it can give a motivation to fix that first and think about optimization next. Otherwise I was under impression that (as usual for Wine patches?) it is the subject for patch justification to make some argument that it doesn't regress performance considerably.
On 1/18/22 17:33, Paul Gofman wrote:
On 1/19/22 02:08, Zebediah Figura (she/her) wrote:
On 1/18/22 13:30, Jinoh Kang wrote:
Otherwise, try_recv() call from sock_recv() may race against try_recv() call from async_recv_proc(), shuffling the packet order.
Wine-Bug: https://bugs.winehq.org/show_bug.cgi?id=52401 Signed-off-by: Jinoh Kang jinoh.kang.kr@gmail.com
dlls/ntdll/unix/socket.c | 38 ++++++++++++++++++------------------- dlls/ws2_32/tests/sock.c | 8 ++++---- server/protocol.def | 4 ++-- server/sock.c | 41 ++++++++++++++++++++++++++++------------ 4 files changed, 54 insertions(+), 37 deletions(-)
Despite the attempts to split 2/4 and 3/4 out of this patch, it's still doing two things at once. Namely, the optimization whereby recv_socket returns STATUS_ALERTED should not be part of this patch.
As I discussed in [1] it's not really clear to me that three server calls is significantly worse than two. It'd be nice to know that just removing the initial try_recv() call is really going to make programs worse if we're going to pursue it.
GeForce NOW comes to mind with network speed times slower in Wine vs Windows, although I didn't measure anything myself with it nor have any proof that the major part of slowdown server roundtrip.
I was under impression that the major socket performance compromise in network heavy applications happens due to server calls during socket I/O. Are there reasons to think it might not be the case?
No, I think it's likely to be the case.
If that is considered to be the likely case, the question is: if there is a planned change which is going to increase amount of server calls per every sync socket operation from 2 to 3 should it be presumed ok and subject to be proven otherwise? Or should instead some metrics and reasoning suggesting that the added overhead is not sufficient should be provided instead before making such a change? I am not sure if the issue addressed by these patches is a regression, maybe if that is a regression it can give a motivation to fix that first and think about optimization next. Otherwise I was under impression that (as usual for Wine patches?) it is the subject for patch justification to make some argument that it doesn't regress performance considerably.
What concerns me is the prospect that once we do one server call we've already lost. It leads me to wonder if three server calls is perceptibly worse than two—and if it is, perhaps we should find a way to eliminate those two server calls instead.
It's quite likely that I'm biased from working with frame-time performance, though. It is probably very hard to not make any server calls at all when doing socket I/O. So depending on how ugly it gets to cut out the third server call it's probably not worth arguing against it.
On 1/19/22 08:48, Zebediah Figura (she/her) wrote:
If that is considered to be the likely case, the question is: if there is a planned change which is going to increase amount of server calls per every sync socket operation from 2 to 3 should it be presumed ok and subject to be proven otherwise? Or should instead some metrics and reasoning suggesting that the added overhead is not sufficient should be provided instead before making such a change? I am not sure if the issue addressed by these patches is a regression, maybe if that is a regression it can give a motivation to fix that first and think about optimization next. Otherwise I was under impression that (as usual for Wine patches?) it is the subject for patch justification to make some argument that it doesn't regress performance considerably.
What concerns me is the prospect that once we do one server call we've already lost. It leads me to wonder if three server calls is perceptibly worse than two—and if it is, perhaps we should find a way to eliminate those two server calls instead.
We can eliminate wait_async() only if:
1. The file object does not have any associated completion ports. 2. The request doesn't have a completion APC. 3. The request doesn't have a completion event. 4. Either the object has FILE_SKIP_SET_EVENT_ON_HANDLE flag, or is already signaled. 5. The server-side async object doesn't have completion_callback assigned.
However, these are not the prerequisite. Additional measures have to be taken:
1. We need to make sure that listened FD events (poll flags) are set correctly at all times, before and after try_recv(). 2. We need to implement some kind of opportunistic lock (not unlike RCU); Later calls to receive_sock shall probe for any pending synchronous I/O on the same file object. Normally this would involve IPC, but we can short circuit if all I/O to the file object are happening in a single thread.* 3. Some other cases I haven't thought of.
We can take a step further and remove the server calls entirely. This would however require some kind of shared memory to flag handles as async-safe. Such effort would apply generally to the entire Wine server-client architecture, though.
*In principle, it's impossible to perform multiple synchronous I/O simultaneously. In the event Wine supports special user-mode APCs, we can simply block SIGUSR1 in the critical region.
It's quite likely that I'm biased from working with frame-time performance, though. It is probably very hard to not make any server calls at all when doing socket I/O. So depending on how ugly it gets to cut out the third server call it's probably not worth arguing against it.
If the server calls make up the majority of overhead, then it's possible that each removed call will translate to noticeable drop in I/O latency. Not that I have a concrete example, though.
On 1/19/22 08:33, Paul Gofman wrote:
On 1/19/22 02:08, Zebediah Figura (she/her) wrote:
On 1/18/22 13:30, Jinoh Kang wrote:
Otherwise, try_recv() call from sock_recv() may race against try_recv() call from async_recv_proc(), shuffling the packet order.
Wine-Bug: https://bugs.winehq.org/show_bug.cgi?id=52401 Signed-off-by: Jinoh Kang jinoh.kang.kr@gmail.com
dlls/ntdll/unix/socket.c | 38 ++++++++++++++++++------------------- dlls/ws2_32/tests/sock.c | 8 ++++---- server/protocol.def | 4 ++-- server/sock.c | 41 ++++++++++++++++++++++++++++------------ 4 files changed, 54 insertions(+), 37 deletions(-)
Despite the attempts to split 2/4 and 3/4 out of this patch, it's still doing two things at once. Namely, the optimization whereby recv_socket returns STATUS_ALERTED should not be part of this patch.
As I discussed in [1] it's not really clear to me that three server calls is significantly worse than two. It'd be nice to know that just removing the initial try_recv() call is really going to make programs worse if we're going to pursue it.
GeForce NOW comes to mind with network speed times slower in Wine vs Windows, although I didn't measure anything myself with it nor have any proof that the major part of slowdown server roundtrip.
I was under impression that the major socket performance compromise in network heavy applications happens due to server calls during socket I/O. Are there reasons to think it might not be the case? If that is considered to be the likely case, the question is: if there is a planned change which is going to increase amount of server calls per every sync socket operation from 2 to 3 should it be presumed ok and subject to be proven otherwise? Or should instead some metrics and reasoning suggesting that the added overhead is not sufficient should be provided instead before making such a change?
I am not sure if the issue addressed by these patches is a regression, maybe if that is a regression it can give a motivation to fix that first and think about optimization next.
This isn't a regression. My initial impression was that it was, but closer inspection revealed that the winsock->Afd transition only preserved the bug already present in the old code, not introduced a new one.
Now that the bug is in fact not a regression, I suppose I have much weaker argument to get this committed before release. I'm okay either way.
Otherwise I was under impression that (as usual for Wine patches?) it is the subject for patch justification to make some argument that it doesn't regress performance considerably.
Yes, this was my intention.
On 1/19/22 08:08, Zebediah Figura (she/her) wrote:
On 1/18/22 13:30, Jinoh Kang wrote:
Otherwise, try_recv() call from sock_recv() may race against try_recv() call from async_recv_proc(), shuffling the packet order.
Wine-Bug: https://bugs.winehq.org/show_bug.cgi?id=52401 Signed-off-by: Jinoh Kang jinoh.kang.kr@gmail.com
dlls/ntdll/unix/socket.c | 38 ++++++++++++++++++------------------- dlls/ws2_32/tests/sock.c | 8 ++++---- server/protocol.def | 4 ++-- server/sock.c | 41 ++++++++++++++++++++++++++++------------ 4 files changed, 54 insertions(+), 37 deletions(-)
Despite the attempts to split 2/4 and 3/4 out of this patch, it's still doing two things at once. Namely, the optimization whereby recv_socket returns STATUS_ALERTED should not be part of this patch.
I just realised that I can still keep try_recv() before recv_socket even after removing the status field from struct recv_socket_request. I'll have to admit that this didn't come as obvious to me the first time.
I'll split the patch accordingly. Thanks for the remark!
As I discussed in [1] it's not really clear to me that three server calls is significantly worse than two.
I didn't really see the need for a new server request, when we could reuse an already existing one. Also, it doesn't harm readability that much either.
It'd be nice to know that just removing the initial try_recv() call is really going to make programs worse if we're going to pursue it.
Actually I have observed that simply removing try_recv() altogether from sock_recv() suffers from another set of problems (that make the test fail):
1. It makes recv() on nonblocking socket always fail with WSAEWOULDBLOCK. 2. It turns all synchronous failures into asynchronous failures. Normally, a synchronous failure shall not fire up the APC or post to the completion port at all.
Thus, a call to try_recv() still has to be made before the async request enters pending mode, regardless of whether this call is done before receive_sock or not.
My previous approach was to do this "initial" try_recv() call via APC_ASYNC_IO (i.e. inside async_recv_proc), thereby making the call just like any other asynchronous try_recv() call. Eventually I could get to the point where all the unit tests pass, but the end result looked a bit messy (especially with the special case handling inside invoke_system_apc, and extra arrangements that are needed inside receive_sock).
That said, I do need to write more test cases for synchronous vs. asynchronous failure, specifically for STATUS_BUFFER_OVERFLOW.
I.e. personally I'd rather just leave it alone until we find a program that gets worse.
That aside, though, I'm not sure that adding a new select type is really the right way to go about this. One approach that occurs to me, which might end up being simpler, would be to return an apc_call_t, essentially as if the select request had been called immediately afterward. (In that case perhaps we should return STATUS_KERNEL_APC rather than STATUS_ALERTED).
This is exactly the approach I have initially had in mind, and is described in https://bugs.winehq.org/show_bug.cgi?id=52401#c4. Pasting it here:
- The client issues a 'recv_socket' call.
- The server determines if read_q is empty *and* the request can be served immediately.
- If this is the case, the server does the following:
- The server marks the current thread as being in system APC wait.
- The server bypasses sock->read_q and calls async_terminate() directly. (Alternatively, the server first puts the async in read_q and later calls async_wake_up( &sock->read_q ).)
- The server dequeues the next system APC (likely APC_ASYNC_IO from 3.2) from the queue.
- The server inserts the APC into the 'recv_socket' reply.
Ideally we could make it so that every Wineserver call has a flag that indicates whether it could process the original request *and* retrieve the next system APC from the queue in one go. This will eliminate the extra SIGUSR1 round trip in other cases as well.
(Note: 3.2 is slightly wrong, we should always go through sock->read_q no matter what.)
As you can see this inevitably winds up tweaking the select mechanism anyway. This approach has some open questions:
1. How shell we return the results for our new pseudo-APC call? Using prev_apc in the select request, or a separate server call?
2. Shall our pseudo-APC go through the system_apc queue? If so, what if there is another kernel APC that is already queued?
3. Shall we reuse invoke_system_apc() for this apc_call_t response?
4. Shall we reuse queue_apc() to generate the apc_call_t? If so, what shall be done with the gratuitous SIGUSR1 delivery? Enter uninterrupted region or explicitly block SIGUSR1 while calling receive_sock?
Also, as I've discussed earlier, the initial synchronous try_recv() call has subtly different semantics with respect to error handling compared to subsequent asynchronous counterparts, which does make existing code more complicated.
[1] https://www.winehq.org/pipermail/wine-devel/2021-December/202825.html
On 1/19/22 11:30, Jinoh Kang wrote:
As you can see this inevitably winds up tweaking the select mechanism anyway. This approach has some open questions:
How shell we return the results for our new pseudo-APC call? Using prev_apc in the select request, or a separate server call?
Shall our pseudo-APC go through the system_apc queue? If so, what if there is another kernel APC that is already queued?
Shall we reuse invoke_system_apc() for this apc_call_t response?
Shall we reuse queue_apc() to generate the apc_call_t? If so, what shall be done with the gratuitous SIGUSR1 delivery? Enter uninterrupted region or explicitly block SIGUSR1 while calling receive_sock?
Further remarks:
- If we lean toward "don't reuse anything," we're left with a bunch of APC bureaucracies with no apparent merits. - If we lean toward "reuse as much as possible," we would be left with some nontrivial refactoring work to do regarding select calls (e.g. injecting prev_apc to server_select/server_wait, special casing the "synchronous APC").
On 1/18/22 20:30, Jinoh Kang wrote:
Actually I have observed that simply removing try_recv() altogether from sock_recv() suffers from another set of problems (that make the test fail):
- It makes recv() on nonblocking socket always fail with WSAEWOULDBLOCK.
- It turns all synchronous failures into asynchronous failures. Normally, a synchronous failure shall not fire up the APC or post to the completion port at all.
Thus, a call to try_recv() still has to be made before the async request enters pending mode, regardless of whether this call is done before receive_sock or not.
My previous approach was to do this "initial" try_recv() call via APC_ASYNC_IO (i.e. inside async_recv_proc), thereby making the call just like any other asynchronous try_recv() call. Eventually I could get to the point where all the unit tests pass, but the end result looked a bit messy (especially with the special case handling inside invoke_system_apc, and extra arrangements that are needed inside receive_sock).
Okay, that's fair enough.
That aside, though, I'm not sure that adding a new select type is really the right way to go about this. One approach that occurs to me, which might end up being simpler, would be to return an apc_call_t, essentially as if the select request had been called immediately afterward. (In that case perhaps we should return STATUS_KERNEL_APC rather than STATUS_ALERTED).
This is exactly the approach I have initially had in mind, and is described in https://bugs.winehq.org/show_bug.cgi?id=52401#c4. Pasting it here:
- The client issues a 'recv_socket' call.
- The server determines if read_q is empty *and* the request can be served immediately.
- If this is the case, the server does the following:
- The server marks the current thread as being in system APC wait.
- The server bypasses sock->read_q and calls async_terminate() directly. (Alternatively, the server first puts the async in read_q and later calls async_wake_up( &sock->read_q ).)
- The server dequeues the next system APC (likely APC_ASYNC_IO from 3.2) from the queue.
- The server inserts the APC into the 'recv_socket' reply.
Ideally we could make it so that every Wineserver call has a flag that indicates whether it could process the original request *and* retrieve the next system APC from the queue in one go. This will eliminate the extra SIGUSR1 round trip in other cases as well.
(Note: 3.2 is slightly wrong, we should always go through sock->read_q no matter what.)
As you can see this inevitably winds up tweaking the select mechanism anyway. This approach has some open questions:
- How shell we return the results for our new pseudo-APC call? Using prev_apc in the select request, or a separate server call?
Using select seems reasonable to me. I don't think it would require much change in ntdll [an extra argument to server_select(), probably], and none at all in the server.
- Shall our pseudo-APC go through the system_apc queue? If so, what if there is another kernel APC that is already queued?
Yes, the point is to use APC_ASYNC_IO itself, so we just reuse that entire code.
I would imagine we should return the first APC in the queue. In the normal case that should be the APC we just queued; if not we were going to receive SIGUSR1 anyway.
- Shall we reuse invoke_system_apc() for this apc_call_t response?
Yes, of course.
- Shall we reuse queue_apc() to generate the apc_call_t? If so, what shall be done with the gratuitous SIGUSR1 delivery? Enter uninterrupted region or explicitly block SIGUSR1 while calling receive_sock?
I think we should avoid sending SIGUSR1 in the first place. We'd need some way of communicating to queue_apc() or even is_in_apc_wait() that we are going to try to return the APC synchronously.
Also, as I've discussed earlier, the initial synchronous try_recv() call has subtly different semantics with respect to error handling compared to subsequent asynchronous counterparts, which does make existing code more complicated.
Sure. I don't think using APC_ASYNC_IO should complicate any of that, although to be sure I haven't tried it.
On 1/19/22 08:08, Zebediah Figura (she/her) wrote:
That aside, though, I'm not sure that adding a new select type is really the right way to go about this.
If we are not going with SIGNAL_WAIT_ASYNC, there's an alternate approach (avoiding three server round trips):
1. Add a new wineserver request that calls async_set_result(), as in [1]. 2. Re-implement async's add_queue to switch to pending mode before calling the original add_queue.
In this approach, Unix side will handle synchronous I/O operation as follows:
1. If the operation is completed, we request wineserver to call async_set_result() directly, and don't wait *at all*. 2. If the operation needs to be queued (STATUS_PENDING), we do async_wait() as before. This will "magically" switch the async to STATUS_PENDING and continue polling.
Note "magically" -- we're injecting impure semantics into async's pre-wait stage. I don't think this would be a significant problem, since the "async" object type is internal to Wine anyway.
One approach that occurs to me, which might end up being simpler,
Reusing the APC machinery while keeping things simple turned out to be no easy task. Perhaps I could just make the other [2] patch serie even simpler, but I'm stuck here...
would be to return an apc_call_t, essentially as if the select request had been called immediately afterward. (In that case perhaps we should return STATUS_KERNEL_APC rather than STATUS_ALERTED).
[1] https://www.winehq.org/pipermail/wine-devel/2021-December/202825.html [2] https://www.winehq.org/pipermail/wine-devel/2022-January/205168.html
Please excuse the incorrect subjects/commit messages...
This patch set is basically RFC at this state, though.
Thank you for your understanding.