The key change is to never return STATUS_TIMEOUT, and to instead return the result of NtYieldExecution() if zero timeout was passed, or STATUS_SUCCESS otherwise.
An overview of the correct values for each combination, copied from the test commit: - Non-alertable, zero timeout: STATUS_SUCCESS or STATUS_NO_YIELD_PERFORMED - Non-alertable, non-zero timeout: STATUS_SUCCESS - Alertable, zero timeout: STATUS_SUCCESS, STATUS_NO_YIELD_PERFORMED, or STATUS_USER_APC - Alertable, non-zero timeout: STATUS_SUCCESS or STATUS_USER_APC
-- v3: ntdll: Fix the return value of NtDelayExecution. ntdll/tests: Add tests for NtDelayExecution and Sleep(Ex).
From: William Horvath william@horvath.blog
--- dlls/ntdll/tests/sync.c | 138 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 138 insertions(+)
diff --git a/dlls/ntdll/tests/sync.c b/dlls/ntdll/tests/sync.c index 3ab89bbf361..77ec2792c13 100644 --- a/dlls/ntdll/tests/sync.c +++ b/dlls/ntdll/tests/sync.c @@ -32,6 +32,7 @@ static NTSTATUS (WINAPI *pNtCreateEvent) ( PHANDLE, ACCESS_MASK, const OBJECT_AT static NTSTATUS (WINAPI *pNtCreateKeyedEvent)( HANDLE *, ACCESS_MASK, const OBJECT_ATTRIBUTES *, ULONG ); static NTSTATUS (WINAPI *pNtCreateMutant)( HANDLE *, ACCESS_MASK, const OBJECT_ATTRIBUTES *, BOOLEAN ); static NTSTATUS (WINAPI *pNtCreateSemaphore)( HANDLE *, ACCESS_MASK, const OBJECT_ATTRIBUTES *, LONG, LONG ); +static NTSTATUS (WINAPI *pNtDelayExecution)( BOOLEAN, const LARGE_INTEGER * ); static NTSTATUS (WINAPI *pNtOpenEvent)( HANDLE *, ACCESS_MASK, const OBJECT_ATTRIBUTES * ); static NTSTATUS (WINAPI *pNtOpenKeyedEvent)( HANDLE *, ACCESS_MASK, const OBJECT_ATTRIBUTES * ); static NTSTATUS (WINAPI *pNtPulseEvent)( HANDLE, LONG * ); @@ -1030,6 +1031,141 @@ static void test_completion_port_scheduling(void) } }
+/* An overview of possible combinations and return values: + * - Non-alertable, zero timeout: STATUS_SUCCESS or STATUS_NO_YIELD_PERFORMED + * - Non-alertable, non-zero timeout: STATUS_SUCCESS + * - Alertable, zero timeout: STATUS_SUCCESS, STATUS_NO_YIELD_PERFORMED, or STATUS_USER_APC + * - Alertable, non-zero timeout: STATUS_SUCCESS or STATUS_USER_APC + * - Sleep/SleepEx don't modify LastError, no matter what + */ + +static VOID CALLBACK apc_proc( ULONG_PTR param ) +{ + InterlockedIncrement( (LONG *)param ); +} + +struct delay_param +{ + BOOLEAN alertable; + LONGLONG timeout; + BOOLEAN queue_apc; + const char *desc; +}; + +static const struct delay_param tests[] = +{ + /* "yield" means zero-timeout delay */ + /* non-alertable tests */ + { FALSE, 0, FALSE, "non-alertable yield" }, + { FALSE, -10000, FALSE, "non-alertable sleep" }, + /* alertable tests without APCs */ + { TRUE, 0, FALSE, "alertable yield" }, + { TRUE, -10000, FALSE, "alertable sleep" }, + /* alertable tests with APCs */ + { TRUE, 0, TRUE, "alertable yield with APC" }, + { TRUE, -10000, TRUE, "alertable sleep with APC" }, +}; + +static void test_delayexecution(void) +{ + unsigned int i, j; + unsigned int noyields[2] = {0, 0}; /* [0] = non-alertable, [1] = alertable */ + unsigned int apc_status_count[2] = {0, 0}; /* [0] = zero timeout, [1] = non-zero timeout */ + LARGE_INTEGER timeout; + ULONG apc_count; + NTSTATUS status; + DWORD ret; + + for (i = 0; i < 50; i++) + { + for (j = 0; j < ARRAY_SIZE(tests); j++) + { + const struct delay_param *test = &tests[j]; + timeout.QuadPart = test->timeout; + apc_count = 0; + + if (test->queue_apc) + { + int k; + for (k = 0; k < 8; k++) + ok( QueueUserAPC( apc_proc, GetCurrentThread(), (ULONG_PTR)&apc_count ), + "%s: failed to queue APC %d.\n", test->desc, k ); + } + + /* test NtDelayExecution */ + SetLastError( 0xdeadbeef ); + status = pNtDelayExecution( test->alertable, &timeout ); + ok( GetLastError() == 0xdeadbeef, "%s: LastError unexpectedly changed to %lu.\n", test->desc, GetLastError() ); + + /* test yields */ + if (!test->timeout) + { + todo_wine_if(status == STATUS_TIMEOUT) + ok( status == STATUS_SUCCESS || status == STATUS_NO_YIELD_PERFORMED || + (test->alertable && test->queue_apc && status == STATUS_USER_APC), + "%s: got %#lx.\n", test->desc, status ); + + if (status == STATUS_NO_YIELD_PERFORMED) + noyields[test->alertable ? 1 : 0]++; + } + /* test delays */ + else + { + todo_wine_if(status == STATUS_TIMEOUT) + ok( status == STATUS_SUCCESS || + (test->alertable && test->queue_apc && status == STATUS_USER_APC), + "%s: got %#lx.\n", test->desc, status ); + } + + /* make sure we only get APCs when we expect them */ + if (test->alertable && test->queue_apc) + { + if (status == STATUS_USER_APC) + apc_status_count[test->timeout ? 1 : 0]++; + ok( apc_count > 0, "%s: no APCs executed.\n", test->desc ); + } + else + ok( !apc_count, "%s: unexpected APCs executed %ld.\n", test->desc, apc_count ); + + /* test SleepEx */ + apc_count = 0; + if (test->queue_apc) + { + int k; + for (k = 0; k < 8; k++) + ok( QueueUserAPC( apc_proc, GetCurrentThread(), (ULONG_PTR)&apc_count ), + "%s: failed to queue APC %d.\n", test->desc, k ); + } + + SetLastError( 0xdeadbeef ); + ret = SleepEx( timeout.QuadPart ? (-timeout.QuadPart / 10000) : 0, test->alertable ); + ok( GetLastError() == 0xdeadbeef, "%s: LastError unexpectedly changed to %lu.\n", test->desc, GetLastError() ); + + if (!test->alertable) + ok( !ret, "%s: SleepEx returned %lu.\n", test->desc, ret ); + else + ok( !ret || ret == WAIT_IO_COMPLETION, + "%s: SleepEx returned %lu.\n", test->desc, ret ); + + if (test->queue_apc) + ok( apc_count > 0, "%s: no APCs executed.\n", test->desc ); + + /* test Sleep (non-alertable only) */ + if (!test->alertable) + { + SetLastError( 0xdeadbeef ); + Sleep( timeout.QuadPart ? (-timeout.QuadPart / 10000) : 0 ); + ok( GetLastError() == 0xdeadbeef, "%s: LastError unexpectedly changed to %lu.\n", test->desc, GetLastError() ); + } + } + } + + todo_wine ok( noyields[0] > 0, "no STATUS_NO_YIELD_PERFORMED results for non-alertable yields.\n" ); + todo_wine ok( noyields[1] > 0, "no STATUS_NO_YIELD_PERFORMED results for alertable yields.\n" ); + ok( apc_status_count[0] > 0, "no STATUS_USER_APC results for alertable yields.\n" ); + ok( apc_status_count[1] > 0, "no STATUS_USER_APC results for alertable delays.\n" ); +} + START_TEST(sync) { HMODULE module = GetModuleHandleA("ntdll.dll"); @@ -1046,6 +1182,7 @@ START_TEST(sync) pNtCreateKeyedEvent = (void *)GetProcAddress(module, "NtCreateKeyedEvent"); pNtCreateMutant = (void *)GetProcAddress(module, "NtCreateMutant"); pNtCreateSemaphore = (void *)GetProcAddress(module, "NtCreateSemaphore"); + pNtDelayExecution = (void *)GetProcAddress(module, "NtDelayExecution"); pNtOpenEvent = (void *)GetProcAddress(module, "NtOpenEvent"); pNtOpenKeyedEvent = (void *)GetProcAddress(module, "NtOpenKeyedEvent"); pNtPulseEvent = (void *)GetProcAddress(module, "NtPulseEvent"); @@ -1078,4 +1215,5 @@ START_TEST(sync) test_resource(); test_tid_alert( argv ); test_completion_port_scheduling(); + test_delayexecution(); }
From: William Horvath william@horvath.blog
The key change is to never return STATUS_TIMEOUT, and to instead return the result of NtYieldExecution() if zero timeout was passed, or STATUS_SUCCESS otherwise.
An overview of the correct values for each combination, copied from the test commit: - Non-alertable, zero timeout: STATUS_SUCCESS or STATUS_NO_YIELD_PERFORMED - Non-alertable, non-zero timeout: STATUS_SUCCESS - Alertable, zero timeout: STATUS_SUCCESS, STATUS_NO_YIELD_PERFORMED, or STATUS_USER_APC - Alertable, non-zero timeout: STATUS_SUCCESS or STATUS_USER_APC - Sleep/SleepEx don't modify LastError, no matter what --- dlls/ntdll/tests/sync.c | 6 ++---- dlls/ntdll/unix/server.c | 10 +++++++--- dlls/ntdll/unix/sync.c | 22 +++++++++++++++------- dlls/ntdll/unix/unix_private.h | 2 +- 4 files changed, 25 insertions(+), 15 deletions(-)
diff --git a/dlls/ntdll/tests/sync.c b/dlls/ntdll/tests/sync.c index 77ec2792c13..cdfec5c51d6 100644 --- a/dlls/ntdll/tests/sync.c +++ b/dlls/ntdll/tests/sync.c @@ -1100,7 +1100,6 @@ static void test_delayexecution(void) /* test yields */ if (!test->timeout) { - todo_wine_if(status == STATUS_TIMEOUT) ok( status == STATUS_SUCCESS || status == STATUS_NO_YIELD_PERFORMED || (test->alertable && test->queue_apc && status == STATUS_USER_APC), "%s: got %#lx.\n", test->desc, status ); @@ -1111,7 +1110,6 @@ static void test_delayexecution(void) /* test delays */ else { - todo_wine_if(status == STATUS_TIMEOUT) ok( status == STATUS_SUCCESS || (test->alertable && test->queue_apc && status == STATUS_USER_APC), "%s: got %#lx.\n", test->desc, status ); @@ -1160,8 +1158,8 @@ static void test_delayexecution(void) } }
- todo_wine ok( noyields[0] > 0, "no STATUS_NO_YIELD_PERFORMED results for non-alertable yields.\n" ); - todo_wine ok( noyields[1] > 0, "no STATUS_NO_YIELD_PERFORMED results for alertable yields.\n" ); + ok( noyields[0] > 0, "no STATUS_NO_YIELD_PERFORMED results for non-alertable yields.\n" ); + ok( noyields[1] > 0, "no STATUS_NO_YIELD_PERFORMED results for alertable yields.\n" ); ok( apc_status_count[0] > 0, "no STATUS_USER_APC results for alertable yields.\n" ); ok( apc_status_count[1] > 0, "no STATUS_USER_APC results for alertable delays.\n" ); } diff --git a/dlls/ntdll/unix/server.c b/dlls/ntdll/unix/server.c index b40e7c2a1d7..0cc236ce21f 100644 --- a/dlls/ntdll/unix/server.c +++ b/dlls/ntdll/unix/server.c @@ -764,10 +764,10 @@ unsigned int server_select( const union select_op *select_op, data_size_t size, * server_wait */ unsigned int server_wait( const union select_op *select_op, data_size_t size, UINT flags, - const LARGE_INTEGER *timeout ) + const LARGE_INTEGER *timeout, unsigned int *yield_result ) { timeout_t abs_timeout = timeout ? timeout->QuadPart : TIMEOUT_INFINITE; - unsigned int ret; + unsigned int ret, status; struct user_apc apc;
if (abs_timeout < 0) @@ -784,7 +784,11 @@ unsigned int server_wait( const union select_op *select_op, data_size_t size, UI /* A test on Windows 2000 shows that Windows always yields during a wait, but a wait that is hit by an event gets a priority boost as well. This seems to model that behavior the closest. */ - if (ret == STATUS_TIMEOUT) NtYieldExecution(); + if (ret == STATUS_TIMEOUT) + { + status = NtYieldExecution(); + if (yield_result) *yield_result = status; + } return ret; }
diff --git a/dlls/ntdll/unix/sync.c b/dlls/ntdll/unix/sync.c index 0ee16b360b3..5401b76d12a 100644 --- a/dlls/ntdll/unix/sync.c +++ b/dlls/ntdll/unix/sync.c @@ -1580,7 +1580,7 @@ NTSTATUS WINAPI NtWaitForMultipleObjects( DWORD count, const HANDLE *handles, BO if (alertable) flags |= SELECT_ALERTABLE; select_op.wait.op = wait_any ? SELECT_WAIT : SELECT_WAIT_ALL; for (i = 0; i < count; i++) select_op.wait.handles[i] = wine_server_obj_handle( handles[i] ); - return server_wait( &select_op, offsetof( union select_op, wait.handles[count] ), flags, timeout ); + return server_wait( &select_op, offsetof( union select_op, wait.handles[count] ), flags, timeout, NULL ); }
@@ -1608,7 +1608,7 @@ NTSTATUS WINAPI NtSignalAndWaitForSingleObject( HANDLE signal, HANDLE wait, select_op.signal_and_wait.op = SELECT_SIGNAL_AND_WAIT; select_op.signal_and_wait.wait = wine_server_obj_handle( wait ); select_op.signal_and_wait.signal = wine_server_obj_handle( signal ); - return server_wait( &select_op, sizeof(select_op.signal_and_wait), flags, timeout ); + return server_wait( &select_op, sizeof(select_op.signal_and_wait), flags, timeout, NULL ); }
@@ -1641,8 +1641,16 @@ NTSTATUS WINAPI NtYieldExecution(void) */ NTSTATUS WINAPI NtDelayExecution( BOOLEAN alertable, const LARGE_INTEGER *timeout ) { + unsigned int yield_result = STATUS_SUCCESS; + /* if alertable, we need to query the server */ - if (alertable) return server_wait( NULL, 0, SELECT_INTERRUPTIBLE | SELECT_ALERTABLE, timeout ); + if (alertable) + { + server_wait( NULL, 0, SELECT_INTERRUPTIBLE | SELECT_ALERTABLE, timeout, &yield_result ); + /* we only care about the return value of the yield for zero timeouts */ + if (timeout && !timeout->QuadPart) return yield_result; + return STATUS_SUCCESS; + }
if (!timeout || timeout->QuadPart == TIMEOUT_INFINITE) /* sleep forever */ { @@ -1660,8 +1668,8 @@ NTSTATUS WINAPI NtDelayExecution( BOOLEAN alertable, const LARGE_INTEGER *timeou }
/* Note that we yield after establishing the desired timeout */ - NtYieldExecution(); - if (!when) return STATUS_SUCCESS; + yield_result = NtYieldExecution(); + if (!when) return yield_result;
for (;;) { @@ -1892,7 +1900,7 @@ NTSTATUS WINAPI NtWaitForKeyedEvent( HANDLE handle, const void *key, select_op.keyed_event.op = SELECT_KEYED_EVENT_WAIT; select_op.keyed_event.handle = wine_server_obj_handle( handle ); select_op.keyed_event.key = wine_server_client_ptr( key ); - return server_wait( &select_op, sizeof(select_op.keyed_event), flags, timeout ); + return server_wait( &select_op, sizeof(select_op.keyed_event), flags, timeout, NULL ); }
@@ -1911,7 +1919,7 @@ NTSTATUS WINAPI NtReleaseKeyedEvent( HANDLE handle, const void *key, select_op.keyed_event.op = SELECT_KEYED_EVENT_RELEASE; select_op.keyed_event.handle = wine_server_obj_handle( handle ); select_op.keyed_event.key = wine_server_client_ptr( key ); - return server_wait( &select_op, sizeof(select_op.keyed_event), flags, timeout ); + return server_wait( &select_op, sizeof(select_op.keyed_event), flags, timeout, NULL ); }
diff --git a/dlls/ntdll/unix/unix_private.h b/dlls/ntdll/unix/unix_private.h index f840045f841..b93b37b003d 100644 --- a/dlls/ntdll/unix/unix_private.h +++ b/dlls/ntdll/unix/unix_private.h @@ -211,7 +211,7 @@ extern void server_leave_uninterrupted_section( pthread_mutex_t *mutex, sigset_t extern unsigned int server_select( const union select_op *select_op, data_size_t size, UINT flags, timeout_t abs_timeout, struct context_data *context, struct user_apc *user_apc ); extern unsigned int server_wait( const union select_op *select_op, data_size_t size, UINT flags, - const LARGE_INTEGER *timeout ); + const LARGE_INTEGER *timeout, unsigned int *yield_result ); extern unsigned int server_queue_process_apc( HANDLE process, const union apc_call *call, union apc_result *result ); extern int server_get_unix_fd( HANDLE handle, unsigned int wanted_access, int *unix_fd,
Hi,
It looks like your patch introduced the new failures shown below. Please investigate and fix them before resubmitting your patch. If they are not new, fixing them anyway would help a lot. Otherwise please ask for the known failures list to be updated.
The tests also ran into some preexisting test failures. If you know how to fix them that would be helpful. See the TestBot job for the details:
The full results can be found at: https://testbot.winehq.org/JobDetails.pl?Key=151053
Your paranoid android.
=== debian11 (32 bit report) ===
Report validation errors: ntdll:file crashed (c0000028)
=== debian11b (64 bit WoW report) ===
user32: input.c:4306: Test succeeded inside todo block: button_down_hwnd_todo 1: got MSG_TEST_WIN hwnd 00000000012D00F0, msg WM_LBUTTONDOWN, wparam 0x1, lparam 0x320032
Report validation errors: kernel32:file crashed (80000101) ntdll:file crashed (80000101)
v3: - Refactor to use a single thread for tests. - Change `NTSTATUS` to `unsigned int` for the yield return param. - Don't use a ternary operator to check whether to return the yield result.
On Sat Jan 25 03:26:51 2025 +0000, William Horvath wrote:
changed this line in [version 3 of the diff](/wine/wine/-/merge_requests/7169/diffs?diff_id=154065&start_sha=3ced0c675dc4615bc7814c86061710ad56e9475b#78064f886cf71bafdbe5ff1e7794d63c4bdda9bb_1248_1161)
I thought I had a better reason for it, but you're right, it's actually kind of tangential to testing the relevant behavior here. I removed that and generally tried to simplify the tests to make the execution flow clearer.
On Sat Jan 25 03:26:51 2025 +0000, William Horvath wrote:
changed this line in [version 3 of the diff](/wine/wine/-/merge_requests/7169/diffs?diff_id=154065&start_sha=3ced0c675dc4615bc7814c86061710ad56e9475b#78064f886cf71bafdbe5ff1e7794d63c4bdda9bb_1153_1142)
No, that wasn't necessary. I believe it's clearer now.
Paul Gofman (@gofman) commented about dlls/ntdll/tests/sync.c:
- */
+static VOID CALLBACK apc_proc( ULONG_PTR param ) +{
- InterlockedIncrement( (LONG *)param );
+}
+struct delay_param +{
- BOOLEAN alertable;
- LONGLONG timeout;
- BOOLEAN queue_apc;
- const char *desc;
+};
+static const struct delay_param tests[] =
We usually define such static arrays and related structure definitions inside test itself unless that is shared with some other test functions. Also, we usually omit a separate test parameters declaration and declare just as: struct { BOOLEAN alertable; LONGLONG timeout; BOOLEAN queue_apc; const char *desc; } tests[] = { { ... }, }; ```
You probably don't need ```const struct delay_param *test = &tests[j];``` below, tests[j] is almost same length.
Paul Gofman (@gofman) commented about dlls/ntdll/tests/sync.c:
/* test NtDelayExecution */
SetLastError( 0xdeadbeef );
status = pNtDelayExecution( test->alertable, &timeout );
ok( GetLastError() == 0xdeadbeef, "%s: LastError unexpectedly changed to %lu.\n", test->desc, GetLastError() );
/* test yields */
if (!test->timeout)
{
todo_wine_if(status == STATUS_TIMEOUT)
ok( status == STATUS_SUCCESS || status == STATUS_NO_YIELD_PERFORMED ||
(test->alertable && test->queue_apc && status == STATUS_USER_APC),
"%s: got %#lx.\n", test->desc, status );
if (status == STATUS_NO_YIELD_PERFORMED)
noyields[test->alertable ? 1 : 0]++;
Just ```noyields[test->alertable]++```?
Paul Gofman (@gofman) commented about dlls/ntdll/tests/sync.c:
/* test yields */
if (!test->timeout)
{
todo_wine_if(status == STATUS_TIMEOUT)
ok( status == STATUS_SUCCESS || status == STATUS_NO_YIELD_PERFORMED ||
(test->alertable && test->queue_apc && status == STATUS_USER_APC),
"%s: got %#lx.\n", test->desc, status );
if (status == STATUS_NO_YIELD_PERFORMED)
noyields[test->alertable ? 1 : 0]++;
}
/* test delays */
else
{
todo_wine_if(status == STATUS_TIMEOUT)
ok( status == STATUS_SUCCESS ||
checks in else look very close to if? In this probably worth removing the if and only conditionally increment noyields and coalesce the ok() condition into single expression?
Paul Gofman (@gofman) commented about dlls/ntdll/tests/sync.c:
- DWORD ret;
- for (i = 0; i < 50; i++)
- {
for (j = 0; j < ARRAY_SIZE(tests); j++)
{
const struct delay_param *test = &tests[j];
timeout.QuadPart = test->timeout;
apc_count = 0;
if (test->queue_apc)
{
int k;
for (k = 0; k < 8; k++)
ok( QueueUserAPC( apc_proc, GetCurrentThread(), (ULONG_PTR)&apc_count ),
"%s: failed to queue APC %d.\n", test->desc, k );
We now use winetest_push_context / winetest_pop_context to avoid adding test id string into every ok().
Paul Gofman (@gofman) commented about dlls/ntdll/tests/sync.c:
{
int k;
for (k = 0; k < 8; k++)
ok( QueueUserAPC( apc_proc, GetCurrentThread(), (ULONG_PTR)&apc_count ),
"%s: failed to queue APC %d.\n", test->desc, k );
}
SetLastError( 0xdeadbeef );
ret = SleepEx( timeout.QuadPart ? (-timeout.QuadPart / 10000) : 0, test->alertable );
ok( GetLastError() == 0xdeadbeef, "%s: LastError unexpectedly changed to %lu.\n", test->desc, GetLastError() );
if (!test->alertable)
ok( !ret, "%s: SleepEx returned %lu.\n", test->desc, ret );
else
ok( !ret || ret == WAIT_IO_COMPLETION,
"%s: SleepEx returned %lu.\n", test->desc, ret );
like above, ``` ok( !ret || (test->alertable && ret == WAIT_IO_COMPLETION), "got %lu.\n", ret ); ``` is probably cleaner.
Paul Gofman (@gofman) commented about dlls/ntdll/tests/sync.c:
- LARGE_INTEGER timeout;
- ULONG apc_count;
- NTSTATUS status;
- DWORD ret;
- for (i = 0; i < 50; i++)
- {
for (j = 0; j < ARRAY_SIZE(tests); j++)
{
const struct delay_param *test = &tests[j];
timeout.QuadPart = test->timeout;
apc_count = 0;
if (test->queue_apc)
{
int k;
k may probably better go to ```unsigned int i, j``` at the top.
Paul Gofman (@gofman) commented about dlls/ntdll/tests/sync.c:
- NTSTATUS status;
- DWORD ret;
- for (i = 0; i < 50; i++)
- {
for (j = 0; j < ARRAY_SIZE(tests); j++)
{
const struct delay_param *test = &tests[j];
timeout.QuadPart = test->timeout;
apc_count = 0;
if (test->queue_apc)
{
int k;
for (k = 0; k < 8; k++)
ok( QueueUserAPC( apc_proc, GetCurrentThread(), (ULONG_PTR)&apc_count ),
it is better not to call the tested functions inside ok(). In this case not sure ok() is needed here, it is very unlikely to fail and if it will the test will fail anyway?
Paul Gofman (@gofman) commented about dlls/ntdll/tests/sync.c:
/* make sure we only get APCs when we expect them */
if (test->alertable && test->queue_apc)
{
if (status == STATUS_USER_APC)
apc_status_count[test->timeout ? 1 : 0]++;
ok( apc_count > 0, "%s: no APCs executed.\n", test->desc );
}
else
ok( !apc_count, "%s: unexpected APCs executed %ld.\n", test->desc, apc_count );
/* test SleepEx */
apc_count = 0;
if (test->queue_apc)
{
int k;
for (k = 0; k < 8; k++)
Same, 'int k' and queuing APC in ok().
Other than that, I am not seeing issues with this.
On Sat Jan 25 04:04:21 2025 +0000, Paul Gofman wrote:
k may probably better go to ```unsigned int i, j``` at the top.
I think we only have to queue a single APC here for each test anyways, I think I can just remove that loop.
On Sat Jan 25 04:04:21 2025 +0000, Paul Gofman wrote:
it is better not to call the tested functions inside ok(). In this case not sure ok() is needed here, it is very unlikely to fail and if it will the test will fail anyway?
Same as the comment above, the multiple APCs probably aren't necessary to test the correct behavior, I'll remove it.
The only thing... It doesn't seem like NtYieldExecution after server call doesn't make much sense? If we waited noone can ever tell whether we yielded or not. There could be in theory some difference for zero timeout, but it is server request anyway, it yielded enough while waiting for server reply. So I suggest to just remove the new return value from server_wait() and only care for yield and its status in NtYieldExecution()'s own yield with zero timeout (and always return 'yielded' result on alertable / server path).
Then, the test looks actually inherently flaky, it is not given that it should ever yield in 50 attemps if the machine is not loaded, maybe worth not checking the conditions depending on that in 'if (0) and make a comment?
On Sat Jan 25 04:28:32 2025 +0000, Paul Gofman wrote:
The only thing... It doesn't seem like NtYieldExecution after server call doesn't make much sense? If we waited noone can ever tell whether we yielded or not. There could be in theory some difference for zero timeout, but it is server request anyway, it yielded enough while waiting for server reply. So I suggest to just remove the new return value from server_wait() and only care for yield and its status in NtYieldExecution()'s own yield with zero timeout (and always return 'yielded' result on alertable / server path). Then, the test looks actually inherently flaky, it is not given that it should ever yield in 50 attemps if the machine is not loaded, maybe worth not checking the conditions depending on that in 'if (0) and make a comment?
About flakiness: I could up the iteration count for yields (0 timeout waits) drastically without it increasing the total test time much. In practice, I've never seen this check fail on my fast machine under no load, and it's probably even less likely to fail on the comparatively sluggish test VM, but it wouldn't hurt.
About the yield after the server call: we only yield if we timed out, which I guess is what's supposed to emulate the "if it's hit by an event then it gets a priority boost" behavior, as "hit by an event" means the we didn't get `STATUS_TIMEOUT`. It's a bit backwards in that we're instead giving the other events a priority **penalty**, but I think that's the logic.
On Sat Jan 25 04:28:32 2025 +0000, William Horvath wrote:
About flakiness: I could up the iteration count for yields (0 timeout waits) drastically without it increasing the total test time much. In practice, I've never seen this check fail on my fast machine under no load, and it's probably even less likely to fail on the comparatively sluggish test VM, but it wouldn't hurt. About the yield after the server call: we only yield if we timed out, which I guess is what's supposed to emulate the "if it's hit by an event then it gets a priority boost" behavior, as "hit by an event" means the we didn't get `STATUS_TIMEOUT`. It's a bit backwards in that we're instead giving the other events a priority **penalty**, but I think that's the logic.
Your patch doesn't change whether NtYieldExecution is called or not, and thus has no chance to affect any host "priority boost". I actually doubt that it works this way now, giving any additional "boost" after we already waited for server call, but this is a different story unrelated to this patch. And especially that NtYieldExecution worth calling after non-zero timeout; if the thread slept for some time it did perform the yield, no way around. in a much more sure way than what NtYieldExecution does which is de-facto no-op most often. But even with zero timeout server call did a blocking wait.
_NO_YIELD_PERFORMED only reflects reality when there were no blocking native system calls on the way of NtDelayExecution(). Basing this status on the sole return of NtYieldExecution in server_wait() on that path is plainly wrong, it doesn't take into account yield performed during server call. So returning _NO_YIELD_PERFORMED based on that addition NtYieldExecution may only confuse the app, hinting it to wait additionally. As a bonus, ignoring that from server_wait() reduces the functional part of the patch to 2-3 lines.
On Sat Jan 25 04:40:34 2025 +0000, Paul Gofman wrote:
Your patch doesn't change whether NtYieldExecution is called or not, and thus has no chance to affect any host "priority boost". I actually doubt that it works this way now, giving any additional "boost" after we already waited for server call, but this is a different story unrelated to this patch. And especially that NtYieldExecution worth calling after non-zero timeout; if the thread slept for some time it did perform the yield, no way around. in a much more sure way than what NtYieldExecution does which is de-facto no-op most often. But even with zero timeout server call did a blocking wait. _NO_YIELD_PERFORMED from NtYieldExecution() only reflects reality when there were no other blocking native system calls on the way. Basing this status on the sole return of NtYieldExecution in server_wait() on that path is plainly wrong, it doesn't take into account yield performed during server call. So returning _NO_YIELD_PERFORMED based on that addition NtYieldExecution may only confuse the app, hinting it to wait additionally. As a bonus, ignoring that from server_wait() reduces the functional part of the patch to 2-3 lines.
That's not quite what I meant, I know I'm not changing that behavior. I just meant that the alertable waits that **do** hit the yield (timed out) get an additional **penalty**, which consequently results in the waits that **don't** hit the yield having a slight advantage ("boost") over those ones. It's rudimentary and perhaps symbolic, given the weakness of `sched_yield`, but I think it makes sense when you compare it to Windows' behavior.
Before I made the Wine tests, I made a standalone test with 32000 alertable zero-timeout NtDelayExecution calls and ran them:
**Windows 10 22H2** VM (qemu):
Default timer resolution: ``` Total execution time: 156 ms
Yield Return Status Distribution: STATUS_SUCCESS: 19170 (59.9%) STATUS_TIMEOUT: 0 (0.0%) STATUS_NO_YIELD_PERFORMED: 12802 (40.0%) STATUS_USER_APC: 28 (0.1%) Other statuses: 0 (0.0%) ``` 1ms timer resolution: ``` Total execution time: 47 ms
Yield Return Status Distribution: STATUS_SUCCESS: 647 (2.0%) STATUS_TIMEOUT: 0 (0.0%) STATUS_NO_YIELD_PERFORMED: 31328 (97.9%) STATUS_USER_APC: 25 (0.1%) Other statuses: 0 (0.0%) ``` It seems that newer Windows NtDelayExecution return value heavily depends on the timer resolution, but with the default of 16ms, it's almost an even balance between the two on an idle system. Curiously, Windows 7 matches the 16ms resolution results of Windows 10, regardless of timer resolution.
Now, here's **Wine after this patch** (only 1ms matters on Wine, obviously): ``` Total execution time: 31 ms
Yield Return Status Distribution: STATUS_SUCCESS: 181 (0.6%) STATUS_TIMEOUT: 0 (0.0%) STATUS_NO_YIELD_PERFORMED: 31789 (99.3%) STATUS_USER_APC: 30 (0.1%) Other statuses: 0 (0.0%) ``` The ratio between SUCCESS/NO_YIELD_PERFORMED is eerily similar to Windows 10 with a timer interval of 1ms. That's why I think it's correct the way it is.
I think matching return status statistics by faking status is, most of the time, is the wrong metric. The status should reflect the real state of things, whether yield performed or not. Then, if anything cares about it at all, we shouldn’t probably err out on the side saying that yield wasn’t performed, it msy trigger unneeded waits.
On Sat Jan 25 05:37:01 2025 +0000, Paul Gofman wrote:
I think matching return status statistics by faking status is, most of the time, is the wrong metric. The status should reflect the real state of things, whether yield performed or not. Then, if anything cares about it at all, we shouldn’t probably err out on the side saying that yield wasn’t performed, it msy trigger unneeded waits.
I mean, before this, we were returning a completely unexpected result for `NtDelayExecution` (`STATUS_TIMEOUT`). If, by your suggestion, we instead always return `STATUS_SUCCESS` for alertable waits, that'd be almost the complete inverse of what Windows returns (for 1ms resolution). If an application happened to care about the return value of the delay, I feel like the current version is more likely to be correct than if we basically flipped the NO_YIELD/SUCCESS ratio from ~100%/~0% to 0%/100%.
On Sat Jan 25 05:37:01 2025 +0000, William Horvath wrote:
I mean, before this, we were returning a completely unexpected result for `NtDelayExecution` (`STATUS_TIMEOUT`). If, by your suggestion, we instead always return `STATUS_SUCCESS` for alertable waits, that'd be almost the complete inverse of what Windows returns (for 1ms resolution). If an application happened to care about the return value of the delay, I feel like the current version is more likely to be correct than if we basically flipped the NO_YIELD/SUCCESS ratio from ~100%/~0% to 0%/100%.
Why this statistics matters in practice? For non-alertable 0ms waits it won’t yield most of the time and return the corresponding status. For each 0ms alertable wait it *does yield 100% of the time*, do you have any practical reason to lie about that in return status, any app behaves better this way for some explained reason?
On Sat Jan 25 05:43:24 2025 +0000, Paul Gofman wrote:
Why this statistics matters in practice? For non-alertable 0ms waits it won’t yield most of the time and return the corresponding status. For each 0ms alertable wait it *does yield 100% of the time*, do you have any practical reason to lie about that in return status, any app behaves better this way for some explained reason?
I'm following you. Since we have no real control over the scheduler, I guess the second best thing we can do is to just report the fact that any call to `server_select` (from `server_wait`) is an implicit yield (i.e. `SUCCESS`/`USER_APC`). I don't think it's worth arguing over really, maybe your suggestion is safer.
I'll change it in accordance to your suggestion; nevertheless, I still think it should be a `todo_wine` at least, for alertable zero-timeout delays to result in at least some `STATUS_NO_YIELD_PERFORMED`.