From: Paul Gofman <pgofman(a)codeweavers.com> --- dlls/ntdll/sync.c | 85 ++++++++++++++++- dlls/ntdll/tests/sync.c | 198 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 279 insertions(+), 4 deletions(-) diff --git a/dlls/ntdll/sync.c b/dlls/ntdll/sync.c index f9deaf5ce3d..58d4bd9d708 100644 --- a/dlls/ntdll/sync.c +++ b/dlls/ntdll/sync.c @@ -1376,13 +1376,32 @@ void WINAPI RtlDumpResource(LPRTL_RWLOCK rwl) } +struct barrier_impl +{ + LONG spin_count; + LONG total_thread_count; + volatile LONG reached_thread_count; + volatile LONG waiting_thread_count; + volatile LONG wait_barrier_complete; +}; + +C_ASSERT( sizeof(struct barrier_impl) <= sizeof(RTL_BARRIER) ); + /*********************************************************************** * RtlInitBarrier (NTDLL.@) */ NTSTATUS WINAPI RtlInitBarrier( RTL_BARRIER *barrier, LONG thread_count, LONG spin_count ) { - FIXME( "barrier %p, thread_count %ld, spin_count %ld stub.\n", barrier, thread_count, spin_count ); + struct barrier_impl *b = (struct barrier_impl *)barrier; + TRACE( "barrier %p, thread_count %ld, spin_count %ld.\n", barrier, thread_count, spin_count ); + + if (!barrier) return STATUS_INVALID_PARAMETER; + b->total_thread_count = thread_count; + b->spin_count = spin_count; + b->reached_thread_count = 0; + b->waiting_thread_count = 0; + b->wait_barrier_complete = 0; return STATUS_SUCCESS; } @@ -1392,7 +1411,20 @@ NTSTATUS WINAPI RtlInitBarrier( RTL_BARRIER *barrier, LONG thread_count, LONG sp */ void WINAPI RtlDeleteBarrier( RTL_BARRIER *barrier ) { - FIXME( "barrier %p stub.\n", barrier ); + struct barrier_impl *b = (struct barrier_impl *)barrier; + LONG count; + + TRACE( "barrier %p.\n", barrier ); + + if (!barrier) return; + if (ReadAcquire( &b->reached_thread_count ) < b->total_thread_count && b->waiting_thread_count) + { + /* On Windows this case will make RtlDeleteBarrier and the threads joining after wait forever, + * unless the threads joining after will have SYNCHRONIZATION_BARRIER_FLAGS_NO_DELETE. */ + ERR( "called before the barrier wait is satisfied.\n" ); + } + while ((count = ReadAcquire( &b->waiting_thread_count ))) + RtlWaitOnAddress( (void *)&b->waiting_thread_count, &count, sizeof(b->waiting_thread_count), NULL ); } @@ -1401,7 +1433,52 @@ void WINAPI RtlDeleteBarrier( RTL_BARRIER *barrier ) */ BOOLEAN WINAPI RtlBarrier( RTL_BARRIER *barrier, ULONG flags ) { - FIXME( "barrier %p, flags %#lx stub.\n", barrier, flags ); + static unsigned int once; + static const LONG zero; + + struct barrier_impl *b = (struct barrier_impl *)barrier; + unsigned int spin_count, count; + BOOL ret = FALSE; + + TRACE( "barrier %p, flags %#lx.\n", barrier, flags ); + + if (flags & ~0x10000 && !once++) FIXME( "Unknown flags %#lx.\n", flags ); + if (!barrier) return FALSE; + + if (ReadAcquire( &b->reached_thread_count ) >= b->total_thread_count) return TRUE; - return TRUE; + /* Incrementing reached_thread_count may trigger RTL_BARRIER data desrtuction from another thread, + * so lock RtlDeleteBarrier with waiting_thread_count before that. */ + if (flags & 0x10000) InterlockedIncrement( &b->waiting_thread_count ); + if (InterlockedIncrement( &b->reached_thread_count ) == b->total_thread_count) + { + WriteRelease( &b->wait_barrier_complete, 1 ); + RtlWakeAddressAll( (const void *)&b->wait_barrier_complete ); + ret = TRUE; + goto done; + } + /* On Windows the long wait doesn't consume CPU with any spin count, so probably the spin count + * is limited or not used at all. */ + spin_count = min( 2000, (unsigned int)b->spin_count ); + count = 0; + while (ReadAcquire( &b->reached_thread_count ) < b->total_thread_count ) + { + if (count < spin_count) + { + ++count; + YieldProcessor(); + continue; + } + RtlWaitOnAddress( (void *)&b->wait_barrier_complete, &zero, sizeof(b->wait_barrier_complete), NULL ); + } + +done: + if (flags & 0x10000 && !InterlockedDecrement( &b->waiting_thread_count )) + { + /* Now RTL_BARRIER structure contents may become invalid. Signaling on its address should be fine, the worst + * (unlikely) case it will wake something unrelated on reused address but that should be a legitimate spurious + * wakeup case. */ + RtlWakeAddressAll( (const void *)&b->waiting_thread_count ); + } + return ret; } diff --git a/dlls/ntdll/tests/sync.c b/dlls/ntdll/tests/sync.c index 22aaed5405f..f584dd9539d 100644 --- a/dlls/ntdll/tests/sync.c +++ b/dlls/ntdll/tests/sync.c @@ -24,6 +24,7 @@ #define WIN32_NO_STATUS #include "windef.h" #include "winternl.h" +#include "setjmp.h" #include "wine/test.h" static NTSTATUS (WINAPI *pNtAlertThreadByThreadId)( HANDLE ); @@ -1138,11 +1139,75 @@ static void test_delayexecution(void) } } +static jmp_buf call_exception_jmpbuf; + +static LONG WINAPI call_exception_handler( EXCEPTION_POINTERS *eptr ) +{ + EXCEPTION_RECORD *rec = eptr->ExceptionRecord; + + longjmp(call_exception_jmpbuf, rec->ExceptionCode); + return EXCEPTION_CONTINUE_SEARCH; +} + +struct test_barrier_thread_param +{ + RTL_BARRIER *barrier; + ULONG flags; + LONG thread_count; + BOOL wait_skipped; + volatile LONG *count; + volatile LONG *true_ret_count; +}; + +static DWORD WINAPI test_barrier_thread(void *param) +{ + struct test_barrier_thread_param *p = param; + + InterlockedIncrement( p->count ); + if (pRtlBarrier( p->barrier, p->flags )) + InterlockedIncrement( p->true_ret_count ); + if (!p->wait_skipped) + ok( *p->count == p->thread_count, "got %ld.\n", *p->count ); + return 0; +} + +static DWORD WINAPI test_barrier_delete_thread(void *param) +{ + struct test_barrier_thread_param *p = param; + + pRtlDeleteBarrier( p->barrier ); + if (p->flags & 0x10000) + { + ok( *p->count == p->thread_count, "got %ld.\n", *p->count ); + } + else + { + /* No wait was performed. */ + ok( *p->count <= p->thread_count, "got %ld.\n", *p->count ); + } + + return 0; +} + static void test_barrier(void) { + static const ULONG rtl_barrier_flags[] = + { + 0, + 0x10000, + ~0u, + 1, + }; + struct test_barrier_thread_param p, p2; + volatile LONG count, true_ret_count; + HANDLE threads[8], delete_thread; + void *vectored_handler; + unsigned int i, test; RTL_BARRIER barrier; NTSTATUS status; + int exc_code; BOOLEAN bval; + DWORD ret; if (!pRtlInitBarrier) { @@ -1150,16 +1215,149 @@ static void test_barrier(void) return; } + + vectored_handler = AddVectoredExceptionHandler(TRUE, call_exception_handler); + status = 0; + if (!(exc_code = setjmp(call_exception_jmpbuf))) + status = pRtlInitBarrier( NULL, 1, -1 ); + if (exc_code == STATUS_ACCESS_VIOLATION) + { + /* Crashes before Win10 1607; the other behaviour details are also different. */ + win_skip( "Old synchronization barriers implementation, skipping tests.\n" ); + return; + } + ok( status == STATUS_INVALID_PARAMETER, "got %#lx.\n", status ); + RemoveVectoredExceptionHandler(vectored_handler); + + status = pRtlInitBarrier( &barrier, -2, -1 ); + ok( !status, "got %#lx.\n", status ); + + status = pRtlInitBarrier( &barrier, INT_MAX, -1 ); + ok( !status, "got %#lx.\n", status ); + + status = pRtlInitBarrier( &barrier, 0, -1 ); + ok( !status, "got %#lx.\n", status ); + + status = pRtlInitBarrier( &barrier, 1, -2 ); + ok( !status, "got %#lx.\n", status ); + + status = pRtlInitBarrier( &barrier, 1, INT_MAX ); + ok( !status, "got %#lx.\n", status ); + status = pRtlInitBarrier( &barrier, 1, -1 ); ok( !status, "got %#lx.\n", status ); bval = pRtlBarrier( &barrier, 0 ); ok( bval == 1, "got %#x.\n", bval ); + bval = pRtlBarrier( NULL, 0 ); + ok( !bval, "got %#x.\n", bval ); + bval = pRtlBarrier( &barrier, 0 ); ok( bval == 1, "got %#x.\n", bval ); + pRtlDeleteBarrier( NULL ); + pRtlDeleteBarrier( &barrier ); + + bval = pRtlBarrier( &barrier, 0 ); + ok( bval == 1, "got %#x.\n", bval ); + + /* Previously completed barrier. */ + p.barrier = &barrier; + p.flags = 0; + p.thread_count = ARRAY_SIZE(threads) + 1; + p.count = &count; + p.true_ret_count = &true_ret_count; + p.wait_skipped = TRUE; + count = 0; + true_ret_count = 0; + for (i = 0; i < ARRAY_SIZE(threads); ++i) + threads[i] = CreateThread( NULL, 0, test_barrier_thread, &p, 0, NULL ); + InterlockedIncrement( p.count ); + if (pRtlBarrier( p.barrier, p.flags )) + InterlockedIncrement( p.true_ret_count ); + for (i = 0; i < ARRAY_SIZE(threads); ++i) + { + WaitForSingleObject( threads[i], INFINITE ); + CloseHandle( threads[i] ); + } + ok( true_ret_count == p.thread_count, "got %ld.\n", true_ret_count ); + + /* Normal case. */ + status = pRtlInitBarrier( &barrier, p.thread_count, -1 ); + ok( !status, "got %#lx.\n", status ); + /* RtlDeleteBarrier doesn't seem to do anything unless there are threads already waiting on the barrier with + * flag 0x10000 (and then it will wait for those to finish). */ + pRtlDeleteBarrier( &barrier ); + p.flags = 0x10000; + p.wait_skipped = FALSE; + count = 0; + true_ret_count = 0; + for (i = 0; i < ARRAY_SIZE(threads); ++i) + threads[i] = CreateThread( NULL, 0, test_barrier_thread, &p, 0, NULL ); + InterlockedIncrement( p.count ); + if (pRtlBarrier( p.barrier, p.flags )) + InterlockedIncrement( p.true_ret_count ); + /* RtlDeleteBarrier (with 0x10000 flag passed to RtlBarrier) will wait for the waiters to be actually woken + * before returning. Without calling RtlDeleteBarrier or setting 0x10000 flag here the test will randomly + * fire an exception or hang (because RtlInitBarrier will break the not yet woken waiters wake up. */ + pRtlDeleteBarrier( &barrier ); + pRtlInitBarrier( &barrier, p.thread_count, -1 ); + /* p.count is incremented before barrier wait, check at once. */ + ok( *p.count == p.thread_count, "got %ld.\n", *p.count ); + for (i = 0; i < ARRAY_SIZE(threads); ++i) + { + WaitForSingleObject( threads[i], INFINITE ); + CloseHandle( threads[i] ); + } + /* Only check after all the threads are finished and thus guaranteed to exit RtlBarrier() and increment true_ret_count. */ + ok( true_ret_count == 1, "got %ld.\n", true_ret_count ); + + /* Test wait in RtlDeleteBarrier(). */ + for (test = 0; test < ARRAY_SIZE(rtl_barrier_flags); ++test) + { + winetest_push_context( "flags %#lx", rtl_barrier_flags[test] ); + p.thread_count = ARRAY_SIZE(threads); + status = pRtlInitBarrier( &barrier, p.thread_count, -1 ); + ok( !status, "got %#lx.\n", status ); + true_ret_count = 0; + p.wait_skipped = FALSE; + count = 0; + true_ret_count = 0; + p.flags = rtl_barrier_flags[test]; + threads[0] = CreateThread( NULL, 0, test_barrier_thread, &p, 0, NULL ); + /* Now try to make sure the thread has entered barrier wait before spawning test_barrier_delete_thread. */ + while (!ReadAcquire( p.count )) + Sleep(1); + Sleep(16); + + delete_thread = CreateThread( NULL, 0, test_barrier_delete_thread, &p, 0, NULL ); + ret = WaitForSingleObject( delete_thread, 100 ); + if (p.flags & 0x10000) + ok( ret == WAIT_TIMEOUT, "got %#lx.\n", ret ); + else + ok( !ret, "got %#lx.\n", ret ); + + /* If barrier waiters joined with flag 0x10000 after RtlDeleteBarrier started the wait, all the barrier + * waiting threads and RtlDeleteBarrier will hang forever on Windows for some reason. So create the rest of + * the waiters without the flag. */ + p2 = p; + p2.flags = 0; + for (i = 1; i < ARRAY_SIZE(threads); ++i) + threads[i] = CreateThread( NULL, 0, test_barrier_thread, &p2, 0, NULL ); + + WaitForSingleObject( delete_thread, INFINITE ); + CloseHandle( delete_thread ); + for (i = 0; i < ARRAY_SIZE(threads); ++i) + { + WaitForSingleObject( threads[i], INFINITE ); + CloseHandle( threads[i] ); + } + ok( *p.count == p.thread_count, "got %ld.\n", *p.count ); + ok( true_ret_count == 1, "got %ld.\n", true_ret_count ); + winetest_pop_context(); + } } START_TEST(sync) -- GitLab https://gitlab.winehq.org/wine/wine/-/merge_requests/9267