From: Jinoh Kang jinoh.kang.kr@gmail.com
--- dlls/kernel32/tests/virtual.c | 148 ++++++++++++++++++++++++++++++++++ 1 file changed, 148 insertions(+)
diff --git a/dlls/kernel32/tests/virtual.c b/dlls/kernel32/tests/virtual.c index 466badf0433..461e5bacfe0 100644 --- a/dlls/kernel32/tests/virtual.c +++ b/dlls/kernel32/tests/virtual.c @@ -4409,8 +4409,144 @@ static void test_ReadProcessMemory(void) free(buf); }
+struct sbtestshared +{ + /* Number of slots per thread */ + unsigned int num_slots; + + /* Number of generations (iterations per each slot) */ + unsigned int num_generations; + + /* Stores from threads */ + LONG *wrote; + + /* Observations from threads */ + LONG *read; + + /* Number of observed reorderings (witnesses) in the SB litmus test. + * Note: unobservable reorderings are not counted. */ + LONG *reorderings; +}; + +struct sbtestparams +{ + struct sbtestshared *shared; + unsigned int th_id; + void (WINAPI *barrier)(void); +}; + +static DWORD CALLBACK sbtest_thread_proc( void *arg ) +{ + const struct sbtestparams *params = arg; + const struct sbtestshared s = *params->shared; + const unsigned int th_id = params->th_id; + void (WINAPI *const barrier)(void) = params->barrier; + unsigned int slot = 0, gen = 0; + + for (;;) + { + LONG read_l, read_r; + + /* Access slots in reverse order to avoid prefetching */ + if (!slot--) + { + if (gen++ >= s.num_generations) break; + slot = s.num_slots - 1; + } + + WriteRelease(&s.wrote[th_id * s.num_slots + slot], gen); + (*barrier)(); + read_l = (gen << 1) | (ReadAcquire(&s.wrote[(th_id ^ 1) * s.num_slots + slot]) == gen); + + WriteRelease(&s.read[th_id * s.num_slots + slot], read_l); + + while (((read_r = ReadAcquire(&s.read[(th_id ^ 1) * s.num_slots + slot])) & ~1) != (gen << 1)) + YieldProcessor(); + + /* fairly distribute testing overhead across threads */ + if ((slot & 1) == th_id) + { + if (!(read_l & 1) && !(read_r & 1)) + InterlockedIncrement( s.reorderings ); + } + } + + return 0; +} + +#ifdef _MSC_VER + +#pragma intrinsic(_ReadWriteBarrier) +void _ReadWriteBarrier(void); + +static void WINAPI compiler_barrier(void) +{ +#pragma warning(suppress:4996) + _ReadWriteBarrier(); +} + +#else /* _MSC_VER */ + +static void WINAPI compiler_barrier(void) +{ +#if defined(__x86_64__) || defined(__i386__) + __asm__ __volatile__("" ::: "memory"); +#else + __atomic_thread_fence( __ATOMIC_RELAXED ); +#endif +} + +#endif /* _MSC_VER */ + +static LONG store_buffer_litmus_test( void (*WINAPI barrier0)(void), void (*WINAPI barrier1)(void) ) +{ + LONG reorderings = 0; + struct sbtestshared shared = { + /* Should be big enough to avoid false sharing */ + .num_slots = 64, + + /* Increase if flaky, decrease if slow */ + .num_generations = 32768, + + .reorderings = &reorderings, + }; + struct sbtestparams pars[2]; + HANDLE threads[2]; + unsigned int i; + DWORD ret; + + shared.wrote = VirtualAlloc( NULL, ARRAY_SIZE(threads) * shared.num_slots * sizeof(*shared.wrote), + MEM_COMMIT, PAGE_READWRITE ); + ok( shared.wrote != NULL, "VirtualAlloc failed: %lu\n", GetLastError() ); + + shared.read = VirtualAlloc( NULL, ARRAY_SIZE(threads) * shared.num_slots * sizeof(*shared.read), + MEM_COMMIT, PAGE_READWRITE ); + ok( shared.read != NULL, "VirtualAlloc failed: %lu\n", GetLastError() ); + + for (i = 0; i < ARRAY_SIZE(threads); i++) + { + pars[i].shared = &shared; + pars[i].th_id = i; + pars[i].barrier = i == 0 ? barrier0 : barrier1; + threads[i] = CreateThread( NULL, 0, sbtest_thread_proc, &pars[i], 0, NULL ); + ok( threads[i] != NULL, "CreateThread failed: %lu\n", GetLastError() ); + } + + ret = WaitForMultipleObjects( ARRAY_SIZE(threads), threads, TRUE, INFINITE ); + ok( ret == WAIT_OBJECT_0, "WaitForMultipleObjects failed: %lu\n", GetLastError() ); + + ret = VirtualFree( shared.read, 0, MEM_RELEASE ); + ok( ret, "VirtualFree failed: %lu\n", GetLastError() ); + + ret = VirtualFree( shared.wrote, 0, MEM_RELEASE ); + ok( ret, "VirtualFree failed: %lu\n", GetLastError() ); + + return reorderings; +} + static void test_FlushProcessWriteBuffers(void) { + LONG reorderings; unsigned int i;
if (!pFlushProcessWriteBuffers) @@ -4424,6 +4560,18 @@ static void test_FlushProcessWriteBuffers(void) { pFlushProcessWriteBuffers(); } + + if (si.dwNumberOfProcessors == 1) + { + skip( "single-processor system, cannot test store buffering behavior\n" ); + return; + } + + reorderings = store_buffer_litmus_test( compiler_barrier, compiler_barrier ); + ok( reorderings, "expected write-read reordering with compiler barrier only (got %ld reorderings)\n", reorderings ); + + reorderings = store_buffer_litmus_test( compiler_barrier, pFlushProcessWriteBuffers ); + ok( !reorderings, "expected sequential consistency with FlushProcessWriteBuffers (got %ld reorderings)\n", reorderings ); }
START_TEST(virtual)