On Wed Jul 10 12:00:25 2024 +0000, Grigory Vasilyev wrote:
changed this line in [version 2 of the diff](/wine/wine/-/merge_requests/6031/diffs?diff_id=121544&start_sha=121c32a30b55d32d1d3eea5e340e76208d8311cc#7cb9ec68fa2d916cb30c713051e39c8ebdc3165a_70_51)
You are right FUTEX_WAKE makes the mutex noticeably slower.
t1 - is custom mutex
t2 - pthread mutex
With FUTEX_WAKE:
``` Time elapsed: t1=89.413000ms, t2=5.800000ms ```
Without:
``` Time elapsed: t1=3.665000ms, t2=5.786000ms ```
simple benchmark:
```C #include <stdatomic.h> #include <stdint.h> #include <unistd.h> #include <stdlib.h> #include <stdio.h> #include <pthread.h> #include <linux/futex.h> #include <sys/syscall.h> #include <omp.h>
#define WINE_MUTEX_TYPE _Atomic unsigned int #define WINE_MUTEX_INIT ATOMIC_VAR_INIT(0) #define WINE_MUTEX_LOCK(RESOURCE) do { \ unsigned int expected = 0; \ while(!atomic_compare_exchange_weak(RESOURCE, &expected, 1)) { \ syscall(SYS_futex, RESOURCE, FUTEX_WAIT, 1, NULL, NULL, 0); \ } \ } while(0) #define WINE_MUTEX_UNLOCK(RESOURCE) do { \ atomic_store(RESOURCE, 0); \ } while(0) #define COUNT 1000000
void test(){ size_t t1_sum, t1_part; size_t t2_sum, t2_part; clock_t t1_start, t1_stop; clock_t t2_start, t2_stop; double t1_elapsed, t2_elapsed;
WINE_MUTEX_TYPE m1 = WINE_MUTEX_INIT; pthread_mutex_t m2 = PTHREAD_MUTEX_INITIALIZER;
t1_start = clock(); #pragma omp parallel private(t1_part) shared(t1_sum) {
t1_sum = 0; t1_part = 0;
#pragma omp for { for (size_t i = 0; i < COUNT; i++) { WINE_MUTEX_LOCK(&m1); t1_part = t1_part + i; WINE_MUTEX_UNLOCK(&m1); } } #pragma omp critical { t1_sum += t1_part; } } t1_stop = clock();
t2_start = clock(); #pragma omp parallel private(t2_part) shared(t2_sum) {
t2_sum = 0; t2_part = 0;
#pragma omp for { for (size_t i = 0; i < COUNT; i++) { pthread_mutex_lock(&m2); t2_part = t2_part + i; pthread_mutex_unlock(&m2); } } #pragma omp critical { t2_sum += t2_part; } } t2_stop = clock();
printf("t1=%zu td2=%zu\n", t1_sum, t2_sum); t1_elapsed = (double)(t1_stop - t1_start) * 1000.0 / CLOCKS_PER_SEC; t2_elapsed = (double)(t2_stop - t2_start) * 1000.0 / CLOCKS_PER_SEC; printf("Time elapsed: t1=%fms, t2=%fms\n", t1_elapsed, t2_elapsed); }
int main() { test(); return 0; } ```
```bash clang -O2 -std=gnu17 test_mutex.c -o test_mutex OMP_NUM_THREADS=8; export OMP_NUM_THREADS ./test_mutex ```