This allows *BSDs to also have a fast path similar to futexes for thread-ID alerts. Also a kevent with `EV_CLEAR` and `NOTE_TRIGGER` maps perfectly to the thread alertable state, fixing the issue of the current mach-semaphore implementation not correctly waiting in the test case below: ``` NtAlertThreadByThreadId((HANDLE)GetCurrentThreadId()); NtAlertThreadByThreadId((HANDLE)GetCurrentThreadId()); NtWaitForAlertByThreadId(NULL, NULL); NtWaitForAlertByThreadId(NULL, NULL); ``` I took the liberty to remove this mach semaphore implementation, since all versions of OSX have supported kqueue().
-- v5: ntdll: Implement thread-ID alerts using ulock.
From: Marc-Aurel Zent marc_aurel@me.com
--- dlls/ntdll/unix/sync.c | 96 ++++++++++++++++++++++++++++-------------- 1 file changed, 65 insertions(+), 31 deletions(-)
diff --git a/dlls/ntdll/unix/sync.c b/dlls/ntdll/unix/sync.c index d1e4e5ee111..066717e800c 100644 --- a/dlls/ntdll/unix/sync.c +++ b/dlls/ntdll/unix/sync.c @@ -49,11 +49,11 @@ #include <stdlib.h> #include <time.h> #ifdef __APPLE__ -# include <mach/mach.h> -# include <mach/task.h> -# include <mach/semaphore.h> # include <mach/mach_time.h> #endif +#ifdef HAVE_KQUEUE +# include <sys/event.h> +#endif
#include "ntstatus.h" #define WIN32_NO_STATUS @@ -2354,8 +2354,8 @@ NTSTATUS WINAPI NtQueryInformationAtom( RTL_ATOM atom, ATOM_INFORMATION_CLASS cl
union tid_alert_entry { -#ifdef __APPLE__ - semaphore_t sem; +#ifdef HAVE_KQUEUE + int kq; #else HANDLE event; #ifdef __linux__ @@ -2396,15 +2396,35 @@ static union tid_alert_entry *get_tid_alert_entry( HANDLE tid )
entry = &tid_alert_blocks[block_idx][idx % TID_ALERT_BLOCK_SIZE];
-#ifdef __APPLE__ - if (!entry->sem) +#ifdef HAVE_KQUEUE + if (!entry->kq) { - semaphore_t sem; + int kq = kqueue(); + static const struct kevent init_event = + { + .ident = 1, + .filter = EVFILT_USER, + .flags = EV_ADD | EV_CLEAR, + .fflags = 0, + .data = 0, + .udata = NULL + }; + + if (kq == -1) + { + ERR("kqueue failed with error: %d (%s)\n", errno, strerror( errno )); + return NULL; + }
- if (semaphore_create( mach_task_self(), &sem, SYNC_POLICY_FIFO, 0 )) + if (kevent( kq, &init_event, 1, NULL, 0, NULL) == -1) + { + ERR("kevent creation failed with error: %d (%s)\n", errno, strerror( errno )); + close( kq ); return NULL; - if (InterlockedCompareExchange( (LONG *)&entry->sem, sem, 0 )) - semaphore_destroy( mach_task_self(), sem ); + } + + if (InterlockedCompareExchange( (LONG*)&entry->kq, kq, 0 )) + close( kq ); } #else #ifdef __linux__ @@ -2438,8 +2458,18 @@ NTSTATUS WINAPI NtAlertThreadByThreadId( HANDLE tid )
if (!entry) return STATUS_INVALID_CID;
-#ifdef __APPLE__ - semaphore_signal( entry->sem ); +#ifdef HAVE_KQUEUE + static const struct kevent signal_event = + { + .ident = 1, + .filter = EVFILT_USER, + .flags = 0, + .fflags = NOTE_TRIGGER, + .data = 0, + .udata = NULL + }; + + kevent( entry->kq, &signal_event, 1, NULL, 0, NULL ); return STATUS_SUCCESS; #else #ifdef __linux__ @@ -2457,7 +2487,7 @@ NTSTATUS WINAPI NtAlertThreadByThreadId( HANDLE tid ) }
-#if defined(__linux__) || defined(__APPLE__) +#if defined(__linux__) || defined(HAVE_KQUEUE) static LONGLONG get_absolute_timeout( const LARGE_INTEGER *timeout ) { LARGE_INTEGER now; @@ -2480,22 +2510,22 @@ static LONGLONG update_timeout( ULONGLONG end ) #endif
-#ifdef __APPLE__ +#ifdef HAVE_KQUEUE
/*********************************************************************** * NtWaitForAlertByThreadId (NTDLL.@) */ NTSTATUS WINAPI NtWaitForAlertByThreadId( const void *address, const LARGE_INTEGER *timeout ) { - union tid_alert_entry *entry = get_tid_alert_entry( NtCurrentTeb()->ClientId.UniqueThread ); - semaphore_t sem; + int ret; ULONGLONG end; - kern_return_t ret; + struct timespec timespec; + struct kevent wait_event; + union tid_alert_entry *entry = get_tid_alert_entry( NtCurrentTeb()->ClientId.UniqueThread );
TRACE( "%p %s\n", address, debugstr_timeout( timeout ) );
if (!entry) return STATUS_INVALID_CID; - sem = entry->sem;
if (timeout) { @@ -2505,27 +2535,31 @@ NTSTATUS WINAPI NtWaitForAlertByThreadId( const void *address, const LARGE_INTEG end = get_absolute_timeout( timeout ); }
- for (;;) + do { if (timeout) { LONGLONG timeleft = update_timeout( end ); - mach_timespec_t timespec;
timespec.tv_sec = timeleft / (ULONGLONG)TICKSPERSEC; timespec.tv_nsec = (timeleft % TICKSPERSEC) * 100; - ret = semaphore_timedwait( sem, timespec ); + if (timespec.tv_sec > 0x7FFFFFFF) timeout = NULL; } - else - ret = semaphore_wait( sem );
- switch (ret) - { - case KERN_SUCCESS: return STATUS_ALERTED; - case KERN_ABORTED: continue; - case KERN_OPERATION_TIMED_OUT: return STATUS_TIMEOUT; - default: return STATUS_INVALID_HANDLE; - } + ret = kevent( entry->kq, NULL, 0, &wait_event, 1, timeout ? ×pec : NULL ); + + } while (ret == -1 && errno == EINTR); + + switch (ret) + { + case 1: + return STATUS_ALERTED; + case 0: + return STATUS_TIMEOUT; + default: + assert( ret == -1 ); + ERR("kevent failed with error: %d (%s)\n", errno, strerror(errno)); + return STATUS_INVALID_HANDLE; } }
From: Marc-Aurel Zent marc_aurel@me.com
--- dlls/ntdll/unix/sync.c | 68 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 62 insertions(+), 6 deletions(-)
diff --git a/dlls/ntdll/unix/sync.c b/dlls/ntdll/unix/sync.c index 066717e800c..7333c58bae9 100644 --- a/dlls/ntdll/unix/sync.c +++ b/dlls/ntdll/unix/sync.c @@ -148,6 +148,13 @@ static inline int use_futexes(void)
#endif
+#ifdef __APPLE__ + +#define UL_COMPARE_AND_WAIT 1 +extern int __ulock_wait( uint32_t operation, void *addr, uint64_t value, uint32_t timeout ); +extern int __ulock_wake( uint32_t operation, void *addr, uint64_t wake_value ); + +#endif
/* create a struct security_descriptor and contained information in one contiguous piece of memory */ unsigned int alloc_object_attributes( const OBJECT_ATTRIBUTES *attr, struct object_attributes **ret, @@ -2354,14 +2361,17 @@ NTSTATUS WINAPI NtQueryInformationAtom( RTL_ATOM atom, ATOM_INFORMATION_CLASS cl
union tid_alert_entry { -#ifdef HAVE_KQUEUE +#ifdef __APPLE__ + LONG ulock; +#elif defined(HAVE_KQUEUE) int kq; #else HANDLE event; +#endif + #ifdef __linux__ LONG futex; #endif -#endif };
#define TID_ALERT_BLOCK_SIZE (65536 / sizeof(union tid_alert_entry)) @@ -2395,8 +2405,9 @@ static union tid_alert_entry *get_tid_alert_entry( HANDLE tid ) }
entry = &tid_alert_blocks[block_idx][idx % TID_ALERT_BLOCK_SIZE]; - -#ifdef HAVE_KQUEUE +#if defined(__APPLE__) + return entry; +#elif defined(HAVE_KQUEUE) if (!entry->kq) { int kq = kqueue(); @@ -2458,7 +2469,12 @@ NTSTATUS WINAPI NtAlertThreadByThreadId( HANDLE tid )
if (!entry) return STATUS_INVALID_CID;
-#ifdef HAVE_KQUEUE +#ifdef __APPLE__ + LONG *ulock = &entry->ulock; + if (!InterlockedExchange( ulock, 1 )) + __ulock_wake( UL_COMPARE_AND_WAIT, (void*)ulock, 0 ); + return STATUS_SUCCESS; +#elif defined(HAVE_KQUEUE) static const struct kevent signal_event = { .ident = 1, @@ -2509,8 +2525,48 @@ static LONGLONG update_timeout( ULONGLONG end ) } #endif
+#ifdef __APPLE__
-#ifdef HAVE_KQUEUE +/*********************************************************************** + * NtWaitForAlertByThreadId (NTDLL.@) + */ +NTSTATUS WINAPI NtWaitForAlertByThreadId( const void *address, const LARGE_INTEGER *timeout ) +{ + union tid_alert_entry *entry = get_tid_alert_entry( NtCurrentTeb()->ClientId.UniqueThread ); + NTSTATUS status; + LONG *ulock = &entry->ulock; + ULONGLONG end; + int ret; + + TRACE( "%p %s\n", address, debugstr_timeout( timeout ) ); + + if (!entry) return STATUS_INVALID_CID; + + if (timeout) + { + if (timeout->QuadPart == TIMEOUT_INFINITE) + timeout = NULL; + else + end = get_absolute_timeout( timeout ); + } + + while (!InterlockedExchange( ulock, 0 )) + { + if (timeout) + { + LONGLONG us_timeleft = update_timeout( end ) / 10; + if (!us_timeleft) return STATUS_TIMEOUT; + ret = __ulock_wait( UL_COMPARE_AND_WAIT, (void*)ulock, 0, us_timeleft ); + } + else + ret = __ulock_wait( UL_COMPARE_AND_WAIT, (void*)ulock, 0, 0); + + if (ret == -1 && errno == ETIMEDOUT) return STATUS_TIMEOUT; + } + return STATUS_ALERTED; +} + +#elif defined(HAVE_KQUEUE)
/*********************************************************************** * NtWaitForAlertByThreadId (NTDLL.@)
Since on Apple systems `ulock` also provides futex-like semantics and performs better than both `kqueue/kevent` and the existing mach semaphore implementation, I added it to this MR as well.
Hopefully this is alright, if it should be in another separate MR, just let me know!
Brendan Shanks (@bshanks) commented about dlls/ntdll/unix/sync.c:
if (!entry) return STATUS_INVALID_CID;
-#ifdef __APPLE__
- semaphore_signal( entry->sem );
+#ifdef HAVE_KQUEUE
- static const struct kevent signal_event =
This triggers a `mixing declarations and code` warning, I would put the whole `HAVE_KQUEUE` block inside braces.
Brendan Shanks (@bshanks) commented about dlls/ntdll/unix/sync.c:
timespec.tv_nsec = (timeleft % TICKSPERSEC) * 100;
ret = semaphore_timedwait( sem, timespec );
if (timespec.tv_sec > 0x7FFFFFFF) timeout = NULL; }
else
ret = semaphore_wait( sem );
switch (ret)
{
case KERN_SUCCESS: return STATUS_ALERTED;
case KERN_ABORTED: continue;
case KERN_OPERATION_TIMED_OUT: return STATUS_TIMEOUT;
default: return STATUS_INVALID_HANDLE;
}
ret = kevent( entry->kq, NULL, 0, &wait_event, 1, timeout ? ×pec : NULL );
Blank line doesn't seem necessary here
Brendan Shanks (@bshanks) commented about dlls/ntdll/unix/sync.c:
case KERN_ABORTED: continue;
case KERN_OPERATION_TIMED_OUT: return STATUS_TIMEOUT;
default: return STATUS_INVALID_HANDLE;
}
ret = kevent( entry->kq, NULL, 0, &wait_event, 1, timeout ? ×pec : NULL );
- } while (ret == -1 && errno == EINTR);
- switch (ret)
- {
- case 1:
return STATUS_ALERTED;
- case 0:
return STATUS_TIMEOUT;
- default:
assert( ret == -1 );
I don't think we want to `assert()` for this case.
Brendan Shanks (@bshanks) commented about dlls/ntdll/unix/sync.c:
case KERN_OPERATION_TIMED_OUT: return STATUS_TIMEOUT;
default: return STATUS_INVALID_HANDLE;
}
ret = kevent( entry->kq, NULL, 0, &wait_event, 1, timeout ? ×pec : NULL );
- } while (ret == -1 && errno == EINTR);
- switch (ret)
- {
- case 1:
return STATUS_ALERTED;
- case 0:
return STATUS_TIMEOUT;
- default:
assert( ret == -1 );
ERR("kevent failed with error: %d (%s)\n", errno, strerror(errno));
Please put spaces after/before the parentheses, just to be consistent with the `TRACE` above.
Brendan Shanks (@bshanks) commented about dlls/ntdll/unix/sync.c:
-#ifdef __APPLE__ +#ifdef HAVE_KQUEUE
/***********************************************************************
NtWaitForAlertByThreadId (NTDLL.@)
*/ NTSTATUS WINAPI NtWaitForAlertByThreadId( const void *address, const LARGE_INTEGER *timeout ) {
- union tid_alert_entry *entry = get_tid_alert_entry( NtCurrentTeb()->ClientId.UniqueThread );
- semaphore_t sem;
- int ret; ULONGLONG end;
- kern_return_t ret;
- struct timespec timespec;
- struct kevent wait_event;
- union tid_alert_entry *entry = get_tid_alert_entry( NtCurrentTeb()->ClientId.UniqueThread );
This line could stay above to reduce the diff.
Thanks for sending this, I added some comments but overall it looks good.
I think it would be better to put the `ulock` commit into a separate MR, at the very least that will need a version check or `dlopen()`/`dlsym()` to handle pre-10.12 macOS.