-- v8: rtworkq: Release cancelled work items.
From: Yuxuan Shui yshui@codeweavers.com
queue_release_pending_item releases the work_item reference but later accesses `item->queue`, which is a potential use-after-free. --- dlls/rtworkq/queue.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/dlls/rtworkq/queue.c b/dlls/rtworkq/queue.c index eebb096ad31..00b77bf6953 100644 --- a/dlls/rtworkq/queue.c +++ b/dlls/rtworkq/queue.c @@ -734,9 +734,10 @@ static HRESULT invoke_async_callback(IRtwqAsyncResult *result) * removed from pending items when it got canceled. */ static BOOL queue_release_pending_item(struct work_item *item) { + struct queue *queue = item->queue; BOOL ret = FALSE;
- EnterCriticalSection(&item->queue->cs); + EnterCriticalSection(&queue->cs); if (item->key) { list_remove(&item->entry); @@ -744,7 +745,7 @@ static BOOL queue_release_pending_item(struct work_item *item) item->key = 0; IUnknown_Release(&item->IUnknown_iface); } - LeaveCriticalSection(&item->queue->cs); + LeaveCriticalSection(&queue->cs); return ret; }
From: Yuxuan Shui yshui@codeweavers.com
--- dlls/mfplat/tests/mfplat.c | 6 ++++++ 1 file changed, 6 insertions(+)
diff --git a/dlls/mfplat/tests/mfplat.c b/dlls/mfplat/tests/mfplat.c index 6897036e9dd..363c240d13d 100644 --- a/dlls/mfplat/tests/mfplat.c +++ b/dlls/mfplat/tests/mfplat.c @@ -3598,6 +3598,7 @@ static void test_scheduled_items(void) IMFAsyncResult *result; MFWORKITEM_KEY key, key2; HRESULT hr; + ULONG refcount;
callback = create_test_callback(NULL);
@@ -3610,6 +3611,9 @@ static void test_scheduled_items(void) hr = MFCancelWorkItem(key); ok(hr == S_OK, "Failed to cancel item, hr %#lx.\n", hr);
+ refcount = IMFAsyncCallback_Release(&callback->IMFAsyncCallback_iface); + todo_wine ok(refcount == 0, "Unexpected refcount %lu.\n", refcount); + hr = MFCancelWorkItem(key); ok(hr == MF_E_NOT_FOUND || broken(hr == S_OK) /* < win10 */, "Unexpected hr %#lx.\n", hr);
@@ -3619,6 +3623,8 @@ static void test_scheduled_items(void) return; }
+ callback = create_test_callback(NULL); + hr = MFCreateAsyncResult(NULL, &callback->IMFAsyncCallback_iface, NULL, &result); ok(hr == S_OK, "Failed to create result, hr %#lx.\n", hr);
From: Yuxuan Shui yshui@codeweavers.com
And avoiding race condition while doing so.
Usually the threadpool holds a reference to the `work_item`, which is released when the `work_item`'s callback is invoked. However `queue_cancel_item` closes the threadpool object without releasing it, leading to a leak. The fix is not as simple as adding a `IUnknown_Release` to `queue_cancel_item`, because the `work_item`'s callback can still be called after CloseThreadpoolTimer/Wait has returned. In fact its callback might currently be running. In which case the callback will access freed memory if `queue_cancel_item` frees the `work_item`.
We have to stop any further callbacks to be queued, wait for any currently running callbacks to finish, then finally we can release the `work_item` if it hasn't already been freed during the wait. --- dlls/mfplat/tests/mfplat.c | 2 +- dlls/rtworkq/queue.c | 53 +++++++++++++++++++++++++++++++------- 2 files changed, 45 insertions(+), 10 deletions(-)
diff --git a/dlls/mfplat/tests/mfplat.c b/dlls/mfplat/tests/mfplat.c index 363c240d13d..1e751b425fa 100644 --- a/dlls/mfplat/tests/mfplat.c +++ b/dlls/mfplat/tests/mfplat.c @@ -3612,7 +3612,7 @@ static void test_scheduled_items(void) ok(hr == S_OK, "Failed to cancel item, hr %#lx.\n", hr);
refcount = IMFAsyncCallback_Release(&callback->IMFAsyncCallback_iface); - todo_wine ok(refcount == 0, "Unexpected refcount %lu.\n", refcount); + ok(refcount == 0, "Unexpected refcount %lu.\n", refcount);
hr = MFCancelWorkItem(key); ok(hr == MF_E_NOT_FOUND || broken(hr == S_OK) /* < win10 */, "Unexpected hr %#lx.\n", hr); diff --git a/dlls/rtworkq/queue.c b/dlls/rtworkq/queue.c index 00b77bf6953..a774ee4d58e 100644 --- a/dlls/rtworkq/queue.c +++ b/dlls/rtworkq/queue.c @@ -768,8 +768,9 @@ static void CALLBACK waiting_item_cancelable_callback(TP_CALLBACK_INSTANCE *inst
TRACE("result object %p.\n", item->result);
- if (queue_release_pending_item(item)) - invoke_async_callback(item->result); + queue_release_pending_item(item); + + invoke_async_callback(item->result);
IUnknown_Release(&item->IUnknown_iface); } @@ -791,8 +792,9 @@ static void CALLBACK scheduled_item_cancelable_callback(TP_CALLBACK_INSTANCE *in
TRACE("result object %p.\n", item->result);
- if (queue_release_pending_item(item)) - invoke_async_callback(item->result); + queue_release_pending_item(item); + + invoke_async_callback(item->result);
IUnknown_Release(&item->IUnknown_iface); } @@ -884,6 +886,8 @@ static HRESULT queue_submit_timer(struct queue *queue, IRtwqAsyncResult *result, static HRESULT queue_cancel_item(struct queue *queue, RTWQWORKITEM_KEY key) { HRESULT hr = RTWQ_E_NOT_FOUND; + TP_WAIT *wait_object; + TP_TIMER *timer_object; struct work_item *item;
EnterCriticalSection(&queue->cs); @@ -891,22 +895,53 @@ static HRESULT queue_cancel_item(struct queue *queue, RTWQWORKITEM_KEY key) { if (item->key == key) { + /* We can't immediately release the item here, because the callback could already be + * running somewhere else. And if we release it here, the callback will access freed memory. + * So instead we have to make sure the callback is really stopped, or has really finished + * running before we do that. And we can't do that in this critical section, which would be a + * deadlock. So we first keep an extra reference to it, then leave the critical section to + * wait for the thread-pool objects, finally we re-enter critical section to release it. */ key >>= 32; + IUnknown_AddRef(&item->IUnknown_iface); if ((key & WAIT_ITEM_KEY_MASK) == WAIT_ITEM_KEY_MASK) { - IRtwqAsyncResult_SetStatus(item->result, RTWQ_E_OPERATION_CANCELLED); - invoke_async_callback(item->result); - CloseThreadpoolWait(item->u.wait_object); + wait_object = item->u.wait_object; item->u.wait_object = NULL; + LeaveCriticalSection(&queue->cs); + + SetThreadpoolWait(wait_object, NULL, NULL); + WaitForThreadpoolWaitCallbacks(wait_object, TRUE); + CloseThreadpoolWait(wait_object); + + EnterCriticalSection(&queue->cs); } else if ((key & SCHEDULED_ITEM_KEY_MASK) == SCHEDULED_ITEM_KEY_MASK) { - CloseThreadpoolTimer(item->u.timer_object); + timer_object = item->u.timer_object; item->u.timer_object = NULL; + LeaveCriticalSection(&queue->cs); + + SetThreadpoolTimer(timer_object, NULL, 0, 0); + WaitForThreadpoolTimerCallbacks(timer_object, TRUE); + CloseThreadpoolTimer(timer_object); + + EnterCriticalSection(&queue->cs); } else WARN("Unknown item key mask %#I64x.\n", key); - queue_release_pending_item(item); + + if (queue_release_pending_item(item)) + { + // This means the callback wasn't run during our wait, so we can invoke the + // callback with a canceled status, and release the work item. + if ((key & WAIT_ITEM_KEY_MASK) == WAIT_ITEM_KEY_MASK) + { + IRtwqAsyncResult_SetStatus(item->result, RTWQ_E_OPERATION_CANCELLED); + invoke_async_callback(item->result); + } + IUnknown_Release(&item->IUnknown_iface); + } + IUnknown_Release(&item->IUnknown_iface); hr = S_OK; break; }
On Wed Mar 6 06:20:55 2024 +0000, Yuxuan Shui wrote:
~~Or maybe we can put the whole callback into critical section~~ I don't think this would help.
OK, I restructured it, maybe it's better? :/
On Wed Mar 6 07:02:17 2024 +0000, Yuxuan Shui wrote:
OK, I restructured it, maybe it's better? :/
I think an alternative to what this MR is doing is adding a `cancelled` flag to `work_item`, in `queue_cancel_item` we set this flag and schedule the Tp object to run immediately, and let the callback release the work item.
Zhiyi Zhang (@zhiyi) commented about dlls/mfplat/tests/mfplat.c:
IMFAsyncResult *result; MFWORKITEM_KEY key, key2; HRESULT hr;
- ULONG refcount;
Please change the subject to "mfplat/tests: Validate MFCancelWorkItem releases the callback."
Zhiyi Zhang (@zhiyi) commented about dlls/rtworkq/queue.c:
item->u.timer_object = NULL;
LeaveCriticalSection(&queue->cs);
SetThreadpoolTimer(timer_object, NULL, 0, 0);
WaitForThreadpoolTimerCallbacks(timer_object, TRUE);
CloseThreadpoolTimer(timer_object);
EnterCriticalSection(&queue->cs); } else WARN("Unknown item key mask %#I64x.\n", key);
queue_release_pending_item(item);
if (queue_release_pending_item(item))
{
// This means the callback wasn't run during our wait, so we can invoke the
Please use /**/ for comments.
Zhiyi Zhang (@zhiyi) commented about dlls/mfplat/tests/mfplat.c:
ok(hr == S_OK, "Failed to cancel item, hr %#lx.\n", hr); refcount = IMFAsyncCallback_Release(&callback->IMFAsyncCallback_iface);
- todo_wine ok(refcount == 0, "Unexpected refcount %lu.\n", refcount);
- ok(refcount == 0, "Unexpected refcount %lu.\n", refcount);
After 7865026, won't the callback be eventually released in scheduled_item_cancelable_callback() or waiting_item_cancelable_callback()? How does the memory leak happen? If you're referring to the callback reference count should immediately be zero after calling MFCancelWorkItem(), I don't think that's that big of an issue. Is there an application that depends on the reference count being zero immediately after MFCancelWorkItem()?
Hmm, i think the clearer version does have a use-after-free problem. If the item is freed in `queue_cancel_item` while `scheduled_item_cancelable_callback`/`waiting_item_cancelable_callback` is running, the latter will use-after-free.
How can this use-after-free happen? If scheduled_item_cancelable_callback() is still running, queue_cancel_item() merely decreases the work item reference count and not actually releasing the object.
On Wed Mar 6 08:05:54 2024 +0000, Zhiyi Zhang wrote:
After 7865026, won't the callback be eventually released in scheduled_item_cancelable_callback() or waiting_item_cancelable_callback()? How does the memory leak happen? If you're referring to the callback reference count should immediately be zero after calling MFCancelWorkItem(), I don't think that's that big of an issue. Is there an application that depends on the reference count being zero immediately after MFCancelWorkItem()?
the callbacks aren't guaranteed to run. note that the callbacks decrement the refcount _twice_, once in `queue_release_pending_item`, once directly; whereas `queue_cancel_item` only does it once. so if the callbacks aren't run, the `work_item` is leaked.
On Wed Mar 6 08:08:13 2024 +0000, Zhiyi Zhang wrote:
Hmm, i think the clearer version does have a use-after-free problem.
If the item is freed in `queue_cancel_item` while `scheduled_item_cancelable_callback`/`waiting_item_cancelable_callback` is running, the latter will use-after-free. How can this use-after-free happen? If scheduled_item_cancelable_callback() is still running, queue_cancel_item() merely decreases the work item reference count and not actually releasing the object.
the use-after-free happens _if_ I add the `IUnknown_Release(&item->IUnknown_iface)`
On Wed Mar 6 08:50:07 2024 +0000, Yuxuan Shui wrote:
the callbacks aren't guaranteed to run. note that the callbacks decrement the refcount _twice_, once in `queue_release_pending_item`, once directly; whereas `queue_cancel_item` only does it once. so if the callbacks aren't run, the `work_item` is leaked.
The language used here ([CloseThreadpoolWait](https://learn.microsoft.com/en-us/windows/win32/api/threadpoolapiset/nf-thre...)) is a bit ambiguous:
The wait object is freed immediately if there are no outstanding callbacks; otherwise, the timer object is freed asynchronously after the outstanding callbacks complete.
It's unclear what "outstanding" means, so I used this test:
```cpp #include <windows.h> #include <stdio.h> #include <assert.h>
void CALLBACK MyWaitCallback( PTP_CALLBACK_INSTANCE Instance, PVOID Parameter, PTP_WAIT Wait, TP_WAIT_RESULT WaitResult ) { (void)Instance; (void)Parameter; (void)Wait; (void)WaitResult;
fprintf(stderr, "MyWaitCallback: wait is over.\n"); }
int main(void) { PTP_WAIT Wait = NULL; PTP_WAIT_CALLBACK waitcallback = MyWaitCallback; HANDLE hEvent = NULL;
hEvent = CreateEvent(NULL, FALSE, FALSE, NULL); assert(hEvent);
Wait = CreateThreadpoolWait(waitcallback, NULL, NULL); assert(Wait);
SetThreadpoolWait(Wait, hEvent, NULL); CloseThreadpoolWait(Wait); Sleep(500);
SetEvent(hEvent); Sleep(5000); return 0; } ```
As you can see, the callback isn't guaranteed to run.