-- v4: rtworkq: Release cancelled work items. mfplat: tests: Validate MFCancelWorkItem releases the callback. rtworkq: Avoid use-after-free.
From: Yuxuan Shui yshui@codeweavers.com
queue_release_pending_item releases the work_item reference but later accesses `item->queue`, which is a potential use-after-free. --- dlls/rtworkq/queue.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/dlls/rtworkq/queue.c b/dlls/rtworkq/queue.c index 15b8da47639..03a7c1ce8bb 100644 --- a/dlls/rtworkq/queue.c +++ b/dlls/rtworkq/queue.c @@ -732,14 +732,16 @@ static HRESULT invoke_async_callback(IRtwqAsyncResult *result)
static void queue_release_pending_item(struct work_item *item) { - EnterCriticalSection(&item->queue->cs); + struct queue *queue = item->queue; + EnterCriticalSection(&queue->cs); if (item->key) { list_remove(&item->entry); item->key = 0; IUnknown_Release(&item->IUnknown_iface); } - LeaveCriticalSection(&item->queue->cs); + LeaveCriticalSection(&queue->cs); + }
static void CALLBACK waiting_item_callback(TP_CALLBACK_INSTANCE *instance, void *context, TP_WAIT *wait,
From: Yuxuan Shui yshui@codeweavers.com
--- dlls/mfplat/tests/mfplat.c | 6 ++++++ 1 file changed, 6 insertions(+)
diff --git a/dlls/mfplat/tests/mfplat.c b/dlls/mfplat/tests/mfplat.c index 5f9460655ec..7f0d721392d 100644 --- a/dlls/mfplat/tests/mfplat.c +++ b/dlls/mfplat/tests/mfplat.c @@ -3560,6 +3560,7 @@ static void test_scheduled_items(void) IMFAsyncResult *result; MFWORKITEM_KEY key, key2; HRESULT hr; + ULONG refcount;
callback = create_test_callback(NULL);
@@ -3572,6 +3573,9 @@ static void test_scheduled_items(void) hr = MFCancelWorkItem(key); ok(hr == S_OK, "Failed to cancel item, hr %#lx.\n", hr);
+ refcount = IMFAsyncCallback_Release(&callback->IMFAsyncCallback_iface); + ok(refcount == 0, "Unexpected refcount %lu.\n", refcount); + hr = MFCancelWorkItem(key); ok(hr == MF_E_NOT_FOUND || broken(hr == S_OK) /* < win10 */, "Unexpected hr %#lx.\n", hr);
@@ -3581,6 +3585,8 @@ static void test_scheduled_items(void) return; }
+ callback = create_test_callback(NULL); + hr = MFCreateAsyncResult(NULL, &callback->IMFAsyncCallback_iface, NULL, &result); ok(hr == S_OK, "Failed to create result, hr %#lx.\n", hr);
From: Yuxuan Shui yshui@codeweavers.com
And avoiding race condition while doing so.
Usually the threadpool holds a reference to the work_item, which is released when the work_item's callback is invoked. However queue_cancel_item closes the threadpool object without releasing it, leading to a leak. The fix is not as simple as adding a IUnknown_Release, because the work_item's callback can still be called after CloseThreadpoolTimer/Wait has returned. In fact its callback might currently be running. In which case we would double release the reference.
We have to stop any further callbacks to be queued, wait for any currently running callbacks to finish, then finally we can close the threadpool object. After that, if the callback wasn't called, we can safely release the work_item reference. --- dlls/rtworkq/queue.c | 73 ++++++++++++++++++++++++++++++++++++++------ 1 file changed, 64 insertions(+), 9 deletions(-)
diff --git a/dlls/rtworkq/queue.c b/dlls/rtworkq/queue.c index 03a7c1ce8bb..620145f64ba 100644 --- a/dlls/rtworkq/queue.c +++ b/dlls/rtworkq/queue.c @@ -878,32 +878,87 @@ static HRESULT queue_submit_timer(struct queue *queue, IRtwqAsyncResult *result, return S_OK; }
-static HRESULT queue_cancel_item(struct queue *queue, RTWQWORKITEM_KEY key) +static HRESULT queue_cancel_item(struct queue *queue, const RTWQWORKITEM_KEY key) { HRESULT hr = RTWQ_E_NOT_FOUND; + union { TP_WAIT *wait_object; TP_TIMER *timer_object; } work_object; + enum work_item_type work_object_type; struct work_item *item; + const UINT64 mask = key >> 32;
EnterCriticalSection(&queue->cs); LIST_FOR_EACH_ENTRY(item, &queue->pending_items, struct work_item, entry) { if (item->key == key) { - key >>= 32; - if ((key & WAIT_ITEM_KEY_MASK) == WAIT_ITEM_KEY_MASK) + hr = S_OK; + if ((mask & WAIT_ITEM_KEY_MASK) == WAIT_ITEM_KEY_MASK) + { + if (item->type != WORK_ITEM_WAIT) + WARN("Item %p is not a wait item, but its key has wait item mask.\n", item); + + work_object_type = WORK_ITEM_WAIT; + work_object.wait_object = item->u.wait_object; + item->u.wait_object = NULL; + } + else if ((mask & SCHEDULED_ITEM_KEY_MASK) == SCHEDULED_ITEM_KEY_MASK) + { + if (item->type != WORK_ITEM_TIMER) + WARN("Item %p is not a timer item, but its key has timer item mask.\n", item); + + work_object_type = WORK_ITEM_TIMER; + work_object.timer_object = item->u.timer_object; + item->u.timer_object = NULL; + } + else + { + WARN("Unknown item key mask %#I64x.\n", mask); + queue_release_pending_item(item); + goto out; + } + break; + } + } + + if (FAILED(hr)) + goto out; + + LeaveCriticalSection(&queue->cs); + + // Safely either stop the thread pool object, or if the callback is already running, wait for it to finish. + // This way, we can safely release the reference to the work item. + if (work_object_type == WORK_ITEM_WAIT) + { + SetThreadpoolWait(work_object.wait_object, NULL, NULL); + WaitForThreadpoolWaitCallbacks(work_object.wait_object, TRUE); + CloseThreadpoolWait(work_object.wait_object); + } + else if (work_object_type == WORK_ITEM_TIMER) + { + SetThreadpoolTimer(work_object.timer_object, NULL, 0, 0); + WaitForThreadpoolTimerCallbacks(work_object.timer_object, TRUE); + CloseThreadpoolTimer(work_object.timer_object); + } + + // If the work item is still in pending items, its callback hasn't been invoked yet; + // we remove it. Otherwise its callback would have already released it. + EnterCriticalSection(&queue->cs); + LIST_FOR_EACH_ENTRY(item, &queue->pending_items, struct work_item, entry) + { + if (item->key == key) + { + if (work_object_type == WORK_ITEM_WAIT) { IRtwqAsyncResult_SetStatus(item->result, RTWQ_E_OPERATION_CANCELLED); invoke_async_callback(item->result); - CloseThreadpoolWait(item->u.wait_object); } - else if ((key & SCHEDULED_ITEM_KEY_MASK) == SCHEDULED_ITEM_KEY_MASK) - CloseThreadpoolTimer(item->u.timer_object); - else - WARN("Unknown item key mask %#I64x.\n", key); queue_release_pending_item(item); - hr = S_OK; + IUnknown_Release(&item->IUnknown_iface); break; } } + +out: LeaveCriticalSection(&queue->cs);
return hr;
Hi,
It looks like your patch introduced the new failures shown below. Please investigate and fix them before resubmitting your patch. If they are not new, fixing them anyway would help a lot. Otherwise please ask for the known failures list to be updated.
The tests also ran into some preexisting test failures. If you know how to fix them that would be helpful. See the TestBot job for the details:
The full results can be found at: https://testbot.winehq.org/JobDetails.pl?Key=139439
Your paranoid android.
=== debian11b (64 bit WoW report) ===
dinput: device8.c:2238: Test failed: 0x700: got key_state[0] 0
On Wed Nov 1 14:34:08 2023 +0000, Nikolay Sivov wrote:
Just add a temp "struct queue *", and keep the rest as is.
addressed
On Wed Nov 1 14:26:41 2023 +0000, Yuxuan Shui wrote:
to make sure i didn't accidentally change `key`
Sorry, I don't see how this is useful.
On Wed Nov 1 14:26:17 2023 +0000, Yuxuan Shui wrote:
the concern is that this is causing a memory leak.
It's working as it supposed to, as far as I can tell. But like I said, it's not a problem to check, if it's reliable.
On Wed Nov 1 14:46:17 2023 +0000, Yuxuan Shui wrote:
the only place the TP objects are freed is in this function or in `work_item_Release`. we set the pointers to the TP objects to `NULL` here while holding the queue lock (which means no other threads can remove the item from `queue->pending_items`, which in turn means the work item will have at least 1 reference), so `work_item_Release` won't release it. therefore we can safely release it here. i haven't tried to demonstrate this issue but it's clearly there
Is it possible to have some demo program that demonstrates this?
On Mon Nov 6 14:41:51 2023 +0000, Nikolay Sivov wrote:
It's working as it supposed to, as far as I can tell. But like I said, it's not a problem to check, if it's reliable.
i am not sure what you meant by that? it is supposed to leak?
On Mon Nov 6 14:41:01 2023 +0000, Nikolay Sivov wrote:
Sorry, I don't see how this is useful.
it's that so i don't make accidental mistakes? is there an argument that having `const` is bad?
On Mon Nov 6 14:42:44 2023 +0000, Nikolay Sivov wrote:
Is it possible to have some demo program that demonstrates this?
demonstrate the leak? or demonstrate the race condition?
in the first case, our unit test already does. in the second case, i don't see how that's useful? the race condition is pretty clearly there, simply based on the fact that the callback might be running when you try to cancel it.
On Mon Nov 6 14:43:31 2023 +0000, Yuxuan Shui wrote:
i am not sure what you meant by that? it is supposed to leak?
I see now why it's confusing. The correct way is to have every commit succeeding tests, not every MR. Right now with two first commits you'll get a test failure.