From: Giovanni Mascellani gmascellani@codeweavers.com
d3d12_command_queue_flush_ops() can renter itself while processing signal events. Since we don't use recursive mutexes, we currently have to check some of the queue variables without holding the mutex, which is not safe.
This is solved by allowing the queue to release its mutex while it is processing entries: when flushing, the queue is briefly locked, the is_flushing flag is set, the queue content is copied away and the queue is unlocked again. After having processed the entries, the queue is locked again to check is something else was added in the meantime. This is repeated until the queue is empty (or a wait operation is blocking it).
This should also remove some latency when a thread pushes to the queue while another one is processing it, but I didn't try to measure any impact. While it is expected that with this patch the queue mutex will be locked and unlocked more frequently, it should also remain locked for less time, hopefully creating little contention. --- libs/vkd3d/command.c | 136 ++++++++++++++++++++++++++----------- libs/vkd3d/vkd3d_private.h | 9 +++ 2 files changed, 106 insertions(+), 39 deletions(-)
diff --git a/libs/vkd3d/command.c b/libs/vkd3d/command.c index 23353a6f..f83774cf 100644 --- a/libs/vkd3d/command.c +++ b/libs/vkd3d/command.c @@ -6017,6 +6017,7 @@ static ULONG STDMETHODCALLTYPE d3d12_command_queue_Release(ID3D12CommandQueue *i
vkd3d_mutex_destroy(&command_queue->op_mutex); vkd3d_free(command_queue->ops); + vkd3d_free(command_queue->aux_ops);
vkd3d_private_store_destroy(&command_queue->private_store);
@@ -6166,7 +6167,7 @@ static HRESULT d3d12_command_queue_enqueue_op(struct d3d12_command_queue *queue,
*new_op = *op;
- if (queue->ops_count == 1) + if (queue->ops_count == 1 && !queue->is_flushing) { if (!d3d12_command_queue_flush_ops_locked(queue, &flushed_any)) return d3d12_device_add_blocked_command_queues(queue->device, &queue, 1); @@ -6737,18 +6738,62 @@ static const struct ID3D12CommandQueueVtbl d3d12_command_queue_vtbl = d3d12_command_queue_GetDesc, };
+static void d3d12_command_queue_swap_queues(struct d3d12_command_queue *queue) +{ + size_t size; + struct vkd3d_cs_op_data *ops; + + size = queue->ops_count; + queue->ops_count = queue->aux_ops_count; + queue->aux_ops_count = size; + + size = queue->ops_size; + queue->ops_size = queue->aux_ops_size; + queue->aux_ops_size = size; + + ops = queue->ops; + queue->ops = queue->aux_ops; + queue->aux_ops = ops; +} + +static void d3d12_command_queue_fixup_after_flush(struct d3d12_command_queue *queue, unsigned int done_count) +{ + queue->aux_ops_count -= done_count; + memmove(queue->aux_ops, &queue->aux_ops[done_count], queue->aux_ops_count * sizeof(*queue->ops)); + + vkd3d_mutex_lock(&queue->op_mutex); + + d3d12_command_queue_swap_queues(queue); + + if (!vkd3d_array_reserve((void **)&queue->ops, &queue->ops_size, + queue->ops_count + queue->aux_ops_count, sizeof(*queue->ops))) + { + ERR("Cannot reallocate, dropping %zu ops.\n", queue->aux_ops_count); + } + else + { + memcpy(&queue->ops[queue->ops_count], queue->aux_ops, queue->aux_ops_count * sizeof(*queue->ops)); + queue->ops_count += queue->aux_ops_count; + } + + queue->aux_ops_count = 0; + queue->is_flushing = false; + + vkd3d_mutex_unlock(&queue->op_mutex); +} + static bool d3d12_command_queue_flush_ops(struct d3d12_command_queue *queue, bool *flushed_any) { - if (!queue->ops_count) - return true; + vkd3d_mutex_lock(&queue->op_mutex);
/* This function may be re-entered during a call below to d3d12_command_queue_signal(). * We return true because the first caller is responsible for re-adding this queue to * the flush list if it ends up returning false. */ if (queue->is_flushing) + { + vkd3d_mutex_unlock(&queue->op_mutex); return true; - - vkd3d_mutex_lock(&queue->op_mutex); + }
return d3d12_command_queue_flush_ops_locked(queue, flushed_any); } @@ -6758,55 +6803,62 @@ static bool d3d12_command_queue_flush_ops_locked(struct d3d12_command_queue *que { struct vkd3d_cs_op_data *op; struct d3d12_fence *fence; - bool flushed_all = false; unsigned int i;
- /* Currently only required for d3d12_command_queue_signal(), but set it here anyway. */ queue->is_flushing = true;
- for (i = 0; i < queue->ops_count; ++i) + assert(queue->aux_ops_count == 0); + + while (queue->ops_count != 0) { - op = &queue->ops[i]; - switch (op->opcode) + d3d12_command_queue_swap_queues(queue); + + vkd3d_mutex_unlock(&queue->op_mutex); + + for (i = 0; i < queue->aux_ops_count; ++i) { - case VKD3D_CS_OP_WAIT: - fence = op->u.wait.fence; - vkd3d_mutex_lock(&fence->mutex); - if (op->u.wait.value > fence->max_pending_value) - { - vkd3d_mutex_unlock(&fence->mutex); - queue->ops_count -= i; - memmove(queue->ops, op, queue->ops_count * sizeof(*op)); - goto done; - } - d3d12_command_queue_wait_locked(queue, fence, op->u.wait.value); - d3d12_fence_decref(fence); - break; + op = &queue->aux_ops[i]; + switch (op->opcode) + { + case VKD3D_CS_OP_WAIT: + fence = op->u.wait.fence; + vkd3d_mutex_lock(&fence->mutex); + if (op->u.wait.value > fence->max_pending_value) + { + vkd3d_mutex_unlock(&fence->mutex); + d3d12_command_queue_fixup_after_flush(queue, i); + return false; + } + d3d12_command_queue_wait_locked(queue, fence, op->u.wait.value); + d3d12_fence_decref(fence); + break;
- case VKD3D_CS_OP_SIGNAL: - d3d12_command_queue_signal(queue, op->u.signal.fence, op->u.signal.value); - d3d12_fence_decref(op->u.signal.fence); - break; + case VKD3D_CS_OP_SIGNAL: + d3d12_command_queue_signal(queue, op->u.signal.fence, op->u.signal.value); + d3d12_fence_decref(op->u.signal.fence); + break;
- case VKD3D_CS_OP_EXECUTE: - d3d12_command_queue_execute(queue, op->u.execute.buffers, op->u.execute.buffer_count); - break; + case VKD3D_CS_OP_EXECUTE: + d3d12_command_queue_execute(queue, op->u.execute.buffers, op->u.execute.buffer_count); + break;
- default: - FIXME("Unhandled op type %u.\n", op->opcode); - break; + default: + vkd3d_unreachable(); + } + + *flushed_any |= true; } - *flushed_any |= true; - }
- queue->ops_count = 0; - flushed_all = true; + queue->aux_ops_count = 0; + + vkd3d_mutex_lock(&queue->op_mutex); + }
-done: queue->is_flushing = false;
vkd3d_mutex_unlock(&queue->op_mutex); - return flushed_all; + + return true; }
static HRESULT d3d12_command_queue_init(struct d3d12_command_queue *queue, @@ -6832,6 +6884,10 @@ static HRESULT d3d12_command_queue_init(struct d3d12_command_queue *queue, queue->ops_size = 0; queue->is_flushing = false;
+ queue->aux_ops = NULL; + queue->aux_ops_count = 0; + queue->aux_ops_size = 0; + if (desc->Priority == D3D12_COMMAND_QUEUE_PRIORITY_GLOBAL_REALTIME) { FIXME("Global realtime priority is not implemented.\n"); @@ -6897,6 +6953,8 @@ VkQueue vkd3d_acquire_vk_queue(ID3D12CommandQueue *queue)
if (d3d12_queue->ops_count) WARN("Acquired command queue %p with %zu remaining ops.\n", d3d12_queue, d3d12_queue->ops_count); + else if (d3d12_queue->is_flushing) + WARN("Acquired command queue %p which is flushing.\n", d3d12_queue);
return vk_queue; } diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index 56677648..f2b519fa 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -1387,11 +1387,20 @@ struct d3d12_command_queue struct d3d12_device *device;
struct vkd3d_mutex op_mutex; + + /* These fields are protected by op_mutex. */ struct vkd3d_cs_op_data *ops; size_t ops_count; size_t ops_size; bool is_flushing;
+ /* These fields are not protected by op_mutex, but can only be + * used by the thread that set is_flushing; when is_flushing is + * not set, aux_ops_count must be zero. */ + struct vkd3d_cs_op_data *aux_ops; + size_t aux_ops_count; + size_t aux_ops_size; + struct vkd3d_private_store private_store; };