From: Giovanni Mascellani gmascellani@codeweavers.com
The goal is to simplify the CS queue handling: with this change operations are always started by d3d12_command_queue_flush_ops(), in order to make further refactoring easier.
Notice that while with this change executing an operation on an empty CS queue is a bit less efficient, it doesn't require more locking. On the other hand, this change paves the road for executing CS operations without holding the queue lock. --- libs/vkd3d/command.c | 136 ++++++++++++++++--------------------------- 1 file changed, 50 insertions(+), 86 deletions(-)
diff --git a/libs/vkd3d/command.c b/libs/vkd3d/command.c index c9d2f03d..dc2b184e 100644 --- a/libs/vkd3d/command.c +++ b/libs/vkd3d/command.c @@ -27,6 +27,7 @@ static void d3d12_fence_signal_timeline_semaphore(struct d3d12_fence *fence, uin static HRESULT d3d12_command_queue_signal(struct d3d12_command_queue *command_queue, struct d3d12_fence *fence, uint64_t value); static HRESULT d3d12_command_queue_flush_ops(struct d3d12_command_queue *queue, bool *flushed_any); +static HRESULT d3d12_command_queue_flush_ops_locked(struct d3d12_command_queue *queue, bool *flushed_any);
HRESULT vkd3d_queue_create(struct d3d12_device *device, uint32_t family_index, const VkQueueFamilyProperties *properties, struct vkd3d_queue **queue) @@ -6142,12 +6143,34 @@ static void d3d12_command_queue_execute(struct d3d12_command_queue *command_queu vkd3d_free(buffers); }
+static HRESULT d3d12_command_queue_enqueue_op(struct d3d12_command_queue *queue, const struct vkd3d_cs_op_data *op) +{ + struct vkd3d_cs_op_data *new_op; + bool flushed_any = false; + + vkd3d_mutex_lock(&queue->op_mutex); + + if (!(new_op = d3d12_command_queue_require_space_locked(queue))) + { + vkd3d_mutex_unlock(&queue->op_mutex); + return E_OUTOFMEMORY; + } + + *new_op = *op; + + if (queue->ops_count == 1) + return d3d12_command_queue_flush_ops_locked(queue, &flushed_any); + + vkd3d_mutex_unlock(&queue->op_mutex); + return S_OK; +} + static void STDMETHODCALLTYPE d3d12_command_queue_ExecuteCommandLists(ID3D12CommandQueue *iface, UINT command_list_count, ID3D12CommandList * const *command_lists) { struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface); struct d3d12_command_list *cmd_list; - struct vkd3d_cs_op_data *op; + struct vkd3d_cs_op_data op; VkCommandBuffer *buffers; unsigned int i;
@@ -6178,26 +6201,12 @@ static void STDMETHODCALLTYPE d3d12_command_queue_ExecuteCommandLists(ID3D12Comm buffers[i] = cmd_list->vk_command_buffer; }
- vkd3d_mutex_lock(&command_queue->op_mutex); + op.opcode = VKD3D_CS_OP_EXECUTE; + op.u.execute.buffers = buffers; + op.u.execute.buffer_count = command_list_count;
- if (!command_queue->ops_count) - { - d3d12_command_queue_execute(command_queue, buffers, command_list_count); - vkd3d_mutex_unlock(&command_queue->op_mutex); - return; - } - - if (!(op = d3d12_command_queue_require_space_locked(command_queue))) - { - ERR("Failed to add op.\n"); - return; - } - op->opcode = VKD3D_CS_OP_EXECUTE; - op->u.execute.buffers = buffers; - op->u.execute.buffer_count = command_list_count; - - vkd3d_mutex_unlock(&command_queue->op_mutex); - return; + if (FAILED(d3d12_command_queue_enqueue_op(command_queue, &op))) + ERR("Cannot enqueue in command queue %p.\n", command_queue); }
static void STDMETHODCALLTYPE d3d12_command_queue_SetMarker(ID3D12CommandQueue *iface, @@ -6254,33 +6263,17 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Signal(ID3D12CommandQueue * { struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface); struct d3d12_fence *fence = unsafe_impl_from_ID3D12Fence(fence_iface); - struct vkd3d_cs_op_data *op; - HRESULT hr = S_OK; + struct vkd3d_cs_op_data op;
TRACE("iface %p, fence %p, value %#"PRIx64".\n", iface, fence_iface, value);
- vkd3d_mutex_lock(&command_queue->op_mutex); - - if (!command_queue->ops_count) - { - hr = d3d12_command_queue_signal(command_queue, fence, value); - goto done; - } - - if (!(op = d3d12_command_queue_require_space_locked(command_queue))) - { - hr = E_OUTOFMEMORY; - goto done; - } - op->opcode = VKD3D_CS_OP_SIGNAL; - op->u.signal.fence = fence; - op->u.signal.value = value; + op.opcode = VKD3D_CS_OP_SIGNAL; + op.u.signal.fence = fence; + op.u.signal.value = value;
d3d12_fence_incref(fence);
-done: - vkd3d_mutex_unlock(&command_queue->op_mutex); - return hr; + return d3d12_command_queue_enqueue_op(command_queue, &op); }
static HRESULT d3d12_command_queue_signal(struct d3d12_command_queue *command_queue, @@ -6596,51 +6589,17 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Wait(ID3D12CommandQueue *if { struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface); struct d3d12_fence *fence = unsafe_impl_from_ID3D12Fence(fence_iface); - struct vkd3d_cs_op_data *op; - HRESULT hr = S_OK; + struct vkd3d_cs_op_data op;
TRACE("iface %p, fence %p, value %#"PRIx64".\n", iface, fence_iface, value);
- vkd3d_mutex_lock(&command_queue->op_mutex); - vkd3d_mutex_lock(&fence->mutex); - - if (!command_queue->ops_count && value <= fence->max_pending_value) - { - hr = d3d12_command_queue_wait_locked(command_queue, fence, value); - goto done; - } - - /* This is the critical part required to support out-of-order signal. - * Normally we would be able to submit waits and signals out of order, but - * we don't have virtualized queues in Vulkan, so we need to handle the case - * where multiple queues alias over the same physical queue, so effectively, - * we need to manage out-of-order submits ourselves. */ - - if (!(op = d3d12_command_queue_require_space_locked(command_queue))) - { - vkd3d_mutex_unlock(&fence->mutex); - hr = E_OUTOFMEMORY; - goto done; - } - op->opcode = VKD3D_CS_OP_WAIT; - op->u.wait.fence = fence; - op->u.wait.value = value; + op.opcode = VKD3D_CS_OP_WAIT; + op.u.wait.fence = fence; + op.u.wait.value = value;
d3d12_fence_incref(fence);
- /* Add the queue to the blocked list after writing the op to ensure the queue isn't - * removed again in another thread because it has no ops. */ - if (command_queue->ops_count == 1) - hr = d3d12_command_queue_record_as_blocked(command_queue); - - /* The fence must remain locked until the op is created and the queue is added to the blocked list, - * because if an unblocking d3d12_fence_Signal() call occurs on another thread before the above - * work is done, flushing will be delayed until the next signal, if one occurs at all. */ - vkd3d_mutex_unlock(&fence->mutex); - -done: - vkd3d_mutex_unlock(&command_queue->op_mutex); - return hr; + return d3d12_command_queue_enqueue_op(command_queue, &op); }
static HRESULT STDMETHODCALLTYPE d3d12_command_queue_GetTimestampFrequency(ID3D12CommandQueue *iface, @@ -6764,14 +6723,8 @@ static const struct ID3D12CommandQueueVtbl d3d12_command_queue_vtbl = d3d12_command_queue_GetDesc, };
-/* flushed_any is initialised by the caller. */ static HRESULT d3d12_command_queue_flush_ops(struct d3d12_command_queue *queue, bool *flushed_any) { - struct vkd3d_cs_op_data *op; - struct d3d12_fence *fence; - HRESULT hr = S_OK; - unsigned int i; - if (!queue->ops_count) return S_OK;
@@ -6783,6 +6736,17 @@ static HRESULT d3d12_command_queue_flush_ops(struct d3d12_command_queue *queue,
vkd3d_mutex_lock(&queue->op_mutex);
+ return d3d12_command_queue_flush_ops_locked(queue, flushed_any); +} + +/* flushed_any is initialised by the caller. */ +static HRESULT d3d12_command_queue_flush_ops_locked(struct d3d12_command_queue *queue, bool *flushed_any) +{ + struct vkd3d_cs_op_data *op; + struct d3d12_fence *fence; + HRESULT hr = S_OK; + unsigned int i; + /* Currently only required for d3d12_command_queue_signal(), but set it here anyway. */ queue->is_flushing = true;