-- v6: vkd3d: Do not keep the CS queue locked while processing it.
From: Giovanni Mascellani gmascellani@codeweavers.com
--- libs/vkd3d/command.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/libs/vkd3d/command.c b/libs/vkd3d/command.c index 158d0d9f..2d3007cf 100644 --- a/libs/vkd3d/command.c +++ b/libs/vkd3d/command.c @@ -670,8 +670,9 @@ static HRESULT d3d12_device_flush_blocked_queues_once(struct d3d12_device *devic
vkd3d_mutex_lock(&device->blocked_queues_mutex);
- /* Flush any ops unblocked by a new pending value. These cannot be flushed - * with the device locked, so move the queue pointers to a local array. */ + /* Flush any ops unblocked by a new pending value. These cannot be + * flushed while holding blocked_queue_mutex, so move the queue + * pointers to a local array. */ blocked_queue_count = device->blocked_queue_count; memcpy(blocked_queues, device->blocked_queues, blocked_queue_count * sizeof(blocked_queues[0])); device->blocked_queue_count = 0;
From: Giovanni Mascellani gmascellani@codeweavers.com
Otherwise it could be added more than once.
Note that the deleted comment is wrong: between when d3d12_command_queue_flush_ops() returns and when the queue is added back to the blocked list, the queue might have been pushed to and flushed an arbitrary number of times. --- libs/vkd3d/command.c | 61 +++++++++++++++++++------------------------- 1 file changed, 26 insertions(+), 35 deletions(-)
diff --git a/libs/vkd3d/command.c b/libs/vkd3d/command.c index 2d3007cf..c9d2f03d 100644 --- a/libs/vkd3d/command.c +++ b/libs/vkd3d/command.c @@ -26,7 +26,7 @@ static HRESULT d3d12_fence_signal(struct d3d12_fence *fence, uint64_t value, VkF static void d3d12_fence_signal_timeline_semaphore(struct d3d12_fence *fence, uint64_t timeline_value); static HRESULT d3d12_command_queue_signal(struct d3d12_command_queue *command_queue, struct d3d12_fence *fence, uint64_t value); -static bool d3d12_command_queue_flush_ops(struct d3d12_command_queue *queue, bool *flushed_any); +static HRESULT d3d12_command_queue_flush_ops(struct d3d12_command_queue *queue, bool *flushed_any);
HRESULT vkd3d_queue_create(struct d3d12_device *device, uint32_t family_index, const VkQueueFamilyProperties *properties, struct vkd3d_queue **queue) @@ -636,26 +636,17 @@ static HRESULT d3d12_fence_update_pending_value(struct d3d12_fence *fence) return S_OK; }
-static HRESULT d3d12_device_add_blocked_command_queues(struct d3d12_device *device, - struct d3d12_command_queue * const *command_queues, unsigned int count) +static HRESULT d3d12_command_queue_record_as_blocked(struct d3d12_command_queue *command_queue) { + struct d3d12_device *device = command_queue->device; HRESULT hr = S_OK; - unsigned int i; - - if (count == 0) - return S_OK;
vkd3d_mutex_lock(&device->blocked_queues_mutex);
- if ((i = ARRAY_SIZE(device->blocked_queues) - device->blocked_queue_count) < count) - { - FIXME("Failed to add %u blocked command queue(s) to device %p.\n", count - i, device); - count = i; - hr = E_FAIL; - } - - for (i = 0; i < count; ++i) - device->blocked_queues[device->blocked_queue_count++] = command_queues[i]; + if (device->blocked_queue_count < ARRAY_SIZE(device->blocked_queues)) + device->blocked_queues[device->blocked_queue_count++] = command_queue; + else + WARN("Failed to add blocked command queue %p to device %p.\n", command_queue, device);
vkd3d_mutex_unlock(&device->blocked_queues_mutex); return hr; @@ -665,6 +656,7 @@ static HRESULT d3d12_device_flush_blocked_queues_once(struct d3d12_device *devic { struct d3d12_command_queue *blocked_queues[VKD3D_MAX_DEVICE_BLOCKED_QUEUES]; unsigned int i, blocked_queue_count; + HRESULT hr = S_OK;
*flushed_any = false;
@@ -679,18 +671,17 @@ static HRESULT d3d12_device_flush_blocked_queues_once(struct d3d12_device *devic
vkd3d_mutex_unlock(&device->blocked_queues_mutex);
- i = 0; - while (i < blocked_queue_count) + for (i = 0; i < blocked_queue_count; ++i) { - if (d3d12_command_queue_flush_ops(blocked_queues[i], flushed_any)) - blocked_queues[i] = blocked_queues[--blocked_queue_count]; - else - ++i; + HRESULT new_hr; + + new_hr = d3d12_command_queue_flush_ops(blocked_queues[i], flushed_any); + + if (SUCCEEDED(hr)) + hr = new_hr; }
- /* None of these queues could have been re-added during the above loop because - * blocked queues always have a nonzero op count. */ - return d3d12_device_add_blocked_command_queues(device, blocked_queues, blocked_queue_count); + return hr; }
static HRESULT d3d12_device_flush_blocked_queues(struct d3d12_device *device) @@ -6640,7 +6631,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Wait(ID3D12CommandQueue *if /* Add the queue to the blocked list after writing the op to ensure the queue isn't * removed again in another thread because it has no ops. */ if (command_queue->ops_count == 1) - hr = d3d12_device_add_blocked_command_queues(command_queue->device, &command_queue, 1); + hr = d3d12_command_queue_record_as_blocked(command_queue);
/* The fence must remain locked until the op is created and the queue is added to the blocked list, * because if an unblocking d3d12_fence_Signal() call occurs on another thread before the above @@ -6774,21 +6765,21 @@ static const struct ID3D12CommandQueueVtbl d3d12_command_queue_vtbl = };
/* flushed_any is initialised by the caller. */ -static bool d3d12_command_queue_flush_ops(struct d3d12_command_queue *queue, bool *flushed_any) +static HRESULT d3d12_command_queue_flush_ops(struct d3d12_command_queue *queue, bool *flushed_any) { struct vkd3d_cs_op_data *op; struct d3d12_fence *fence; - bool flushed_all = false; + HRESULT hr = S_OK; unsigned int i;
if (!queue->ops_count) - return true; + return S_OK;
- /* This function may be re-entered during a call below to d3d12_command_queue_signal(). - * We return true because the first caller is responsible for re-adding this queue to - * the flush list if it ends up returning false. */ + /* This function may be re-entered when invoking + * d3d12_command_queue_signal(). The first call is responsible + * for re-adding the queue to the flush list. */ if (queue->is_flushing) - return true; + return S_OK;
vkd3d_mutex_lock(&queue->op_mutex);
@@ -6808,6 +6799,7 @@ static bool d3d12_command_queue_flush_ops(struct d3d12_command_queue *queue, boo vkd3d_mutex_unlock(&fence->mutex); queue->ops_count -= i; memmove(queue->ops, op, queue->ops_count * sizeof(*op)); + hr = d3d12_command_queue_record_as_blocked(queue); goto done; } d3d12_command_queue_wait_locked(queue, fence, op->u.wait.value); @@ -6831,13 +6823,12 @@ static bool d3d12_command_queue_flush_ops(struct d3d12_command_queue *queue, boo }
queue->ops_count = 0; - flushed_all = true;
done: queue->is_flushing = false;
vkd3d_mutex_unlock(&queue->op_mutex); - return flushed_all; + return hr; }
static HRESULT d3d12_command_queue_init(struct d3d12_command_queue *queue,
From: Giovanni Mascellani gmascellani@codeweavers.com
The goal is to simplify the CS queue handling: with this change operations are always started by d3d12_command_queue_flush_ops(), in order to make further refactoring easier.
Notice that while with this change executing an operation on an empty CS queue is a bit less efficient, it doesn't require more locking. On the other hand, this change paves the road for executing CS operations without holding the queue lock. --- libs/vkd3d/command.c | 136 ++++++++++++++++--------------------------- 1 file changed, 50 insertions(+), 86 deletions(-)
diff --git a/libs/vkd3d/command.c b/libs/vkd3d/command.c index c9d2f03d..dc2b184e 100644 --- a/libs/vkd3d/command.c +++ b/libs/vkd3d/command.c @@ -27,6 +27,7 @@ static void d3d12_fence_signal_timeline_semaphore(struct d3d12_fence *fence, uin static HRESULT d3d12_command_queue_signal(struct d3d12_command_queue *command_queue, struct d3d12_fence *fence, uint64_t value); static HRESULT d3d12_command_queue_flush_ops(struct d3d12_command_queue *queue, bool *flushed_any); +static HRESULT d3d12_command_queue_flush_ops_locked(struct d3d12_command_queue *queue, bool *flushed_any);
HRESULT vkd3d_queue_create(struct d3d12_device *device, uint32_t family_index, const VkQueueFamilyProperties *properties, struct vkd3d_queue **queue) @@ -6142,12 +6143,34 @@ static void d3d12_command_queue_execute(struct d3d12_command_queue *command_queu vkd3d_free(buffers); }
+static HRESULT d3d12_command_queue_enqueue_op(struct d3d12_command_queue *queue, const struct vkd3d_cs_op_data *op) +{ + struct vkd3d_cs_op_data *new_op; + bool flushed_any = false; + + vkd3d_mutex_lock(&queue->op_mutex); + + if (!(new_op = d3d12_command_queue_require_space_locked(queue))) + { + vkd3d_mutex_unlock(&queue->op_mutex); + return E_OUTOFMEMORY; + } + + *new_op = *op; + + if (queue->ops_count == 1) + return d3d12_command_queue_flush_ops_locked(queue, &flushed_any); + + vkd3d_mutex_unlock(&queue->op_mutex); + return S_OK; +} + static void STDMETHODCALLTYPE d3d12_command_queue_ExecuteCommandLists(ID3D12CommandQueue *iface, UINT command_list_count, ID3D12CommandList * const *command_lists) { struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface); struct d3d12_command_list *cmd_list; - struct vkd3d_cs_op_data *op; + struct vkd3d_cs_op_data op; VkCommandBuffer *buffers; unsigned int i;
@@ -6178,26 +6201,12 @@ static void STDMETHODCALLTYPE d3d12_command_queue_ExecuteCommandLists(ID3D12Comm buffers[i] = cmd_list->vk_command_buffer; }
- vkd3d_mutex_lock(&command_queue->op_mutex); + op.opcode = VKD3D_CS_OP_EXECUTE; + op.u.execute.buffers = buffers; + op.u.execute.buffer_count = command_list_count;
- if (!command_queue->ops_count) - { - d3d12_command_queue_execute(command_queue, buffers, command_list_count); - vkd3d_mutex_unlock(&command_queue->op_mutex); - return; - } - - if (!(op = d3d12_command_queue_require_space_locked(command_queue))) - { - ERR("Failed to add op.\n"); - return; - } - op->opcode = VKD3D_CS_OP_EXECUTE; - op->u.execute.buffers = buffers; - op->u.execute.buffer_count = command_list_count; - - vkd3d_mutex_unlock(&command_queue->op_mutex); - return; + if (FAILED(d3d12_command_queue_enqueue_op(command_queue, &op))) + ERR("Cannot enqueue in command queue %p.\n", command_queue); }
static void STDMETHODCALLTYPE d3d12_command_queue_SetMarker(ID3D12CommandQueue *iface, @@ -6254,33 +6263,17 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Signal(ID3D12CommandQueue * { struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface); struct d3d12_fence *fence = unsafe_impl_from_ID3D12Fence(fence_iface); - struct vkd3d_cs_op_data *op; - HRESULT hr = S_OK; + struct vkd3d_cs_op_data op;
TRACE("iface %p, fence %p, value %#"PRIx64".\n", iface, fence_iface, value);
- vkd3d_mutex_lock(&command_queue->op_mutex); - - if (!command_queue->ops_count) - { - hr = d3d12_command_queue_signal(command_queue, fence, value); - goto done; - } - - if (!(op = d3d12_command_queue_require_space_locked(command_queue))) - { - hr = E_OUTOFMEMORY; - goto done; - } - op->opcode = VKD3D_CS_OP_SIGNAL; - op->u.signal.fence = fence; - op->u.signal.value = value; + op.opcode = VKD3D_CS_OP_SIGNAL; + op.u.signal.fence = fence; + op.u.signal.value = value;
d3d12_fence_incref(fence);
-done: - vkd3d_mutex_unlock(&command_queue->op_mutex); - return hr; + return d3d12_command_queue_enqueue_op(command_queue, &op); }
static HRESULT d3d12_command_queue_signal(struct d3d12_command_queue *command_queue, @@ -6596,51 +6589,17 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Wait(ID3D12CommandQueue *if { struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface); struct d3d12_fence *fence = unsafe_impl_from_ID3D12Fence(fence_iface); - struct vkd3d_cs_op_data *op; - HRESULT hr = S_OK; + struct vkd3d_cs_op_data op;
TRACE("iface %p, fence %p, value %#"PRIx64".\n", iface, fence_iface, value);
- vkd3d_mutex_lock(&command_queue->op_mutex); - vkd3d_mutex_lock(&fence->mutex); - - if (!command_queue->ops_count && value <= fence->max_pending_value) - { - hr = d3d12_command_queue_wait_locked(command_queue, fence, value); - goto done; - } - - /* This is the critical part required to support out-of-order signal. - * Normally we would be able to submit waits and signals out of order, but - * we don't have virtualized queues in Vulkan, so we need to handle the case - * where multiple queues alias over the same physical queue, so effectively, - * we need to manage out-of-order submits ourselves. */ - - if (!(op = d3d12_command_queue_require_space_locked(command_queue))) - { - vkd3d_mutex_unlock(&fence->mutex); - hr = E_OUTOFMEMORY; - goto done; - } - op->opcode = VKD3D_CS_OP_WAIT; - op->u.wait.fence = fence; - op->u.wait.value = value; + op.opcode = VKD3D_CS_OP_WAIT; + op.u.wait.fence = fence; + op.u.wait.value = value;
d3d12_fence_incref(fence);
- /* Add the queue to the blocked list after writing the op to ensure the queue isn't - * removed again in another thread because it has no ops. */ - if (command_queue->ops_count == 1) - hr = d3d12_command_queue_record_as_blocked(command_queue); - - /* The fence must remain locked until the op is created and the queue is added to the blocked list, - * because if an unblocking d3d12_fence_Signal() call occurs on another thread before the above - * work is done, flushing will be delayed until the next signal, if one occurs at all. */ - vkd3d_mutex_unlock(&fence->mutex); - -done: - vkd3d_mutex_unlock(&command_queue->op_mutex); - return hr; + return d3d12_command_queue_enqueue_op(command_queue, &op); }
static HRESULT STDMETHODCALLTYPE d3d12_command_queue_GetTimestampFrequency(ID3D12CommandQueue *iface, @@ -6764,14 +6723,8 @@ static const struct ID3D12CommandQueueVtbl d3d12_command_queue_vtbl = d3d12_command_queue_GetDesc, };
-/* flushed_any is initialised by the caller. */ static HRESULT d3d12_command_queue_flush_ops(struct d3d12_command_queue *queue, bool *flushed_any) { - struct vkd3d_cs_op_data *op; - struct d3d12_fence *fence; - HRESULT hr = S_OK; - unsigned int i; - if (!queue->ops_count) return S_OK;
@@ -6783,6 +6736,17 @@ static HRESULT d3d12_command_queue_flush_ops(struct d3d12_command_queue *queue,
vkd3d_mutex_lock(&queue->op_mutex);
+ return d3d12_command_queue_flush_ops_locked(queue, flushed_any); +} + +/* flushed_any is initialised by the caller. */ +static HRESULT d3d12_command_queue_flush_ops_locked(struct d3d12_command_queue *queue, bool *flushed_any) +{ + struct vkd3d_cs_op_data *op; + struct d3d12_fence *fence; + HRESULT hr = S_OK; + unsigned int i; + /* Currently only required for d3d12_command_queue_signal(), but set it here anyway. */ queue->is_flushing = true;
From: Giovanni Mascellani gmascellani@codeweavers.com
d3d12_command_queue_flush_ops() can renter itself while processing signal events. Since we don't use recursive mutexes, we currently have to check some of the queue variables without holding the mutex, which is not safe.
This is solved by allowing the queue to release its mutex while it is processing entries: when flushing, the queue is briefly locked, the is_flushing flag is set, the queue content is copied away and the queue is unlocked again. After having processed the entries, the queue is locked again to check is something else was added in the meantime. This is repeated until the queue is empty (or a wait operation is blocking it).
This should also remove some latency when a thread pushes to the queue while another one is processing it, but I didn't try to measure any impact. While it is expected that with this patch the queue mutex will be locked and unlocked more frequently, it should also remain locked for less time, hopefully creating little contention. --- libs/vkd3d/command.c | 172 +++++++++++++++++++++++++------------ libs/vkd3d/vkd3d_private.h | 18 +++- 2 files changed, 130 insertions(+), 60 deletions(-)
diff --git a/libs/vkd3d/command.c b/libs/vkd3d/command.c index dc2b184e..0e444c42 100644 --- a/libs/vkd3d/command.c +++ b/libs/vkd3d/command.c @@ -5994,6 +5994,11 @@ static ULONG STDMETHODCALLTYPE d3d12_command_queue_AddRef(ID3D12CommandQueue *if return refcount; }
+static void d3d12_command_queue_op_array_destroy(struct d3d12_command_queue_op_array *array) +{ + vkd3d_free(array->ops); +} + static ULONG STDMETHODCALLTYPE d3d12_command_queue_Release(ID3D12CommandQueue *iface) { struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface); @@ -6008,7 +6013,8 @@ static ULONG STDMETHODCALLTYPE d3d12_command_queue_Release(ID3D12CommandQueue *i vkd3d_fence_worker_stop(&command_queue->fence_worker, device);
vkd3d_mutex_destroy(&command_queue->op_mutex); - vkd3d_free(command_queue->ops); + d3d12_command_queue_op_array_destroy(&command_queue->op_queue); + d3d12_command_queue_op_array_destroy(&command_queue->aux_op_queue);
vkd3d_private_store_destroy(&command_queue->private_store);
@@ -6079,14 +6085,6 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_queue_GetDevice(ID3D12CommandQueu return d3d12_device_query_interface(command_queue->device, iid, device); }
-static struct vkd3d_cs_op_data *d3d12_command_queue_require_space_locked(struct d3d12_command_queue *queue) -{ - if (!vkd3d_array_reserve((void **)&queue->ops, &queue->ops_size, queue->ops_count + 1, sizeof(*queue->ops))) - return NULL; - - return &queue->ops[queue->ops_count++]; -} - static void STDMETHODCALLTYPE d3d12_command_queue_UpdateTileMappings(ID3D12CommandQueue *iface, ID3D12Resource *resource, UINT region_count, const D3D12_TILED_RESOURCE_COORDINATE *region_start_coordinates, const D3D12_TILE_REGION_SIZE *region_sizes, @@ -6143,22 +6141,34 @@ static void d3d12_command_queue_execute(struct d3d12_command_queue *command_queu vkd3d_free(buffers); }
+static bool d3d12_command_queue_op_array_append(struct d3d12_command_queue_op_array *array, + size_t count, const struct vkd3d_cs_op_data *new_ops) +{ + if (!vkd3d_array_reserve((void **)&array->ops, &array->size, array->count + count, sizeof(*array->ops))) + { + ERR("Cannot reserve memory for %zu new ops.\n", count); + return false; + } + + memcpy(&array->ops[array->count], new_ops, count * sizeof(*array->ops)); + array->count += count; + + return true; +} + static HRESULT d3d12_command_queue_enqueue_op(struct d3d12_command_queue *queue, const struct vkd3d_cs_op_data *op) { - struct vkd3d_cs_op_data *new_op; bool flushed_any = false;
vkd3d_mutex_lock(&queue->op_mutex);
- if (!(new_op = d3d12_command_queue_require_space_locked(queue))) + if (!d3d12_command_queue_op_array_append(&queue->op_queue, 1, op)) { vkd3d_mutex_unlock(&queue->op_mutex); return E_OUTOFMEMORY; }
- *new_op = *op; - - if (queue->ops_count == 1) + if (queue->op_queue.count == 1 && !queue->is_flushing) return d3d12_command_queue_flush_ops_locked(queue, &flushed_any);
vkd3d_mutex_unlock(&queue->op_mutex); @@ -6723,18 +6733,51 @@ static const struct ID3D12CommandQueueVtbl d3d12_command_queue_vtbl = d3d12_command_queue_GetDesc, };
+static void d3d12_command_queue_swap_queues(struct d3d12_command_queue *queue) +{ + struct d3d12_command_queue_op_array array; + + array = queue->op_queue; + queue->op_queue = queue->aux_op_queue; + queue->aux_op_queue = array; +} + +static HRESULT d3d12_command_queue_fixup_after_flush(struct d3d12_command_queue *queue, unsigned int done_count) +{ + HRESULT hr; + + queue->aux_op_queue.count -= done_count; + memmove(queue->aux_op_queue.ops, &queue->aux_op_queue.ops[done_count], + queue->aux_op_queue.count * sizeof(*queue->aux_op_queue.ops)); + + vkd3d_mutex_lock(&queue->op_mutex); + + d3d12_command_queue_swap_queues(queue); + + d3d12_command_queue_op_array_append(&queue->op_queue, queue->aux_op_queue.count, queue->aux_op_queue.ops); + + queue->aux_op_queue.count = 0; + queue->is_flushing = false; + + hr = d3d12_command_queue_record_as_blocked(queue); + + vkd3d_mutex_unlock(&queue->op_mutex); + + return hr; +} + static HRESULT d3d12_command_queue_flush_ops(struct d3d12_command_queue *queue, bool *flushed_any) { - if (!queue->ops_count) - return S_OK; + vkd3d_mutex_lock(&queue->op_mutex);
/* This function may be re-entered when invoking * d3d12_command_queue_signal(). The first call is responsible * for re-adding the queue to the flush list. */ if (queue->is_flushing) + { + vkd3d_mutex_unlock(&queue->op_mutex); return S_OK; - - vkd3d_mutex_lock(&queue->op_mutex); + }
return d3d12_command_queue_flush_ops_locked(queue, flushed_any); } @@ -6744,55 +6787,68 @@ static HRESULT d3d12_command_queue_flush_ops_locked(struct d3d12_command_queue * { struct vkd3d_cs_op_data *op; struct d3d12_fence *fence; - HRESULT hr = S_OK; unsigned int i;
- /* Currently only required for d3d12_command_queue_signal(), but set it here anyway. */ queue->is_flushing = true;
- for (i = 0; i < queue->ops_count; ++i) + assert(queue->aux_op_queue.count == 0); + + while (queue->op_queue.count != 0) { - op = &queue->ops[i]; - switch (op->opcode) + d3d12_command_queue_swap_queues(queue); + + vkd3d_mutex_unlock(&queue->op_mutex); + + for (i = 0; i < queue->aux_op_queue.count; ++i) { - case VKD3D_CS_OP_WAIT: - fence = op->u.wait.fence; - vkd3d_mutex_lock(&fence->mutex); - if (op->u.wait.value > fence->max_pending_value) - { - vkd3d_mutex_unlock(&fence->mutex); - queue->ops_count -= i; - memmove(queue->ops, op, queue->ops_count * sizeof(*op)); - hr = d3d12_command_queue_record_as_blocked(queue); - goto done; - } - d3d12_command_queue_wait_locked(queue, fence, op->u.wait.value); - d3d12_fence_decref(fence); - break; + op = &queue->aux_op_queue.ops[i]; + switch (op->opcode) + { + case VKD3D_CS_OP_WAIT: + fence = op->u.wait.fence; + vkd3d_mutex_lock(&fence->mutex); + if (op->u.wait.value > fence->max_pending_value) + { + vkd3d_mutex_unlock(&fence->mutex); + return d3d12_command_queue_fixup_after_flush(queue, i); + } + d3d12_command_queue_wait_locked(queue, fence, op->u.wait.value); + d3d12_fence_decref(fence); + break;
- case VKD3D_CS_OP_SIGNAL: - d3d12_command_queue_signal(queue, op->u.signal.fence, op->u.signal.value); - d3d12_fence_decref(op->u.signal.fence); - break; + case VKD3D_CS_OP_SIGNAL: + d3d12_command_queue_signal(queue, op->u.signal.fence, op->u.signal.value); + d3d12_fence_decref(op->u.signal.fence); + break;
- case VKD3D_CS_OP_EXECUTE: - d3d12_command_queue_execute(queue, op->u.execute.buffers, op->u.execute.buffer_count); - break; + case VKD3D_CS_OP_EXECUTE: + d3d12_command_queue_execute(queue, op->u.execute.buffers, op->u.execute.buffer_count); + break;
- default: - FIXME("Unhandled op type %u.\n", op->opcode); - break; + default: + vkd3d_unreachable(); + } + + *flushed_any |= true; } - *flushed_any |= true; - }
- queue->ops_count = 0; + queue->aux_op_queue.count = 0; + + vkd3d_mutex_lock(&queue->op_mutex); + }
-done: queue->is_flushing = false;
vkd3d_mutex_unlock(&queue->op_mutex); - return hr; + + return S_OK; +} + +static void d3d12_command_queue_op_array_init(struct d3d12_command_queue_op_array *array) +{ + array->ops = NULL; + array->count = 0; + array->size = 0; }
static HRESULT d3d12_command_queue_init(struct d3d12_command_queue *queue, @@ -6813,11 +6869,11 @@ static HRESULT d3d12_command_queue_init(struct d3d12_command_queue *queue, queue->last_waited_fence = NULL; queue->last_waited_fence_value = 0;
- queue->ops = NULL; - queue->ops_count = 0; - queue->ops_size = 0; + d3d12_command_queue_op_array_init(&queue->op_queue); queue->is_flushing = false;
+ d3d12_command_queue_op_array_init(&queue->aux_op_queue); + if (desc->Priority == D3D12_COMMAND_QUEUE_PRIORITY_GLOBAL_REALTIME) { FIXME("Global realtime priority is not implemented.\n"); @@ -6881,8 +6937,10 @@ VkQueue vkd3d_acquire_vk_queue(ID3D12CommandQueue *queue) struct d3d12_command_queue *d3d12_queue = impl_from_ID3D12CommandQueue(queue); VkQueue vk_queue = vkd3d_queue_acquire(d3d12_queue->vkd3d_queue);
- if (d3d12_queue->ops_count) - WARN("Acquired command queue %p with %zu remaining ops.\n", d3d12_queue, d3d12_queue->ops_count); + if (d3d12_queue->op_queue.count) + WARN("Acquired command queue %p with %zu remaining ops.\n", d3d12_queue, d3d12_queue->op_queue.count); + else if (d3d12_queue->is_flushing) + WARN("Acquired command queue %p which is flushing.\n", d3d12_queue);
return vk_queue; } diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index 56677648..075b67c2 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -1370,6 +1370,13 @@ struct vkd3d_cs_op_data } u; };
+struct d3d12_command_queue_op_array +{ + struct vkd3d_cs_op_data *ops; + size_t count; + size_t size; +}; + /* ID3D12CommandQueue */ struct d3d12_command_queue { @@ -1387,11 +1394,16 @@ struct d3d12_command_queue struct d3d12_device *device;
struct vkd3d_mutex op_mutex; - struct vkd3d_cs_op_data *ops; - size_t ops_count; - size_t ops_size; + + /* These fields are protected by op_mutex. */ + struct d3d12_command_queue_op_array op_queue; bool is_flushing;
+ /* This field is not protected by op_mutex, but can only be used + * by the thread that set is_flushing; when is_flushing is not + * set, aux_op_queue.count must be zero. */ + struct d3d12_command_queue_op_array aux_op_queue; + struct vkd3d_private_store private_store; };
This merge request was approved by Conor McCarthy.
From patch 2/4:
@@ -636,26 +636,17 @@ static HRESULT d3d12_fence_update_pending_value(struct d3d12_fence *fence) return S_OK; } -static HRESULT d3d12_device_add_blocked_command_queues(struct d3d12_device *device, - struct d3d12_command_queue * const *command_queues, unsigned int count) +static HRESULT d3d12_command_queue_record_as_blocked(struct d3d12_command_queue *command_queue) { + struct d3d12_device *device = command_queue->device; HRESULT hr = S_OK; - unsigned int i; - - if (count == 0) - return S_OK; vkd3d_mutex_lock(&device->blocked_queues_mutex); - if ((i = ARRAY_SIZE(device->blocked_queues) - device->blocked_queue_count) < count) - { - FIXME("Failed to add %u blocked command queue(s) to device %p.\n", count - i, device); - count = i; - hr = E_FAIL; - } - - for (i = 0; i < count; ++i) - device->blocked_queues[device->blocked_queue_count++] = command_queues[i]; + if (device->blocked_queue_count < ARRAY_SIZE(device->blocked_queues)) + device->blocked_queues[device->blocked_queue_count++] = command_queue; + else + WARN("Failed to add blocked command queue %p to device %p.\n", command_queue, device); vkd3d_mutex_unlock(&device->blocked_queues_mutex); return hr;
This drops setting hr to E_FAIL when adding the queue fails. If that's in fact intentional, there's no longer much of a point in returning a HRESULT here; it'll always be S_OK.
From patch 3/4:
+static HRESULT d3d12_command_queue_enqueue_op(struct d3d12_command_queue *queue, const struct vkd3d_cs_op_data *op) +{ + struct vkd3d_cs_op_data *new_op; + bool flushed_any = false; + + vkd3d_mutex_lock(&queue->op_mutex); + + if (!(new_op = d3d12_command_queue_require_space_locked(queue))) + { + vkd3d_mutex_unlock(&queue->op_mutex); + return E_OUTOFMEMORY; + } + + *new_op = *op; + + if (queue->ops_count == 1) + return d3d12_command_queue_flush_ops_locked(queue, &flushed_any); + + vkd3d_mutex_unlock(&queue->op_mutex); + return S_OK; +}
I don't like that d3d12_command_queue_enqueue_op() introduces an extra copy of "op". It's not the end of the world, but it seems unnecessary. The introduction of the helper also ends up obfuscating what's supposed to be the main change in the commit according to the commit message somewhat. (I.e., the removal of the direct paths in d3d12_command_queue_ExecuteCommandLists(), d3d12_command_queue_Signal(), and d3d12_command_queue_Wait(). Which could have been three separate commits, I suppose.)
@@ -6783,6 +6736,17 @@ static HRESULT d3d12_command_queue_flush_ops(struct d3d12_command_queue *queue, vkd3d_mutex_lock(&queue->op_mutex); + return d3d12_command_queue_flush_ops_locked(queue, flushed_any); +} + +/* flushed_any is initialised by the caller. */ +static HRESULT d3d12_command_queue_flush_ops_locked(struct d3d12_command_queue *queue, bool *flushed_any) +{ + struct vkd3d_cs_op_data *op; + struct d3d12_fence *fence; + HRESULT hr = S_OK; + unsigned int i; + /* Currently only required for d3d12_command_queue_signal(), but set it here anyway. */ queue->is_flushing = true;
It's perhaps not terribly obvious from the diff, but this moves releasing queue->op_mutex to d3d12_command_queue_flush_ops_locked(). I don't think that's desirable, and with the somewhat unfortunate exceptions of d3d12_command_queue_wait_locked() and d3d12_command_queue_wait_binary_semaphore_locked(), that's not what _locked means in vkd3d.
I haven't reviewed 4/4 in detail yet, though it doesn't look wrong at first sight.