From: Conor McCarthy cmccarthy@codeweavers.com
The D3D12 spec guarantees that lists submitted in ExecuteCommandLists() will complete execution before any subsequent commands begin execution.
Based on a vkd3d-proton patch by Hans-Kristian Arntzen. --- libs/vkd3d/command.c | 61 ++++++++++++++++++++++++++++++++++++-- libs/vkd3d/vkd3d_private.h | 3 ++ 2 files changed, 62 insertions(+), 2 deletions(-)
diff --git a/libs/vkd3d/command.c b/libs/vkd3d/command.c index e5ead7d3..ff96ef52 100644 --- a/libs/vkd3d/command.c +++ b/libs/vkd3d/command.c @@ -33,7 +33,13 @@ HRESULT vkd3d_queue_create(struct d3d12_device *device, uint32_t family_index, const VkQueueFamilyProperties *properties, struct vkd3d_queue **queue) { const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + VkCommandBufferAllocateInfo allocate_info; + VkCommandPoolCreateInfo pool_create_info; + VkCommandBufferBeginInfo begin_info; + VkMemoryBarrier memory_barrier; struct vkd3d_queue *object; + VkResult vr; + HRESULT hr;
if (!(object = vkd3d_malloc(sizeof(*object)))) return E_OUTOFMEMORY; @@ -55,11 +61,55 @@ HRESULT vkd3d_queue_create(struct d3d12_device *device,
VK_CALL(vkGetDeviceQueue(device->vk_device, family_index, 0, &object->vk_queue));
+ /* Create a reusable full barrier command buffer. This is used in submissions + * to reproduce the guaranteed serialised behavior of D3D12 queues. */ + pool_create_info.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; + pool_create_info.pNext = NULL; + pool_create_info.flags = 0; + pool_create_info.queueFamilyIndex = family_index; + if ((vr = VK_CALL(vkCreateCommandPool(device->vk_device, &pool_create_info, NULL, &object->barrier_pool))) < 0) + goto fail_destroy_mutex; + + allocate_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; + allocate_info.pNext = NULL; + allocate_info.commandPool = object->barrier_pool; + allocate_info.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; + allocate_info.commandBufferCount = 1; + if ((vr = VK_CALL(vkAllocateCommandBuffers(device->vk_device, &allocate_info, + &object->barrier_command_buffer))) < 0) + goto fail_free_command_pool; + + begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; + begin_info.pNext = NULL; + /* Allow simultaneous use of this command buffer. */ + begin_info.flags = VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT; + begin_info.pInheritanceInfo = NULL; + if ((vr = VK_CALL(vkBeginCommandBuffer(object->barrier_command_buffer, &begin_info))) < 0) + goto fail_free_command_pool; + + /* To avoid unnecessary tracking, just emit a host barrier on every submit. */ + memory_barrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER; + memory_barrier.pNext = NULL; + memory_barrier.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT; + memory_barrier.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_HOST_READ_BIT; + VK_CALL(vkCmdPipelineBarrier(object->barrier_command_buffer, + VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_ALL_COMMANDS_BIT | VK_PIPELINE_STAGE_HOST_BIT, 0, + 1, &memory_barrier, 0, NULL, 0, NULL)); + if ((vr = VK_CALL(vkEndCommandBuffer(object->barrier_command_buffer))) < 0) + goto fail_free_command_pool; + TRACE("Created queue %p for queue family index %u.\n", object, family_index);
*queue = object;
return S_OK; + +fail_free_command_pool: + VK_CALL(vkDestroyCommandPool(device->vk_device, object->barrier_pool, NULL)); +fail_destroy_mutex: + vkd3d_mutex_destroy(&object->mutex); + return hresult_from_vk_result(vr); }
void vkd3d_queue_destroy(struct vkd3d_queue *queue, struct d3d12_device *device) @@ -80,6 +130,8 @@ void vkd3d_queue_destroy(struct vkd3d_queue *queue, struct d3d12_device *device) VK_CALL(vkDestroySemaphore(device->vk_device, queue->old_vk_semaphores[i], NULL)); }
+ VK_CALL(vkDestroyCommandPool(device->vk_device, queue->barrier_pool, NULL)); + vkd3d_mutex_unlock(&queue->mutex);
vkd3d_mutex_destroy(&queue->mutex); @@ -6181,7 +6233,7 @@ static void STDMETHODCALLTYPE d3d12_command_queue_ExecuteCommandLists(ID3D12Comm if (!command_list_count) return;
- if (!(buffers = vkd3d_calloc(command_list_count, sizeof(*buffers)))) + if (!(buffers = vkd3d_calloc(command_list_count + 1, sizeof(*buffers)))) { ERR("Failed to allocate command buffer array.\n"); return; @@ -6202,6 +6254,11 @@ static void STDMETHODCALLTYPE d3d12_command_queue_ExecuteCommandLists(ID3D12Comm buffers[i] = cmd_list->vk_command_buffer; }
+ /* The lists submitted in a call to ExecuteCommandLists() are guaranteed to complete + * before execution begins of the next command submitted to the queue. Append a full + * GPU barrier between submissions. This command buffer has SIMULTANEOUS_BIT. */ + buffers[i++] = command_queue->vkd3d_queue->barrier_command_buffer; + vkd3d_mutex_lock(&command_queue->op_mutex);
if (!(op = d3d12_command_queue_op_array_require_space(&command_queue->op_queue))) @@ -6211,7 +6268,7 @@ static void STDMETHODCALLTYPE d3d12_command_queue_ExecuteCommandLists(ID3D12Comm } op->opcode = VKD3D_CS_OP_EXECUTE; op->u.execute.buffers = buffers; - op->u.execute.buffer_count = command_list_count; + op->u.execute.buffer_count = i;
d3d12_command_queue_submit_locked(command_queue);
diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index 1a277a47..a757f9c4 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -1327,6 +1327,9 @@ struct vkd3d_queue size_t semaphore_count;
VkSemaphore old_vk_semaphores[VKD3D_MAX_VK_SYNC_OBJECTS]; + + VkCommandPool barrier_pool; + VkCommandBuffer barrier_command_buffer; };
VkQueue vkd3d_queue_acquire(struct vkd3d_queue *queue);