From: Derek Lesho dereklesho52@Gmail.com
Signed-off-by: Derek Lesho dereklesho52@Gmail.com --- libs/vkd3d/command.c | 82 ++++++++++++++++++++------------------ libs/vkd3d/vkd3d_private.h | 7 ++-- 2 files changed, 47 insertions(+), 42 deletions(-)
diff --git a/libs/vkd3d/command.c b/libs/vkd3d/command.c index ae88910..b98cc5d 100644 --- a/libs/vkd3d/command.c +++ b/libs/vkd3d/command.c @@ -45,9 +45,11 @@ HRESULT vkd3d_queue_create(struct d3d12_device *device, object->vk_queue_flags = properties->queueFlags; object->timestamp_bits = properties->timestampValidBits;
- object->semaphores = NULL; + object->semaphores.vk_semaphores = NULL; + object->semaphores.sequence_numbers = NULL; object->semaphores_size = 0; object->semaphore_count = 0; + object->pending_semaphore_index = -1;
memset(object->old_vk_semaphores, 0, sizeof(object->old_vk_semaphores));
@@ -70,9 +72,10 @@ void vkd3d_queue_destroy(struct vkd3d_queue *queue, struct d3d12_device *device) ERR("Failed to lock mutex, error %d.\n", rc);
for (i = 0; i < queue->semaphore_count; ++i) - VK_CALL(vkDestroySemaphore(device->vk_device, queue->semaphores[i].vk_semaphore, NULL)); + VK_CALL(vkDestroySemaphore(device->vk_device, queue->semaphores.vk_semaphores[i], NULL));
- vkd3d_free(queue->semaphores); + vkd3d_free(queue->semaphores.vk_semaphores); + vkd3d_free(queue->semaphores.sequence_numbers);
for (i = 0; i < ARRAY_SIZE(queue->old_vk_semaphores); ++i) { @@ -157,10 +160,10 @@ static void vkd3d_queue_update_sequence_number(struct vkd3d_queue *queue,
for (i = 0; i < queue->semaphore_count; ++i) { - if (queue->semaphores[i].sequence_number > queue->completed_sequence_number) + if (queue->semaphores.sequence_numbers[i] > queue->completed_sequence_number) break;
- vk_semaphore = queue->semaphores[i].vk_semaphore; + vk_semaphore = queue->semaphores.vk_semaphores[i];
/* Try to store the Vulkan semaphore for reuse. */ for (j = 0; j < ARRAY_SIZE(queue->old_vk_semaphores); ++j) @@ -182,7 +185,8 @@ static void vkd3d_queue_update_sequence_number(struct vkd3d_queue *queue, if (i > 0) { queue->semaphore_count -= i; - memmove(queue->semaphores, &queue->semaphores[i], queue->semaphore_count * sizeof(*queue->semaphores)); + memmove(queue->semaphores.vk_semaphores, &queue->semaphores.vk_semaphores[i], queue->semaphore_count * sizeof(*queue->semaphores.vk_semaphores)); + memmove(queue->semaphores.sequence_numbers, &queue->semaphores.sequence_numbers[i], queue->semaphore_count * sizeof(*queue->semaphores.sequence_numbers)); }
if (destroyed_semaphore_count) @@ -201,7 +205,7 @@ static uint64_t vkd3d_queue_reset_sequence_number_locked(struct vkd3d_queue *que queue->submitted_sequence_number = 1;
for (i = 0; i < queue->semaphore_count; ++i) - queue->semaphores[i].sequence_number = queue->submitted_sequence_number; + queue->semaphores.sequence_numbers[i] = queue->submitted_sequence_number;
return queue->submitted_sequence_number; } @@ -5511,6 +5515,7 @@ static void STDMETHODCALLTYPE d3d12_command_queue_CopyTileMappings(ID3D12Command static void STDMETHODCALLTYPE d3d12_command_queue_ExecuteCommandLists(ID3D12CommandQueue *iface, UINT command_list_count, ID3D12CommandList * const *command_lists) { + static const VkPipelineStageFlagBits wait_stage_mask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface); const struct vkd3d_vk_device_procs *vk_procs; struct d3d12_command_list *cmd_list; @@ -5556,6 +5561,15 @@ static void STDMETHODCALLTYPE d3d12_command_queue_ExecuteCommandLists(ID3D12Comm submit_desc.signalSemaphoreCount = 0; submit_desc.pSignalSemaphores = NULL;
+ if (command_queue->vkd3d_queue->pending_semaphore_index != -1) + { + submit_desc.waitSemaphoreCount = command_queue->vkd3d_queue->semaphore_count - command_queue->vkd3d_queue->pending_semaphore_index; + submit_desc.pWaitSemaphores = &command_queue->vkd3d_queue->semaphores.vk_semaphores[command_queue->vkd3d_queue->pending_semaphore_index]; + submit_desc.pWaitDstStageMask = &wait_stage_mask; + + command_queue->vkd3d_queue->pending_semaphore_index = -1; + } + if (!(vk_queue = vkd3d_queue_acquire(command_queue->vkd3d_queue))) { ERR("Failed to acquire queue %p.\n", command_queue->vkd3d_queue); @@ -5593,6 +5607,7 @@ static void STDMETHODCALLTYPE d3d12_command_queue_EndEvent(ID3D12CommandQueue *i static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Signal(ID3D12CommandQueue *iface, ID3D12Fence *fence_iface, UINT64 value) { + static const VkPipelineStageFlagBits wait_stage_mask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface); const struct vkd3d_vk_device_procs *vk_procs; VkSemaphore vk_semaphore = VK_NULL_HANDLE; @@ -5643,6 +5658,15 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Signal(ID3D12CommandQueue * submit_info.signalSemaphoreCount = vk_semaphore ? 1 : 0; submit_info.pSignalSemaphores = &vk_semaphore;
+ if (command_queue->vkd3d_queue->pending_semaphore_index != -1) + { + submit_info.waitSemaphoreCount = command_queue->vkd3d_queue->semaphore_count - command_queue->vkd3d_queue->pending_semaphore_index; + submit_info.pWaitSemaphores = &command_queue->vkd3d_queue->semaphores.vk_semaphores[command_queue->vkd3d_queue->pending_semaphore_index]; + submit_info.pWaitDstStageMask = &wait_stage_mask; + + command_queue->vkd3d_queue->pending_semaphore_index = -1; + } + if ((vr = VK_CALL(vkQueueSubmit(vk_queue, 1, &submit_info, vk_fence))) >= 0) { sequence_number = ++vkd3d_queue->submitted_sequence_number; @@ -5702,21 +5726,16 @@ fail: static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Wait(ID3D12CommandQueue *iface, ID3D12Fence *fence_iface, UINT64 value) { - static const VkPipelineStageFlagBits wait_stage_mask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface); - const struct vkd3d_vk_device_procs *vk_procs; struct vkd3d_signaled_semaphore *semaphore; uint64_t completed_value = 0; struct vkd3d_queue *queue; struct d3d12_fence *fence; - VkSubmitInfo submit_info; VkQueue vk_queue; - VkResult vr; HRESULT hr;
TRACE("iface %p, fence %p, value %#"PRIx64".\n", iface, fence_iface, value);
- vk_procs = &command_queue->device->vk_procs; queue = command_queue->vkd3d_queue;
fence = unsafe_impl_from_ID3D12Fence(fence_iface); @@ -5752,18 +5771,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Wait(ID3D12CommandQueue *if return S_OK; }
- submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; - submit_info.pNext = NULL; - submit_info.waitSemaphoreCount = 1; - submit_info.pWaitSemaphores = &semaphore->vk_semaphore; - submit_info.pWaitDstStageMask = &wait_stage_mask; - submit_info.commandBufferCount = 0; - submit_info.pCommandBuffers = NULL; - submit_info.signalSemaphoreCount = 0; - submit_info.pSignalSemaphores = NULL; - - if (!vkd3d_array_reserve((void **)&queue->semaphores, &queue->semaphores_size, - queue->semaphore_count + 1, sizeof(*queue->semaphores))) + if (!vkd3d_array_reserve((void **)&queue->semaphores.vk_semaphores, &queue->semaphores_size, + queue->semaphore_count + 1, sizeof(*queue->semaphores.vk_semaphores)) + || !vkd3d_array_reserve((void **)&queue->semaphores.sequence_numbers, &queue->semaphores_size, + queue->semaphore_count + 1, sizeof(*queue->semaphores.sequence_numbers))) { ERR("Failed to allocate memory for semaphore.\n"); vkd3d_queue_release(queue); @@ -5771,24 +5782,17 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Wait(ID3D12CommandQueue *if goto fail; }
- if ((vr = VK_CALL(vkQueueSubmit(vk_queue, 1, &submit_info, VK_NULL_HANDLE))) >= 0) - { - queue->semaphores[queue->semaphore_count].vk_semaphore = semaphore->vk_semaphore; - queue->semaphores[queue->semaphore_count].sequence_number = queue->submitted_sequence_number + 1; - ++queue->semaphore_count; + queue->semaphores.vk_semaphores[queue->semaphore_count] = semaphore->vk_semaphore; + queue->semaphores.sequence_numbers[queue->semaphore_count] = queue->submitted_sequence_number + 1;
- command_queue->last_waited_fence = fence; - command_queue->last_waited_fence_value = value; - } + if (queue->pending_semaphore_index == -1) queue->pending_semaphore_index = queue->semaphore_count;
- vkd3d_queue_release(queue); + ++queue->semaphore_count;
- if (vr < 0) - { - WARN("Failed to submit wait operation, vr %d.\n", vr); - hr = hresult_from_vk_result(vr); - goto fail; - } + command_queue->last_waited_fence = fence; + command_queue->last_waited_fence_value = value; + + vkd3d_queue_release(queue);
d3d12_fence_remove_vk_semaphore(fence, semaphore); return S_OK; diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index 294e677..5a705bf 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -954,11 +954,12 @@ struct vkd3d_queue
struct { - VkSemaphore vk_semaphore; - uint64_t sequence_number; - } *semaphores; + VkSemaphore *vk_semaphores; + uint64_t *sequence_numbers; + } semaphores; size_t semaphores_size; size_t semaphore_count; + uint64_t pending_semaphore_index;
VkSemaphore old_vk_semaphores[VKD3D_MAX_VK_SYNC_OBJECTS]; };
On Tue, 10 Sep 2019 at 07:48, Derek Lesho dlesho@codeweavers.com wrote:
From: Derek Lesho dereklesho52@Gmail.com
Signed-off-by: Derek Lesho dereklesho52@Gmail.com
libs/vkd3d/command.c | 82 ++++++++++++++++++++------------------ libs/vkd3d/vkd3d_private.h | 7 ++-- 2 files changed, 47 insertions(+), 42 deletions(-)
That's not a very helpful commit message. What problem are we trying to solve, and why is this the best way to solve it?
Henri, it is my perception that vkQueueSubmit calls are expensive and batching them up decreases load on the CPU. If we are uncertain of that I'll try to do some testing to confirm
On Tue, Sep 10, 2019 at 6:56 AM Henri Verbeet hverbeet@gmail.com wrote:
On Tue, 10 Sep 2019 at 07:48, Derek Lesho dlesho@codeweavers.com wrote:
From: Derek Lesho dereklesho52@Gmail.com
Signed-off-by: Derek Lesho dereklesho52@Gmail.com
libs/vkd3d/command.c | 82 ++++++++++++++++++++------------------ libs/vkd3d/vkd3d_private.h | 7 ++-- 2 files changed, 47 insertions(+), 42 deletions(-)
That's not a very helpful commit message. What problem are we trying to solve, and why is this the best way to solve it?
On Tue, 10 Sep 2019 at 18:29, Derek Lesho dereklesho52@gmail.com wrote:
Henri, it is my perception that vkQueueSubmit calls are expensive and batching them up decreases load on the CPU. If we are uncertain of that I'll try to do some testing to confirm
Ok, so it's a potential performance improvement. In that case, some numbers would be nice, yes. Preferably with a real application.
On 9/10/19 10:49 AM, Henri Verbeet wrote:
On Tue, 10 Sep 2019 at 18:29, Derek Lesho dereklesho52@gmail.com wrote:
Henri, it is my perception that vkQueueSubmit calls are expensive and batching them up decreases load on the CPU. If we are uncertain of that I'll try to do some testing to confirm
Ok, so it's a potential performance improvement. In that case, some numbers would be nice, yes. Preferably with a real application.
Understood, I'll test it w/ SOTTR when things are performing better.
vkQueueSubmit calls are expensive, so wait until we queue work to wait for semaphores.
Signed-off-by: Derek Lesho dlesho@codeweavers.com --- v2: Fix allocation mistake. --- libs/vkd3d/command.c | 83 ++++++++++++++++++++------------------ libs/vkd3d/vkd3d_private.h | 7 ++-- 2 files changed, 48 insertions(+), 42 deletions(-)
diff --git a/libs/vkd3d/command.c b/libs/vkd3d/command.c index 9177f66..7778e09 100644 --- a/libs/vkd3d/command.c +++ b/libs/vkd3d/command.c @@ -45,9 +45,11 @@ HRESULT vkd3d_queue_create(struct d3d12_device *device, object->vk_queue_flags = properties->queueFlags; object->timestamp_bits = properties->timestampValidBits;
- object->semaphores = NULL; + object->semaphores.vk_semaphores = NULL; + object->semaphores.sequence_numbers = NULL; object->semaphores_size = 0; object->semaphore_count = 0; + object->pending_semaphore_index = -1;
memset(object->old_vk_semaphores, 0, sizeof(object->old_vk_semaphores));
@@ -70,9 +72,10 @@ void vkd3d_queue_destroy(struct vkd3d_queue *queue, struct d3d12_device *device) ERR("Failed to lock mutex, error %d.\n", rc);
for (i = 0; i < queue->semaphore_count; ++i) - VK_CALL(vkDestroySemaphore(device->vk_device, queue->semaphores[i].vk_semaphore, NULL)); + VK_CALL(vkDestroySemaphore(device->vk_device, queue->semaphores.vk_semaphores[i], NULL));
- vkd3d_free(queue->semaphores); + vkd3d_free(queue->semaphores.vk_semaphores); + vkd3d_free(queue->semaphores.sequence_numbers);
for (i = 0; i < ARRAY_SIZE(queue->old_vk_semaphores); ++i) { @@ -157,10 +160,10 @@ static void vkd3d_queue_update_sequence_number(struct vkd3d_queue *queue,
for (i = 0; i < queue->semaphore_count; ++i) { - if (queue->semaphores[i].sequence_number > queue->completed_sequence_number) + if (queue->semaphores.sequence_numbers[i] > queue->completed_sequence_number) break;
- vk_semaphore = queue->semaphores[i].vk_semaphore; + vk_semaphore = queue->semaphores.vk_semaphores[i];
/* Try to store the Vulkan semaphore for reuse. */ for (j = 0; j < ARRAY_SIZE(queue->old_vk_semaphores); ++j) @@ -182,7 +185,8 @@ static void vkd3d_queue_update_sequence_number(struct vkd3d_queue *queue, if (i > 0) { queue->semaphore_count -= i; - memmove(queue->semaphores, &queue->semaphores[i], queue->semaphore_count * sizeof(*queue->semaphores)); + memmove(queue->semaphores.vk_semaphores, &queue->semaphores.vk_semaphores[i], queue->semaphore_count * sizeof(*queue->semaphores.vk_semaphores)); + memmove(queue->semaphores.sequence_numbers, &queue->semaphores.sequence_numbers[i], queue->semaphore_count * sizeof(*queue->semaphores.sequence_numbers)); }
if (destroyed_semaphore_count) @@ -201,7 +205,7 @@ static uint64_t vkd3d_queue_reset_sequence_number_locked(struct vkd3d_queue *que queue->submitted_sequence_number = 1;
for (i = 0; i < queue->semaphore_count; ++i) - queue->semaphores[i].sequence_number = queue->submitted_sequence_number; + queue->semaphores.sequence_numbers[i] = queue->submitted_sequence_number;
return queue->submitted_sequence_number; } @@ -5513,6 +5517,7 @@ static void STDMETHODCALLTYPE d3d12_command_queue_CopyTileMappings(ID3D12Command static void STDMETHODCALLTYPE d3d12_command_queue_ExecuteCommandLists(ID3D12CommandQueue *iface, UINT command_list_count, ID3D12CommandList * const *command_lists) { + static const VkPipelineStageFlagBits wait_stage_mask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface); const struct vkd3d_vk_device_procs *vk_procs; struct d3d12_command_list *cmd_list; @@ -5558,6 +5563,15 @@ static void STDMETHODCALLTYPE d3d12_command_queue_ExecuteCommandLists(ID3D12Comm submit_desc.signalSemaphoreCount = 0; submit_desc.pSignalSemaphores = NULL;
+ if (command_queue->vkd3d_queue->pending_semaphore_index != -1) + { + submit_desc.waitSemaphoreCount = command_queue->vkd3d_queue->semaphore_count - command_queue->vkd3d_queue->pending_semaphore_index; + submit_desc.pWaitSemaphores = &command_queue->vkd3d_queue->semaphores.vk_semaphores[command_queue->vkd3d_queue->pending_semaphore_index]; + submit_desc.pWaitDstStageMask = &wait_stage_mask; + + command_queue->vkd3d_queue->pending_semaphore_index = -1; + } + if (!(vk_queue = vkd3d_queue_acquire(command_queue->vkd3d_queue))) { ERR("Failed to acquire queue %p.\n", command_queue->vkd3d_queue); @@ -5595,6 +5609,7 @@ static void STDMETHODCALLTYPE d3d12_command_queue_EndEvent(ID3D12CommandQueue *i static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Signal(ID3D12CommandQueue *iface, ID3D12Fence *fence_iface, UINT64 value) { + static const VkPipelineStageFlagBits wait_stage_mask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface); const struct vkd3d_vk_device_procs *vk_procs; VkSemaphore vk_semaphore = VK_NULL_HANDLE; @@ -5645,6 +5660,15 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Signal(ID3D12CommandQueue * submit_info.signalSemaphoreCount = vk_semaphore ? 1 : 0; submit_info.pSignalSemaphores = &vk_semaphore;
+ if (command_queue->vkd3d_queue->pending_semaphore_index != -1) + { + submit_info.waitSemaphoreCount = command_queue->vkd3d_queue->semaphore_count - command_queue->vkd3d_queue->pending_semaphore_index; + submit_info.pWaitSemaphores = &command_queue->vkd3d_queue->semaphores.vk_semaphores[command_queue->vkd3d_queue->pending_semaphore_index]; + submit_info.pWaitDstStageMask = &wait_stage_mask; + + command_queue->vkd3d_queue->pending_semaphore_index = -1; + } + if ((vr = VK_CALL(vkQueueSubmit(vk_queue, 1, &submit_info, vk_fence))) >= 0) { sequence_number = ++vkd3d_queue->submitted_sequence_number; @@ -5704,21 +5728,16 @@ fail: static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Wait(ID3D12CommandQueue *iface, ID3D12Fence *fence_iface, UINT64 value) { - static const VkPipelineStageFlagBits wait_stage_mask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface); - const struct vkd3d_vk_device_procs *vk_procs; struct vkd3d_signaled_semaphore *semaphore; uint64_t completed_value = 0; struct vkd3d_queue *queue; struct d3d12_fence *fence; - VkSubmitInfo submit_info; VkQueue vk_queue; - VkResult vr; HRESULT hr;
TRACE("iface %p, fence %p, value %#"PRIx64".\n", iface, fence_iface, value);
- vk_procs = &command_queue->device->vk_procs; queue = command_queue->vkd3d_queue;
fence = unsafe_impl_from_ID3D12Fence(fence_iface); @@ -5754,18 +5773,11 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Wait(ID3D12CommandQueue *if return S_OK; }
- submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; - submit_info.pNext = NULL; - submit_info.waitSemaphoreCount = 1; - submit_info.pWaitSemaphores = &semaphore->vk_semaphore; - submit_info.pWaitDstStageMask = &wait_stage_mask; - submit_info.commandBufferCount = 0; - submit_info.pCommandBuffers = NULL; - submit_info.signalSemaphoreCount = 0; - submit_info.pSignalSemaphores = NULL; - - if (!vkd3d_array_reserve((void **)&queue->semaphores, &queue->semaphores_size, - queue->semaphore_count + 1, sizeof(*queue->semaphores))) + if (!vkd3d_array_reserve((void **)&queue->semaphores.vk_semaphores, &queue->semaphores_size, + queue->semaphore_count + 1, sizeof(*queue->semaphores.vk_semaphores)) + || (--queue->semaphores_size) + 1 + || !vkd3d_array_reserve((void **)&queue->semaphores.sequence_numbers, &queue->semaphores_size, + queue->semaphore_count + 1, sizeof(*queue->semaphores.sequence_numbers))) { ERR("Failed to allocate memory for semaphore.\n"); vkd3d_queue_release(queue); @@ -5773,24 +5785,17 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Wait(ID3D12CommandQueue *if goto fail; }
- if ((vr = VK_CALL(vkQueueSubmit(vk_queue, 1, &submit_info, VK_NULL_HANDLE))) >= 0) - { - queue->semaphores[queue->semaphore_count].vk_semaphore = semaphore->vk_semaphore; - queue->semaphores[queue->semaphore_count].sequence_number = queue->submitted_sequence_number + 1; - ++queue->semaphore_count; + queue->semaphores.vk_semaphores[queue->semaphore_count] = semaphore->vk_semaphore; + queue->semaphores.sequence_numbers[queue->semaphore_count] = queue->submitted_sequence_number + 1;
- command_queue->last_waited_fence = fence; - command_queue->last_waited_fence_value = value; - } + if (queue->pending_semaphore_index == -1) queue->pending_semaphore_index = queue->semaphore_count;
- vkd3d_queue_release(queue); + ++queue->semaphore_count;
- if (vr < 0) - { - WARN("Failed to submit wait operation, vr %d.\n", vr); - hr = hresult_from_vk_result(vr); - goto fail; - } + command_queue->last_waited_fence = fence; + command_queue->last_waited_fence_value = value; + + vkd3d_queue_release(queue);
d3d12_fence_remove_vk_semaphore(fence, semaphore); return S_OK; diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index 294e677..5a705bf 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -954,11 +954,12 @@ struct vkd3d_queue
struct { - VkSemaphore vk_semaphore; - uint64_t sequence_number; - } *semaphores; + VkSemaphore *vk_semaphores; + uint64_t *sequence_numbers; + } semaphores; size_t semaphores_size; size_t semaphore_count; + uint64_t pending_semaphore_index;
VkSemaphore old_vk_semaphores[VKD3D_MAX_VK_SYNC_OBJECTS]; };
Signed-off-by: Derek Lesho dlesho@codeweavers.com --- v3: Stores the size of the vulkan semaphore array and sequence number array separately. --- libs/vkd3d/command.c | 85 ++++++++++++++++++++------------------ libs/vkd3d/vkd3d_private.h | 10 +++-- 2 files changed, 51 insertions(+), 44 deletions(-)
diff --git a/libs/vkd3d/command.c b/libs/vkd3d/command.c index ae88910..844d0ba 100644 --- a/libs/vkd3d/command.c +++ b/libs/vkd3d/command.c @@ -45,9 +45,12 @@ HRESULT vkd3d_queue_create(struct d3d12_device *device, object->vk_queue_flags = properties->queueFlags; object->timestamp_bits = properties->timestampValidBits;
- object->semaphores = NULL; - object->semaphores_size = 0; + object->semaphores.vk_semaphores = NULL; + object->semaphores.sequence_numbers = NULL; + object->vk_semaphores_size = 0; + object->seq_numbers_size = 0; object->semaphore_count = 0; + object->pending_semaphore_index = -1;
memset(object->old_vk_semaphores, 0, sizeof(object->old_vk_semaphores));
@@ -70,9 +73,10 @@ void vkd3d_queue_destroy(struct vkd3d_queue *queue, struct d3d12_device *device) ERR("Failed to lock mutex, error %d.\n", rc);
for (i = 0; i < queue->semaphore_count; ++i) - VK_CALL(vkDestroySemaphore(device->vk_device, queue->semaphores[i].vk_semaphore, NULL)); + VK_CALL(vkDestroySemaphore(device->vk_device, queue->semaphores.vk_semaphores[i], NULL));
- vkd3d_free(queue->semaphores); + vkd3d_free(queue->semaphores.vk_semaphores); + vkd3d_free(queue->semaphores.sequence_numbers);
for (i = 0; i < ARRAY_SIZE(queue->old_vk_semaphores); ++i) { @@ -157,10 +161,10 @@ static void vkd3d_queue_update_sequence_number(struct vkd3d_queue *queue,
for (i = 0; i < queue->semaphore_count; ++i) { - if (queue->semaphores[i].sequence_number > queue->completed_sequence_number) + if (queue->semaphores.sequence_numbers[i] > queue->completed_sequence_number) break;
- vk_semaphore = queue->semaphores[i].vk_semaphore; + vk_semaphore = queue->semaphores.vk_semaphores[i];
/* Try to store the Vulkan semaphore for reuse. */ for (j = 0; j < ARRAY_SIZE(queue->old_vk_semaphores); ++j) @@ -182,7 +186,8 @@ static void vkd3d_queue_update_sequence_number(struct vkd3d_queue *queue, if (i > 0) { queue->semaphore_count -= i; - memmove(queue->semaphores, &queue->semaphores[i], queue->semaphore_count * sizeof(*queue->semaphores)); + memmove(queue->semaphores.vk_semaphores, &queue->semaphores.vk_semaphores[i], queue->semaphore_count * sizeof(*queue->semaphores.vk_semaphores)); + memmove(queue->semaphores.sequence_numbers, &queue->semaphores.sequence_numbers[i], queue->semaphore_count * sizeof(*queue->semaphores.sequence_numbers)); }
if (destroyed_semaphore_count) @@ -201,7 +206,7 @@ static uint64_t vkd3d_queue_reset_sequence_number_locked(struct vkd3d_queue *que queue->submitted_sequence_number = 1;
for (i = 0; i < queue->semaphore_count; ++i) - queue->semaphores[i].sequence_number = queue->submitted_sequence_number; + queue->semaphores.sequence_numbers[i] = queue->submitted_sequence_number;
return queue->submitted_sequence_number; } @@ -5511,6 +5516,7 @@ static void STDMETHODCALLTYPE d3d12_command_queue_CopyTileMappings(ID3D12Command static void STDMETHODCALLTYPE d3d12_command_queue_ExecuteCommandLists(ID3D12CommandQueue *iface, UINT command_list_count, ID3D12CommandList * const *command_lists) { + static const VkPipelineStageFlagBits wait_stage_mask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface); const struct vkd3d_vk_device_procs *vk_procs; struct d3d12_command_list *cmd_list; @@ -5556,6 +5562,15 @@ static void STDMETHODCALLTYPE d3d12_command_queue_ExecuteCommandLists(ID3D12Comm submit_desc.signalSemaphoreCount = 0; submit_desc.pSignalSemaphores = NULL;
+ if (command_queue->vkd3d_queue->pending_semaphore_index != -1) + { + submit_desc.waitSemaphoreCount = command_queue->vkd3d_queue->semaphore_count - command_queue->vkd3d_queue->pending_semaphore_index; + submit_desc.pWaitSemaphores = &command_queue->vkd3d_queue->semaphores.vk_semaphores[command_queue->vkd3d_queue->pending_semaphore_index]; + submit_desc.pWaitDstStageMask = &wait_stage_mask; + + command_queue->vkd3d_queue->pending_semaphore_index = -1; + } + if (!(vk_queue = vkd3d_queue_acquire(command_queue->vkd3d_queue))) { ERR("Failed to acquire queue %p.\n", command_queue->vkd3d_queue); @@ -5593,6 +5608,7 @@ static void STDMETHODCALLTYPE d3d12_command_queue_EndEvent(ID3D12CommandQueue *i static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Signal(ID3D12CommandQueue *iface, ID3D12Fence *fence_iface, UINT64 value) { + static const VkPipelineStageFlagBits wait_stage_mask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface); const struct vkd3d_vk_device_procs *vk_procs; VkSemaphore vk_semaphore = VK_NULL_HANDLE; @@ -5643,6 +5659,15 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Signal(ID3D12CommandQueue * submit_info.signalSemaphoreCount = vk_semaphore ? 1 : 0; submit_info.pSignalSemaphores = &vk_semaphore;
+ if (command_queue->vkd3d_queue->pending_semaphore_index != -1) + { + submit_info.waitSemaphoreCount = command_queue->vkd3d_queue->semaphore_count - command_queue->vkd3d_queue->pending_semaphore_index; + submit_info.pWaitSemaphores = &command_queue->vkd3d_queue->semaphores.vk_semaphores[command_queue->vkd3d_queue->pending_semaphore_index]; + submit_info.pWaitDstStageMask = &wait_stage_mask; + + command_queue->vkd3d_queue->pending_semaphore_index = -1; + } + if ((vr = VK_CALL(vkQueueSubmit(vk_queue, 1, &submit_info, vk_fence))) >= 0) { sequence_number = ++vkd3d_queue->submitted_sequence_number; @@ -5702,21 +5727,16 @@ fail: static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Wait(ID3D12CommandQueue *iface, ID3D12Fence *fence_iface, UINT64 value) { - static const VkPipelineStageFlagBits wait_stage_mask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface); - const struct vkd3d_vk_device_procs *vk_procs; struct vkd3d_signaled_semaphore *semaphore; uint64_t completed_value = 0; struct vkd3d_queue *queue; struct d3d12_fence *fence; - VkSubmitInfo submit_info; VkQueue vk_queue; - VkResult vr; HRESULT hr;
TRACE("iface %p, fence %p, value %#"PRIx64".\n", iface, fence_iface, value);
- vk_procs = &command_queue->device->vk_procs; queue = command_queue->vkd3d_queue;
fence = unsafe_impl_from_ID3D12Fence(fence_iface); @@ -5752,18 +5772,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Wait(ID3D12CommandQueue *if return S_OK; }
- submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; - submit_info.pNext = NULL; - submit_info.waitSemaphoreCount = 1; - submit_info.pWaitSemaphores = &semaphore->vk_semaphore; - submit_info.pWaitDstStageMask = &wait_stage_mask; - submit_info.commandBufferCount = 0; - submit_info.pCommandBuffers = NULL; - submit_info.signalSemaphoreCount = 0; - submit_info.pSignalSemaphores = NULL; - - if (!vkd3d_array_reserve((void **)&queue->semaphores, &queue->semaphores_size, - queue->semaphore_count + 1, sizeof(*queue->semaphores))) + if (!vkd3d_array_reserve((void **)&queue->semaphores.vk_semaphores, &queue->vk_semaphores_size, + queue->semaphore_count + 1, sizeof(*queue->semaphores.vk_semaphores)) + || !vkd3d_array_reserve((void **)&queue->semaphores.sequence_numbers, &queue->seq_numbers_size, + queue->semaphore_count + 1, sizeof(*queue->semaphores.sequence_numbers))) { ERR("Failed to allocate memory for semaphore.\n"); vkd3d_queue_release(queue); @@ -5771,24 +5783,17 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Wait(ID3D12CommandQueue *if goto fail; }
- if ((vr = VK_CALL(vkQueueSubmit(vk_queue, 1, &submit_info, VK_NULL_HANDLE))) >= 0) - { - queue->semaphores[queue->semaphore_count].vk_semaphore = semaphore->vk_semaphore; - queue->semaphores[queue->semaphore_count].sequence_number = queue->submitted_sequence_number + 1; - ++queue->semaphore_count; + queue->semaphores.vk_semaphores[queue->semaphore_count] = semaphore->vk_semaphore; + queue->semaphores.sequence_numbers[queue->semaphore_count] = queue->submitted_sequence_number + 1;
- command_queue->last_waited_fence = fence; - command_queue->last_waited_fence_value = value; - } + if (queue->pending_semaphore_index == -1) queue->pending_semaphore_index = queue->semaphore_count;
- vkd3d_queue_release(queue); + ++queue->semaphore_count;
- if (vr < 0) - { - WARN("Failed to submit wait operation, vr %d.\n", vr); - hr = hresult_from_vk_result(vr); - goto fail; - } + command_queue->last_waited_fence = fence; + command_queue->last_waited_fence_value = value; + + vkd3d_queue_release(queue);
d3d12_fence_remove_vk_semaphore(fence, semaphore); return S_OK; diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index 294e677..bd9670e 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -954,11 +954,13 @@ struct vkd3d_queue
struct { - VkSemaphore vk_semaphore; - uint64_t sequence_number; - } *semaphores; - size_t semaphores_size; + VkSemaphore *vk_semaphores; + uint64_t *sequence_numbers; + } semaphores; + size_t vk_semaphores_size; + size_t seq_numbers_size; size_t semaphore_count; + uint64_t pending_semaphore_index;
VkSemaphore old_vk_semaphores[VKD3D_MAX_VK_SYNC_OBJECTS]; };
On 9/11/19 2:28 PM, Derek Lesho wrote:
Signed-off-by: Derek Lesho dlesho@codeweavers.com
v3: Stores the size of the vulkan semaphore array and sequence number array separately.
libs/vkd3d/command.c | 85 ++++++++++++++++++++------------------ libs/vkd3d/vkd3d_private.h | 10 +++-- 2 files changed, 51 insertions(+), 44 deletions(-)
diff --git a/libs/vkd3d/command.c b/libs/vkd3d/command.c index ae88910..844d0ba 100644 --- a/libs/vkd3d/command.c +++ b/libs/vkd3d/command.c @@ -45,9 +45,12 @@ HRESULT vkd3d_queue_create(struct d3d12_device *device, object->vk_queue_flags = properties->queueFlags; object->timestamp_bits = properties->timestampValidBits;
- object->semaphores = NULL;
- object->semaphores_size = 0;
object->semaphores.vk_semaphores = NULL;
object->semaphores.sequence_numbers = NULL;
object->vk_semaphores_size = 0;
object->seq_numbers_size = 0; object->semaphore_count = 0;
object->pending_semaphore_index = -1;
memset(object->old_vk_semaphores, 0, sizeof(object->old_vk_semaphores));
@@ -70,9 +73,10 @@ void vkd3d_queue_destroy(struct vkd3d_queue *queue, struct d3d12_device *device) ERR("Failed to lock mutex, error %d.\n", rc);
for (i = 0; i < queue->semaphore_count; ++i)
VK_CALL(vkDestroySemaphore(device->vk_device, queue->semaphores[i].vk_semaphore, NULL));
VK_CALL(vkDestroySemaphore(device->vk_device, queue->semaphores.vk_semaphores[i], NULL));
- vkd3d_free(queue->semaphores);
vkd3d_free(queue->semaphores.vk_semaphores);
vkd3d_free(queue->semaphores.sequence_numbers);
for (i = 0; i < ARRAY_SIZE(queue->old_vk_semaphores); ++i) {
@@ -157,10 +161,10 @@ static void vkd3d_queue_update_sequence_number(struct vkd3d_queue *queue,
for (i = 0; i < queue->semaphore_count; ++i) {
if (queue->semaphores[i].sequence_number > queue->completed_sequence_number)
if (queue->semaphores.sequence_numbers[i] > queue->completed_sequence_number) break;
vk_semaphore = queue->semaphores[i].vk_semaphore;
vk_semaphore = queue->semaphores.vk_semaphores[i]; /* Try to store the Vulkan semaphore for reuse. */ for (j = 0; j < ARRAY_SIZE(queue->old_vk_semaphores); ++j)
@@ -182,7 +186,8 @@ static void vkd3d_queue_update_sequence_number(struct vkd3d_queue *queue, if (i > 0) { queue->semaphore_count -= i;
memmove(queue->semaphores, &queue->semaphores[i], queue->semaphore_count * sizeof(*queue->semaphores));
memmove(queue->semaphores.vk_semaphores, &queue->semaphores.vk_semaphores[i], queue->semaphore_count * sizeof(*queue->semaphores.vk_semaphores));
memmove(queue->semaphores.sequence_numbers, &queue->semaphores.sequence_numbers[i], queue->semaphore_count * sizeof(*queue->semaphores.sequence_numbers)); } if (destroyed_semaphore_count)
@@ -201,7 +206,7 @@ static uint64_t vkd3d_queue_reset_sequence_number_locked(struct vkd3d_queue *que queue->submitted_sequence_number = 1;
for (i = 0; i < queue->semaphore_count; ++i)
queue->semaphores[i].sequence_number = queue->submitted_sequence_number;
queue->semaphores.sequence_numbers[i] = queue->submitted_sequence_number; return queue->submitted_sequence_number;
}
@@ -5511,6 +5516,7 @@ static void STDMETHODCALLTYPE d3d12_command_queue_CopyTileMappings(ID3D12Command static void STDMETHODCALLTYPE d3d12_command_queue_ExecuteCommandLists(ID3D12CommandQueue *iface, UINT command_list_count, ID3D12CommandList * const *command_lists) {
- static const VkPipelineStageFlagBits wait_stage_mask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface); const struct vkd3d_vk_device_procs *vk_procs; struct d3d12_command_list *cmd_list;
@@ -5556,6 +5562,15 @@ static void STDMETHODCALLTYPE d3d12_command_queue_ExecuteCommandLists(ID3D12Comm submit_desc.signalSemaphoreCount = 0; submit_desc.pSignalSemaphores = NULL;
- if (command_queue->vkd3d_queue->pending_semaphore_index != -1)
- {
submit_desc.waitSemaphoreCount = command_queue->vkd3d_queue->semaphore_count - command_queue->vkd3d_queue->pending_semaphore_index;
submit_desc.pWaitSemaphores = &command_queue->vkd3d_queue->semaphores.vk_semaphores[command_queue->vkd3d_queue->pending_semaphore_index];
submit_desc.pWaitDstStageMask = &wait_stage_mask;
command_queue->vkd3d_queue->pending_semaphore_index = -1;
- }
if (!(vk_queue = vkd3d_queue_acquire(command_queue->vkd3d_queue))) { ERR("Failed to acquire queue %p.\n", command_queue->vkd3d_queue);
@@ -5593,6 +5608,7 @@ static void STDMETHODCALLTYPE d3d12_command_queue_EndEvent(ID3D12CommandQueue *i static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Signal(ID3D12CommandQueue *iface, ID3D12Fence *fence_iface, UINT64 value) {
- static const VkPipelineStageFlagBits wait_stage_mask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface); const struct vkd3d_vk_device_procs *vk_procs; VkSemaphore vk_semaphore = VK_NULL_HANDLE;
@@ -5643,6 +5659,15 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Signal(ID3D12CommandQueue * submit_info.signalSemaphoreCount = vk_semaphore ? 1 : 0; submit_info.pSignalSemaphores = &vk_semaphore;
- if (command_queue->vkd3d_queue->pending_semaphore_index != -1)
- {
submit_info.waitSemaphoreCount = command_queue->vkd3d_queue->semaphore_count - command_queue->vkd3d_queue->pending_semaphore_index;
submit_info.pWaitSemaphores = &command_queue->vkd3d_queue->semaphores.vk_semaphores[command_queue->vkd3d_queue->pending_semaphore_index];
submit_info.pWaitDstStageMask = &wait_stage_mask;
command_queue->vkd3d_queue->pending_semaphore_index = -1;
- }
if ((vr = VK_CALL(vkQueueSubmit(vk_queue, 1, &submit_info, vk_fence))) >= 0) { sequence_number = ++vkd3d_queue->submitted_sequence_number;
@@ -5702,21 +5727,16 @@ fail: static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Wait(ID3D12CommandQueue *iface, ID3D12Fence *fence_iface, UINT64 value) {
static const VkPipelineStageFlagBits wait_stage_mask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface);
const struct vkd3d_vk_device_procs *vk_procs; struct vkd3d_signaled_semaphore *semaphore; uint64_t completed_value = 0; struct vkd3d_queue *queue; struct d3d12_fence *fence;
VkSubmitInfo submit_info; VkQueue vk_queue;
VkResult vr; HRESULT hr;
TRACE("iface %p, fence %p, value %#"PRIx64".\n", iface, fence_iface, value);
vk_procs = &command_queue->device->vk_procs; queue = command_queue->vkd3d_queue;
fence = unsafe_impl_from_ID3D12Fence(fence_iface);
@@ -5752,18 +5772,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Wait(ID3D12CommandQueue *if return S_OK; }
- submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
- submit_info.pNext = NULL;
- submit_info.waitSemaphoreCount = 1;
- submit_info.pWaitSemaphores = &semaphore->vk_semaphore;
- submit_info.pWaitDstStageMask = &wait_stage_mask;
- submit_info.commandBufferCount = 0;
- submit_info.pCommandBuffers = NULL;
- submit_info.signalSemaphoreCount = 0;
- submit_info.pSignalSemaphores = NULL;
- if (!vkd3d_array_reserve((void **)&queue->semaphores, &queue->semaphores_size,
queue->semaphore_count + 1, sizeof(*queue->semaphores)))
- if (!vkd3d_array_reserve((void **)&queue->semaphores.vk_semaphores, &queue->vk_semaphores_size,
queue->semaphore_count + 1, sizeof(*queue->semaphores.vk_semaphores))
|| !vkd3d_array_reserve((void **)&queue->semaphores.sequence_numbers, &queue->seq_numbers_size,
queue->semaphore_count + 1, sizeof(*queue->semaphores.sequence_numbers))) { ERR("Failed to allocate memory for semaphore.\n"); vkd3d_queue_release(queue);
@@ -5771,24 +5783,17 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Wait(ID3D12CommandQueue *if goto fail; }
- if ((vr = VK_CALL(vkQueueSubmit(vk_queue, 1, &submit_info, VK_NULL_HANDLE))) >= 0)
- {
queue->semaphores[queue->semaphore_count].vk_semaphore = semaphore->vk_semaphore;
queue->semaphores[queue->semaphore_count].sequence_number = queue->submitted_sequence_number + 1;
++queue->semaphore_count;
- queue->semaphores.vk_semaphores[queue->semaphore_count] = semaphore->vk_semaphore;
- queue->semaphores.sequence_numbers[queue->semaphore_count] = queue->submitted_sequence_number + 1;
command_queue->last_waited_fence = fence;
command_queue->last_waited_fence_value = value;
- }
- if (queue->pending_semaphore_index == -1) queue->pending_semaphore_index = queue->semaphore_count;
- vkd3d_queue_release(queue);
- ++queue->semaphore_count;
- if (vr < 0)
- {
WARN("Failed to submit wait operation, vr %d.\n", vr);
hr = hresult_from_vk_result(vr);
goto fail;
- }
command_queue->last_waited_fence = fence;
command_queue->last_waited_fence_value = value;
vkd3d_queue_release(queue);
d3d12_fence_remove_vk_semaphore(fence, semaphore); return S_OK;
diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index 294e677..bd9670e 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -954,11 +954,13 @@ struct vkd3d_queue
struct {
VkSemaphore vk_semaphore;
uint64_t sequence_number;
- } *semaphores;
- size_t semaphores_size;
VkSemaphore *vk_semaphores;
uint64_t *sequence_numbers;
} semaphores;
size_t vk_semaphores_size;
size_t seq_numbers_size; size_t semaphore_count;
uint64_t pending_semaphore_index;
VkSemaphore old_vk_semaphores[VKD3D_MAX_VK_SYNC_OBJECTS]; };
*[PATCH v3], not v2, sorry about the typo!
In D3D12 it is legal to concurrently overwrite descriptors, we can't handle that. This patch locks descriptors that share an underlying view from being written to at the same time.
Signed-off-by: Derek Lesho dlesho@codeweavers.com --- v4: Instead of locking descriptors and moving lock pointers to the destination descriptors, this version ensures that the minimal amount of descriptors are grouped synchronized by storing the locks in vkd3d_view instead. My initial testing shows this allows us to spend half as much time waiting as in v3, and a fourth as much time waiting as an implementation that locks the whole heap on write. --- libs/vkd3d/resource.c | 307 ++++++++++++++++++++++++++----------- libs/vkd3d/vkd3d_private.h | 5 +- 2 files changed, 219 insertions(+), 93 deletions(-)
diff --git a/libs/vkd3d/resource.c b/libs/vkd3d/resource.c index 463f373..a43f6fa 100644 --- a/libs/vkd3d/resource.c +++ b/libs/vkd3d/resource.c @@ -1881,6 +1881,7 @@ static struct vkd3d_view *vkd3d_view_create(void) if ((view = vkd3d_malloc(sizeof(*view)))) { view->refcount = 1; + pthread_spin_init(&view->lock, PTHREAD_PROCESS_PRIVATE); view->vk_counter_view = VK_NULL_HANDLE; } return view; @@ -1891,25 +1892,24 @@ void vkd3d_view_incref(struct vkd3d_view *view) InterlockedIncrement(&view->refcount); }
-static void vkd3d_view_decref_descriptor(struct vkd3d_view *view, - const struct d3d12_desc *descriptor, struct d3d12_device *device) +int vkd3d_view_decref(struct vkd3d_view *view, struct d3d12_device *device) { const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; ULONG refcount = InterlockedDecrement(&view->refcount);
if (refcount) - return; + return true;
TRACE("Destroying view %p.\n", view);
- if (!descriptor) + if (view->type == VKD3D_DESCRIPTOR_MAGIC_RTV || view->type == VKD3D_DESCRIPTOR_MAGIC_DSV) { VK_CALL(vkDestroyImageView(device->vk_device, view->u.vk_image_view, NULL)); } - else if (descriptor->magic == VKD3D_DESCRIPTOR_MAGIC_SRV || descriptor->magic == VKD3D_DESCRIPTOR_MAGIC_UAV) + else if (view->type == VKD3D_DESCRIPTOR_MAGIC_SRV || view->type == VKD3D_DESCRIPTOR_MAGIC_UAV) { - if (descriptor->vk_descriptor_type == VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER - || descriptor->vk_descriptor_type == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) + if (view->vk_descriptor_type == VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER + || view->vk_descriptor_type == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) VK_CALL(vkDestroyBufferView(device->vk_device, view->u.vk_buffer_view, NULL)); else VK_CALL(vkDestroyImageView(device->vk_device, view->u.vk_image_view, NULL)); @@ -1917,17 +1917,16 @@ static void vkd3d_view_decref_descriptor(struct vkd3d_view *view, if (view->vk_counter_view) VK_CALL(vkDestroyBufferView(device->vk_device, view->vk_counter_view, NULL)); } - else if (descriptor->magic == VKD3D_DESCRIPTOR_MAGIC_SAMPLER) + else if (view->type == VKD3D_DESCRIPTOR_MAGIC_SAMPLER) { VK_CALL(vkDestroySampler(device->vk_device, view->u.vk_sampler, NULL)); }
- vkd3d_free(view); -} + pthread_spin_unlock(&view->lock); + pthread_spin_destroy(&view->lock);
-void vkd3d_view_decref(struct vkd3d_view *view, struct d3d12_device *device) -{ - vkd3d_view_decref_descriptor(view, NULL, device); + vkd3d_free(view); + return false; }
static void d3d12_desc_destroy(struct d3d12_desc *descriptor, @@ -1938,19 +1937,41 @@ static void d3d12_desc_destroy(struct d3d12_desc *descriptor, || descriptor->magic == VKD3D_DESCRIPTOR_MAGIC_UAV || descriptor->magic == VKD3D_DESCRIPTOR_MAGIC_SAMPLER) { - vkd3d_view_decref_descriptor(descriptor->u.view, descriptor, device); + vkd3d_view_decref(descriptor->u.view, device); } +} + +static struct vkd3d_view *d3d12_desc_grab_view(struct d3d12_desc *desc, struct d3d12_device *device) +{ + pthread_spinlock_t *lock = NULL;
- memset(descriptor, 0, sizeof(*descriptor)); + while (desc->magic && desc->magic != VKD3D_DESCRIPTOR_MAGIC_CBV) + { + if (!(pthread_spin_trylock(lock = &desc->u.view->lock))) + { + /* we successfully acquired the lock */ + vkd3d_view_incref(desc->u.view); + return desc->u.view; + } + + /* if we can't acquire it right away, wait for it then try again w/ the new view pointer */ + pthread_spin_lock(lock); + pthread_spin_unlock(lock); + } + + return NULL; }
void d3d12_desc_copy(struct d3d12_desc *dst, const struct d3d12_desc *src, struct d3d12_device *device) { + struct vkd3d_view *dst_view; + assert(dst != src);
- d3d12_desc_destroy(dst, device); + dst_view = d3d12_desc_grab_view(dst, device);
+ d3d12_desc_destroy(dst, device); *dst = *src;
if (src->magic == VKD3D_DESCRIPTOR_MAGIC_SRV @@ -1959,6 +1980,10 @@ void d3d12_desc_copy(struct d3d12_desc *dst, const struct d3d12_desc *src, { vkd3d_view_incref(src->u.view); } + + if (dst_view) + if (vkd3d_view_decref(dst_view, device)) + pthread_spin_unlock(&dst_view->lock); }
static VkDeviceSize vkd3d_get_required_texel_buffer_alignment(const struct d3d12_device *device, @@ -2322,22 +2347,22 @@ void d3d12_desc_create_cbv(struct d3d12_desc *descriptor, { struct VkDescriptorBufferInfo *buffer_info; struct d3d12_resource *resource; - - d3d12_desc_destroy(descriptor, device); + struct d3d12_desc new_desc = {}; + struct vkd3d_view *prev_view;
if (!desc) { WARN("Constant buffer desc is NULL.\n"); - return; + goto done; }
if (desc->SizeInBytes & (D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT - 1)) { WARN("Size is not %u bytes aligned.\n", D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT); - return; + goto done; }
- buffer_info = &descriptor->u.vk_cbv_info; + buffer_info = &new_desc.u.vk_cbv_info; if (desc->BufferLocation) { resource = vkd3d_gpu_va_allocator_dereference(&device->gpu_va_allocator, desc->BufferLocation); @@ -2353,8 +2378,18 @@ void d3d12_desc_create_cbv(struct d3d12_desc *descriptor, buffer_info->range = VKD3D_NULL_BUFFER_SIZE; }
- descriptor->magic = VKD3D_DESCRIPTOR_MAGIC_CBV; - descriptor->vk_descriptor_type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + new_desc.magic = VKD3D_DESCRIPTOR_MAGIC_CBV; + new_desc.vk_descriptor_type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + + done: + prev_view = d3d12_desc_grab_view(descriptor, device); + + d3d12_desc_destroy(descriptor, device); + *descriptor = new_desc; + + if (prev_view) + if (vkd3d_view_decref(prev_view, device)) + pthread_spin_unlock(&prev_view->lock); }
static unsigned int vkd3d_view_flags_from_d3d12_buffer_srv_flags(D3D12_BUFFER_SRV_FLAGS flags) @@ -2425,8 +2460,8 @@ static void vkd3d_create_null_srv(struct d3d12_desc *descriptor, if (!vkd3d_create_texture_view(device, vk_image, &vkd3d_desc, &view)) return;
- descriptor->magic = VKD3D_DESCRIPTOR_MAGIC_SRV; - descriptor->vk_descriptor_type = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + descriptor->magic = view->type = VKD3D_DESCRIPTOR_MAGIC_SRV; + descriptor->vk_descriptor_type = view->vk_descriptor_type = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; descriptor->u.view = view; }
@@ -2455,8 +2490,8 @@ static void vkd3d_create_buffer_srv(struct d3d12_desc *descriptor, desc->u.Buffer.StructureByteStride, flags, &view)) return;
- descriptor->magic = VKD3D_DESCRIPTOR_MAGIC_SRV; - descriptor->vk_descriptor_type = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; + descriptor->magic = view->type = VKD3D_DESCRIPTOR_MAGIC_SRV; + descriptor->vk_descriptor_type = view->vk_descriptor_type = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; descriptor->u.view = view; }
@@ -2465,24 +2500,23 @@ void d3d12_desc_create_srv(struct d3d12_desc *descriptor, const D3D12_SHADER_RESOURCE_VIEW_DESC *desc) { struct vkd3d_texture_view_desc vkd3d_desc; - struct vkd3d_view *view; - - d3d12_desc_destroy(descriptor, device); + struct vkd3d_view *view, *prev_view; + struct d3d12_desc new_desc = {};
if (!resource) { - vkd3d_create_null_srv(descriptor, device, desc); - return; + vkd3d_create_null_srv(&new_desc, device, desc); + goto done; }
if (d3d12_resource_is_buffer(resource)) { - vkd3d_create_buffer_srv(descriptor, device, resource, desc); - return; + vkd3d_create_buffer_srv(&new_desc, device, resource, desc); + goto done; }
if (!init_default_texture_view_desc(&vkd3d_desc, resource, desc ? desc->Format : 0)) - return; + goto done;
vkd3d_desc.miplevel_count = VK_REMAINING_MIP_LEVELS; vkd3d_desc.allowed_swizzle = true; @@ -2559,11 +2593,21 @@ void d3d12_desc_create_srv(struct d3d12_desc *descriptor, }
if (!vkd3d_create_texture_view(device, resource->u.vk_image, &vkd3d_desc, &view)) - return; + goto done;
- descriptor->magic = VKD3D_DESCRIPTOR_MAGIC_SRV; - descriptor->vk_descriptor_type = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; - descriptor->u.view = view; + new_desc.magic = view->type = VKD3D_DESCRIPTOR_MAGIC_SRV; + new_desc.vk_descriptor_type = view->vk_descriptor_type = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + new_desc.u.view = view; + + done: + prev_view = d3d12_desc_grab_view(descriptor, device); + + d3d12_desc_destroy(descriptor, device); + *descriptor = new_desc; + + if (prev_view) + if (vkd3d_view_decref(prev_view, device)) + pthread_spin_unlock(&prev_view->lock); }
static unsigned int vkd3d_view_flags_from_d3d12_buffer_uav_flags(D3D12_BUFFER_UAV_FLAGS flags) @@ -2634,8 +2678,8 @@ static void vkd3d_create_null_uav(struct d3d12_desc *descriptor, if (!vkd3d_create_texture_view(device, vk_image, &vkd3d_desc, &view)) return;
- descriptor->magic = VKD3D_DESCRIPTOR_MAGIC_UAV; - descriptor->vk_descriptor_type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; + descriptor->magic = view->type = VKD3D_DESCRIPTOR_MAGIC_UAV; + descriptor->vk_descriptor_type = view->vk_descriptor_type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; descriptor->u.view = view; }
@@ -2667,8 +2711,8 @@ static void vkd3d_create_buffer_uav(struct d3d12_desc *descriptor, struct d3d12_ desc->u.Buffer.StructureByteStride, flags, &view)) return;
- descriptor->magic = VKD3D_DESCRIPTOR_MAGIC_UAV; - descriptor->vk_descriptor_type = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; + descriptor->magic = view->type = VKD3D_DESCRIPTOR_MAGIC_UAV; + descriptor->vk_descriptor_type = view->vk_descriptor_type = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; descriptor->u.view = view;
if (counter_resource) @@ -2685,6 +2729,7 @@ static void vkd3d_create_buffer_uav(struct d3d12_desc *descriptor, struct d3d12_ WARN("Failed to create counter buffer view.\n"); view->vk_counter_view = VK_NULL_HANDLE; d3d12_desc_destroy(descriptor, device); + memset(descriptor, 0, sizeof(*descriptor)); } }
@@ -2747,8 +2792,8 @@ static void vkd3d_create_texture_uav(struct d3d12_desc *descriptor, if (!vkd3d_create_texture_view(device, resource->u.vk_image, &vkd3d_desc, &view)) return;
- descriptor->magic = VKD3D_DESCRIPTOR_MAGIC_UAV; - descriptor->vk_descriptor_type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; + descriptor->magic = view->type = VKD3D_DESCRIPTOR_MAGIC_UAV; + descriptor->vk_descriptor_type = view->vk_descriptor_type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; descriptor->u.view = view;
descriptor->uav.texture.vk_aspect_mask = vkd3d_desc.format->vk_aspect_mask; @@ -2761,26 +2806,37 @@ void d3d12_desc_create_uav(struct d3d12_desc *descriptor, struct d3d12_device *d struct d3d12_resource *resource, struct d3d12_resource *counter_resource, const D3D12_UNORDERED_ACCESS_VIEW_DESC *desc) { - d3d12_desc_destroy(descriptor, device); + struct d3d12_desc new_desc = {}; + struct vkd3d_view *prev_view;
if (!resource) { if (counter_resource) FIXME("Ignoring counter resource %p.\n", counter_resource); - vkd3d_create_null_uav(descriptor, device, desc); - return; + vkd3d_create_null_uav(&new_desc, device, desc); + goto done; }
if (d3d12_resource_is_buffer(resource)) { - vkd3d_create_buffer_uav(descriptor, device, resource, counter_resource, desc); + vkd3d_create_buffer_uav(&new_desc, device, resource, counter_resource, desc); } else { if (counter_resource) FIXME("Unexpected counter resource for texture view.\n"); - vkd3d_create_texture_uav(descriptor, device, resource, desc); + vkd3d_create_texture_uav(&new_desc, device, resource, desc); } + + done: + prev_view = d3d12_desc_grab_view(descriptor, device); + + d3d12_desc_destroy(descriptor, device); + *descriptor = new_desc; + + if (prev_view) + if (vkd3d_view_decref(prev_view, device)) + pthread_spin_unlock(&prev_view->lock); }
bool vkd3d_create_raw_buffer_view(struct d3d12_device *device, @@ -2887,14 +2943,13 @@ static VkResult d3d12_create_sampler(struct d3d12_device *device, D3D12_FILTER f void d3d12_desc_create_sampler(struct d3d12_desc *sampler, struct d3d12_device *device, const D3D12_SAMPLER_DESC *desc) { - struct vkd3d_view *view; - - d3d12_desc_destroy(sampler, device); + struct vkd3d_view *view, *prev_view; + struct d3d12_desc new_sampler = {};
if (!desc) { WARN("NULL sampler desc.\n"); - return; + goto done; }
if (desc->AddressU == D3D12_TEXTURE_ADDRESS_MODE_BORDER @@ -2904,19 +2959,29 @@ void d3d12_desc_create_sampler(struct d3d12_desc *sampler, desc->BorderColor[0], desc->BorderColor[1], desc->BorderColor[2], desc->BorderColor[3]);
if (!(view = vkd3d_view_create())) - return; + goto done;
if (d3d12_create_sampler(device, desc->Filter, desc->AddressU, desc->AddressV, desc->AddressW, desc->MipLODBias, desc->MaxAnisotropy, desc->ComparisonFunc, desc->MinLOD, desc->MaxLOD, &view->u.vk_sampler) < 0) { vkd3d_free(view); - return; + goto done; }
- sampler->magic = VKD3D_DESCRIPTOR_MAGIC_SAMPLER; - sampler->vk_descriptor_type = VK_DESCRIPTOR_TYPE_SAMPLER; - sampler->u.view = view; + new_sampler.magic = view->type = VKD3D_DESCRIPTOR_MAGIC_SAMPLER; + new_sampler.vk_descriptor_type = view->vk_descriptor_type = VK_DESCRIPTOR_TYPE_SAMPLER; + new_sampler.u.view = view; + + done: + prev_view = d3d12_desc_grab_view(sampler, device); + + d3d12_desc_destroy(sampler, device); + *sampler = new_sampler; + + if (prev_view) + if (vkd3d_view_decref(prev_view, device)) + pthread_spin_unlock(&prev_view->lock); }
HRESULT vkd3d_create_static_sampler(struct d3d12_device *device, @@ -2942,30 +3007,49 @@ static void d3d12_rtv_desc_destroy(struct d3d12_rtv_desc *rtv, struct d3d12_devi return;
vkd3d_view_decref(rtv->view, device); - memset(rtv, 0, sizeof(*rtv)); +} + +static struct vkd3d_view *d3d12_rtv_desc_grab_view(struct d3d12_rtv_desc *rtv_desc, struct d3d12_device *device) +{ + pthread_spinlock_t *lock; + + while (rtv_desc->view) + { + if (!(pthread_spin_trylock(lock = &rtv_desc->view->lock))) + { + /* we successfully picked up the lock */ + vkd3d_view_incref(rtv_desc->view); + return rtv_desc->view; + } + + /* if we can't acquire it right away, wait for it then try again w/ the new view pointer */ + pthread_spin_lock(lock); + pthread_spin_unlock(lock); + } + + return NULL; }
void d3d12_rtv_desc_create_rtv(struct d3d12_rtv_desc *rtv_desc, struct d3d12_device *device, struct d3d12_resource *resource, const D3D12_RENDER_TARGET_VIEW_DESC *desc) { struct vkd3d_texture_view_desc vkd3d_desc; - struct vkd3d_view *view; - - d3d12_rtv_desc_destroy(rtv_desc, device); + struct vkd3d_view *view, *prev_view; + struct d3d12_rtv_desc new_rtv_desc = {};
if (!resource) { FIXME("NULL resource RTV not implemented.\n"); - return; + goto done; }
if (!init_default_texture_view_desc(&vkd3d_desc, resource, desc ? desc->Format : 0)) - return; + goto done;
if (vkd3d_desc.format->vk_aspect_mask != VK_IMAGE_ASPECT_COLOR_BIT) { WARN("Trying to create RTV for depth/stencil format %#x.\n", vkd3d_desc.format->dxgi_format); - return; + goto done; }
if (desc) @@ -3013,16 +3097,26 @@ void d3d12_rtv_desc_create_rtv(struct d3d12_rtv_desc *rtv_desc, struct d3d12_dev assert(d3d12_resource_is_texture(resource));
if (!vkd3d_create_texture_view(device, resource->u.vk_image, &vkd3d_desc, &view)) - return; + goto done; + + new_rtv_desc.magic = view->type = VKD3D_DESCRIPTOR_MAGIC_RTV; + new_rtv_desc.sample_count = vk_samples_from_dxgi_sample_desc(&resource->desc.SampleDesc); + new_rtv_desc.format = vkd3d_desc.format; + new_rtv_desc.width = d3d12_resource_desc_get_width(&resource->desc, vkd3d_desc.miplevel_idx); + new_rtv_desc.height = d3d12_resource_desc_get_height(&resource->desc, vkd3d_desc.miplevel_idx); + new_rtv_desc.layer_count = vkd3d_desc.layer_count; + new_rtv_desc.view = view; + new_rtv_desc.resource = resource; + + done: + prev_view = d3d12_rtv_desc_grab_view(rtv_desc, device);
- rtv_desc->magic = VKD3D_DESCRIPTOR_MAGIC_RTV; - rtv_desc->sample_count = vk_samples_from_dxgi_sample_desc(&resource->desc.SampleDesc); - rtv_desc->format = vkd3d_desc.format; - rtv_desc->width = d3d12_resource_desc_get_width(&resource->desc, vkd3d_desc.miplevel_idx); - rtv_desc->height = d3d12_resource_desc_get_height(&resource->desc, vkd3d_desc.miplevel_idx); - rtv_desc->layer_count = vkd3d_desc.layer_count; - rtv_desc->view = view; - rtv_desc->resource = resource; + d3d12_rtv_desc_destroy(rtv_desc, device); + *rtv_desc = new_rtv_desc; + + if (prev_view) + if (vkd3d_view_decref(prev_view, device)) + pthread_spin_unlock(&prev_view->lock); }
/* DSVs */ @@ -3032,36 +3126,55 @@ static void d3d12_dsv_desc_destroy(struct d3d12_dsv_desc *dsv, struct d3d12_devi return;
vkd3d_view_decref(dsv->view, device); - memset(dsv, 0, sizeof(*dsv)); +} + +static struct vkd3d_view *d3d12_dsv_desc_grab_view(struct d3d12_dsv_desc *dsv_desc, struct d3d12_device *device) +{ + pthread_spinlock_t *lock; + + while (dsv_desc->view) + { + if (!(pthread_spin_trylock(lock = &dsv_desc->view->lock))) + { + /* we successfully picked up the lock */ + vkd3d_view_incref(dsv_desc->view); + return dsv_desc->view; + } + + /* if we can't acquire it right away, wait for it then try again w/ the new view pointer */ + pthread_spin_lock(lock); + pthread_spin_unlock(lock); + } + + return NULL; }
void d3d12_dsv_desc_create_dsv(struct d3d12_dsv_desc *dsv_desc, struct d3d12_device *device, struct d3d12_resource *resource, const D3D12_DEPTH_STENCIL_VIEW_DESC *desc) { struct vkd3d_texture_view_desc vkd3d_desc; - struct vkd3d_view *view; - - d3d12_dsv_desc_destroy(dsv_desc, device); + struct vkd3d_view *view, *prev_view; + struct d3d12_dsv_desc new_dsv_desc = {};
if (!resource) { FIXME("NULL resource DSV not implemented.\n"); - return; + goto done; }
if (resource->desc.Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE3D) { WARN("Cannot create DSV for 3D texture.\n"); - return; + goto done; }
if (!init_default_texture_view_desc(&vkd3d_desc, resource, desc ? desc->Format : 0)) - return; + goto done;
if (!(vkd3d_desc.format->vk_aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT))) { WARN("Trying to create DSV for format %#x.\n", vkd3d_desc.format->dxgi_format); - return; + goto done; }
if (desc) @@ -3096,16 +3209,26 @@ void d3d12_dsv_desc_create_dsv(struct d3d12_dsv_desc *dsv_desc, struct d3d12_dev assert(d3d12_resource_is_texture(resource));
if (!vkd3d_create_texture_view(device, resource->u.vk_image, &vkd3d_desc, &view)) - return; + goto done; + + new_dsv_desc.magic = view->type = VKD3D_DESCRIPTOR_MAGIC_DSV; + new_dsv_desc.sample_count = vk_samples_from_dxgi_sample_desc(&resource->desc.SampleDesc); + new_dsv_desc.format = vkd3d_desc.format; + new_dsv_desc.width = d3d12_resource_desc_get_width(&resource->desc, vkd3d_desc.miplevel_idx); + new_dsv_desc.height = d3d12_resource_desc_get_height(&resource->desc, vkd3d_desc.miplevel_idx); + new_dsv_desc.layer_count = vkd3d_desc.layer_count; + new_dsv_desc.view = view; + new_dsv_desc.resource = resource; + + done: + prev_view = d3d12_dsv_desc_grab_view(dsv_desc, device); + + d3d12_dsv_desc_destroy(dsv_desc, device); + *dsv_desc = new_dsv_desc;
- dsv_desc->magic = VKD3D_DESCRIPTOR_MAGIC_DSV; - dsv_desc->sample_count = vk_samples_from_dxgi_sample_desc(&resource->desc.SampleDesc); - dsv_desc->format = vkd3d_desc.format; - dsv_desc->width = d3d12_resource_desc_get_width(&resource->desc, vkd3d_desc.miplevel_idx); - dsv_desc->height = d3d12_resource_desc_get_height(&resource->desc, vkd3d_desc.miplevel_idx); - dsv_desc->layer_count = vkd3d_desc.layer_count; - dsv_desc->view = view; - dsv_desc->resource = resource; + if (prev_view) + if (vkd3d_view_decref(prev_view, device)) + pthread_spin_unlock(&prev_view->lock); }
/* ID3D12DescriptorHeap */ diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index bd9670e..3c4daff 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -451,6 +451,9 @@ HRESULT vkd3d_get_image_allocation_info(struct d3d12_device *device, struct vkd3d_view { LONG refcount; + pthread_spinlock_t lock; + uint32_t type; + VkDescriptorType vk_descriptor_type; union { VkBufferView vk_buffer_view; @@ -460,7 +463,7 @@ struct vkd3d_view VkBufferView vk_counter_view; };
-void vkd3d_view_decref(struct vkd3d_view *view, struct d3d12_device *device) DECLSPEC_HIDDEN; +int vkd3d_view_decref(struct vkd3d_view *view, struct d3d12_device *device) DECLSPEC_HIDDEN; void vkd3d_view_incref(struct vkd3d_view *view) DECLSPEC_HIDDEN;
struct d3d12_desc
*v5, sorry for making this mistake again.
I think it's risky to store the lock in the vkd3d_view, because if the current thread hasn't already inc'd the ref count, the view could be deallocated at any time.
The core problem is that while ref counting is fine for concurrent management of object lifetime, it's not a safe way to acquire a reference to an object to which we have no reference. You could have just incremented the ref count of a deallocated object! So we need locks to protect the acquisition of a reference. I've attached a patch which does this and is sufficient to get the game working. It uses the device mutex so is not optimal, but it's designed to spend the minimum amount of time locked. See if you can make a faster spinlock version.
On 9/19/19 9:31 AM, Conor McCarthy wrote:
I think it's risky to store the lock in the vkd3d_view, because if the current thread hasn't already inc'd the ref count, the view could be deallocated at any time.
The core problem is that while ref counting is fine for concurrent management of object lifetime, it's not a safe way to acquire a reference to an object to which we have no reference. You could have just incremented the ref count of a deallocated object! So we need locks to protect the acquisition of a reference. I've attached a patch which does this and is sufficient to get the game working. It uses the device mutex so is not optimal, but it's designed to spend the minimum amount of time locked. See if you can make a faster spinlock version.
This is tricky. For maximum performance we must only wait for our destination descriptor's view to be replaced and in a valid state again. But, as you mentioned, if we store the lock inside the view, there are multiple steps involved in acquiring it (accessing the view pointer, then increment the reference count).
Also, in your patch, I noticed that you intentionally unlock before performing the copy or overwrite. This puzzles me, if one thread waits on the global mutex for access to a descriptor's view to dereference, and we unlock the mutex for them when the view is NULL, they may crash when trying to decrement the reference of the NULL view. If we add a nullcheck to vkd3d_view_decref_descriptor, we'd still have a problem with a resource leak, where a thread overwrites the descriptor without actually freeing the Vulkan object created by the other thread.
Taking this all into account, I think that a good solution would be keeping your global mutex (or making it a spinlock local to accessing the views, maybe even local to the heap?) to acquire the references safely. However, instead of destroying the Vulkan object inside of this lock, maybe we could copy the Vulkan handle, and destroy it after we unlock the broader mutex/spinlock. Assuming we keep the overwriting/copying outside of the lock, we would probably want to call the Vulkan destroy function after the descriptor's view is in a valid state again, to minimize time where the descriptor is invalid and avoid the problem I mentioned in the previous paragraph.
Thoughts?
-Derek
On Fri, Sep 20, 2019 at 1:32 AM Derek Lesho dlesho@codeweavers.com wrote:
Also, in your patch, I noticed that you intentionally unlock before performing the copy or overwrite. This puzzles me, if one thread waits on the global mutex for access to a descriptor's view to dereference, and we unlock the mutex for them when the view is NULL, they may crash when trying to decrement the reference of the NULL view.
Yes, it's missing a null check after locking the mutex. If it's null after locking then the function can return because it means another thread has destroyed the descriptor. I think the only remaining problem is the old one of a single mutex being used for all descriptors. Maybe performance can be improved.
On 9/19/19 11:12 AM, Conor McCarthy wrote:
On Fri, Sep 20, 2019 at 1:32 AM Derek Lesho <dlesho@codeweavers.com mailto:dlesho@codeweavers.com> wrote:
Also, in your patch, I noticed that you intentionally unlock before performing the copy or overwrite. This puzzles me, if one thread waits on the global mutex for access to a descriptor's view to dereference, and we unlock the mutex for them when the view is NULL, they may crash when trying to decrement the reference of the NULL view.
Yes, it's missing a null check after locking the mutex. If it's null after locking then the function can return because it means another thread has destroyed the descriptor. I think the only remaining problem is the old one of a single mutex being used for all descriptors. Maybe performance can be improved.
Even if one thread destroys a descriptor, and then we let both concurrently write their data, we will leak the Vulkan handle of written by the thread which writes it to the descriptor first and gets overwritten.
On 9/19/19 12:43 PM, Derek Lesho wrote:
On 9/19/19 11:12 AM, Conor McCarthy wrote:
On Fri, Sep 20, 2019 at 1:32 AM Derek Lesho <dlesho@codeweavers.com mailto:dlesho@codeweavers.com> wrote:
Also, in your patch, I noticed that you intentionally unlock before performing the copy or overwrite. This puzzles me, if one thread waits on the global mutex for access to a descriptor's view to dereference, and we unlock the mutex for them when the view is NULL, they may crash when trying to decrement the reference of the NULL view.
Yes, it's missing a null check after locking the mutex. If it's null after locking then the function can return because it means another thread has destroyed the descriptor. I think the only remaining problem is the old one of a single mutex being used for all descriptors. Maybe performance can be improved.
Even if one thread destroys a descriptor, and then we let both concurrently write their data, we will leak the Vulkan handle of written by the thread which writes it to the descriptor first and gets overwritten.
I did some testing with your patch as is, and compared it with a solution where we destroy the vulkan handle after unlocking the global mutex. The difference was within the margin of error.
I also tried adding a dedicated mutex for this purpose, but that didn't have much of an affect on performance either. A dedicated spinlock actually worsened performance, even without the Vulkan call inside the lock.
If we can confirm that no resource leakage is occurring, the patch you sent is probably fine as is.
It turns out that no concurrent writes occur at all. I added mutex locks and tests with WARN messages to the descriptor creation functions and copy, but nothing at all showed up. The only issue is descriptors being written to while being copied. I sent a patch upstream which has couple of issues fixed. It doesn't slow down Metro Exodus, which is nice.
On Fri, Sep 20, 2019 at 7:29 AM Derek Lesho dlesho@codeweavers.com wrote:
On 9/19/19 12:43 PM, Derek Lesho wrote:
On 9/19/19 11:12 AM, Conor McCarthy wrote:
On Fri, Sep 20, 2019 at 1:32 AM Derek Lesho dlesho@codeweavers.com wrote:
Also, in your patch, I noticed that you intentionally unlock before performing the copy or overwrite. This puzzles me, if one thread waits on the global mutex for access to a descriptor's view to dereference, and we unlock the mutex for them when the view is NULL, they may crash when trying to decrement the reference of the NULL view.
Yes, it's missing a null check after locking the mutex. If it's null after locking then the function can return because it means another thread has destroyed the descriptor. I think the only remaining problem is the old one of a single mutex being used for all descriptors. Maybe performance can be improved.
Even if one thread destroys a descriptor, and then we let both concurrently write their data, we will leak the Vulkan handle of written by the thread which writes it to the descriptor first and gets overwritten.
I did some testing with your patch as is, and compared it with a solution where we destroy the vulkan handle after unlocking the global mutex. The difference was within the margin of error.
I also tried adding a dedicated mutex for this purpose, but that didn't have much of an affect on performance either. A dedicated spinlock actually worsened performance, even without the Vulkan call inside the lock.
If we can confirm that no resource leakage is occurring, the patch you sent is probably fine as is.