Signed-off-by: Conor McCarthy cmccarthy@codeweavers.com --- libs/vkd3d/command.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-)
diff --git a/libs/vkd3d/command.c b/libs/vkd3d/command.c index 55e6be58..b187c65b 100644 --- a/libs/vkd3d/command.c +++ b/libs/vkd3d/command.c @@ -1247,6 +1247,7 @@ static struct d3d12_fence *unsafe_impl_from_ID3D12Fence(ID3D12Fence *iface) static HRESULT d3d12_fence_init(struct d3d12_fence *fence, struct d3d12_device *device, UINT64 initial_value, D3D12_FENCE_FLAGS flags) { + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; VkResult vr; HRESULT hr; int rc; @@ -1266,8 +1267,8 @@ static HRESULT d3d12_fence_init(struct d3d12_fence *fence, struct d3d12_device * if ((rc = vkd3d_cond_init(&fence->null_event_cond))) { ERR("Failed to initialize cond variable, error %d.\n", rc); - vkd3d_mutex_destroy(&fence->mutex); - return hresult_from_errno(rc); + hr = hresult_from_errno(rc); + goto fail_destroy_mutex; }
if (flags) @@ -1282,7 +1283,8 @@ static HRESULT d3d12_fence_init(struct d3d12_fence *fence, struct d3d12_device * &fence->timeline_semaphore)) < 0) { WARN("Failed to create timeline semaphore, vr %d.\n", vr); - return hresult_from_vk_result(vr); + hr = hresult_from_vk_result(vr); + goto fail_destroy_null_cond; } fence->pending_timeline_value = initial_value; fence->gpu_wait_count = 0; @@ -1295,14 +1297,21 @@ static HRESULT d3d12_fence_init(struct d3d12_fence *fence, struct d3d12_device *
if (FAILED(hr = vkd3d_private_store_init(&fence->private_store))) { - vkd3d_mutex_destroy(&fence->mutex); - vkd3d_cond_destroy(&fence->null_event_cond); - return hr; + goto fail_destroy_timeline_semaphore; }
d3d12_device_add_ref(fence->device = device);
return S_OK; + +fail_destroy_timeline_semaphore: + VK_CALL(vkDestroySemaphore(device->vk_device, fence->timeline_semaphore, NULL)); +fail_destroy_null_cond: + vkd3d_cond_destroy(&fence->null_event_cond); +fail_destroy_mutex: + vkd3d_mutex_destroy(&fence->mutex); + + return hr; }
HRESULT d3d12_fence_create(struct d3d12_device *device,
Strictly increasing timeline values must be mapped to fence virtual values to avoid invalid use of Vulkan timeline semaphores. In particular, non- increasing values and value jumps of >= 4G are permitted in d3d12.
Different virtual D3D12 command queues may map to the same Vulkan queue. If a wait of value N is submitted on one command queue, and then a signal for >= N is submitted on another, but they are sent to the same Vk queue, the wait will never complete. The solution is to buffer out-of-order waits and any subsequent queue commands until an unblocking signal value is submitted to a different D3D12 queue, or signaled on the CPU.
Buffering out-of-order waits also fixes the old fence implementation so it is fully functional, though a bit less efficient than timeline semaphores.
Based in part on vkd3d-proton patches by Hans-Kristian Arntzen. Unlike the vkd3d-proton implementation, this patch does not use worker threads for submissions to the Vulkan queue.
Signed-off-by: Conor McCarthy cmccarthy@codeweavers.com --- libs/vkd3d/command.c | 894 ++++++++++++++++++++++--------------- libs/vkd3d/device.c | 81 +--- libs/vkd3d/vkd3d_private.h | 78 +++- tests/d3d12.c | 4 +- 4 files changed, 602 insertions(+), 455 deletions(-)
diff --git a/libs/vkd3d/command.c b/libs/vkd3d/command.c index b187c65b..d0782e5a 100644 --- a/libs/vkd3d/command.c +++ b/libs/vkd3d/command.c @@ -22,7 +22,11 @@
static void d3d12_fence_incref(struct d3d12_fence *fence); static void d3d12_fence_decref(struct d3d12_fence *fence); -static HRESULT d3d12_fence_signal(struct d3d12_fence *fence, uint64_t value, VkFence vk_fence); +static HRESULT d3d12_fence_signal(struct d3d12_fence *fence, uint64_t value, VkFence vk_fence, bool on_cpu); +static void d3d12_fence_signal_timeline_semaphore(struct d3d12_fence *fence, uint64_t timeline_value); +static HRESULT d3d12_command_queue_signal(struct d3d12_command_queue *command_queue, + struct d3d12_fence *fence, uint64_t value); +static bool d3d12_command_queue_flush_ops(struct d3d12_command_queue *queue, bool *flushed_any);
HRESULT vkd3d_queue_create(struct d3d12_device *device, uint32_t family_index, const VkQueueFamilyProperties *properties, struct vkd3d_queue **queue) @@ -48,9 +52,6 @@ HRESULT vkd3d_queue_create(struct d3d12_device *device, object->vk_queue_flags = properties->queueFlags; object->timestamp_bits = properties->timestampValidBits;
- object->wait_completion_semaphore = VK_NULL_HANDLE; - object->pending_wait_completion_value = 0; - object->semaphores = NULL; object->semaphores_size = 0; object->semaphore_count = 0; @@ -66,20 +67,6 @@ HRESULT vkd3d_queue_create(struct d3d12_device *device, return S_OK; }
-bool vkd3d_queue_init_timeline_semaphore(struct vkd3d_queue *queue, struct d3d12_device *device) -{ - VkResult vr; - - if (!queue->wait_completion_semaphore - && (vr = vkd3d_create_timeline_semaphore(device, 0, &queue->wait_completion_semaphore)) < 0) - { - WARN("Failed to create timeline semaphore, vr %d.\n", vr); - return false; - } - - return true; -} - void vkd3d_queue_destroy(struct vkd3d_queue *queue, struct d3d12_device *device) { const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; @@ -94,8 +81,6 @@ void vkd3d_queue_destroy(struct vkd3d_queue *queue, struct d3d12_device *device)
vkd3d_free(queue->semaphores);
- VK_CALL(vkDestroySemaphore(device->vk_device, queue->wait_completion_semaphore, NULL)); - for (i = 0; i < ARRAY_SIZE(queue->old_vk_semaphores); ++i) { if (queue->old_vk_semaphores[i]) @@ -308,9 +293,7 @@ static void vkd3d_wait_for_gpu_timeline_semaphore(struct vkd3d_fence_worker *wor const struct d3d12_device *device = worker->device; const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; VkSemaphoreWaitInfoKHR wait_info; - uint64_t counter_value; VkResult vr; - HRESULT hr;
wait_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO_KHR; wait_info.pNext = NULL; @@ -328,19 +311,10 @@ static void vkd3d_wait_for_gpu_timeline_semaphore(struct vkd3d_fence_worker *wor return; }
- if ((vr = VK_CALL(vkGetSemaphoreCounterValueKHR(device->vk_device, waiting_fence->u.vk_semaphore, - &counter_value))) < 0) - { - ERR("Failed to get Vulkan semaphore value, vr %d.\n", vr); - } - else - { - TRACE("Signaling fence %p value %#"PRIx64".\n", waiting_fence->fence, waiting_fence->value); - if (FAILED(hr = d3d12_fence_signal(waiting_fence->fence, counter_value, VK_NULL_HANDLE))) - ERR("Failed to signal D3D12 fence, hr %#x.\n", hr); + TRACE("Signaling fence %p value %#"PRIx64".\n", waiting_fence->fence, waiting_fence->value); + d3d12_fence_signal_timeline_semaphore(waiting_fence->fence, waiting_fence->value);
- d3d12_fence_decref(waiting_fence->fence); - } + d3d12_fence_decref(waiting_fence->fence); }
static void vkd3d_wait_for_gpu_fence(struct vkd3d_fence_worker *worker, @@ -361,7 +335,7 @@ static void vkd3d_wait_for_gpu_fence(struct vkd3d_fence_worker *worker, }
TRACE("Signaling fence %p value %#"PRIx64".\n", waiting_fence->fence, waiting_fence->value); - if (FAILED(hr = d3d12_fence_signal(waiting_fence->fence, waiting_fence->value, waiting_fence->u.vk_fence))) + if (FAILED(hr = d3d12_fence_signal(waiting_fence->fence, waiting_fence->value, waiting_fence->u.vk_fence, false))) ERR("Failed to signal D3D12 fence, hr %#x.\n", hr);
d3d12_fence_decref(waiting_fence->fence); @@ -434,7 +408,7 @@ static HRESULT vkd3d_fence_worker_start(struct vkd3d_fence_worker *worker, worker->fences = NULL; worker->fences_size = 0;
- worker->wait_for_gpu_fence = device->use_timeline_semaphores + worker->wait_for_gpu_fence = device->vk_info.KHR_timeline_semaphore ? vkd3d_wait_for_gpu_timeline_semaphore : vkd3d_wait_for_gpu_fence;
if ((rc = vkd3d_mutex_init(&worker->mutex))) @@ -606,17 +580,17 @@ static void d3d12_fence_garbage_collect_vk_semaphores_locked(struct d3d12_fence current = &fence->semaphores[i]; /* The semaphore doesn't have a pending signal operation if the fence * was signaled. */ - if ((current->vk_fence || current->is_acquired) && !destroy_all) + if ((current->u.binary.vk_fence || current->u.binary.is_acquired) && !destroy_all) { ++i; continue; }
- if (current->vk_fence) + if (current->u.binary.vk_fence) WARN("Destroying potentially pending semaphore.\n"); - assert(!current->is_acquired); + assert(!current->u.binary.is_acquired);
- VK_CALL(vkDestroySemaphore(device->vk_device, current->vk_semaphore, NULL)); + VK_CALL(vkDestroySemaphore(device->vk_device, current->u.binary.vk_semaphore, NULL)); fence->semaphores[i] = fence->semaphores[--fence->semaphore_count]; }
@@ -652,23 +626,16 @@ static void d3d12_fence_destroy_vk_objects(struct d3d12_fence *fence) vkd3d_mutex_unlock(&fence->mutex); }
-static struct vkd3d_signaled_semaphore *d3d12_fence_acquire_vk_semaphore(struct d3d12_fence *fence, +static struct vkd3d_signaled_semaphore *d3d12_fence_acquire_vk_semaphore_locked(struct d3d12_fence *fence, uint64_t value, uint64_t *completed_value) { struct vkd3d_signaled_semaphore *semaphore; struct vkd3d_signaled_semaphore *current; uint64_t semaphore_value; unsigned int i; - int rc;
TRACE("fence %p, value %#"PRIx64".\n", fence, value);
- if ((rc = vkd3d_mutex_lock(&fence->mutex))) - { - ERR("Failed to lock mutex, error %d.\n", rc); - return VK_NULL_HANDLE; - } - semaphore = NULL; semaphore_value = ~(uint64_t)0;
@@ -676,7 +643,7 @@ static struct vkd3d_signaled_semaphore *d3d12_fence_acquire_vk_semaphore(struct { current = &fence->semaphores[i]; /* Prefer a semaphore with the smallest value. */ - if (!current->is_acquired && current->value >= value && semaphore_value >= current->value) + if (!current->u.binary.is_acquired && current->value >= value && semaphore_value >= current->value) { semaphore = current; semaphore_value = current->value; @@ -686,12 +653,10 @@ static struct vkd3d_signaled_semaphore *d3d12_fence_acquire_vk_semaphore(struct }
if (semaphore) - semaphore->is_acquired = true; + semaphore->u.binary.is_acquired = true;
*completed_value = fence->value;
- vkd3d_mutex_unlock(&fence->mutex); - return semaphore; }
@@ -705,7 +670,7 @@ static void d3d12_fence_remove_vk_semaphore(struct d3d12_fence *fence, struct vk return; }
- assert(semaphore->is_acquired); + assert(semaphore->u.binary.is_acquired);
*semaphore = fence->semaphores[--fence->semaphore_count];
@@ -722,32 +687,133 @@ static void d3d12_fence_release_vk_semaphore(struct d3d12_fence *fence, struct v return; }
- assert(semaphore->is_acquired); - semaphore->is_acquired = false; + assert(semaphore->u.binary.is_acquired); + semaphore->u.binary.is_acquired = false;
vkd3d_mutex_unlock(&fence->mutex); }
-static HRESULT d3d12_fence_add_vk_semaphore(struct d3d12_fence *fence, - VkSemaphore vk_semaphore, VkFence vk_fence, uint64_t value) +static void d3d12_fence_update_pending_value_locked(struct d3d12_fence *fence) +{ + uint64_t new_max_pending_value; + unsigned int i; + + for (i = 0, new_max_pending_value = 0; i < fence->semaphore_count; ++i) + new_max_pending_value = max(fence->semaphores[i].value, new_max_pending_value); + + fence->max_pending_value = max(fence->value, new_max_pending_value); +} + +static HRESULT d3d12_fence_update_pending_value(struct d3d12_fence *fence) +{ + int rc; + + if ((rc = vkd3d_mutex_lock(&fence->mutex))) + { + ERR("Failed to lock mutex, error %d.\n", rc); + return hresult_from_errno(rc); + } + + d3d12_fence_update_pending_value_locked(fence); + + vkd3d_mutex_unlock(&fence->mutex); + + return S_OK; +} + +static HRESULT d3d12_device_add_blocked_command_queues(struct d3d12_device *device, + struct d3d12_command_queue * const *command_queues, unsigned int count) { - struct vkd3d_signaled_semaphore *semaphore; HRESULT hr = S_OK; + unsigned int i; int rc;
- TRACE("fence %p, value %#"PRIx64".\n", fence, value); + if ((rc = vkd3d_mutex_lock(&device->mutex))) + { + ERR("Failed to lock mutex, error %d.\n", rc); + return hresult_from_errno(rc); + }
- if (!(semaphore = vkd3d_malloc(sizeof(*semaphore)))) + if ((i = ARRAY_SIZE(device->blocked_queues) - device->blocked_queue_count) < count) { - ERR("Failed to add semaphore.\n"); - return E_OUTOFMEMORY; + FIXME("Failed to add %u blocked command queue(s) to device %p.\n", count - i, device); + count = i; + hr = E_FAIL; + } + + for (i = 0; i < count; ++i) + device->blocked_queues[device->blocked_queue_count++] = command_queues[i]; + + vkd3d_mutex_unlock(&device->mutex); + return hr; +} + +static HRESULT d3d12_device_flush_blocked_queues_once(struct d3d12_device *device, bool *flushed_any) +{ + struct d3d12_command_queue *blocked_queues[VKD3D_MAX_DEVICE_BLOCKED_QUEUES]; + unsigned int i, blocked_queue_count; + int rc; + + *flushed_any = false; + + if ((rc = vkd3d_mutex_lock(&device->mutex))) + { + ERR("Failed to lock mutex, error %d.\n", rc); + return hresult_from_errno(rc); + } + + /* Flush any ops unblocked by a new pending value. These cannot be flushed + * with the device locked, so move the queue pointers to a local array. */ + blocked_queue_count = device->blocked_queue_count; + memcpy(blocked_queues, device->blocked_queues, blocked_queue_count * sizeof(blocked_queues[0])); + device->blocked_queue_count = 0; + + vkd3d_mutex_unlock(&device->mutex); + + i = 0; + while (i < blocked_queue_count) + { + if (d3d12_command_queue_flush_ops(blocked_queues[i], flushed_any)) + blocked_queues[i] = blocked_queues[--blocked_queue_count]; + else + ++i; + } + + /* None of these queues could have been re-added during the above loop because + * blocked queues always have a nonzero op count. */ + return d3d12_device_add_blocked_command_queues(device, blocked_queues, blocked_queue_count); +} + +static HRESULT d3d12_device_flush_blocked_queues(struct d3d12_device *device) +{ + bool flushed_any; + HRESULT hr; + + /* Executing an op on one queue may unblock another, so repeat until nothing is flushed. */ + do + { + if (!device->blocked_queue_count) + return S_OK; + if (FAILED(hr = d3d12_device_flush_blocked_queues_once(device, &flushed_any))) + return hr; } + while (flushed_any); + + return S_OK; +} + +static HRESULT d3d12_fence_add_vk_semaphore(struct d3d12_fence *fence, VkSemaphore vk_semaphore, + VkFence vk_fence, uint64_t value, const struct vkd3d_queue *signalling_queue) +{ + struct vkd3d_signaled_semaphore *semaphore; + int rc; + + TRACE("fence %p, value %#"PRIx64".\n", fence, value);
if ((rc = vkd3d_mutex_lock(&fence->mutex))) { ERR("Failed to lock mutex, error %d.\n", rc); - vkd3d_free(semaphore); - return E_FAIL; + return hresult_from_errno(rc); }
d3d12_fence_garbage_collect_vk_semaphores_locked(fence, false); @@ -757,21 +823,24 @@ static HRESULT d3d12_fence_add_vk_semaphore(struct d3d12_fence *fence, { ERR("Failed to add semaphore.\n"); vkd3d_mutex_unlock(&fence->mutex); - return false; + return E_OUTOFMEMORY; }
semaphore = &fence->semaphores[fence->semaphore_count++]; semaphore->value = value; - semaphore->vk_semaphore = vk_semaphore; - semaphore->vk_fence = vk_fence; - semaphore->is_acquired = false; + semaphore->u.binary.vk_semaphore = vk_semaphore; + semaphore->u.binary.vk_fence = vk_fence; + semaphore->u.binary.is_acquired = false; + semaphore->signalling_queue = signalling_queue; + + d3d12_fence_update_pending_value_locked(fence);
vkd3d_mutex_unlock(&fence->mutex);
- return hr; + return d3d12_device_flush_blocked_queues(fence->device); }
-static bool d3d12_fence_signal_external_events_locked(struct d3d12_fence *fence) +static void d3d12_fence_signal_external_events_locked(struct d3d12_fence *fence) { struct d3d12_device *device = fence->device; bool signal_null_event_cond = false; @@ -803,10 +872,11 @@ static bool d3d12_fence_signal_external_events_locked(struct d3d12_fence *fence)
fence->event_count = j;
- return signal_null_event_cond; + if (signal_null_event_cond) + vkd3d_cond_broadcast(&fence->null_event_cond); }
-static HRESULT d3d12_fence_signal(struct d3d12_fence *fence, uint64_t value, VkFence vk_fence) +static HRESULT d3d12_fence_signal(struct d3d12_fence *fence, uint64_t value, VkFence vk_fence, bool on_cpu) { struct d3d12_device *device = fence->device; struct vkd3d_signaled_semaphore *current; @@ -821,8 +891,7 @@ static HRESULT d3d12_fence_signal(struct d3d12_fence *fence, uint64_t value, VkF
fence->value = value;
- if (d3d12_fence_signal_external_events_locked(fence)) - vkd3d_cond_broadcast(&fence->null_event_cond); + d3d12_fence_signal_external_events_locked(fence);
if (vk_fence) { @@ -831,8 +900,8 @@ static HRESULT d3d12_fence_signal(struct d3d12_fence *fence, uint64_t value, VkF for (i = 0; i < fence->semaphore_count; ++i) { current = &fence->semaphores[i]; - if (current->vk_fence == vk_fence) - current->vk_fence = VK_NULL_HANDLE; + if (current->u.binary.vk_fence == vk_fence) + current->u.binary.vk_fence = VK_NULL_HANDLE; }
for (i = 0; i < ARRAY_SIZE(fence->old_vk_fences); ++i) @@ -849,9 +918,101 @@ static HRESULT d3d12_fence_signal(struct d3d12_fence *fence, uint64_t value, VkF VK_CALL(vkDestroyFence(device->vk_device, vk_fence, NULL)); }
+ d3d12_fence_update_pending_value_locked(fence); + vkd3d_mutex_unlock(&fence->mutex);
- return S_OK; + return on_cpu ? d3d12_device_flush_blocked_queues(device) : S_OK; +} + +static uint64_t d3d12_fence_add_pending_timeline_signal(struct d3d12_fence *fence, uint64_t virtual_value, + const struct vkd3d_queue *signalling_queue) +{ + struct vkd3d_signaled_semaphore *semaphore; + int rc; + + if ((rc = vkd3d_mutex_lock(&fence->mutex))) + { + ERR("Failed to lock mutex, error %d.\n", rc); + return hresult_from_errno(rc); + } + + if (!vkd3d_array_reserve((void **)&fence->semaphores, &fence->semaphores_size, + fence->semaphore_count + 1, sizeof(*fence->semaphores))) + { + return 0; + } + + semaphore = &fence->semaphores[fence->semaphore_count++]; + semaphore->value = virtual_value; + semaphore->u.timeline_value = ++fence->pending_timeline_value; + semaphore->signalling_queue = signalling_queue; + + vkd3d_mutex_unlock(&fence->mutex); + + return fence->pending_timeline_value; +} + +static uint64_t d3d12_fence_get_timeline_wait_value_locked(struct d3d12_fence *fence, uint64_t virtual_value) +{ + uint64_t target_timeline_value = UINT64_MAX; + unsigned int i; + + /* Find the smallest physical value which is at least the virtual value. */ + for (i = 0; i < fence->semaphore_count; ++i) + { + if (virtual_value <= fence->semaphores[i].value) + target_timeline_value = min(target_timeline_value, fence->semaphores[i].u.timeline_value); + } + + /* No timeline value will be found if it was already signaled on the GPU and handled in + * the worker thread. A wait must still be emitted as a barrier against command re-ordering. */ + return (target_timeline_value == UINT64_MAX) ? 0 : target_timeline_value; +} + +static void d3d12_fence_signal_timeline_semaphore(struct d3d12_fence *fence, uint64_t timeline_value) +{ + bool did_signal; + unsigned int i; + int rc; + + if ((rc = vkd3d_mutex_lock(&fence->mutex))) + { + ERR("Failed to lock mutex, error %d.\n", rc); + return; + } + + /* With multiple fence workers, it is possible that signal calls are out of + * order. The physical value itself is monotonic, but we need to make sure + * that all signals happen in correct order if there are fence rewinds. + * We don't expect the loop to run more than once, but there might be + * extreme edge cases where we signal 2 or more. */ + while (fence->timeline_value < timeline_value) + { + ++fence->timeline_value; + did_signal = false; + + for (i = 0; i < fence->semaphore_count; ++i) + { + if (fence->timeline_value == fence->semaphores[i].u.timeline_value) + { + fence->value = fence->semaphores[i].value; + d3d12_fence_signal_external_events_locked(fence); + fence->semaphores[i] = fence->semaphores[--fence->semaphore_count]; + did_signal = true; + break; + } + } + + if (!did_signal) + FIXME("Did not signal a virtual value.\n"); + } + + /* If a rewind remains queued, the virtual value deleted above may be + * greater than any pending value, so update the max pending value. */ + d3d12_fence_update_pending_value_locked(fence); + + vkd3d_mutex_unlock(&fence->mutex); }
static HRESULT STDMETHODCALLTYPE d3d12_fence_QueryInterface(ID3D12Fence *iface, @@ -1060,100 +1221,8 @@ static HRESULT STDMETHODCALLTYPE d3d12_fence_SetEventOnCompletion(ID3D12Fence *i return S_OK; }
-static inline bool d3d12_fence_gpu_wait_is_completed(const struct d3d12_fence *fence, unsigned int i) -{ - const struct d3d12_device *device = fence->device; - const struct vkd3d_vk_device_procs *vk_procs; - uint64_t value; - VkResult vr; - - vk_procs = &device->vk_procs; - - if ((vr = VK_CALL(vkGetSemaphoreCounterValueKHR(device->vk_device, - fence->gpu_waits[i].queue->wait_completion_semaphore, &value))) >= 0) - { - return value >= fence->gpu_waits[i].pending_value; - } - - ERR("Failed to get Vulkan semaphore status, vr %d.\n", vr); - return true; -} - -static inline bool d3d12_fence_has_pending_gpu_ops_locked(struct d3d12_fence *fence) -{ - const struct d3d12_device *device = fence->device; - const struct vkd3d_vk_device_procs *vk_procs; - uint64_t value; - unsigned int i; - VkResult vr; - - for (i = 0; i < fence->gpu_wait_count; ++i) - { - if (d3d12_fence_gpu_wait_is_completed(fence, i) && i < --fence->gpu_wait_count) - fence->gpu_waits[i] = fence->gpu_waits[fence->gpu_wait_count]; - } - if (fence->gpu_wait_count) - return true; - - /* Check for pending signals too. */ - if (fence->value >= fence->pending_timeline_value) - return false; - - vk_procs = &device->vk_procs; - - /* Check the actual semaphore value in case fence->value update is lagging. */ - if ((vr = VK_CALL(vkGetSemaphoreCounterValueKHR(device->vk_device, fence->timeline_semaphore, &value))) < 0) - { - ERR("Failed to get Vulkan semaphore status, vr %d.\n", vr); - return false; - } - - return value < fence->pending_timeline_value; -} - -/* Replace the VkSemaphore with a new one to allow a lower value to be set. Ideally apps will - * only use this to reset the fence when no operations are pending on the queue. */ -static HRESULT d3d12_fence_reinit_timeline_semaphore_locked(struct d3d12_fence *fence, uint64_t value) -{ - const struct d3d12_device *device = fence->device; - const struct vkd3d_vk_device_procs *vk_procs; - VkSemaphore timeline_semaphore; - VkResult vr; - - if (d3d12_fence_has_pending_gpu_ops_locked(fence)) - { - /* This situation is not very likely because it means a fence with pending waits and/or signals was - * signalled on the CPU to a lower value. For now, emit a fixme so it can be patched if necessary. - * A patch already exists for this but it's not pretty. */ - FIXME("Unable to re-initialise timeline semaphore to a lower value due to pending GPU ops.\n"); - return E_FAIL; - } - - if ((vr = vkd3d_create_timeline_semaphore(device, value, &timeline_semaphore)) < 0) - { - WARN("Failed to create timeline semaphore, vr %d.\n", vr); - return hresult_from_vk_result(vr); - } - - fence->value = value; - fence->pending_timeline_value = value; - - WARN("Replacing timeline semaphore with a new object.\n"); - - vk_procs = &device->vk_procs; - - VK_CALL(vkDestroySemaphore(device->vk_device, fence->timeline_semaphore, NULL)); - fence->timeline_semaphore = timeline_semaphore; - - return S_OK; -} - static HRESULT d3d12_fence_signal_cpu_timeline_semaphore(struct d3d12_fence *fence, uint64_t value) { - const struct d3d12_device *device = fence->device; - VkSemaphoreSignalInfoKHR info; - HRESULT hr = S_OK; - VkResult vr; int rc;
if ((rc = vkd3d_mutex_lock(&fence->mutex))) @@ -1162,48 +1231,13 @@ static HRESULT d3d12_fence_signal_cpu_timeline_semaphore(struct d3d12_fence *fen return hresult_from_errno(rc); }
- /* We must only signal a value which is greater than the current value. - * That value can be in the range of current known value (fence->value), or as large as pending_timeline_value. - * Pending timeline value signal might be blocked by another synchronization primitive, and thus statically - * cannot be that value, so the safest thing to do is to check the current value which is updated by the fence - * wait thread continuously. This check is technically racy since the value might be immediately out of date, - * but there is no way to avoid this. */ - if (value > fence->value) - { - const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; - - /* Sanity check against the delta limit. */ - if (value - fence->value > device->vk_info.timeline_semaphore_properties.maxTimelineSemaphoreValueDifference) - { - FIXME("Timeline semaphore delta is %"PRIu64", but implementation only supports a delta of %"PRIu64".\n", - value - fence->value, device->vk_info.timeline_semaphore_properties.maxTimelineSemaphoreValueDifference); - } - - info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_SIGNAL_INFO_KHR; - info.pNext = NULL; - info.semaphore = fence->timeline_semaphore; - info.value = value; - if ((vr = VK_CALL(vkSignalSemaphoreKHR(device->vk_device, &info))) >= 0) - { - fence->value = value; - if (value > fence->pending_timeline_value) - fence->pending_timeline_value = value; - } - else - { - ERR("Failed to signal timeline semaphore, vr %d.\n", vr); - hr = hresult_from_vk_result(vr); - } - } - else if (value < fence->value) - { - hr = d3d12_fence_reinit_timeline_semaphore_locked(fence, value); - } - + fence->value = value; d3d12_fence_signal_external_events_locked(fence); + d3d12_fence_update_pending_value_locked(fence);
vkd3d_mutex_unlock(&fence->mutex); - return hr; + + return d3d12_device_flush_blocked_queues(fence->device); }
static HRESULT STDMETHODCALLTYPE d3d12_fence_Signal(ID3D12Fence *iface, UINT64 value) @@ -1214,7 +1248,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_fence_Signal(ID3D12Fence *iface, UINT64 v
if (fence->timeline_semaphore) return d3d12_fence_signal_cpu_timeline_semaphore(fence, value); - return d3d12_fence_signal(fence, value, VK_NULL_HANDLE); + return d3d12_fence_signal(fence, value, VK_NULL_HANDLE, true); }
static const struct ID3D12FenceVtbl d3d12_fence_vtbl = @@ -1257,6 +1291,7 @@ static HRESULT d3d12_fence_init(struct d3d12_fence *fence, struct d3d12_device * fence->refcount = 1;
fence->value = initial_value; + fence->max_pending_value = initial_value;
if ((rc = vkd3d_mutex_init(&fence->mutex))) { @@ -1279,15 +1314,15 @@ static HRESULT d3d12_fence_init(struct d3d12_fence *fence, struct d3d12_device * fence->event_count = 0;
fence->timeline_semaphore = VK_NULL_HANDLE; - if (device->use_timeline_semaphores && (vr = vkd3d_create_timeline_semaphore(device, initial_value, + fence->timeline_value = 0; + fence->pending_timeline_value = 0; + if (device->vk_info.KHR_timeline_semaphore && (vr = vkd3d_create_timeline_semaphore(device, 0, &fence->timeline_semaphore)) < 0) { WARN("Failed to create timeline semaphore, vr %d.\n", vr); hr = hresult_from_vk_result(vr); goto fail_destroy_null_cond; } - fence->pending_timeline_value = initial_value; - fence->gpu_wait_count = 0;
fence->semaphores = NULL; fence->semaphores_size = 0; @@ -6136,6 +6171,9 @@ static ULONG STDMETHODCALLTYPE d3d12_command_queue_Release(ID3D12CommandQueue *i
vkd3d_fence_worker_stop(&command_queue->fence_worker, device);
+ vkd3d_mutex_destroy(&command_queue->op_mutex); + vkd3d_free(command_queue->ops); + vkd3d_private_store_destroy(&command_queue->private_store);
vkd3d_free(command_queue); @@ -6205,6 +6243,14 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_queue_GetDevice(ID3D12CommandQueu return d3d12_device_query_interface(command_queue->device, iid, device); }
+static struct vkd3d_cs_op_data *d3d12_command_queue_require_space_locked(struct d3d12_command_queue *queue) +{ + if (!vkd3d_array_reserve((void **)&queue->ops, &queue->ops_size, queue->ops_count + 1, sizeof(*queue->ops))) + return NULL; + + return &queue->ops[queue->ops_count++]; +} + static void STDMETHODCALLTYPE d3d12_command_queue_UpdateTileMappings(ID3D12CommandQueue *iface, ID3D12Resource *resource, UINT region_count, const D3D12_TILED_RESOURCE_COORDINATE *region_start_coordinates, @@ -6236,22 +6282,50 @@ static void STDMETHODCALLTYPE d3d12_command_queue_CopyTileMappings(ID3D12Command src_region_start_coordinate, region_size, flags); }
+static void d3d12_command_queue_execute(struct d3d12_command_queue *command_queue, + VkCommandBuffer *buffers, unsigned int count) +{ + const struct vkd3d_vk_device_procs *vk_procs = &command_queue->device->vk_procs; + struct vkd3d_queue *vkd3d_queue = command_queue->vkd3d_queue; + VkSubmitInfo submit_desc; + VkQueue vk_queue; + VkResult vr; + + memset(&submit_desc, 0, sizeof(submit_desc)); + + if (!(vk_queue = vkd3d_queue_acquire(vkd3d_queue))) + { + ERR("Failed to acquire queue %p.\n", vkd3d_queue); + return; + } + + submit_desc.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + submit_desc.commandBufferCount = count; + submit_desc.pCommandBuffers = buffers; + + if ((vr = VK_CALL(vkQueueSubmit(vk_queue, 1, &submit_desc, VK_NULL_HANDLE))) < 0) + ERR("Failed to submit queue(s), vr %d.\n", vr); + + vkd3d_queue_release(vkd3d_queue); + + vkd3d_free(buffers); +} + static void STDMETHODCALLTYPE d3d12_command_queue_ExecuteCommandLists(ID3D12CommandQueue *iface, UINT command_list_count, ID3D12CommandList * const *command_lists) { struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface); - const struct vkd3d_vk_device_procs *vk_procs; struct d3d12_command_list *cmd_list; - struct VkSubmitInfo submit_desc; + struct vkd3d_cs_op_data *op; VkCommandBuffer *buffers; - VkQueue vk_queue; unsigned int i; - VkResult vr; + int rc;
TRACE("iface %p, command_list_count %u, command_lists %p.\n", iface, command_list_count, command_lists);
- vk_procs = &command_queue->device->vk_procs; + if (!command_list_count) + return;
if (!(buffers = vkd3d_calloc(command_list_count, sizeof(*buffers)))) { @@ -6274,29 +6348,30 @@ static void STDMETHODCALLTYPE d3d12_command_queue_ExecuteCommandLists(ID3D12Comm buffers[i] = cmd_list->vk_command_buffer; }
- submit_desc.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; - submit_desc.pNext = NULL; - submit_desc.waitSemaphoreCount = 0; - submit_desc.pWaitSemaphores = NULL; - submit_desc.pWaitDstStageMask = NULL; - submit_desc.commandBufferCount = command_list_count; - submit_desc.pCommandBuffers = buffers; - submit_desc.signalSemaphoreCount = 0; - submit_desc.pSignalSemaphores = NULL; - - if (!(vk_queue = vkd3d_queue_acquire(command_queue->vkd3d_queue))) + if ((rc = vkd3d_mutex_lock(&command_queue->op_mutex))) { - ERR("Failed to acquire queue %p.\n", command_queue->vkd3d_queue); - vkd3d_free(buffers); + ERR("Failed to lock mutex, error %d.\n", rc); return; }
- if ((vr = VK_CALL(vkQueueSubmit(vk_queue, 1, &submit_desc, VK_NULL_HANDLE))) < 0) - ERR("Failed to submit queue(s), vr %d.\n", vr); + if (!command_queue->ops_count) + { + d3d12_command_queue_execute(command_queue, buffers, command_list_count); + vkd3d_mutex_unlock(&command_queue->op_mutex); + return; + }
- vkd3d_queue_release(command_queue->vkd3d_queue); + if (!(op = d3d12_command_queue_require_space_locked(command_queue))) + { + ERR("Failed to add op.\n"); + return; + } + op->opcode = VKD3D_CS_OP_EXECUTE; + op->u.execute.buffers = buffers; + op->u.execute.buffer_count = command_list_count;
- vkd3d_free(buffers); + vkd3d_mutex_unlock(&command_queue->op_mutex); + return; }
static void STDMETHODCALLTYPE d3d12_command_queue_SetMarker(ID3D12CommandQueue *iface, @@ -6318,38 +6393,6 @@ static void STDMETHODCALLTYPE d3d12_command_queue_EndEvent(ID3D12CommandQueue *i FIXME("iface %p stub!\n", iface); }
-static HRESULT d3d12_fence_update_gpu_signal_timeline_semaphore(struct d3d12_fence *fence, uint64_t value) -{ - const struct d3d12_device *device = fence->device; - int rc; - - if ((rc = vkd3d_mutex_lock(&fence->mutex))) - { - ERR("Failed to lock mutex, error %d.\n", rc); - return hresult_from_errno(rc); - } - - /* If we're attempting to async signal a fence with a value which is not strictly increasing the payload value, - * warn about this case. Do not treat this as an error since it works at least with RADV and Nvidia drivers and - * there's no workaround on the GPU side. */ - if (value <= fence->pending_timeline_value) - { - WARN("Fence %p values are not strictly increasing. Pending values: old %"PRIu64", new %"PRIu64".\n", - fence, fence->pending_timeline_value, value); - } - /* Sanity check against the delta limit. Use the current fence value. */ - else if (value - fence->value > device->vk_info.timeline_semaphore_properties.maxTimelineSemaphoreValueDifference) - { - FIXME("Timeline semaphore delta is %"PRIu64", but implementation only supports a delta of %"PRIu64".\n", - value - fence->value, device->vk_info.timeline_semaphore_properties.maxTimelineSemaphoreValueDifference); - } - fence->pending_timeline_value = value; - - vkd3d_mutex_unlock(&fence->mutex); - - return S_OK; -} - static HRESULT vkd3d_enqueue_timeline_semaphore(struct vkd3d_fence_worker *worker, VkSemaphore vk_semaphore, struct d3d12_fence *fence, uint64_t value, struct vkd3d_queue *queue) { @@ -6389,31 +6432,68 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Signal(ID3D12CommandQueue * ID3D12Fence *fence_iface, UINT64 value) { struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface); + struct d3d12_fence *fence = unsafe_impl_from_ID3D12Fence(fence_iface); + struct vkd3d_cs_op_data *op; + HRESULT hr = S_OK; + int rc; + + TRACE("iface %p, fence %p, value %#"PRIx64".\n", iface, fence_iface, value); + + if ((rc = vkd3d_mutex_lock(&command_queue->op_mutex))) + { + ERR("Failed to lock mutex, error %d.\n", rc); + return hresult_from_errno(rc); + } + + if (!command_queue->ops_count) + { + hr = d3d12_command_queue_signal(command_queue, fence, value); + goto done; + } + + if (!(op = d3d12_command_queue_require_space_locked(command_queue))) + { + hr = E_OUTOFMEMORY; + goto done; + } + op->opcode = VKD3D_CS_OP_SIGNAL; + op->u.signal.fence = fence; + op->u.signal.value = value; + + d3d12_fence_incref(fence); + +done: + vkd3d_mutex_unlock(&command_queue->op_mutex); + return hr; +} + +static HRESULT d3d12_command_queue_signal(struct d3d12_command_queue *command_queue, + struct d3d12_fence *fence, uint64_t value) +{ VkTimelineSemaphoreSubmitInfoKHR timeline_submit_info; const struct vkd3d_vk_device_procs *vk_procs; VkSemaphore vk_semaphore = VK_NULL_HANDLE; VkFence vk_fence = VK_NULL_HANDLE; struct vkd3d_queue *vkd3d_queue; uint64_t sequence_number = 0; + uint64_t timeline_value = 0; struct d3d12_device *device; - struct d3d12_fence *fence; VkSubmitInfo submit_info; VkQueue vk_queue; VkResult vr; HRESULT hr;
- TRACE("iface %p, fence %p, value %#"PRIx64".\n", iface, fence_iface, value); - device = command_queue->device; vk_procs = &device->vk_procs; vkd3d_queue = command_queue->vkd3d_queue;
- fence = unsafe_impl_from_ID3D12Fence(fence_iface); - - if (device->use_timeline_semaphores) + if (device->vk_info.KHR_timeline_semaphore) { - if (FAILED(hr = d3d12_fence_update_gpu_signal_timeline_semaphore(fence, value))) - return hr; + if (!(timeline_value = d3d12_fence_add_pending_timeline_signal(fence, value, vkd3d_queue))) + { + ERR("Failed to add pending signal.\n"); + return E_OUTOFMEMORY; + }
vk_semaphore = fence->timeline_semaphore; assert(vk_semaphore); @@ -6434,7 +6514,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Signal(ID3D12CommandQueue * goto fail; }
- if (!device->use_timeline_semaphores && (vr = vkd3d_queue_create_vk_semaphore_locked(vkd3d_queue, + if (!device->vk_info.KHR_timeline_semaphore && (vr = vkd3d_queue_create_vk_semaphore_locked(vkd3d_queue, device, &vk_semaphore)) < 0) { ERR("Failed to create Vulkan semaphore, vr %d.\n", vr); @@ -6451,11 +6531,11 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Signal(ID3D12CommandQueue * submit_info.signalSemaphoreCount = vk_semaphore ? 1 : 0; submit_info.pSignalSemaphores = &vk_semaphore;
- if (device->use_timeline_semaphores) + if (device->vk_info.KHR_timeline_semaphore) { timeline_submit_info.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR; timeline_submit_info.pNext = NULL; - timeline_submit_info.pSignalSemaphoreValues = &value; + timeline_submit_info.pSignalSemaphoreValues = &timeline_value; timeline_submit_info.signalSemaphoreValueCount = submit_info.signalSemaphoreCount; timeline_submit_info.waitSemaphoreValueCount = 0; timeline_submit_info.pWaitSemaphoreValues = NULL; @@ -6463,7 +6543,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Signal(ID3D12CommandQueue * }
vr = VK_CALL(vkQueueSubmit(vk_queue, 1, &submit_info, vk_fence)); - if (!device->use_timeline_semaphores && vr >= 0) + if (!device->vk_info.KHR_timeline_semaphore && vr >= 0) { sequence_number = ++vkd3d_queue->submitted_sequence_number;
@@ -6480,13 +6560,22 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Signal(ID3D12CommandQueue * goto fail_vkresult; }
- if (device->use_timeline_semaphores) + if (device->vk_info.KHR_timeline_semaphore) { + if (FAILED(hr = d3d12_fence_update_pending_value(fence))) + return hr; + + if (FAILED(hr = d3d12_device_flush_blocked_queues(device))) + return hr; + + vk_semaphore = fence->timeline_semaphore; + assert(vk_semaphore); + return vkd3d_enqueue_timeline_semaphore(&command_queue->fence_worker, - vk_semaphore, fence, value, vkd3d_queue); + vk_semaphore, fence, timeline_value, vkd3d_queue); }
- if (vk_semaphore && SUCCEEDED(hr = d3d12_fence_add_vk_semaphore(fence, vk_semaphore, vk_fence, value))) + if (vk_semaphore && SUCCEEDED(hr = d3d12_fence_add_vk_semaphore(fence, vk_semaphore, vk_fence, value, vkd3d_queue))) vk_semaphore = VK_NULL_HANDLE;
vr = VK_CALL(vkGetFenceStatus(device->vk_device, vk_fence)); @@ -6501,7 +6590,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Signal(ID3D12CommandQueue * else if (vr == VK_SUCCESS) { TRACE("Already signaled %p, value %#"PRIx64".\n", fence, value); - hr = d3d12_fence_signal(fence, value, vk_fence); + hr = d3d12_fence_signal(fence, value, vk_fence, false); vk_fence = VK_NULL_HANDLE; vkd3d_queue_update_sequence_number(vkd3d_queue, sequence_number, device); } @@ -6524,12 +6613,12 @@ fail_vkresult: hr = hresult_from_vk_result(vr); fail: VK_CALL(vkDestroyFence(device->vk_device, vk_fence, NULL)); - if (!device->use_timeline_semaphores) + if (!device->vk_info.KHR_timeline_semaphore) VK_CALL(vkDestroySemaphore(device->vk_device, vk_semaphore, NULL)); return hr; }
-static HRESULT d3d12_command_queue_wait_binary_semaphore(struct d3d12_command_queue *command_queue, +static HRESULT d3d12_command_queue_wait_binary_semaphore_locked(struct d3d12_command_queue *command_queue, struct d3d12_fence *fence, uint64_t value) { static const VkPipelineStageFlagBits wait_stage_mask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; @@ -6545,7 +6634,10 @@ static HRESULT d3d12_command_queue_wait_binary_semaphore(struct d3d12_command_qu vk_procs = &command_queue->device->vk_procs; queue = command_queue->vkd3d_queue;
- semaphore = d3d12_fence_acquire_vk_semaphore(fence, value, &completed_value); + semaphore = d3d12_fence_acquire_vk_semaphore_locked(fence, value, &completed_value); + + vkd3d_mutex_unlock(&fence->mutex); + if (!semaphore && completed_value >= value) { /* We don't get a Vulkan semaphore if the fence was signaled on CPU. */ @@ -6568,7 +6660,7 @@ static HRESULT d3d12_command_queue_wait_binary_semaphore(struct d3d12_command_qu } else { - FIXME("Failed to acquire Vulkan semaphore for fence %p, value %#"PRIx64 + WARN("Failed to acquire Vulkan semaphore for fence %p, value %#"PRIx64 ", completed value %#"PRIx64".\n", fence, value, completed_value); }
@@ -6579,7 +6671,7 @@ static HRESULT d3d12_command_queue_wait_binary_semaphore(struct d3d12_command_qu submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; submit_info.pNext = NULL; submit_info.waitSemaphoreCount = 1; - submit_info.pWaitSemaphores = &semaphore->vk_semaphore; + submit_info.pWaitSemaphores = &semaphore->u.binary.vk_semaphore; submit_info.pWaitDstStageMask = &wait_stage_mask; submit_info.commandBufferCount = 0; submit_info.pCommandBuffers = NULL; @@ -6597,7 +6689,7 @@ static HRESULT d3d12_command_queue_wait_binary_semaphore(struct d3d12_command_qu
if ((vr = VK_CALL(vkQueueSubmit(vk_queue, 1, &submit_info, VK_NULL_HANDLE))) >= 0) { - queue->semaphores[queue->semaphore_count].vk_semaphore = semaphore->vk_semaphore; + queue->semaphores[queue->semaphore_count].vk_semaphore = semaphore->u.binary.vk_semaphore; queue->semaphores[queue->semaphore_count].sequence_number = queue->submitted_sequence_number + 1; ++queue->semaphore_count;
@@ -6622,48 +6714,7 @@ fail: return hr; }
-static inline void d3d12_fence_update_gpu_wait(struct d3d12_fence *fence, const struct vkd3d_queue *queue) -{ - unsigned int i; - bool found; - int rc; - - if ((rc = vkd3d_mutex_lock(&fence->mutex))) - { - ERR("Failed to lock mutex, error %d.\n", rc); - return; - } - - for (i = 0, found = false; i < fence->gpu_wait_count; ++i) - { - if (fence->gpu_waits[i].queue == queue) - { - fence->gpu_waits[i].pending_value = queue->pending_wait_completion_value; - found = true; - } - else if (d3d12_fence_gpu_wait_is_completed(fence, i) && i < --fence->gpu_wait_count) - { - fence->gpu_waits[i] = fence->gpu_waits[fence->gpu_wait_count]; - } - } - - if (!found) - { - if (fence->gpu_wait_count < ARRAY_SIZE(fence->gpu_waits)) - { - fence->gpu_waits[fence->gpu_wait_count].queue = queue; - fence->gpu_waits[fence->gpu_wait_count++].pending_value = queue->pending_wait_completion_value; - } - else - { - FIXME("Unable to track GPU fence wait.\n"); - } - } - - vkd3d_mutex_unlock(&fence->mutex); -} - -static HRESULT d3d12_command_queue_wait_timeline_semaphore(struct d3d12_command_queue *command_queue, +static HRESULT d3d12_command_queue_wait_locked(struct d3d12_command_queue *command_queue, struct d3d12_fence *fence, uint64_t value) { static const VkPipelineStageFlagBits wait_stage_mask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; @@ -6671,25 +6722,29 @@ static HRESULT d3d12_command_queue_wait_timeline_semaphore(struct d3d12_command_ const struct vkd3d_vk_device_procs *vk_procs; struct vkd3d_queue *queue; VkSubmitInfo submit_info; + uint64_t wait_value; VkQueue vk_queue; VkResult vr;
vk_procs = &command_queue->device->vk_procs; queue = command_queue->vkd3d_queue;
+ if (!command_queue->device->vk_info.KHR_timeline_semaphore) + return d3d12_command_queue_wait_binary_semaphore_locked(command_queue, fence, value); + + wait_value = d3d12_fence_get_timeline_wait_value_locked(fence, value); + + /* We can unlock the fence here. The queue semaphore will not be signalled to signal_value + * until we have submitted, so the semaphore cannot be destroyed before the call to vkQueueSubmit. */ + vkd3d_mutex_unlock(&fence->mutex); + assert(fence->timeline_semaphore); timeline_submit_info.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR; timeline_submit_info.pNext = NULL; + timeline_submit_info.waitSemaphoreValueCount = 1; + timeline_submit_info.pWaitSemaphoreValues = &wait_value; timeline_submit_info.signalSemaphoreValueCount = 0; timeline_submit_info.pSignalSemaphoreValues = NULL; - timeline_submit_info.waitSemaphoreValueCount = 1; - timeline_submit_info.pWaitSemaphoreValues = &value; - - if (!(vk_queue = vkd3d_queue_acquire(queue))) - { - ERR("Failed to acquire queue %p.\n", queue); - return E_FAIL; - }
submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; submit_info.pNext = &timeline_submit_info; @@ -6701,14 +6756,11 @@ static HRESULT d3d12_command_queue_wait_timeline_semaphore(struct d3d12_command_ submit_info.signalSemaphoreCount = 0; submit_info.pSignalSemaphores = NULL;
- ++queue->pending_wait_completion_value; - - submit_info.signalSemaphoreCount = 1; - submit_info.pSignalSemaphores = &queue->wait_completion_semaphore; - timeline_submit_info.signalSemaphoreValueCount = 1; - timeline_submit_info.pSignalSemaphoreValues = &queue->pending_wait_completion_value; - - d3d12_fence_update_gpu_wait(fence, queue); + if (!(vk_queue = vkd3d_queue_acquire(queue))) + { + ERR("Failed to acquire queue %p.\n", queue); + return E_FAIL; + }
vr = VK_CALL(vkQueueSubmit(vk_queue, 1, &submit_info, VK_NULL_HANDLE));
@@ -6728,14 +6780,58 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Wait(ID3D12CommandQueue *if { struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface); struct d3d12_fence *fence = unsafe_impl_from_ID3D12Fence(fence_iface); + struct vkd3d_cs_op_data *op; + HRESULT hr = S_OK; + int rc;
TRACE("iface %p, fence %p, value %#"PRIx64".\n", iface, fence_iface, value);
- if (command_queue->device->use_timeline_semaphores) - return d3d12_command_queue_wait_timeline_semaphore(command_queue, fence, value); + if ((rc = vkd3d_mutex_lock(&command_queue->op_mutex))) + { + ERR("Failed to lock mutex, error %d.\n", rc); + return hresult_from_errno(rc); + } + if ((rc = vkd3d_mutex_lock(&fence->mutex))) + { + ERR("Failed to lock mutex, error %d.\n", rc); + hr = hresult_from_errno(rc); + goto done; + }
- FIXME_ONCE("KHR_timeline_semaphore is not available or incompatible. Some wait commands may be unsupported.\n"); - return d3d12_command_queue_wait_binary_semaphore(command_queue, fence, value); + if (!command_queue->ops_count && value <= fence->max_pending_value) + { + hr = d3d12_command_queue_wait_locked(command_queue, fence, value); + goto done; + } + + vkd3d_mutex_unlock(&fence->mutex); + + /* This is the critical part required to support out-of-order signal. + * Normally we would be able to submit waits and signals out of order, but + * we don't have virtualized queues in Vulkan, so we need to handle the case + * where multiple queues alias over the same physical queue, so effectively, + * we need to manage out-of-order submits ourselves. */ + + if (!command_queue->ops_count) + hr = d3d12_device_add_blocked_command_queues(command_queue->device, &command_queue, 1); + + if (FAILED(hr)) + goto done; + + if (!(op = d3d12_command_queue_require_space_locked(command_queue))) + { + hr = E_OUTOFMEMORY; + goto done; + } + op->opcode = VKD3D_CS_OP_WAIT; + op->u.wait.fence = fence; + op->u.wait.value = value; + + d3d12_fence_incref(fence); + +done: + vkd3d_mutex_unlock(&command_queue->op_mutex); + return hr; }
static HRESULT STDMETHODCALLTYPE d3d12_command_queue_GetTimestampFrequency(ID3D12CommandQueue *iface, @@ -6859,10 +6955,82 @@ static const struct ID3D12CommandQueueVtbl d3d12_command_queue_vtbl = d3d12_command_queue_GetDesc, };
+/* flushed_any is initialised by the caller. */ +static bool d3d12_command_queue_flush_ops(struct d3d12_command_queue *queue, bool *flushed_any) +{ + struct vkd3d_cs_op_data *op; + struct d3d12_fence *fence; + bool flushed_all = false; + unsigned int i; + int rc; + + if (!queue->ops_count) + return true; + + /* This function may be re-entered during a call below to d3d12_command_queue_signal(). + * We return true because the first caller is responsible for re-adding this queue to + * the flush list if it ends up returning false. */ + if (queue->is_flushing) + return true; + + if ((rc = vkd3d_mutex_lock(&queue->op_mutex))) + { + ERR("Failed to lock mutex, error %d.\n", rc); + return true; + } + + /* Currently only required for d3d12_command_queue_signal(), but set it here anyway. */ + queue->is_flushing = true; + + for (i = 0; i < queue->ops_count; ++i) + { + op = &queue->ops[i]; + switch (op->opcode) + { + case VKD3D_CS_OP_WAIT: + fence = op->u.wait.fence; + if (op->u.wait.value > fence->max_pending_value) + { + queue->ops_count -= i; + memmove(queue->ops, op, queue->ops_count * sizeof(*op)); + goto done; + } + vkd3d_mutex_lock(&fence->mutex); + d3d12_command_queue_wait_locked(queue, fence, op->u.wait.value); + d3d12_fence_decref(fence); + break; + + case VKD3D_CS_OP_SIGNAL: + d3d12_command_queue_signal(queue, op->u.signal.fence, op->u.signal.value); + d3d12_fence_decref(op->u.signal.fence); + break; + + case VKD3D_CS_OP_EXECUTE: + d3d12_command_queue_execute(queue, op->u.execute.buffers, op->u.execute.buffer_count); + break; + + default: + FIXME("Unhandled op type %u.\n", op->opcode); + break; + } + *flushed_any |= true; + } + + queue->ops_count = 0; + flushed_all = true; + +done: + queue->is_flushing = false; + + vkd3d_mutex_unlock(&queue->op_mutex); + return flushed_all; +} + static HRESULT d3d12_command_queue_init(struct d3d12_command_queue *queue, struct d3d12_device *device, const D3D12_COMMAND_QUEUE_DESC *desc) { HRESULT hr; + int rc;
queue->ID3D12CommandQueue_iface.lpVtbl = &d3d12_command_queue_vtbl; queue->refcount = 1; @@ -6877,6 +7045,11 @@ static HRESULT d3d12_command_queue_init(struct d3d12_command_queue *queue, queue->last_waited_fence = NULL; queue->last_waited_fence_value = 0;
+ queue->ops = NULL; + queue->ops_count = 0; + queue->ops_size = 0; + queue->is_flushing = false; + if (desc->Priority == D3D12_COMMAND_QUEUE_PRIORITY_GLOBAL_REALTIME) { FIXME("Global realtime priority is not implemented.\n"); @@ -6891,15 +7064,24 @@ static HRESULT d3d12_command_queue_init(struct d3d12_command_queue *queue, if (FAILED(hr = vkd3d_private_store_init(&queue->private_store))) return hr;
- if (FAILED(hr = vkd3d_fence_worker_start(&queue->fence_worker, queue->vkd3d_queue, device))) + if ((rc = vkd3d_mutex_init(&queue->op_mutex)) < 0) { - vkd3d_private_store_destroy(&queue->private_store); - return hr; + hr = hresult_from_errno(rc); + goto fail_destroy_private_store; }
+ if (FAILED(hr = vkd3d_fence_worker_start(&queue->fence_worker, queue->vkd3d_queue, device))) + goto fail_destroy_op_mutex; + d3d12_device_add_ref(queue->device = device);
return S_OK; + +fail_destroy_op_mutex: + vkd3d_mutex_destroy(&queue->op_mutex); +fail_destroy_private_store: + vkd3d_private_store_destroy(&queue->private_store); + return hr; }
HRESULT d3d12_command_queue_create(struct d3d12_device *device, @@ -6934,8 +7116,12 @@ uint32_t vkd3d_get_vk_queue_family_index(ID3D12CommandQueue *queue) VkQueue vkd3d_acquire_vk_queue(ID3D12CommandQueue *queue) { struct d3d12_command_queue *d3d12_queue = impl_from_ID3D12CommandQueue(queue); + VkQueue vk_queue = vkd3d_queue_acquire(d3d12_queue->vkd3d_queue); + + if (d3d12_queue->ops_count) + WARN("Acquired command queue %p with %zu remaining ops.\n", d3d12_queue, d3d12_queue->ops_count);
- return vkd3d_queue_acquire(d3d12_queue->vkd3d_queue); + return vk_queue; }
void vkd3d_release_vk_queue(ID3D12CommandQueue *queue) diff --git a/libs/vkd3d/device.c b/libs/vkd3d/device.c index 5f8108ec..eaedc444 100644 --- a/libs/vkd3d/device.c +++ b/libs/vkd3d/device.c @@ -747,7 +747,6 @@ struct vkd3d_physical_device_info VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT texel_buffer_alignment_properties; VkPhysicalDeviceTransformFeedbackPropertiesEXT xfb_properties; VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT vertex_divisor_properties; - VkPhysicalDeviceTimelineSemaphorePropertiesKHR timeline_semaphore_properties;
VkPhysicalDeviceProperties2KHR properties2;
@@ -772,7 +771,6 @@ static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *i VkPhysicalDeviceDescriptorIndexingPropertiesEXT *descriptor_indexing_properties; VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *vertex_divisor_properties; VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT *buffer_alignment_properties; - VkPhysicalDeviceTimelineSemaphorePropertiesKHR *timeline_semaphore_properties; VkPhysicalDeviceDescriptorIndexingFeaturesEXT *descriptor_indexing_features; VkPhysicalDeviceRobustness2FeaturesEXT *robustness2_features; VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *vertex_divisor_features; @@ -799,7 +797,6 @@ static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *i vertex_divisor_features = &info->vertex_divisor_features; vertex_divisor_properties = &info->vertex_divisor_properties; timeline_semaphore_features = &info->timeline_semaphore_features; - timeline_semaphore_properties = &info->timeline_semaphore_properties; xfb_features = &info->xfb_features; xfb_properties = &info->xfb_properties;
@@ -841,8 +838,6 @@ static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *i vk_prepend_struct(&info->properties2, xfb_properties); vertex_divisor_properties->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT; vk_prepend_struct(&info->properties2, vertex_divisor_properties); - timeline_semaphore_properties->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_PROPERTIES_KHR; - vk_prepend_struct(&info->properties2, timeline_semaphore_properties);
if (vulkan_info->KHR_get_physical_device_properties2) VK_CALL(vkGetPhysicalDeviceProperties2KHR(physical_device, &info->properties2)); @@ -1431,7 +1426,6 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, vulkan_info->rasterization_stream = physical_device_info->xfb_properties.transformFeedbackRasterizationStreamSelect; vulkan_info->transform_feedback_queries = physical_device_info->xfb_properties.transformFeedbackQueries; vulkan_info->max_vertex_attrib_divisor = max(physical_device_info->vertex_divisor_properties.maxVertexAttribDivisor, 1); - vulkan_info->timeline_semaphore_properties = physical_device_info->timeline_semaphore_properties;
device->feature_options.DoublePrecisionFloatShaderOps = features->shaderFloat64; device->feature_options.OutputMergerLogicOp = features->logicOp; @@ -1908,75 +1902,6 @@ static bool d3d12_is_64k_msaa_supported(struct d3d12_device *device) && info.Alignment <= 0x10000; }
-/* A lower value can be signalled on a D3D12 fence. Vulkan timeline semaphores - * do not support this, but test if it works anyway. */ -static bool d3d12_is_timeline_semaphore_supported(const struct d3d12_device *device) -{ - const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; - VkTimelineSemaphoreSubmitInfoKHR timeline_submit_info; - VkSemaphore timeline_semaphore; - VkSubmitInfo submit_info; - bool result = false; - uint64_t value = 0; - VkQueue vk_queue; - VkResult vr; - - if (!device->vk_info.KHR_timeline_semaphore) - return false; - - if ((vr = vkd3d_create_timeline_semaphore(device, 1, &timeline_semaphore)) < 0) - { - WARN("Failed to create timeline semaphore, vr %d.\n", vr); - return false; - } - - if (!(vk_queue = vkd3d_queue_acquire(device->direct_queue))) - { - ERR("Failed to acquire queue %p.\n", device->direct_queue); - VK_CALL(vkDestroySemaphore(device->vk_device, timeline_semaphore, NULL)); - return false; - } - - submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; - submit_info.pNext = &timeline_submit_info; - submit_info.waitSemaphoreCount = 0; - submit_info.pWaitSemaphores = NULL; - submit_info.pWaitDstStageMask = NULL; - submit_info.commandBufferCount = 0; - submit_info.pCommandBuffers = NULL; - submit_info.signalSemaphoreCount = 1; - submit_info.pSignalSemaphores = &timeline_semaphore; - - timeline_submit_info.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR; - timeline_submit_info.pNext = NULL; - timeline_submit_info.pSignalSemaphoreValues = &value; - timeline_submit_info.signalSemaphoreValueCount = 1; - timeline_submit_info.waitSemaphoreValueCount = 0; - timeline_submit_info.pWaitSemaphoreValues = NULL; - - vr = VK_CALL(vkQueueSubmit(vk_queue, 1, &submit_info, VK_NULL_HANDLE)); - - if (vr >= 0) - { - if ((vr = VK_CALL(vkQueueWaitIdle(vk_queue))) < 0) - WARN("Failed to wait for queue, vr %d.\n", vr); - - if ((vr = VK_CALL(vkGetSemaphoreCounterValueKHR(device->vk_device, timeline_semaphore, &value))) < 0) - ERR("Failed to get Vulkan semaphore status, vr %d.\n", vr); - else if (!(result = !value)) - WARN("Disabling timeline semaphore use due to incompatible behaviour.\n"); - } - else - { - WARN("Failed to submit signal operation, vr %d.\n", vr); - } - - vkd3d_queue_release(device->direct_queue); - VK_CALL(vkDestroySemaphore(device->vk_device, timeline_semaphore, NULL)); - - return result; -} - static HRESULT vkd3d_create_vk_device(struct d3d12_device *device, const struct vkd3d_device_create_info *create_info) { @@ -2075,10 +2000,6 @@ static HRESULT vkd3d_create_vk_device(struct d3d12_device *device, }
device->feature_options4.MSAA64KBAlignedTextureSupported = d3d12_is_64k_msaa_supported(device); - device->use_timeline_semaphores = d3d12_is_timeline_semaphore_supported(device) - && vkd3d_queue_init_timeline_semaphore(device->direct_queue, device) - && vkd3d_queue_init_timeline_semaphore(device->compute_queue, device) - && vkd3d_queue_init_timeline_semaphore(device->copy_queue, device);
TRACE("Created Vulkan device %p.\n", vk_device);
@@ -4362,6 +4283,8 @@ static HRESULT d3d12_device_init(struct d3d12_device *device, vkd3d_gpu_va_allocator_init(&device->gpu_va_allocator); vkd3d_time_domains_init(device);
+ device->blocked_queue_count = 0; + for (i = 0; i < ARRAY_SIZE(device->desc_mutex); ++i) vkd3d_mutex_init(&device->desc_mutex[i]);
diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index 4e03145d..f00181a2 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -59,7 +59,7 @@ #define VKD3D_MAX_SHADER_EXTENSIONS 3u #define VKD3D_MAX_SHADER_STAGES 5u #define VKD3D_MAX_VK_SYNC_OBJECTS 4u -#define VKD3D_MAX_FENCE_WAITING_QUEUES 4u +#define VKD3D_MAX_DEVICE_BLOCKED_QUEUES 16u #define VKD3D_MAX_DESCRIPTOR_SETS 64u /* D3D12 binding tier 3 has a limit of 2048 samplers. */ #define VKD3D_MAX_DESCRIPTOR_SET_SAMPLERS 2048u @@ -152,8 +152,6 @@ struct vkd3d_vulkan_info
VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT texel_buffer_alignment_properties;
- VkPhysicalDeviceTimelineSemaphorePropertiesKHR timeline_semaphore_properties; - unsigned int shader_extension_count; enum vkd3d_shader_spirv_extension shader_extensions[VKD3D_MAX_SHADER_EXTENSIONS];
@@ -502,15 +500,17 @@ HRESULT vkd3d_set_private_data_interface(struct vkd3d_private_store *store, cons struct vkd3d_signaled_semaphore { uint64_t value; - VkSemaphore vk_semaphore; - VkFence vk_fence; - bool is_acquired; -}; - -struct vkd3d_pending_fence_wait -{ - const struct vkd3d_queue *queue; - uint64_t pending_value; + union + { + struct + { + VkSemaphore vk_semaphore; + VkFence vk_fence; + bool is_acquired; + } binary; + uint64_t timeline_value; + } u; + const struct vkd3d_queue *signalling_queue; };
/* ID3D12Fence */ @@ -521,6 +521,7 @@ struct d3d12_fence LONG refcount;
uint64_t value; + uint64_t max_pending_value; struct vkd3d_mutex mutex; struct vkd3d_cond null_event_cond;
@@ -534,9 +535,8 @@ struct d3d12_fence size_t event_count;
VkSemaphore timeline_semaphore; + uint64_t timeline_value; uint64_t pending_timeline_value; - struct vkd3d_pending_fence_wait gpu_waits[VKD3D_MAX_FENCE_WAITING_QUEUES]; - unsigned int gpu_wait_count;
struct vkd3d_signaled_semaphore *semaphores; size_t semaphores_size; @@ -1294,9 +1294,6 @@ struct vkd3d_queue VkQueueFlags vk_queue_flags; uint32_t timestamp_bits;
- VkSemaphore wait_completion_semaphore; - uint64_t pending_wait_completion_value; - struct { VkSemaphore vk_semaphore; @@ -1311,10 +1308,45 @@ struct vkd3d_queue VkQueue vkd3d_queue_acquire(struct vkd3d_queue *queue); HRESULT vkd3d_queue_create(struct d3d12_device *device, uint32_t family_index, const VkQueueFamilyProperties *properties, struct vkd3d_queue **queue); -bool vkd3d_queue_init_timeline_semaphore(struct vkd3d_queue *queue, struct d3d12_device *device); void vkd3d_queue_destroy(struct vkd3d_queue *queue, struct d3d12_device *device); void vkd3d_queue_release(struct vkd3d_queue *queue);
+enum vkd3d_cs_op +{ + VKD3D_CS_OP_WAIT, + VKD3D_CS_OP_SIGNAL, + VKD3D_CS_OP_EXECUTE, +}; + +struct vkd3d_cs_wait +{ + struct d3d12_fence *fence; + uint64_t value; +}; + +struct vkd3d_cs_signal +{ + struct d3d12_fence *fence; + uint64_t value; +}; + +struct vkd3d_cs_execute +{ + VkCommandBuffer *buffers; + unsigned int buffer_count; +}; + +struct vkd3d_cs_op_data +{ + enum vkd3d_cs_op opcode; + union + { + struct vkd3d_cs_wait wait; + struct vkd3d_cs_signal signal; + struct vkd3d_cs_execute execute; + } u; +}; + /* ID3D12CommandQueue */ struct d3d12_command_queue { @@ -1331,6 +1363,12 @@ struct d3d12_command_queue
struct d3d12_device *device;
+ struct vkd3d_mutex op_mutex; + struct vkd3d_cs_op_data *ops; + size_t ops_count; + size_t ops_size; + bool is_flushing; + struct vkd3d_private_store private_store; };
@@ -1452,6 +1490,9 @@ struct d3d12_device unsigned int queue_family_count; VkTimeDomainEXT vk_host_time_domain;
+ struct d3d12_command_queue *blocked_queues[VKD3D_MAX_DEVICE_BLOCKED_QUEUES]; + unsigned int blocked_queue_count; + struct vkd3d_instance *vkd3d_instance;
IUnknown *parent; @@ -1470,7 +1511,6 @@ struct d3d12_device VkDescriptorPoolSize vk_pool_sizes[VKD3D_DESCRIPTOR_POOL_COUNT]; struct vkd3d_vk_descriptor_heap_layout vk_descriptor_heap_layouts[VKD3D_SET_INDEX_COUNT]; bool use_vk_heaps; - bool use_timeline_semaphores; };
HRESULT d3d12_device_create(struct vkd3d_instance *instance, diff --git a/tests/d3d12.c b/tests/d3d12.c index 015c3122..5f83a373 100644 --- a/tests/d3d12.c +++ b/tests/d3d12.c @@ -33224,9 +33224,7 @@ static void test_queue_wait(void) command_list = context.list; queue = context.queue;
- /* 'queue2' must not map to the same command queue as 'queue', or Wait() before GPU signal will fail. - * Using a compute queue fixes this on most hardware, but it may still fail on low spec hardware. */ - queue2 = create_command_queue(device, D3D12_COMMAND_LIST_TYPE_COMPUTE, D3D12_COMMAND_QUEUE_PRIORITY_NORMAL); + queue2 = create_command_queue(device, D3D12_COMMAND_LIST_TYPE_DIRECT, D3D12_COMMAND_QUEUE_PRIORITY_NORMAL);
event = create_event(); ok(event, "Failed to create event.\n");
Signed-off-by: Henri Verbeet hverbeet@codeweavers.com
D3D12 guarantees no overlap between commands sent in separate calls to ExecuteCommandLists(). A Vulkan noop wait provides a barrier against reordering.
Signed-off-by: Conor McCarthy cmccarthy@codeweavers.com --- libs/vkd3d/command.c | 18 ++++++++++++++++++ libs/vkd3d/device.c | 13 +++++++++++++ libs/vkd3d/vkd3d_private.h | 2 ++ 3 files changed, 33 insertions(+)
diff --git a/libs/vkd3d/command.c b/libs/vkd3d/command.c index d0782e5a..a44eb02f 100644 --- a/libs/vkd3d/command.c +++ b/libs/vkd3d/command.c @@ -6285,9 +6285,13 @@ static void STDMETHODCALLTYPE d3d12_command_queue_CopyTileMappings(ID3D12Command static void d3d12_command_queue_execute(struct d3d12_command_queue *command_queue, VkCommandBuffer *buffers, unsigned int count) { + static const VkPipelineStageFlagBits wait_stage_mask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; const struct vkd3d_vk_device_procs *vk_procs = &command_queue->device->vk_procs; struct vkd3d_queue *vkd3d_queue = command_queue->vkd3d_queue; + VkTimelineSemaphoreSubmitInfoKHR timeline_submit_info; + struct d3d12_device *device = command_queue->device; VkSubmitInfo submit_desc; + uint64_t noop_value = 0; VkQueue vk_queue; VkResult vr;
@@ -6300,6 +6304,20 @@ static void d3d12_command_queue_execute(struct d3d12_command_queue *command_queu }
submit_desc.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + if (device->vk_info.KHR_timeline_semaphore) + { + /* Insert a noop wait to provide the barrier between executions which D3D12 guarantees. + * The limitations of binary semaphores would make this complex to do with them. */ + submit_desc.pNext = &timeline_submit_info; + submit_desc.pWaitSemaphores = &device->noop_semaphore; + submit_desc.waitSemaphoreCount = 1; + submit_desc.pWaitDstStageMask = &wait_stage_mask; + + memset(&timeline_submit_info, 0, sizeof(timeline_submit_info)); + timeline_submit_info.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR; + timeline_submit_info.waitSemaphoreValueCount = 1; + timeline_submit_info.pWaitSemaphoreValues = &noop_value; + } submit_desc.commandBufferCount = count; submit_desc.pCommandBuffers = buffers;
diff --git a/libs/vkd3d/device.c b/libs/vkd3d/device.c index eaedc444..bdc80ba3 100644 --- a/libs/vkd3d/device.c +++ b/libs/vkd3d/device.c @@ -2626,6 +2626,7 @@ static ULONG STDMETHODCALLTYPE d3d12_device_Release(ID3D12Device *iface) vkd3d_private_store_destroy(&device->private_store);
vkd3d_cleanup_format_info(device); + VK_CALL(vkDestroySemaphore(device->vk_device, device->noop_semaphore, NULL)); vkd3d_vk_descriptor_heap_layouts_cleanup(device); vkd3d_uav_clear_state_cleanup(&device->uav_clear_state, device); vkd3d_destroy_null_resources(&device->null_resources, device); @@ -4241,6 +4242,7 @@ static HRESULT d3d12_device_init(struct d3d12_device *device, struct vkd3d_instance *instance, const struct vkd3d_device_create_info *create_info) { const struct vkd3d_vk_device_procs *vk_procs; + VkResult vr; HRESULT hr; size_t i;
@@ -4278,6 +4280,15 @@ static HRESULT d3d12_device_init(struct d3d12_device *device, if (FAILED(hr = vkd3d_vk_descriptor_heap_layouts_init(device))) goto out_cleanup_uav_clear_state;
+ device->noop_semaphore = VK_NULL_HANDLE; + if (device->vk_info.KHR_timeline_semaphore && (vr = vkd3d_create_timeline_semaphore(device, 0, + &device->noop_semaphore)) < 0) + { + WARN("Failed to create timeline semaphore, vr %d.\n", vr); + hr = hresult_from_vk_result(vr); + goto out_cleanup_descriptor_heap_layouts; + } + vkd3d_render_pass_cache_init(&device->render_pass_cache); vkd3d_gpu_descriptor_allocator_init(&device->gpu_descriptor_allocator); vkd3d_gpu_va_allocator_init(&device->gpu_va_allocator); @@ -4295,6 +4306,8 @@ static HRESULT d3d12_device_init(struct d3d12_device *device,
return S_OK;
+out_cleanup_descriptor_heap_layouts: + vkd3d_vk_descriptor_heap_layouts_cleanup(device); out_cleanup_uav_clear_state: vkd3d_uav_clear_state_cleanup(&device->uav_clear_state, device); out_destroy_null_resources: diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index f00181a2..37bac159 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -1493,6 +1493,8 @@ struct d3d12_device struct d3d12_command_queue *blocked_queues[VKD3D_MAX_DEVICE_BLOCKED_QUEUES]; unsigned int blocked_queue_count;
+ VkSemaphore noop_semaphore; + struct vkd3d_instance *vkd3d_instance;
IUnknown *parent;
Descriptor read/write race conditions in SotTR were a result of deficiencies in the fence implementation.
Signed-off-by: Conor McCarthy cmccarthy@codeweavers.com --- libs/vkd3d/device.c | 50 ++++++++------------ libs/vkd3d/resource.c | 97 ++++++++++++-------------------------- libs/vkd3d/vkd3d_private.h | 17 +------ 3 files changed, 51 insertions(+), 113 deletions(-)
diff --git a/libs/vkd3d/device.c b/libs/vkd3d/device.c index bdc80ba3..e2533598 100644 --- a/libs/vkd3d/device.c +++ b/libs/vkd3d/device.c @@ -2615,7 +2615,6 @@ static ULONG STDMETHODCALLTYPE d3d12_device_Release(ID3D12Device *iface) { struct d3d12_device *device = impl_from_ID3D12Device(iface); ULONG refcount = InterlockedDecrement(&device->refcount); - size_t i;
TRACE("%p decreasing refcount to %u.\n", device, refcount);
@@ -2635,8 +2634,6 @@ static ULONG STDMETHODCALLTYPE d3d12_device_Release(ID3D12Device *iface) vkd3d_render_pass_cache_cleanup(&device->render_pass_cache, device); d3d12_device_destroy_pipeline_cache(device); d3d12_device_destroy_vkd3d_queues(device); - for (i = 0; i < ARRAY_SIZE(device->desc_mutex); ++i) - vkd3d_mutex_destroy(&device->desc_mutex[i]); VK_CALL(vkDestroyDevice(device->vk_device, NULL)); if (device->parent) IUnknown_Release(device->parent); @@ -3457,42 +3454,45 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateRootSignature(ID3D12Device * static void STDMETHODCALLTYPE d3d12_device_CreateConstantBufferView(ID3D12Device *iface, const D3D12_CONSTANT_BUFFER_VIEW_DESC *desc, D3D12_CPU_DESCRIPTOR_HANDLE descriptor) { + struct d3d12_desc *dst = d3d12_desc_from_cpu_handle(descriptor); struct d3d12_device *device = impl_from_ID3D12Device(iface); - struct d3d12_desc tmp = {0};
TRACE("iface %p, desc %p, descriptor %#lx.\n", iface, desc, descriptor.ptr);
- d3d12_desc_create_cbv(&tmp, device, desc); - d3d12_desc_write_atomic(d3d12_desc_from_cpu_handle(descriptor), &tmp, device); + d3d12_desc_create_cbv(dst, device, desc); + if (device->use_vk_heaps && dst->magic) + d3d12_desc_write_vk_heap(dst, device); }
static void STDMETHODCALLTYPE d3d12_device_CreateShaderResourceView(ID3D12Device *iface, ID3D12Resource *resource, const D3D12_SHADER_RESOURCE_VIEW_DESC *desc, D3D12_CPU_DESCRIPTOR_HANDLE descriptor) { + struct d3d12_desc *dst = d3d12_desc_from_cpu_handle(descriptor); struct d3d12_device *device = impl_from_ID3D12Device(iface); - struct d3d12_desc tmp = {0};
TRACE("iface %p, resource %p, desc %p, descriptor %#lx.\n", iface, resource, desc, descriptor.ptr);
- d3d12_desc_create_srv(&tmp, device, unsafe_impl_from_ID3D12Resource(resource), desc); - d3d12_desc_write_atomic(d3d12_desc_from_cpu_handle(descriptor), &tmp, device); + d3d12_desc_create_srv(dst, device, unsafe_impl_from_ID3D12Resource(resource), desc); + if (device->use_vk_heaps && dst->magic) + d3d12_desc_write_vk_heap(dst, device); }
static void STDMETHODCALLTYPE d3d12_device_CreateUnorderedAccessView(ID3D12Device *iface, ID3D12Resource *resource, ID3D12Resource *counter_resource, const D3D12_UNORDERED_ACCESS_VIEW_DESC *desc, D3D12_CPU_DESCRIPTOR_HANDLE descriptor) { + struct d3d12_desc *dst = d3d12_desc_from_cpu_handle(descriptor); struct d3d12_device *device = impl_from_ID3D12Device(iface); - struct d3d12_desc tmp = {0};
TRACE("iface %p, resource %p, counter_resource %p, desc %p, descriptor %#lx.\n", iface, resource, counter_resource, desc, descriptor.ptr);
- d3d12_desc_create_uav(&tmp, device, unsafe_impl_from_ID3D12Resource(resource), + d3d12_desc_create_uav(dst, device, unsafe_impl_from_ID3D12Resource(resource), unsafe_impl_from_ID3D12Resource(counter_resource), desc); - d3d12_desc_write_atomic(d3d12_desc_from_cpu_handle(descriptor), &tmp, device); + if (device->use_vk_heaps && dst->magic) + d3d12_desc_write_vk_heap(dst, device); }
static void STDMETHODCALLTYPE d3d12_device_CreateRenderTargetView(ID3D12Device *iface, @@ -3520,13 +3520,14 @@ static void STDMETHODCALLTYPE d3d12_device_CreateDepthStencilView(ID3D12Device * static void STDMETHODCALLTYPE d3d12_device_CreateSampler(ID3D12Device *iface, const D3D12_SAMPLER_DESC *desc, D3D12_CPU_DESCRIPTOR_HANDLE descriptor) { + struct d3d12_desc *sampler = d3d12_desc_from_cpu_handle(descriptor); struct d3d12_device *device = impl_from_ID3D12Device(iface); - struct d3d12_desc tmp = {0};
TRACE("iface %p, desc %p, descriptor %#lx.\n", iface, desc, descriptor.ptr);
- d3d12_desc_create_sampler(&tmp, device, desc); - d3d12_desc_write_atomic(d3d12_desc_from_cpu_handle(descriptor), &tmp, device); + d3d12_desc_create_sampler(sampler, device, desc); + if (device->use_vk_heaps && sampler->magic) + d3d12_desc_write_vk_heap(sampler, device); }
static void flush_desc_writes(struct d3d12_desc_copy_location locations[][VKD3D_DESCRIPTOR_WRITE_BUFFER_SIZE], @@ -3543,23 +3544,16 @@ static void flush_desc_writes(struct d3d12_desc_copy_location locations[][VKD3D_ } }
-static void d3d12_desc_buffered_copy_atomic(struct d3d12_desc *dst, const struct d3d12_desc *src, +static void d3d12_desc_buffered_copy(struct d3d12_desc *dst, const struct d3d12_desc *src, struct d3d12_desc_copy_location locations[][VKD3D_DESCRIPTOR_WRITE_BUFFER_SIZE], struct d3d12_desc_copy_info *infos, struct d3d12_descriptor_heap *descriptor_heap, struct d3d12_device *device) { struct d3d12_desc_copy_location *location; enum vkd3d_vk_descriptor_set_index set; - struct vkd3d_mutex *mutex; - - mutex = d3d12_device_get_descriptor_mutex(device, src); - vkd3d_mutex_lock(mutex);
if (src->magic == VKD3D_DESCRIPTOR_MAGIC_FREE) { - /* Source must be unlocked first, and therefore can't be used as a null source. */ - static const struct d3d12_desc null = {0}; - vkd3d_mutex_unlock(mutex); - d3d12_desc_write_atomic(dst, &null, device); + d3d12_desc_destroy(dst, device); return; }
@@ -3571,8 +3565,6 @@ static void d3d12_desc_buffered_copy_atomic(struct d3d12_desc *dst, const struct if (location->src.magic & VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW) vkd3d_view_incref(location->src.u.view_info.view);
- vkd3d_mutex_unlock(mutex); - infos[set].uav_counter |= (location->src.magic == VKD3D_DESCRIPTOR_MAGIC_UAV) && !!location->src.u.view_info.view->vk_counter_view; location->dst = dst; @@ -3637,7 +3629,7 @@ static void d3d12_device_vk_heaps_copy_descriptors(struct d3d12_device *device, if (dst[dst_idx].magic == src[src_idx].magic && (dst[dst_idx].magic & VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW) && dst[dst_idx].u.view_info.written_serial_id == src[src_idx].u.view_info.view->serial_id) continue; - d3d12_desc_buffered_copy_atomic(&dst[dst_idx], &src[src_idx], locations, infos, descriptor_heap, device); + d3d12_desc_buffered_copy(&dst[dst_idx], &src[src_idx], locations, infos, descriptor_heap, device); }
if (dst_idx >= dst_range_size) @@ -4244,7 +4236,6 @@ static HRESULT d3d12_device_init(struct d3d12_device *device, const struct vkd3d_vk_device_procs *vk_procs; VkResult vr; HRESULT hr; - size_t i;
device->ID3D12Device_iface.lpVtbl = &d3d12_device_vtbl; device->refcount = 1; @@ -4296,9 +4287,6 @@ static HRESULT d3d12_device_init(struct d3d12_device *device,
device->blocked_queue_count = 0;
- for (i = 0; i < ARRAY_SIZE(device->desc_mutex); ++i) - vkd3d_mutex_init(&device->desc_mutex[i]); - vkd3d_init_descriptor_pool_sizes(device->vk_pool_sizes, &device->vk_info.descriptor_limits);
if ((device->parent = create_info->parent)) diff --git a/libs/vkd3d/resource.c b/libs/vkd3d/resource.c index 68c28cd1..df633afd 100644 --- a/libs/vkd3d/resource.c +++ b/libs/vkd3d/resource.c @@ -2098,9 +2098,13 @@ void vkd3d_view_incref(struct vkd3d_view *view) InterlockedIncrement(&view->refcount); }
-static void vkd3d_view_destroy(struct vkd3d_view *view, struct d3d12_device *device) +static void vkd3d_view_decref_descriptor(struct vkd3d_view *view, struct d3d12_device *device) { const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + ULONG refcount = InterlockedDecrement(&view->refcount); + + if (refcount) + return;
TRACE("Destroying view %p.\n", view);
@@ -2127,8 +2131,7 @@ static void vkd3d_view_destroy(struct vkd3d_view *view, struct d3d12_device *dev
void vkd3d_view_decref(struct vkd3d_view *view, struct d3d12_device *device) { - if (!InterlockedDecrement(&view->refcount)) - vkd3d_view_destroy(view, device); + vkd3d_view_decref_descriptor(view, device); }
/* TODO: write null descriptors to all applicable sets (invalid behaviour workaround). */ @@ -2220,24 +2223,21 @@ static void d3d12_desc_write_vk_heap_null_descriptor(struct d3d12_descriptor_hea } }
-/* dst and src contain the same data unless another thread overwrites dst. The array index is - * calculated from dst, and src is thread safe. */ -static void d3d12_desc_write_vk_heap(const struct d3d12_desc *dst, const struct d3d12_desc *src, - struct d3d12_device *device) +void d3d12_desc_write_vk_heap(const struct d3d12_desc *src, struct d3d12_device *device) { struct d3d12_descriptor_heap_vk_set *descriptor_set; struct d3d12_descriptor_heap *descriptor_heap; const struct vkd3d_vk_device_procs *vk_procs; bool is_null = false;
- descriptor_heap = vkd3d_gpu_descriptor_allocator_heap_from_descriptor(&device->gpu_descriptor_allocator, dst); + descriptor_heap = vkd3d_gpu_descriptor_allocator_heap_from_descriptor(&device->gpu_descriptor_allocator, src); descriptor_set = &descriptor_heap->vk_descriptor_sets[vkd3d_vk_descriptor_set_index_from_vk_descriptor_type( src->vk_descriptor_type)]; vk_procs = &device->vk_procs;
vkd3d_mutex_lock(&descriptor_heap->vk_sets_mutex);
- descriptor_set->vk_descriptor_writes[0].dstArrayElement = dst + descriptor_set->vk_descriptor_writes[0].dstArrayElement = src - (const struct d3d12_desc *)descriptor_heap->descriptors; descriptor_set->vk_descriptor_writes[0].descriptorCount = 1; switch (src->vk_descriptor_type) @@ -2275,7 +2275,7 @@ static void d3d12_desc_write_vk_heap(const struct d3d12_desc *dst, const struct if (src->magic == VKD3D_DESCRIPTOR_MAGIC_UAV && src->u.view_info.view->vk_counter_view) { descriptor_set = &descriptor_heap->vk_descriptor_sets[VKD3D_SET_INDEX_UAV_COUNTER]; - descriptor_set->vk_descriptor_writes[0].dstArrayElement = dst + descriptor_set->vk_descriptor_writes[0].dstArrayElement = src - (const struct d3d12_desc *)descriptor_heap->descriptors; descriptor_set->vk_descriptor_writes[0].descriptorCount = 1; descriptor_set->vk_descriptor_writes[0].pTexelBufferView = &src->u.view_info.view->vk_counter_view; @@ -2285,60 +2285,22 @@ static void d3d12_desc_write_vk_heap(const struct d3d12_desc *dst, const struct vkd3d_mutex_unlock(&descriptor_heap->vk_sets_mutex); }
-static void d3d12_desc_write_atomic_d3d12_only(struct d3d12_desc *dst, const struct d3d12_desc *src, struct d3d12_device *device) -{ - struct vkd3d_view *defunct_view; - struct vkd3d_mutex *mutex; - - mutex = d3d12_device_get_descriptor_mutex(device, dst); - vkd3d_mutex_lock(mutex); - - if (!(dst->magic & VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW) || InterlockedDecrement(&dst->u.view_info.view->refcount)) - { - *dst = *src; - vkd3d_mutex_unlock(mutex); - return; - } - - defunct_view = dst->u.view_info.view; - *dst = *src; - vkd3d_mutex_unlock(mutex); - - /* Destroy the view after unlocking to reduce wait time. */ - vkd3d_view_destroy(defunct_view, device); -} - -void d3d12_desc_write_atomic(struct d3d12_desc *dst, const struct d3d12_desc *src, +static void d3d12_desc_copy_d3d12_only(struct d3d12_desc *dst, const struct d3d12_desc *src, struct d3d12_device *device) { - struct vkd3d_view *defunct_view = NULL; - struct vkd3d_mutex *mutex; - - mutex = d3d12_device_get_descriptor_mutex(device, dst); - vkd3d_mutex_lock(mutex); + assert(dst != src);
- /* Nothing to do for VKD3D_DESCRIPTOR_MAGIC_CBV. */ - if ((dst->magic & VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW) - && !InterlockedDecrement(&dst->u.view_info.view->refcount)) - defunct_view = dst->u.view_info.view; + d3d12_desc_destroy(dst, device);
*dst = *src; - - vkd3d_mutex_unlock(mutex); - - /* Destroy the view after unlocking to reduce wait time. */ - if (defunct_view) - vkd3d_view_destroy(defunct_view, device); - - if (device->use_vk_heaps && dst->magic) - d3d12_desc_write_vk_heap(dst, src, device); }
-static void d3d12_desc_destroy(struct d3d12_desc *descriptor, struct d3d12_device *device) +void d3d12_desc_destroy(struct d3d12_desc *descriptor, struct d3d12_device *device) { - static const struct d3d12_desc null_desc = {0}; + if (descriptor->magic & VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW) + vkd3d_view_decref_descriptor(descriptor->u.view_info.view, device);
- d3d12_desc_write_atomic(descriptor, &null_desc, device); + memset(descriptor, 0, sizeof(*descriptor)); }
void d3d12_desc_copy_vk_heap_range(struct d3d12_desc_copy_location *locations, const struct d3d12_desc_copy_info *info, @@ -2353,7 +2315,7 @@ void d3d12_desc_copy_vk_heap_range(struct d3d12_desc_copy_location *locations, c
for (i = 0, write_count = 0; i < info->count; ++i) { - d3d12_desc_write_atomic_d3d12_only(locations[i].dst, &locations[i].src, device); + d3d12_desc_copy_d3d12_only(locations[i].dst, &locations[i].src, device);
if (i && locations[i].dst == locations[i - 1].dst + 1) { @@ -2394,24 +2356,17 @@ done: void d3d12_desc_copy(struct d3d12_desc *dst, const struct d3d12_desc *src, struct d3d12_device *device) { - struct d3d12_desc tmp; - struct vkd3d_mutex *mutex; - assert(dst != src);
- /* Shadow of the Tomb Raider and possibly other titles sometimes destroy - * and rewrite a descriptor in another thread while it is being copied. */ - mutex = d3d12_device_get_descriptor_mutex(device, src); - vkd3d_mutex_lock(mutex); + d3d12_desc_destroy(dst, device);
if (src->magic & VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW) vkd3d_view_incref(src->u.view_info.view);
- tmp = *src; - - vkd3d_mutex_unlock(mutex); + *dst = *src;
- d3d12_desc_write_atomic(dst, &tmp, device); + if (device->use_vk_heaps && dst->magic) + d3d12_desc_write_vk_heap(dst, device); }
static VkDeviceSize vkd3d_get_required_texel_buffer_alignment(const struct d3d12_device *device, @@ -2813,6 +2768,8 @@ void d3d12_desc_create_cbv(struct d3d12_desc *descriptor, struct VkDescriptorBufferInfo *buffer_info; struct d3d12_resource *resource;
+ d3d12_desc_destroy(descriptor, device); + if (!desc) { WARN("Constant buffer desc is NULL.\n"); @@ -2988,6 +2945,8 @@ void d3d12_desc_create_srv(struct d3d12_desc *descriptor, struct vkd3d_texture_view_desc vkd3d_desc; struct vkd3d_view *view;
+ d3d12_desc_destroy(descriptor, device); + if (!resource) { vkd3d_create_null_srv(descriptor, device, desc); @@ -3286,6 +3245,8 @@ void d3d12_desc_create_uav(struct d3d12_desc *descriptor, struct d3d12_device *d struct d3d12_resource *resource, struct d3d12_resource *counter_resource, const D3D12_UNORDERED_ACCESS_VIEW_DESC *desc) { + d3d12_desc_destroy(descriptor, device); + if (!resource) { if (counter_resource) @@ -3416,6 +3377,8 @@ void d3d12_desc_create_sampler(struct d3d12_desc *sampler, { struct vkd3d_view *view;
+ d3d12_desc_destroy(sampler, device); + if (!desc) { WARN("NULL sampler desc.\n"); diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index 37bac159..e2ab8eac 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -746,7 +746,8 @@ void d3d12_desc_create_uav(struct d3d12_desc *descriptor, struct d3d12_device *d struct d3d12_resource *resource, struct d3d12_resource *counter_resource, const D3D12_UNORDERED_ACCESS_VIEW_DESC *desc); void d3d12_desc_create_sampler(struct d3d12_desc *sampler, struct d3d12_device *device, const D3D12_SAMPLER_DESC *desc); -void d3d12_desc_write_atomic(struct d3d12_desc *dst, const struct d3d12_desc *src, struct d3d12_device *device); +void d3d12_desc_write_vk_heap(const struct d3d12_desc *src, struct d3d12_device *device); +void d3d12_desc_destroy(struct d3d12_desc *descriptor, struct d3d12_device *device);
bool vkd3d_create_raw_buffer_view(struct d3d12_device *device, D3D12_GPU_VIRTUAL_ADDRESS gpu_address, VkBufferView *vk_buffer_view); @@ -1468,7 +1469,6 @@ struct d3d12_device struct vkd3d_gpu_va_allocator gpu_va_allocator;
struct vkd3d_mutex mutex; - struct vkd3d_mutex desc_mutex[8]; struct vkd3d_render_pass_cache render_pass_cache; VkPipelineCache vk_pipeline_cache;
@@ -1544,19 +1544,6 @@ static inline unsigned int d3d12_device_get_descriptor_handle_increment_size(str return ID3D12Device_GetDescriptorHandleIncrementSize(&device->ID3D12Device_iface, descriptor_type); }
-static inline struct vkd3d_mutex *d3d12_device_get_descriptor_mutex(struct d3d12_device *device, - const struct d3d12_desc *descriptor) -{ - STATIC_ASSERT(!(ARRAY_SIZE(device->desc_mutex) & (ARRAY_SIZE(device->desc_mutex) - 1))); - uintptr_t idx = (uintptr_t)descriptor; - - idx ^= idx >> 12; - idx ^= idx >> 6; - idx ^= idx >> 3; - - return &device->desc_mutex[idx & (ARRAY_SIZE(device->desc_mutex) - 1)]; -} - /* utils */ enum vkd3d_format_type {
On Fri, May 13, 2022 at 1:11 AM Conor McCarthy cmccarthy@codeweavers.com wrote:
Descriptor read/write race conditions in SotTR were a result of deficiencies in the fence implementation.
There is still the occasional crash on startup, shutdown, or between benchmark scenes. It always seems to be caused by invalid view refcounts, so the mutexes are still needed. Most likely two descriptors are copied to the same destination concurrently, which maybe doesn't break anything in Windows if they're of the same type.