From: Conor McCarthy cmccarthy@codeweavers.com
D3D12 supports signalling a fence to a lower value, while Vulkan timeline semaphores do not. On the GPU side this is handled by simply submitting the signal anyway, if a test for this passes on device creation, because working around this is impractical. For CPU signals the Vulkan semaphore is replaced with a new one at the lower value only if no waits and/or signals are pending on the GPU. Otherwise, a fixme is emitted.
Partly based on a vkd3d-proton patch by Hans-Kristian Arntzen (not including the handling of lower fence values).
The old implementation is used if KHR_timeline_semaphore is not available or GPU signals do not work for a lower value.
Signed-off-by: Conor McCarthy cmccarthy@codeweavers.com Signed-off-by: Henri Verbeet hverbeet@codeweavers.com --- This supersedes patch 230633. v4: - Fix the (trivial) merge conflict with patch 230641. - Simplify setting "device->use_timeline_semaphores".
libs/vkd3d/command.c | 571 ++++++++++++++++++++++++++++++++++--- libs/vkd3d/device.c | 87 ++++++ libs/vkd3d/vkd3d_private.h | 30 ++ libs/vkd3d/vulkan_procs.h | 5 + tests/d3d12.c | 11 +- 5 files changed, 663 insertions(+), 41 deletions(-)
diff --git a/libs/vkd3d/command.c b/libs/vkd3d/command.c index 952675cf..09171fe4 100644 --- a/libs/vkd3d/command.c +++ b/libs/vkd3d/command.c @@ -46,6 +46,9 @@ HRESULT vkd3d_queue_create(struct d3d12_device *device, object->vk_queue_flags = properties->queueFlags; object->timestamp_bits = properties->timestampValidBits;
+ object->wait_completion_semaphore = VK_NULL_HANDLE; + object->pending_wait_completion_value = 0; + object->semaphores = NULL; object->semaphores_size = 0; object->semaphore_count = 0; @@ -61,6 +64,20 @@ HRESULT vkd3d_queue_create(struct d3d12_device *device, return S_OK; }
+bool vkd3d_queue_init_timeline_semaphore(struct vkd3d_queue *queue, struct d3d12_device *device) +{ + VkResult vr; + + if (!queue->wait_completion_semaphore + && (vr = vkd3d_create_timeline_semaphore(device, 0, &queue->wait_completion_semaphore)) < 0) + { + WARN("Failed to create timeline semaphore, vr %d.\n", vr); + return false; + } + + return true; +} + void vkd3d_queue_destroy(struct vkd3d_queue *queue, struct d3d12_device *device) { const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; @@ -75,6 +92,8 @@ void vkd3d_queue_destroy(struct vkd3d_queue *queue, struct d3d12_device *device)
vkd3d_free(queue->semaphores);
+ VK_CALL(vkDestroySemaphore(device->vk_device, queue->wait_completion_semaphore, NULL)); + for (i = 0; i < ARRAY_SIZE(queue->old_vk_semaphores); ++i) { if (queue->old_vk_semaphores[i]) @@ -268,6 +287,7 @@ static HRESULT vkd3d_enqueue_gpu_fence(struct vkd3d_fence_worker *worker, }
worker->enqueued_fences[worker->enqueued_fence_count].vk_fence = vk_fence; + worker->enqueued_fences[worker->enqueued_fence_count].vk_semaphore = VK_NULL_HANDLE; waiting_fence = &worker->enqueued_fences[worker->enqueued_fence_count].waiting_fence; waiting_fence->fence = fence; waiting_fence->value = value; @@ -317,6 +337,7 @@ static void vkd3d_fence_worker_remove_fence(struct vkd3d_fence_worker *worker, s static void vkd3d_fence_worker_move_enqueued_fences_locked(struct vkd3d_fence_worker *worker) { unsigned int i; + bool timeline; size_t count; bool ret;
@@ -325,8 +346,18 @@ static void vkd3d_fence_worker_move_enqueued_fences_locked(struct vkd3d_fence_wo
count = worker->fence_count + worker->enqueued_fence_count;
- ret = vkd3d_array_reserve((void **)&worker->vk_fences, &worker->vk_fences_size, - count, sizeof(*worker->vk_fences)); + if ((timeline = worker->device->use_timeline_semaphores)) + { + ret = vkd3d_array_reserve((void **) &worker->vk_semaphores, &worker->vk_semaphores_size, + count, sizeof(*worker->vk_semaphores)); + ret &= vkd3d_array_reserve((void **) &worker->semaphore_wait_values, &worker->semaphore_wait_values_size, + count, sizeof(*worker->semaphore_wait_values)); + } + else + { + ret = vkd3d_array_reserve((void **)&worker->vk_fences, &worker->vk_fences_size, + count, sizeof(*worker->vk_fences)); + } ret &= vkd3d_array_reserve((void **)&worker->fences, &worker->fences_size, count, sizeof(*worker->fences)); if (!ret) @@ -339,7 +370,16 @@ static void vkd3d_fence_worker_move_enqueued_fences_locked(struct vkd3d_fence_wo { struct vkd3d_enqueued_fence *current = &worker->enqueued_fences[i];
- worker->vk_fences[worker->fence_count] = current->vk_fence; + if (timeline) + { + worker->vk_semaphores[worker->fence_count] = current->vk_semaphore; + worker->semaphore_wait_values[worker->fence_count] = current->waiting_fence.value; + } + else + { + worker->vk_fences[worker->fence_count] = current->vk_fence; + } + worker->fences[worker->fence_count] = current->waiting_fence; ++worker->fence_count; } @@ -347,6 +387,66 @@ static void vkd3d_fence_worker_move_enqueued_fences_locked(struct vkd3d_fence_wo worker->enqueued_fence_count = 0; }
+static void vkd3d_wait_for_gpu_timeline_semaphores(struct vkd3d_fence_worker *worker) +{ + const struct d3d12_device *device = worker->device; + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + VkSemaphoreWaitInfoKHR wait_info; + VkSemaphore vk_semaphore; + uint64_t counter_value; + unsigned int i, j; + HRESULT hr; + int vr; + + if (!worker->fence_count) + return; + + wait_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO_KHR; + wait_info.pNext = NULL; + wait_info.flags = VK_SEMAPHORE_WAIT_ANY_BIT_KHR; + wait_info.pSemaphores = worker->vk_semaphores; + wait_info.semaphoreCount = worker->fence_count; + wait_info.pValues = worker->semaphore_wait_values; + + vr = VK_CALL(vkWaitSemaphoresKHR(device->vk_device, &wait_info, ~(uint64_t)0)); + if (vr == VK_TIMEOUT) + return; + if (vr != VK_SUCCESS) + { + ERR("Failed to wait for Vulkan timeline semaphores, vr %d.\n", vr); + return; + } + + for (i = 0, j = 0; i < worker->fence_count; ++i) + { + struct vkd3d_waiting_fence *current = &worker->fences[i]; + + vk_semaphore = worker->vk_semaphores[i]; + if ((vr = VK_CALL(vkGetSemaphoreCounterValueKHR(device->vk_device, vk_semaphore, &counter_value))) < 0) + { + ERR("Failed to get Vulkan semaphore value, vr %d.\n", vr); + } + else if (counter_value >= current->value) + { + TRACE("Signaling fence %p value %#"PRIx64".\n", current->fence, current->value); + if (FAILED(hr = d3d12_fence_signal(current->fence, counter_value, VK_NULL_HANDLE))) + ERR("Failed to signal D3D12 fence, hr %#x.\n", hr); + + InterlockedDecrement(¤t->fence->pending_worker_operation_count); + continue; + } + + if (i != j) + { + worker->vk_semaphores[j] = worker->vk_semaphores[i]; + worker->semaphore_wait_values[j] = worker->semaphore_wait_values[i]; + worker->fences[j] = worker->fences[i]; + } + ++j; + } + worker->fence_count = j; +} + static void vkd3d_wait_for_gpu_fences(struct vkd3d_fence_worker *worker) { struct d3d12_device *device = worker->device; @@ -408,7 +508,7 @@ static void *vkd3d_fence_worker_main(void *arg)
for (;;) { - vkd3d_wait_for_gpu_fences(worker); + worker->wait_for_gpu_fences(worker);
if (!worker->fence_count || InterlockedAdd(&worker->enqueued_fence_count, 0)) { @@ -473,6 +573,13 @@ HRESULT vkd3d_fence_worker_start(struct vkd3d_fence_worker *worker, worker->vk_fences_size = 0; worker->fences = NULL; worker->fences_size = 0; + worker->vk_semaphores = NULL; + worker->vk_semaphores_size = 0; + worker->semaphore_wait_values = NULL; + worker->semaphore_wait_values_size = 0; + + worker->wait_for_gpu_fences = device->use_timeline_semaphores + ? vkd3d_wait_for_gpu_timeline_semaphores : vkd3d_wait_for_gpu_fences;
if ((rc = vkd3d_mutex_init(&worker->mutex))) { @@ -535,6 +642,8 @@ HRESULT vkd3d_fence_worker_stop(struct vkd3d_fence_worker *worker, vkd3d_free(worker->enqueued_fences); vkd3d_free(worker->vk_fences); vkd3d_free(worker->fences); + vkd3d_free(worker->vk_semaphores); + vkd3d_free(worker->semaphore_wait_values);
return S_OK; } @@ -684,6 +793,7 @@ static void d3d12_fence_destroy_vk_objects(struct d3d12_fence *fence) }
d3d12_fence_garbage_collect_vk_semaphores_locked(fence, true); + VK_CALL(vkDestroySemaphore(device->vk_device, fence->timeline_semaphore, NULL));
vkd3d_mutex_unlock(&fence->mutex); } @@ -802,31 +912,21 @@ static HRESULT d3d12_fence_add_vk_semaphore(struct d3d12_fence *fence, return hr; }
-static HRESULT d3d12_fence_signal(struct d3d12_fence *fence, uint64_t value, VkFence vk_fence) +static bool d3d12_fence_signal_external_events_locked(struct d3d12_fence *fence) { struct d3d12_device *device = fence->device; - struct vkd3d_signaled_semaphore *current; bool signal_null_event_cond = false; unsigned int i, j; - int rc; - - if ((rc = vkd3d_mutex_lock(&fence->mutex))) - { - ERR("Failed to lock mutex, error %d.\n", rc); - return hresult_from_errno(rc); - } - - fence->value = value;
for (i = 0, j = 0; i < fence->event_count; ++i) { struct vkd3d_waiting_event *current = &fence->events[i];
- if (current->value <= value) + if (current->value <= fence->value) { if (current->event) { - fence->device->signal_event(current->event); + device->signal_event(current->event); } else { @@ -841,9 +941,28 @@ static HRESULT d3d12_fence_signal(struct d3d12_fence *fence, uint64_t value, VkF ++j; } } + fence->event_count = j;
- if (signal_null_event_cond) + return signal_null_event_cond; +} + +static HRESULT d3d12_fence_signal(struct d3d12_fence *fence, uint64_t value, VkFence vk_fence) +{ + struct d3d12_device *device = fence->device; + struct vkd3d_signaled_semaphore *current; + unsigned int i; + int rc; + + if ((rc = vkd3d_mutex_lock(&fence->mutex))) + { + ERR("Failed to lock mutex, error %d.\n", rc); + return hresult_from_errno(rc); + } + + fence->value = value; + + if (d3d12_fence_signal_external_events_locked(fence)) vkd3d_cond_broadcast(&fence->null_event_cond);
if (vk_fence) @@ -1069,12 +1188,160 @@ static HRESULT STDMETHODCALLTYPE d3d12_fence_SetEventOnCompletion(ID3D12Fence *i return S_OK; }
+static inline bool d3d12_fence_gpu_wait_is_completed(const struct d3d12_fence *fence, unsigned int i) +{ + const struct d3d12_device *device = fence->device; + const struct vkd3d_vk_device_procs *vk_procs; + uint64_t value; + VkResult vr; + + vk_procs = &device->vk_procs; + + if ((vr = VK_CALL(vkGetSemaphoreCounterValueKHR(device->vk_device, + fence->gpu_waits[i].queue->wait_completion_semaphore, &value))) >= 0) + { + return value >= fence->gpu_waits[i].pending_value; + } + + ERR("Failed to get Vulkan semaphore status, vr %d.\n", vr); + return true; +} + +static inline bool d3d12_fence_has_pending_gpu_ops_locked(struct d3d12_fence *fence) +{ + const struct d3d12_device *device = fence->device; + const struct vkd3d_vk_device_procs *vk_procs; + uint64_t value; + unsigned int i; + VkResult vr; + + for (i = 0; i < fence->gpu_wait_count; ++i) + { + if (d3d12_fence_gpu_wait_is_completed(fence, i) && i < --fence->gpu_wait_count) + fence->gpu_waits[i] = fence->gpu_waits[fence->gpu_wait_count]; + } + if (fence->gpu_wait_count) + return true; + + /* Check for pending signals too. */ + if (fence->value >= fence->pending_timeline_value) + return false; + + vk_procs = &device->vk_procs; + + /* Check the actual semaphore value in case fence->value update is lagging. */ + if ((vr = VK_CALL(vkGetSemaphoreCounterValueKHR(device->vk_device, fence->timeline_semaphore, &value))) < 0) + { + ERR("Failed to get Vulkan semaphore status, vr %d.\n", vr); + return false; + } + + return value < fence->pending_timeline_value; +} + +/* Replace the VkSemaphore with a new one to allow a lower value to be set. Ideally apps will + * only use this to reset the fence when no operations are pending on the queue. */ +static HRESULT d3d12_fence_reinit_timeline_semaphore_locked(struct d3d12_fence *fence, uint64_t value) +{ + const struct d3d12_device *device = fence->device; + const struct vkd3d_vk_device_procs *vk_procs; + VkSemaphore timeline_semaphore; + VkResult vr; + + if (d3d12_fence_has_pending_gpu_ops_locked(fence)) + { + /* This situation is not very likely because it means a fence with pending waits and/or signals was + * signalled on the CPU to a lower value. For now, emit a fixme so it can be patched if necessary. + * A patch already exists for this but it's not pretty. */ + FIXME("Unable to re-initialise timeline semaphore to a lower value due to pending GPU ops.\n"); + return E_FAIL; + } + + if ((vr = vkd3d_create_timeline_semaphore(device, value, &timeline_semaphore)) < 0) + { + WARN("Failed to create timeline semaphore, vr %d.\n", vr); + return hresult_from_vk_result(vr); + } + + fence->value = value; + fence->pending_timeline_value = value; + + WARN("Replacing timeline semaphore with a new object.\n"); + + vk_procs = &device->vk_procs; + + VK_CALL(vkDestroySemaphore(device->vk_device, fence->timeline_semaphore, NULL)); + fence->timeline_semaphore = timeline_semaphore; + + return S_OK; +} + +static HRESULT d3d12_fence_signal_cpu_timeline_semaphore(struct d3d12_fence *fence, uint64_t value) +{ + const struct d3d12_device *device = fence->device; + VkSemaphoreSignalInfoKHR info; + HRESULT hr = S_OK; + VkResult vr; + int rc; + + if ((rc = vkd3d_mutex_lock(&fence->mutex))) + { + ERR("Failed to lock mutex, error %d.\n", rc); + return hresult_from_errno(rc); + } + + /* We must only signal a value which is greater than the current value. + * That value can be in the range of current known value (fence->value), or as large as pending_timeline_value. + * Pending timeline value signal might be blocked by another synchronization primitive, and thus statically + * cannot be that value, so the safest thing to do is to check the current value which is updated by the fence + * wait thread continuously. This check is technically racy since the value might be immediately out of date, + * but there is no way to avoid this. */ + if (value > fence->value) + { + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + + /* Sanity check against the delta limit. */ + if (value - fence->value > device->vk_info.timeline_semaphore_properties.maxTimelineSemaphoreValueDifference) + { + FIXME("Timeline semaphore delta is %"PRIu64", but implementation only supports a delta of %"PRIu64".\n", + value - fence->value, device->vk_info.timeline_semaphore_properties.maxTimelineSemaphoreValueDifference); + } + + info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_SIGNAL_INFO_KHR; + info.pNext = NULL; + info.semaphore = fence->timeline_semaphore; + info.value = value; + if ((vr = VK_CALL(vkSignalSemaphoreKHR(device->vk_device, &info))) >= 0) + { + fence->value = value; + if (value > fence->pending_timeline_value) + fence->pending_timeline_value = value; + } + else + { + ERR("Failed to signal timeline semaphore, vr %d.\n", vr); + hr = hresult_from_vk_result(vr); + } + } + else if (value < fence->value) + { + hr = d3d12_fence_reinit_timeline_semaphore_locked(fence, value); + } + + d3d12_fence_signal_external_events_locked(fence); + + vkd3d_mutex_unlock(&fence->mutex); + return hr; +} + static HRESULT STDMETHODCALLTYPE d3d12_fence_Signal(ID3D12Fence *iface, UINT64 value) { struct d3d12_fence *fence = impl_from_ID3D12Fence(iface);
TRACE("iface %p, value %#"PRIx64".\n", iface, value);
+ if (fence->timeline_semaphore) + return d3d12_fence_signal_cpu_timeline_semaphore(fence, value); return d3d12_fence_signal(fence, value, VK_NULL_HANDLE); }
@@ -1108,6 +1375,7 @@ static struct d3d12_fence *unsafe_impl_from_ID3D12Fence(ID3D12Fence *iface) static HRESULT d3d12_fence_init(struct d3d12_fence *fence, struct d3d12_device *device, UINT64 initial_value, D3D12_FENCE_FLAGS flags) { + VkResult vr; HRESULT hr; int rc;
@@ -1136,6 +1404,16 @@ static HRESULT d3d12_fence_init(struct d3d12_fence *fence, struct d3d12_device * fence->events_size = 0; fence->event_count = 0;
+ fence->timeline_semaphore = VK_NULL_HANDLE; + if (device->use_timeline_semaphores && (vr = vkd3d_create_timeline_semaphore(device, initial_value, + &fence->timeline_semaphore)) < 0) + { + WARN("Failed to create timeline semaphore, vr %d.\n", vr); + return hresult_from_vk_result(vr); + } + fence->pending_timeline_value = initial_value; + fence->gpu_wait_count = 0; + list_init(&fence->semaphores); fence->semaphore_count = 0;
@@ -1172,6 +1450,25 @@ HRESULT d3d12_fence_create(struct d3d12_device *device, return S_OK; }
+VkResult vkd3d_create_timeline_semaphore(const struct d3d12_device *device, uint64_t initial_value, + VkSemaphore *timeline_semaphore) +{ + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + VkSemaphoreTypeCreateInfoKHR type_info; + VkSemaphoreCreateInfo info; + + info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; + info.pNext = &type_info; + info.flags = 0; + + type_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO_KHR; + type_info.pNext = NULL; + type_info.semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE_KHR; + type_info.initialValue = initial_value; + + return VK_CALL(vkCreateSemaphore(device->vk_device, &info, NULL, timeline_semaphore)); +} + /* Command buffers */ static void d3d12_command_list_mark_as_invalid(struct d3d12_command_list *list, const char *message, ...) @@ -6138,18 +6435,88 @@ static void STDMETHODCALLTYPE d3d12_command_queue_EndEvent(ID3D12CommandQueue *i FIXME("iface %p stub!\n", iface); }
+static HRESULT d3d12_fence_update_gpu_signal_timeline_semaphore(struct d3d12_fence *fence, uint64_t value) +{ + const struct d3d12_device *device = fence->device; + int rc; + + if ((rc = vkd3d_mutex_lock(&fence->mutex))) + { + ERR("Failed to lock mutex, error %d.\n", rc); + return hresult_from_errno(rc); + } + + /* If we're attempting to async signal a fence with a value which is not strictly increasing the payload value, + * warn about this case. Do not treat this as an error since it works at least with RADV and Nvidia drivers and + * there's no workaround on the GPU side. */ + if (value <= fence->pending_timeline_value) + { + WARN("Fence %p values are not strictly increasing. Pending values: old %"PRIu64", new %"PRIu64".\n", + fence, fence->pending_timeline_value, value); + } + /* Sanity check against the delta limit. Use the current fence value. */ + else if (value - fence->value > device->vk_info.timeline_semaphore_properties.maxTimelineSemaphoreValueDifference) + { + FIXME("Timeline semaphore delta is %"PRIu64", but implementation only supports a delta of %"PRIu64".\n", + value - fence->value, device->vk_info.timeline_semaphore_properties.maxTimelineSemaphoreValueDifference); + } + fence->pending_timeline_value = value; + + vkd3d_mutex_unlock(&fence->mutex); + + return S_OK; +} + +static HRESULT vkd3d_enqueue_timeline_semaphore(struct vkd3d_fence_worker *worker, VkSemaphore vk_semaphore, + struct d3d12_fence *fence, uint64_t value, struct vkd3d_queue *queue) +{ + struct vkd3d_waiting_fence *waiting_fence; + int rc; + + TRACE("worker %p, fence %p, value %#"PRIx64".\n", worker, fence, value); + + if ((rc = vkd3d_mutex_lock(&worker->mutex))) + { + ERR("Failed to lock mutex, error %d.\n", rc); + return hresult_from_errno(rc); + } + + if (!vkd3d_array_reserve((void **)&worker->enqueued_fences, &worker->enqueued_fences_size, + worker->enqueued_fence_count + 1, sizeof(*worker->enqueued_fences))) + { + ERR("Failed to add GPU timeline semaphore.\n"); + vkd3d_mutex_unlock(&worker->mutex); + return E_OUTOFMEMORY; + } + + worker->enqueued_fences[worker->enqueued_fence_count].vk_semaphore = vk_semaphore; + waiting_fence = &worker->enqueued_fences[worker->enqueued_fence_count].waiting_fence; + waiting_fence->fence = fence; + waiting_fence->value = value; + waiting_fence->queue = queue; + ++worker->enqueued_fence_count; + + InterlockedIncrement(&fence->pending_worker_operation_count); + + vkd3d_cond_signal(&worker->cond); + vkd3d_mutex_unlock(&worker->mutex); + + return S_OK; +} + static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Signal(ID3D12CommandQueue *iface, ID3D12Fence *fence_iface, UINT64 value) { struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface); + VkTimelineSemaphoreSubmitInfoKHR timeline_submit_info; const struct vkd3d_vk_device_procs *vk_procs; VkSemaphore vk_semaphore = VK_NULL_HANDLE; VkFence vk_fence = VK_NULL_HANDLE; struct vkd3d_queue *vkd3d_queue; + uint64_t sequence_number = 0; struct d3d12_device *device; struct d3d12_fence *fence; VkSubmitInfo submit_info; - uint64_t sequence_number; VkQueue vk_queue; VkResult vr; HRESULT hr; @@ -6162,10 +6529,21 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Signal(ID3D12CommandQueue *
fence = unsafe_impl_from_ID3D12Fence(fence_iface);
- if ((vr = d3d12_fence_create_vk_fence(fence, &vk_fence)) < 0) + if (device->use_timeline_semaphores) { - WARN("Failed to create Vulkan fence, vr %d.\n", vr); - goto fail_vkresult; + if (FAILED(hr = d3d12_fence_update_gpu_signal_timeline_semaphore(fence, value))) + return hr; + + vk_semaphore = fence->timeline_semaphore; + assert(vk_semaphore); + } + else + { + if ((vr = d3d12_fence_create_vk_fence(fence, &vk_fence)) < 0) + { + WARN("Failed to create Vulkan fence, vr %d.\n", vr); + goto fail_vkresult; + } }
if (!(vk_queue = vkd3d_queue_acquire(vkd3d_queue))) @@ -6175,7 +6553,8 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Signal(ID3D12CommandQueue * goto fail; }
- if ((vr = vkd3d_queue_create_vk_semaphore_locked(vkd3d_queue, device, &vk_semaphore)) < 0) + if (!device->use_timeline_semaphores && (vr = vkd3d_queue_create_vk_semaphore_locked(vkd3d_queue, + device, &vk_semaphore)) < 0) { ERR("Failed to create Vulkan semaphore, vr %d.\n", vr); vk_semaphore = VK_NULL_HANDLE; @@ -6191,7 +6570,19 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Signal(ID3D12CommandQueue * submit_info.signalSemaphoreCount = vk_semaphore ? 1 : 0; submit_info.pSignalSemaphores = &vk_semaphore;
- if ((vr = VK_CALL(vkQueueSubmit(vk_queue, 1, &submit_info, vk_fence))) >= 0) + if (device->use_timeline_semaphores) + { + timeline_submit_info.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR; + timeline_submit_info.pNext = NULL; + timeline_submit_info.pSignalSemaphoreValues = &value; + timeline_submit_info.signalSemaphoreValueCount = submit_info.signalSemaphoreCount; + timeline_submit_info.waitSemaphoreValueCount = 0; + timeline_submit_info.pWaitSemaphoreValues = NULL; + submit_info.pNext = &timeline_submit_info; + } + + vr = VK_CALL(vkQueueSubmit(vk_queue, 1, &submit_info, vk_fence)); + if (!device->use_timeline_semaphores && vr >= 0) { sequence_number = ++vkd3d_queue->submitted_sequence_number;
@@ -6208,6 +6599,9 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Signal(ID3D12CommandQueue * goto fail_vkresult; }
+ if (device->use_timeline_semaphores) + return vkd3d_enqueue_timeline_semaphore(&device->fence_worker, vk_semaphore, fence, value, vkd3d_queue); + if (vk_semaphore && SUCCEEDED(hr = d3d12_fence_add_vk_semaphore(fence, vk_semaphore, vk_fence, value))) vk_semaphore = VK_NULL_HANDLE;
@@ -6243,32 +6637,27 @@ fail_vkresult: hr = hresult_from_vk_result(vr); fail: VK_CALL(vkDestroyFence(device->vk_device, vk_fence, NULL)); - VK_CALL(vkDestroySemaphore(device->vk_device, vk_semaphore, NULL)); + if (!device->use_timeline_semaphores) + VK_CALL(vkDestroySemaphore(device->vk_device, vk_semaphore, NULL)); return hr; }
-static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Wait(ID3D12CommandQueue *iface, - ID3D12Fence *fence_iface, UINT64 value) +static HRESULT d3d12_command_queue_wait_binary_semaphore(struct d3d12_command_queue *command_queue, + struct d3d12_fence *fence, uint64_t value) { static const VkPipelineStageFlagBits wait_stage_mask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; - struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface); const struct vkd3d_vk_device_procs *vk_procs; struct vkd3d_signaled_semaphore *semaphore; uint64_t completed_value = 0; struct vkd3d_queue *queue; - struct d3d12_fence *fence; VkSubmitInfo submit_info; VkQueue vk_queue; VkResult vr; HRESULT hr;
- TRACE("iface %p, fence %p, value %#"PRIx64".\n", iface, fence_iface, value); - vk_procs = &command_queue->device->vk_procs; queue = command_queue->vkd3d_queue;
- fence = unsafe_impl_from_ID3D12Fence(fence_iface); - semaphore = d3d12_fence_acquire_vk_semaphore(fence, value, &completed_value); if (!semaphore && completed_value >= value) { @@ -6346,6 +6735,122 @@ fail: return hr; }
+static inline void d3d12_fence_update_gpu_wait(struct d3d12_fence *fence, const struct vkd3d_queue *queue) +{ + unsigned int i; + bool found; + int rc; + + if ((rc = vkd3d_mutex_lock(&fence->mutex))) + { + ERR("Failed to lock mutex, error %d.\n", rc); + return; + } + + for (i = 0, found = false; i < fence->gpu_wait_count; ++i) + { + if (fence->gpu_waits[i].queue == queue) + { + fence->gpu_waits[i].pending_value = queue->pending_wait_completion_value; + found = true; + } + else if (d3d12_fence_gpu_wait_is_completed(fence, i) && i < --fence->gpu_wait_count) + { + fence->gpu_waits[i] = fence->gpu_waits[fence->gpu_wait_count]; + } + } + + if (!found) + { + if (fence->gpu_wait_count < ARRAY_SIZE(fence->gpu_waits)) + { + fence->gpu_waits[fence->gpu_wait_count].queue = queue; + fence->gpu_waits[fence->gpu_wait_count++].pending_value = queue->pending_wait_completion_value; + } + else + { + FIXME("Unable to track GPU fence wait.\n"); + } + } + + vkd3d_mutex_unlock(&fence->mutex); +} + +static HRESULT d3d12_command_queue_wait_timeline_semaphore(struct d3d12_command_queue *command_queue, + struct d3d12_fence *fence, uint64_t value) +{ + static const VkPipelineStageFlagBits wait_stage_mask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; + VkTimelineSemaphoreSubmitInfoKHR timeline_submit_info; + const struct vkd3d_vk_device_procs *vk_procs; + struct vkd3d_queue *queue; + VkSubmitInfo submit_info; + VkQueue vk_queue; + VkResult vr; + + vk_procs = &command_queue->device->vk_procs; + queue = command_queue->vkd3d_queue; + + assert(fence->timeline_semaphore); + timeline_submit_info.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR; + timeline_submit_info.pNext = NULL; + timeline_submit_info.signalSemaphoreValueCount = 0; + timeline_submit_info.pSignalSemaphoreValues = NULL; + timeline_submit_info.waitSemaphoreValueCount = 1; + timeline_submit_info.pWaitSemaphoreValues = &value; + + if (!(vk_queue = vkd3d_queue_acquire(queue))) + { + ERR("Failed to acquire queue %p.\n", queue); + return E_FAIL; + } + + submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + submit_info.pNext = &timeline_submit_info; + submit_info.waitSemaphoreCount = 1; + submit_info.pWaitSemaphores = &fence->timeline_semaphore; + submit_info.pWaitDstStageMask = &wait_stage_mask; + submit_info.commandBufferCount = 0; + submit_info.pCommandBuffers = NULL; + submit_info.signalSemaphoreCount = 0; + submit_info.pSignalSemaphores = NULL; + + ++queue->pending_wait_completion_value; + + submit_info.signalSemaphoreCount = 1; + submit_info.pSignalSemaphores = &queue->wait_completion_semaphore; + timeline_submit_info.signalSemaphoreValueCount = 1; + timeline_submit_info.pSignalSemaphoreValues = &queue->pending_wait_completion_value; + + d3d12_fence_update_gpu_wait(fence, queue); + + vr = VK_CALL(vkQueueSubmit(vk_queue, 1, &submit_info, VK_NULL_HANDLE)); + + vkd3d_queue_release(queue); + + if (vr < 0) + { + WARN("Failed to submit wait operation, vr %d.\n", vr); + return hresult_from_vk_result(vr); + } + + return S_OK; +} + +static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Wait(ID3D12CommandQueue *iface, + ID3D12Fence *fence_iface, UINT64 value) +{ + struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface); + struct d3d12_fence *fence = unsafe_impl_from_ID3D12Fence(fence_iface); + + TRACE("iface %p, fence %p, value %#"PRIx64".\n", iface, fence_iface, value); + + if (command_queue->device->use_timeline_semaphores) + return d3d12_command_queue_wait_timeline_semaphore(command_queue, fence, value); + + FIXME_ONCE("KHR_timeline_semaphore is not available or incompatible. Some wait commands may be unsupported.\n"); + return d3d12_command_queue_wait_binary_semaphore(command_queue, fence, value); +} + static HRESULT STDMETHODCALLTYPE d3d12_command_queue_GetTimestampFrequency(ID3D12CommandQueue *iface, UINT64 *frequency) { diff --git a/libs/vkd3d/device.c b/libs/vkd3d/device.c index eb470c12..ab087d60 100644 --- a/libs/vkd3d/device.c +++ b/libs/vkd3d/device.c @@ -129,6 +129,7 @@ static const struct vkd3d_optional_extension_info optional_device_extensions[] = VK_EXTENSION(KHR_MAINTENANCE3, KHR_maintenance3), VK_EXTENSION(KHR_PUSH_DESCRIPTOR, KHR_push_descriptor), VK_EXTENSION(KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE, KHR_sampler_mirror_clamp_to_edge), + VK_EXTENSION(KHR_TIMELINE_SEMAPHORE, KHR_timeline_semaphore), /* EXT extensions */ VK_EXTENSION(EXT_CALIBRATED_TIMESTAMPS, EXT_calibrated_timestamps), VK_EXTENSION(EXT_CONDITIONAL_RENDERING, EXT_conditional_rendering), @@ -791,6 +792,7 @@ struct vkd3d_physical_device_info VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT texel_buffer_alignment_properties; VkPhysicalDeviceTransformFeedbackPropertiesEXT xfb_properties; VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT vertex_divisor_properties; + VkPhysicalDeviceTimelineSemaphorePropertiesKHR timeline_semaphore_properties;
VkPhysicalDeviceProperties2KHR properties2;
@@ -803,6 +805,7 @@ struct vkd3d_physical_device_info VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT texel_buffer_alignment_features; VkPhysicalDeviceTransformFeedbackFeaturesEXT xfb_features; VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT vertex_divisor_features; + VkPhysicalDeviceTimelineSemaphoreFeaturesKHR timeline_semaphore_features;
VkPhysicalDeviceFeatures2 features2; }; @@ -814,11 +817,13 @@ static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *i VkPhysicalDeviceDescriptorIndexingPropertiesEXT *descriptor_indexing_properties; VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *vertex_divisor_properties; VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT *buffer_alignment_properties; + VkPhysicalDeviceTimelineSemaphorePropertiesKHR *timeline_semaphore_properties; VkPhysicalDeviceDescriptorIndexingFeaturesEXT *descriptor_indexing_features; VkPhysicalDeviceRobustness2FeaturesEXT *robustness2_features; VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *vertex_divisor_features; VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *buffer_alignment_features; VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT *demote_features; + VkPhysicalDeviceTimelineSemaphoreFeaturesKHR *timeline_semaphore_features; VkPhysicalDeviceDepthClipEnableFeaturesEXT *depth_clip_features; VkPhysicalDeviceMaintenance3Properties *maintenance3_properties; VkPhysicalDeviceTransformFeedbackPropertiesEXT *xfb_properties; @@ -838,6 +843,8 @@ static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *i buffer_alignment_properties = &info->texel_buffer_alignment_properties; vertex_divisor_features = &info->vertex_divisor_features; vertex_divisor_properties = &info->vertex_divisor_properties; + timeline_semaphore_features = &info->timeline_semaphore_features; + timeline_semaphore_properties = &info->timeline_semaphore_properties; xfb_features = &info->xfb_features; xfb_properties = &info->xfb_properties;
@@ -859,6 +866,8 @@ static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *i vk_prepend_struct(&info->features2, xfb_features); vertex_divisor_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_FEATURES_EXT; vk_prepend_struct(&info->features2, vertex_divisor_features); + timeline_semaphore_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES_KHR; + vk_prepend_struct(&info->features2, timeline_semaphore_features);
if (vulkan_info->KHR_get_physical_device_properties2) VK_CALL(vkGetPhysicalDeviceFeatures2KHR(physical_device, &info->features2)); @@ -877,6 +886,8 @@ static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *i vk_prepend_struct(&info->properties2, xfb_properties); vertex_divisor_properties->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT; vk_prepend_struct(&info->properties2, vertex_divisor_properties); + timeline_semaphore_properties->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_PROPERTIES_KHR; + vk_prepend_struct(&info->properties2, timeline_semaphore_properties);
if (vulkan_info->KHR_get_physical_device_properties2) VK_CALL(vkGetPhysicalDeviceProperties2KHR(physical_device, &info->properties2)); @@ -1465,6 +1476,7 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, vulkan_info->rasterization_stream = physical_device_info->xfb_properties.transformFeedbackRasterizationStreamSelect; vulkan_info->transform_feedback_queries = physical_device_info->xfb_properties.transformFeedbackQueries; vulkan_info->max_vertex_attrib_divisor = max(physical_device_info->vertex_divisor_properties.maxVertexAttribDivisor, 1); + vulkan_info->timeline_semaphore_properties = physical_device_info->timeline_semaphore_properties;
device->feature_options.DoublePrecisionFloatShaderOps = features->shaderFloat64; device->feature_options.OutputMergerLogicOp = features->logicOp; @@ -1589,6 +1601,8 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, vulkan_info->EXT_shader_demote_to_helper_invocation = false; if (!physical_device_info->texel_buffer_alignment_features.texelBufferAlignment) vulkan_info->EXT_texel_buffer_alignment = false; + if (!physical_device_info->timeline_semaphore_features.timelineSemaphore) + vulkan_info->KHR_timeline_semaphore = false;
vulkan_info->texel_buffer_alignment_properties = physical_device_info->texel_buffer_alignment_properties;
@@ -1939,6 +1953,75 @@ static bool d3d12_is_64k_msaa_supported(struct d3d12_device *device) && info.Alignment <= 0x10000; }
+/* A lower value can be signalled on a D3D12 fence. Vulkan timeline semaphores + * do not support this, but test if it works anyway. */ +static bool d3d12_is_timeline_semaphore_supported(const struct d3d12_device *device) +{ + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + VkTimelineSemaphoreSubmitInfoKHR timeline_submit_info; + VkSemaphore timeline_semaphore; + VkSubmitInfo submit_info; + bool result = false; + uint64_t value = 0; + VkQueue vk_queue; + VkResult vr; + + if (!device->vk_info.KHR_timeline_semaphore) + return false; + + if ((vr = vkd3d_create_timeline_semaphore(device, 1, &timeline_semaphore)) < 0) + { + WARN("Failed to create timeline semaphore, vr %d.\n", vr); + return false; + } + + if (!(vk_queue = vkd3d_queue_acquire(device->direct_queue))) + { + ERR("Failed to acquire queue %p.\n", device->direct_queue); + VK_CALL(vkDestroySemaphore(device->vk_device, timeline_semaphore, NULL)); + return false; + } + + submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + submit_info.pNext = &timeline_submit_info; + submit_info.waitSemaphoreCount = 0; + submit_info.pWaitSemaphores = NULL; + submit_info.pWaitDstStageMask = NULL; + submit_info.commandBufferCount = 0; + submit_info.pCommandBuffers = NULL; + submit_info.signalSemaphoreCount = 1; + submit_info.pSignalSemaphores = &timeline_semaphore; + + timeline_submit_info.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR; + timeline_submit_info.pNext = NULL; + timeline_submit_info.pSignalSemaphoreValues = &value; + timeline_submit_info.signalSemaphoreValueCount = 1; + timeline_submit_info.waitSemaphoreValueCount = 0; + timeline_submit_info.pWaitSemaphoreValues = NULL; + + vr = VK_CALL(vkQueueSubmit(vk_queue, 1, &submit_info, VK_NULL_HANDLE)); + + if (vr >= 0) + { + if ((vr = VK_CALL(vkQueueWaitIdle(vk_queue))) < 0) + WARN("Failed to wait for queue, vr %d.\n", vr); + + if ((vr = VK_CALL(vkGetSemaphoreCounterValueKHR(device->vk_device, timeline_semaphore, &value))) < 0) + ERR("Failed to get Vulkan semaphore status, vr %d.\n", vr); + else if (!(result = !value)) + WARN("Disabling timeline semaphore use due to incompatible behaviour.\n"); + } + else + { + WARN("Failed to submit signal operation, vr %d.\n", vr); + } + + vkd3d_queue_release(device->direct_queue); + VK_CALL(vkDestroySemaphore(device->vk_device, timeline_semaphore, NULL)); + + return result; +} + static HRESULT vkd3d_create_vk_device(struct d3d12_device *device, const struct vkd3d_device_create_info *create_info) { @@ -2037,6 +2120,10 @@ static HRESULT vkd3d_create_vk_device(struct d3d12_device *device, }
device->feature_options4.MSAA64KBAlignedTextureSupported = d3d12_is_64k_msaa_supported(device); + device->use_timeline_semaphores = d3d12_is_timeline_semaphore_supported(device) + && vkd3d_queue_init_timeline_semaphore(device->direct_queue, device) + && vkd3d_queue_init_timeline_semaphore(device->compute_queue, device) + && vkd3d_queue_init_timeline_semaphore(device->copy_queue, device);
TRACE("Created Vulkan device %p.\n", vk_device);
diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index 796dfefd..56060b6d 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -59,6 +59,7 @@ #define VKD3D_MAX_SHADER_EXTENSIONS 3u #define VKD3D_MAX_SHADER_STAGES 5u #define VKD3D_MAX_VK_SYNC_OBJECTS 4u +#define VKD3D_MAX_FENCE_WAITING_QUEUES 4u #define VKD3D_MAX_DESCRIPTOR_SETS 64u /* D3D12 binding tier 3 has a limit of 2048 samplers. */ #define VKD3D_MAX_DESCRIPTOR_SET_SAMPLERS 2048u @@ -125,6 +126,7 @@ struct vkd3d_vulkan_info bool KHR_maintenance3; bool KHR_push_descriptor; bool KHR_sampler_mirror_clamp_to_edge; + bool KHR_timeline_semaphore; /* EXT device extensions */ bool EXT_calibrated_timestamps; bool EXT_conditional_rendering; @@ -150,6 +152,8 @@ struct vkd3d_vulkan_info
VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT texel_buffer_alignment_properties;
+ VkPhysicalDeviceTimelineSemaphorePropertiesKHR timeline_semaphore_properties; + unsigned int shader_extension_count; enum vkd3d_shader_spirv_extension shader_extensions[VKD3D_MAX_SHADER_EXTENSIONS];
@@ -348,6 +352,7 @@ struct vkd3d_fence_worker struct vkd3d_enqueued_fence { VkFence vk_fence; + VkSemaphore vk_semaphore; struct vkd3d_waiting_fence waiting_fence; } *enqueued_fences; size_t enqueued_fences_size; @@ -357,6 +362,12 @@ struct vkd3d_fence_worker size_t vk_fences_size; struct vkd3d_waiting_fence *fences; size_t fences_size; + VkSemaphore *vk_semaphores; + size_t vk_semaphores_size; + uint64_t *semaphore_wait_values; + size_t semaphore_wait_values_size; + + void (*wait_for_gpu_fences)(struct vkd3d_fence_worker *worker);
struct d3d12_device *device; }; @@ -511,6 +522,12 @@ struct vkd3d_signaled_semaphore bool is_acquired; };
+struct vkd3d_pending_fence_wait +{ + const struct vkd3d_queue *queue; + uint64_t pending_value; +}; + /* ID3D12Fence */ struct d3d12_fence { @@ -530,6 +547,11 @@ struct d3d12_fence size_t events_size; size_t event_count;
+ VkSemaphore timeline_semaphore; + uint64_t pending_timeline_value; + struct vkd3d_pending_fence_wait gpu_waits[VKD3D_MAX_FENCE_WAITING_QUEUES]; + unsigned int gpu_wait_count; + struct list semaphores; unsigned int semaphore_count;
@@ -545,6 +567,9 @@ struct d3d12_fence HRESULT d3d12_fence_create(struct d3d12_device *device, uint64_t initial_value, D3D12_FENCE_FLAGS flags, struct d3d12_fence **fence);
+VkResult vkd3d_create_timeline_semaphore(const struct d3d12_device *device, uint64_t initial_value, + VkSemaphore *timeline_semaphore); + /* ID3D12Heap */ struct d3d12_heap { @@ -1284,6 +1309,9 @@ struct vkd3d_queue VkQueueFlags vk_queue_flags; uint32_t timestamp_bits;
+ VkSemaphore wait_completion_semaphore; + uint64_t pending_wait_completion_value; + struct { VkSemaphore vk_semaphore; @@ -1298,6 +1326,7 @@ struct vkd3d_queue VkQueue vkd3d_queue_acquire(struct vkd3d_queue *queue); HRESULT vkd3d_queue_create(struct d3d12_device *device, uint32_t family_index, const VkQueueFamilyProperties *properties, struct vkd3d_queue **queue); +bool vkd3d_queue_init_timeline_semaphore(struct vkd3d_queue *queue, struct d3d12_device *device); void vkd3d_queue_destroy(struct vkd3d_queue *queue, struct d3d12_device *device); void vkd3d_queue_release(struct vkd3d_queue *queue);
@@ -1456,6 +1485,7 @@ struct d3d12_device VkDescriptorPoolSize vk_pool_sizes[VKD3D_DESCRIPTOR_POOL_COUNT]; struct vkd3d_vk_descriptor_heap_layout vk_descriptor_heap_layouts[VKD3D_SET_INDEX_COUNT]; bool use_vk_heaps; + bool use_timeline_semaphores; };
HRESULT d3d12_device_create(struct vkd3d_instance *instance, diff --git a/libs/vkd3d/vulkan_procs.h b/libs/vkd3d/vulkan_procs.h index 60556735..34e0ab4b 100644 --- a/libs/vkd3d/vulkan_procs.h +++ b/libs/vkd3d/vulkan_procs.h @@ -195,6 +195,11 @@ VK_DEVICE_EXT_PFN(vkGetDescriptorSetLayoutSupportKHR) /* VK_KHR_push_descriptor */ VK_DEVICE_EXT_PFN(vkCmdPushDescriptorSetKHR)
+/* VK_KHR_timeline_semaphore */ +VK_DEVICE_EXT_PFN(vkGetSemaphoreCounterValueKHR) +VK_DEVICE_EXT_PFN(vkWaitSemaphoresKHR) +VK_DEVICE_EXT_PFN(vkSignalSemaphoreKHR) + /* VK_EXT_calibrated_timestamps */ VK_DEVICE_EXT_PFN(vkGetCalibratedTimestampsEXT)
diff --git a/tests/d3d12.c b/tests/d3d12.c index 29bd40c8..2059b182 100644 --- a/tests/d3d12.c +++ b/tests/d3d12.c @@ -33240,7 +33240,9 @@ static void test_queue_wait(void) command_list = context.list; queue = context.queue;
- queue2 = create_command_queue(device, D3D12_COMMAND_LIST_TYPE_DIRECT, D3D12_COMMAND_QUEUE_PRIORITY_NORMAL); + /* 'queue2' must not map to the same command queue as 'queue', or Wait() before GPU signal will fail. + * Using a compute queue fixes this on most hardware, but it may still fail on low spec hardware. */ + queue2 = create_command_queue(device, D3D12_COMMAND_LIST_TYPE_COMPUTE, D3D12_COMMAND_QUEUE_PRIORITY_NORMAL);
event = create_event(); ok(event, "Failed to create event.\n"); @@ -33305,12 +33307,6 @@ static void test_queue_wait(void) check_readback_data_uint(&rb, NULL, 0xff00ff00, 0); release_resource_readback(&rb);
- if (!vkd3d_test_platform_is_windows()) - { - skip("Wait() is not implemented yet.\n"); /* FIXME */ - goto skip_tests; - } - /* Wait() before CPU signal */ update_buffer_data(cb, 0, sizeof(blue), &blue); queue_wait(queue, fence, 2); @@ -33386,7 +33382,6 @@ static void test_queue_wait(void) check_readback_data_uint(&rb, NULL, 0xff00ff00, 0); release_resource_readback(&rb);
-skip_tests: /* Signal() and Wait() in the same command queue */ update_buffer_data(cb, 0, sizeof(blue), &blue); queue_signal(queue, fence, 7);
On 3/24/22 09:56, Henri Verbeet wrote:
From: Conor McCarthy cmccarthy@codeweavers.com
D3D12 supports signalling a fence to a lower value, while Vulkan timeline semaphores do not. On the GPU side this is handled by simply submitting the signal anyway, if a test for this passes on device creation, because working around this is impractical. For CPU signals the Vulkan semaphore is replaced with a new one at the lower value only if no waits and/or signals are pending on the GPU. Otherwise, a fixme is emitted.
Partly based on a vkd3d-proton patch by Hans-Kristian Arntzen (not including the handling of lower fence values).
The old implementation is used if KHR_timeline_semaphore is not available or GPU signals do not work for a lower value.
This is causing pretty much all vkd3d tests, including shader tests, to hang on my machine (radeonsi, with Mesa 22.0.0), with the following messages:
err:d3d12_command_queue_ExecuteCommandLists: Failed to submit queue(s), vr -4. d3d12:3729: Test failed: Failed to signal fence, hr 0x80004005.
Perhaps most damningly, immediately returning true from d3d12_is_timeline_semaphore_supported(), without actually performing the invalid signal, allows shader tests to run successfully [although test_gpu_signal_fence() is still broken]. I'll admit, I was doubtful of the prospect of intentionally violating the Vulkan specification...
I don't know all of the concerns here, but from a quick reading of the specification, I wonder if it would be possible to, say, map d3d12 values to Vulkan values, and ensure the latter are monotonically increasing?
Or, if that doesn't work for some reason, could we try to introduce a Vulkan extension that allows properly emulating d3d12 behaviour?