The last commit fixes glitches in Horizon Zero Dawn with vsync on, for reasons stated in the comment there.
The wined3d implementation already blocks the client's call to `Present()` when `vkAcquireNextImageKHR()` blocks, so is roughly equivalent to this when frame latency and buffer count are equal. It has a frame latency implementation which unblocks after calling `vkQueuePresentKHR()`, rather than waiting for its execution to complete. I think the d3d12 implementation is supposed to do the latter, though using `KHR_present_wait` may be unnecessary overkill. Waiting on queue execution comes with a performance cost either way, so without a compelling reason for a strict implementation, something similar to wined3d makes sense.
Another potential issue is after the call to `ID3D12CommandQueue_Signal()` in `d3d12_swapchain_present()`, the client is free to enqueue more commands when `Present()` returns, so by the time the signal is executed and the swapchain worker blits the image and calls `vkQueuePresentKHR()`, these commands are not necessarily enqueued immediately after the `Signal()`. This may not always be harmless.
-- v2: dxgi: Wait for completion of a pending read of the next buffer before returning from d3d12_swapchain_Present(). dxgi: Wake the condition variable after leaving the critical section in d3d12_swapchain_resize_buffers(). dxgi: Wake the condition variable after leaving the critical section in d3d12_swapchain_present().
From: Conor McCarthy cmccarthy@codeweavers.com
--- dlls/dxgi/swapchain.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/dlls/dxgi/swapchain.c b/dlls/dxgi/swapchain.c index 4797fcecd8d..d68788b8fc5 100644 --- a/dlls/dxgi/swapchain.c +++ b/dlls/dxgi/swapchain.c @@ -1214,7 +1214,7 @@ struct d3d12_swapchain_op { unsigned int sync_interval; VkImage vk_image; - unsigned int frame_number; + uint64_t frame_number; } present; struct {
From: Conor McCarthy cmccarthy@codeweavers.com
Microsoft recommends waking after unlocking for performance reasons. The same reasons may not apply here but it results in a consistent 0.7% framerate lift in Horizon Zero Dawn on RADV. --- dlls/dxgi/swapchain.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/dlls/dxgi/swapchain.c b/dlls/dxgi/swapchain.c index d68788b8fc5..36c4716835c 100644 --- a/dlls/dxgi/swapchain.c +++ b/dlls/dxgi/swapchain.c @@ -2345,8 +2345,8 @@ static HRESULT d3d12_swapchain_present(struct d3d12_swapchain *swapchain,
EnterCriticalSection(&swapchain->worker_cs); list_add_tail(&swapchain->worker_ops, &op->entry); - WakeAllConditionVariable(&swapchain->worker_cv); LeaveCriticalSection(&swapchain->worker_cs); + WakeAllConditionVariable(&swapchain->worker_cv);
++swapchain->frame_number; if ((frame_latency_event = swapchain->frame_latency_event))
From: Conor McCarthy cmccarthy@codeweavers.com
--- dlls/dxgi/swapchain.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/dlls/dxgi/swapchain.c b/dlls/dxgi/swapchain.c index 36c4716835c..d9e4f22cdd4 100644 --- a/dlls/dxgi/swapchain.c +++ b/dlls/dxgi/swapchain.c @@ -2630,8 +2630,8 @@ static HRESULT d3d12_swapchain_resize_buffers(struct d3d12_swapchain *swapchain,
EnterCriticalSection(&swapchain->worker_cs); list_add_tail(&swapchain->worker_ops, &op->entry); - WakeAllConditionVariable(&swapchain->worker_cv); LeaveCriticalSection(&swapchain->worker_cs); + WakeAllConditionVariable(&swapchain->worker_cv);
swapchain->current_buffer_index = 0;
From: Conor McCarthy cmccarthy@codeweavers.com
--- dlls/dxgi/swapchain.c | 57 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 53 insertions(+), 4 deletions(-)
diff --git a/dlls/dxgi/swapchain.c b/dlls/dxgi/swapchain.c index d9e4f22cdd4..0b3e8a51519 100644 --- a/dlls/dxgi/swapchain.c +++ b/dlls/dxgi/swapchain.c @@ -1170,6 +1170,7 @@ struct d3d12_swapchain VkImage vk_swapchain_images[DXGI_MAX_SWAP_CHAIN_BUFFERS]; VkCommandBuffer vk_cmd_buffers[DXGI_MAX_SWAP_CHAIN_BUFFERS]; VkSemaphore vk_semaphores[DXGI_MAX_SWAP_CHAIN_BUFFERS]; + VkFence vk_buffer_fences[DXGI_MAX_SWAP_CHAIN_BUFFERS]; unsigned int vk_swapchain_width; unsigned int vk_swapchain_height; VkPresentModeKHR present_mode; @@ -1215,6 +1216,7 @@ struct d3d12_swapchain_op unsigned int sync_interval; VkImage vk_image; uint64_t frame_number; + unsigned int buffer_index; } present; struct { @@ -2020,6 +2022,18 @@ static ULONG STDMETHODCALLTYPE d3d12_swapchain_AddRef(IDXGISwapChain4 *iface) return refcount; }
+static void d3d12_swapchain_destroy_buffer_fences(struct d3d12_swapchain *swapchain) +{ + const struct dxgi_vk_funcs *vk_funcs = &swapchain->vk_funcs; + size_t i; + + for (i = 0; i < ARRAY_SIZE(swapchain->vk_buffer_fences); ++i) + { + vk_funcs->p_vkDestroyFence(swapchain->vk_device, swapchain->vk_buffer_fences[i], NULL); + swapchain->vk_buffer_fences[i] = VK_NULL_HANDLE; + } +} + static void d3d12_swapchain_destroy(struct d3d12_swapchain *swapchain) { const struct dxgi_vk_funcs *vk_funcs = &swapchain->vk_funcs; @@ -2068,6 +2082,7 @@ static void d3d12_swapchain_destroy(struct d3d12_swapchain *swapchain) if (swapchain->vk_device) { vk_funcs->p_vkDestroyFence(swapchain->vk_device, swapchain->vk_fence, NULL); + d3d12_swapchain_destroy_buffer_fences(swapchain); vk_funcs->p_vkDestroySwapchainKHR(swapchain->vk_device, swapchain->vk_swapchain, NULL); }
@@ -2193,7 +2208,7 @@ static HRESULT d3d12_swapchain_set_sync_interval(struct d3d12_swapchain *swapcha }
static VkResult d3d12_swapchain_queue_present(struct d3d12_swapchain *swapchain, VkImage vk_src_image, - uint64_t frame_number) + uint64_t frame_number, unsigned int buffer_index) { const struct dxgi_vk_funcs *vk_funcs = &swapchain->vk_funcs; VkPresentInfoKHR present_info; @@ -2243,7 +2258,7 @@ static VkResult d3d12_swapchain_queue_present(struct d3d12_swapchain *swapchain, submit_info.signalSemaphoreCount = 1; submit_info.pSignalSemaphores = &swapchain->vk_semaphores[swapchain->vk_image_index];
- if ((vr = vk_funcs->p_vkQueueSubmit(vk_queue, 1, &submit_info, VK_NULL_HANDLE)) < 0) + if ((vr = vk_funcs->p_vkQueueSubmit(vk_queue, 1, &submit_info, swapchain->vk_buffer_fences[buffer_index])) < 0) { ERR("Failed to blit swapchain buffer, vr %d.\n", vr); vkd3d_release_vk_queue(swapchain->command_queue); @@ -2275,7 +2290,8 @@ static HRESULT d3d12_swapchain_op_present_execute(struct d3d12_swapchain *swapch if (FAILED(hr = d3d12_swapchain_set_sync_interval(swapchain, op->present.sync_interval))) return hr;
- vr = d3d12_swapchain_queue_present(swapchain, op->present.vk_image, op->present.frame_number); + vr = d3d12_swapchain_queue_present(swapchain, op->present.vk_image, op->present.frame_number, + op->present.buffer_index); if (vr == VK_ERROR_OUT_OF_DATE_KHR) { TRACE("Recreating Vulkan swapchain.\n"); @@ -2284,7 +2300,8 @@ static HRESULT d3d12_swapchain_op_present_execute(struct d3d12_swapchain *swapch if (FAILED(hr = d3d12_swapchain_create_vulkan_resources(swapchain))) return hr;
- if ((vr = d3d12_swapchain_queue_present(swapchain, op->present.vk_image, op->present.frame_number)) < 0) + if ((vr = d3d12_swapchain_queue_present(swapchain, op->present.vk_image, op->present.frame_number, + op->present.buffer_index)) < 0) ERR("Failed to present after recreating swapchain, vr %d.\n", vr); }
@@ -2314,8 +2331,12 @@ static HRESULT d3d12_swapchain_op_present_execute(struct d3d12_swapchain *swapch static HRESULT d3d12_swapchain_present(struct d3d12_swapchain *swapchain, unsigned int sync_interval, unsigned int flags) { + const struct dxgi_vk_funcs *vk_funcs = &swapchain->vk_funcs; + VkDevice vk_device = swapchain->vk_device; struct d3d12_swapchain_op *op; HANDLE frame_latency_event; + VkFence vk_fence; + VkResult vr; HRESULT hr;
if (sync_interval > 4) @@ -2338,8 +2359,22 @@ static HRESULT d3d12_swapchain_present(struct d3d12_swapchain *swapchain, return E_OUTOFMEMORY; }
+ if (!(vk_fence = swapchain->vk_buffer_fences[swapchain->current_buffer_index])) + { + VkFenceCreateInfo fence_desc; + + fence_desc.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; + fence_desc.pNext = NULL; + fence_desc.flags = 0; + if ((vr = vk_funcs->p_vkCreateFence(vk_device, &fence_desc, NULL, &vk_fence)) < 0) + ERR("Failed to create Vulkan fence, vr %d.\n", vr); + else + swapchain->vk_buffer_fences[swapchain->current_buffer_index] = vk_fence; + } + op->type = D3D12_SWAPCHAIN_OP_PRESENT; op->present.sync_interval = sync_interval; + op->present.buffer_index = swapchain->current_buffer_index; op->present.vk_image = swapchain->vk_images[swapchain->current_buffer_index]; op->present.frame_number = swapchain->frame_number;
@@ -2371,6 +2406,18 @@ static HRESULT d3d12_swapchain_present(struct d3d12_swapchain *swapchain, }
swapchain->current_buffer_index = (swapchain->current_buffer_index + 1) % swapchain->desc.BufferCount; + + /* Prevent the client from overwriting the new current buffer before a pending blit is done. + * After Present() returns, it is expected the client will render to the next buffer in sequence, + * so Present() is the last place where it is possible to wait for a pending read from that buffer. */ + if ((vk_fence = swapchain->vk_buffer_fences[swapchain->current_buffer_index])) + { + if ((vr = vk_funcs->p_vkWaitForFences(vk_device, 1, &vk_fence, VK_TRUE, UINT64_MAX)) != VK_SUCCESS) + ERR("Failed to wait for fence, vr %d.\n", vr); + if ((vr = vk_funcs->p_vkResetFences(vk_device, 1, &vk_fence)) < 0) + ERR("Failed to reset fence, vr %d.\n", vr); + } + return S_OK; }
@@ -2611,6 +2658,7 @@ static HRESULT d3d12_swapchain_resize_buffers(struct d3d12_swapchain *swapchain, if (desc->Width == new_desc.Width && desc->Height == new_desc.Height && desc->Format == new_desc.Format && desc->BufferCount == new_desc.BufferCount) { + d3d12_swapchain_destroy_buffer_fences(swapchain); swapchain->current_buffer_index = 0; return S_OK; } @@ -2633,6 +2681,7 @@ static HRESULT d3d12_swapchain_resize_buffers(struct d3d12_swapchain *swapchain, LeaveCriticalSection(&swapchain->worker_cs); WakeAllConditionVariable(&swapchain->worker_cv);
+ d3d12_swapchain_destroy_buffer_fences(swapchain); swapchain->current_buffer_index = 0;
d3d12_swapchain_destroy_d3d12_resources(swapchain);