Module: vkd3d Branch: master Commit: 37e76618ca41b83f4691e9c8afafbc6ebef9228a URL: https://gitlab.winehq.org/wine/vkd3d/-/commit/37e76618ca41b83f4691e9c8afafbc...
Author: Conor McCarthy cmccarthy@codeweavers.com Date: Sun Jul 30 13:34:09 2023 +1000
vkd3d: Write Vulkan descriptors in a worker thread.
Raises framerate by 5-10% in games which write thousands of descriptors per frame, e.g. Horizon Zero Dawn.
The worker thread is a generic device worker which can also be used for other purposes if the need arises.
---
libs/vkd3d/command.c | 4 ++ libs/vkd3d/device.c | 110 +++++++++++++++++++++++++++++++++++++++++++++ libs/vkd3d/resource.c | 9 ++++ libs/vkd3d/vkd3d_private.h | 10 +++++ 4 files changed, 133 insertions(+)
diff --git a/libs/vkd3d/command.c b/libs/vkd3d/command.c index 15c8317b..549f6a45 100644 --- a/libs/vkd3d/command.c +++ b/libs/vkd3d/command.c @@ -2644,6 +2644,8 @@ static bool d3d12_command_list_update_compute_pipeline(struct d3d12_command_list { const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
+ vkd3d_cond_signal(&list->device->worker_cond); + if (list->current_pipeline != VK_NULL_HANDLE) return true;
@@ -2665,6 +2667,8 @@ static bool d3d12_command_list_update_graphics_pipeline(struct d3d12_command_lis VkRenderPass vk_render_pass; VkPipeline vk_pipeline;
+ vkd3d_cond_signal(&list->device->worker_cond); + if (list->current_pipeline != VK_NULL_HANDLE) return true;
diff --git a/libs/vkd3d/device.c b/libs/vkd3d/device.c index 5c801ca4..d6e74d01 100644 --- a/libs/vkd3d/device.c +++ b/libs/vkd3d/device.c @@ -2495,6 +2495,28 @@ static ULONG STDMETHODCALLTYPE d3d12_device_AddRef(ID3D12Device5 *iface) return refcount; }
+static HRESULT device_worker_stop(struct d3d12_device *device) +{ + HRESULT hr; + + TRACE("device %p.\n", device); + + vkd3d_mutex_lock(&device->worker_mutex); + + device->worker_should_exit = true; + vkd3d_cond_signal(&device->worker_cond); + + vkd3d_mutex_unlock(&device->worker_mutex); + + if (FAILED(hr = vkd3d_join_thread(device->vkd3d_instance, &device->worker_thread))) + return hr; + + vkd3d_mutex_destroy(&device->worker_mutex); + vkd3d_cond_destroy(&device->worker_cond); + + return S_OK; +} + static ULONG STDMETHODCALLTYPE d3d12_device_Release(ID3D12Device5 *iface) { struct d3d12_device *device = impl_from_ID3D12Device5(iface); @@ -2520,6 +2542,9 @@ static ULONG STDMETHODCALLTYPE d3d12_device_Release(ID3D12Device5 *iface) d3d12_device_destroy_vkd3d_queues(device); vkd3d_desc_object_cache_cleanup(&device->view_desc_cache); vkd3d_desc_object_cache_cleanup(&device->cbuffer_desc_cache); + if (device->use_vk_heaps) + device_worker_stop(device); + vkd3d_free(device->heaps); VK_CALL(vkDestroyDevice(device->vk_device, NULL)); if (device->parent) IUnknown_Release(device->parent); @@ -4251,6 +4276,40 @@ struct d3d12_device *unsafe_impl_from_ID3D12Device5(ID3D12Device5 *iface) return impl_from_ID3D12Device5(iface); }
+static void *device_worker_main(void *arg) +{ + struct d3d12_descriptor_heap *heap; + struct d3d12_device *device = arg; + size_t i; + + vkd3d_set_thread_name("device_worker"); + + vkd3d_mutex_lock(&device->worker_mutex); + + while (!device->worker_should_exit) + { + for (i = 0; i < device->heap_count; ++i) + { + /* Descriptor updates are not written to Vulkan descriptor sets until a command list + * is submitted to a queue, while the client is free to write d3d12 descriptors earlier, + * from any thread. This causes a delay right before command list execution, so + * handling these updates in a worker thread can speed up execution significantly. */ + heap = device->heaps[i]; + if (heap->dirty_list_head == UINT_MAX) + continue; + vkd3d_mutex_lock(&heap->vk_sets_mutex); + d3d12_desc_flush_vk_heap_updates_locked(heap, device); + vkd3d_mutex_unlock(&heap->vk_sets_mutex); + } + + vkd3d_cond_wait(&device->worker_cond, &device->worker_mutex); + } + + vkd3d_mutex_unlock(&device->worker_mutex); + + return NULL; +} + static HRESULT d3d12_device_init(struct d3d12_device *device, struct vkd3d_instance *instance, const struct vkd3d_device_create_info *create_info) { @@ -4270,6 +4329,14 @@ static HRESULT d3d12_device_init(struct d3d12_device *device,
device->vk_device = VK_NULL_HANDLE;
+ device->heaps = NULL; + device->heap_capacity = 0; + device->heap_count = 0; + memset(&device->worker_thread, 0, sizeof(device->worker_thread)); + device->worker_should_exit = false; + vkd3d_mutex_init(&device->worker_mutex); + vkd3d_cond_init(&device->worker_cond); + if (FAILED(hr = vkd3d_create_vk_device(device, create_info))) goto out_free_instance;
@@ -4291,6 +4358,13 @@ static HRESULT d3d12_device_init(struct d3d12_device *device, if (FAILED(hr = vkd3d_vk_descriptor_heap_layouts_init(device))) goto out_cleanup_uav_clear_state;
+ if (device->use_vk_heaps && FAILED(hr = vkd3d_create_thread(device->vkd3d_instance, + device_worker_main, device, &device->worker_thread))) + { + WARN("Failed to create worker thread, hr %#x.\n", hr); + goto out_cleanup_descriptor_heap_layouts; + } + vkd3d_render_pass_cache_init(&device->render_pass_cache); vkd3d_gpu_va_allocator_init(&device->gpu_va_allocator); vkd3d_time_domains_init(device); @@ -4308,6 +4382,8 @@ static HRESULT d3d12_device_init(struct d3d12_device *device,
return S_OK;
+out_cleanup_descriptor_heap_layouts: + vkd3d_vk_descriptor_heap_layouts_cleanup(device); out_cleanup_uav_clear_state: vkd3d_uav_clear_state_cleanup(&device->uav_clear_state, device); out_destroy_null_resources: @@ -4361,6 +4437,40 @@ void d3d12_device_mark_as_removed(struct d3d12_device *device, HRESULT reason, device->removed_reason = reason; }
+HRESULT d3d12_device_add_descriptor_heap(struct d3d12_device *device, struct d3d12_descriptor_heap *heap) +{ + vkd3d_mutex_lock(&device->worker_mutex); + + if (!vkd3d_array_reserve((void **)&device->heaps, &device->heap_capacity, device->heap_count + 1, + sizeof(*device->heaps))) + { + vkd3d_mutex_unlock(&device->worker_mutex); + return E_OUTOFMEMORY; + } + device->heaps[device->heap_count++] = heap; + + vkd3d_mutex_unlock(&device->worker_mutex); + + return S_OK; +} + +void d3d12_device_remove_descriptor_heap(struct d3d12_device *device, struct d3d12_descriptor_heap *heap) +{ + size_t i; + + vkd3d_mutex_lock(&device->worker_mutex); + + for (i = 0; i < device->heap_count; ++i) + { + if (device->heaps[i] == heap) + { + device->heaps[i] = device->heaps[--device->heap_count]; + break; + } + } + + vkd3d_mutex_unlock(&device->worker_mutex); +}
#ifdef _WIN32 struct thread_data diff --git a/libs/vkd3d/resource.c b/libs/vkd3d/resource.c index 0c9c911a..609c6710 100644 --- a/libs/vkd3d/resource.c +++ b/libs/vkd3d/resource.c @@ -3995,6 +3995,9 @@ static ULONG STDMETHODCALLTYPE d3d12_descriptor_heap_Release(ID3D12DescriptorHea { struct d3d12_desc *descriptors = (struct d3d12_desc *)heap->descriptors;
+ if (heap->use_vk_heaps) + d3d12_device_remove_descriptor_heap(device, heap); + for (i = 0; i < heap->desc.NumDescriptors; ++i) { d3d12_desc_destroy(&descriptors[i], device); @@ -4318,6 +4321,12 @@ HRESULT d3d12_descriptor_heap_create(struct d3d12_device *device, dst[i].next = 0; } object->dirty_list_head = UINT_MAX; + + if (object->use_vk_heaps && FAILED(hr = d3d12_device_add_descriptor_heap(device, object))) + { + vkd3d_free(object); + return hr; + } } else { diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index e0eb9f3d..025ad534 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -1804,6 +1804,14 @@ struct d3d12_device unsigned int vk_pool_count; struct vkd3d_vk_descriptor_heap_layout vk_descriptor_heap_layouts[VKD3D_SET_INDEX_COUNT]; bool use_vk_heaps; + + struct d3d12_descriptor_heap **heaps; + size_t heap_capacity; + size_t heap_count; + union vkd3d_thread_handle worker_thread; + struct vkd3d_mutex worker_mutex; + struct vkd3d_cond worker_cond; + bool worker_should_exit; };
HRESULT d3d12_device_create(struct vkd3d_instance *instance, @@ -1813,6 +1821,8 @@ bool d3d12_device_is_uma(struct d3d12_device *device, bool *coherent); void d3d12_device_mark_as_removed(struct d3d12_device *device, HRESULT reason, const char *message, ...) VKD3D_PRINTF_FUNC(3, 4); struct d3d12_device *unsafe_impl_from_ID3D12Device5(ID3D12Device5 *iface); +HRESULT d3d12_device_add_descriptor_heap(struct d3d12_device *device, struct d3d12_descriptor_heap *heap); +void d3d12_device_remove_descriptor_heap(struct d3d12_device *device, struct d3d12_descriptor_heap *heap);
static inline HRESULT d3d12_device_query_interface(struct d3d12_device *device, REFIID iid, void **object) {