From: Józef Kucia jkucia@codeweavers.com
Ideally, we would like to introduce a Vulkan extension to make vertex buffer strides and primitive topology dynamic in Vulkan.
Signed-off-by: Józef Kucia jkucia@codeweavers.com ---
Such extension should be quite easy to implement in RADV and Anvil.
--- libs/vkd3d/command.c | 53 +++++++--------- libs/vkd3d/device.c | 151 ++++++++++++++++++++++++++++++++++++++++++--- libs/vkd3d/state.c | 12 +++- libs/vkd3d/vkd3d_private.h | 31 ++++++++-- 4 files changed, 201 insertions(+), 46 deletions(-)
diff --git a/libs/vkd3d/command.c b/libs/vkd3d/command.c index 2fc564312f8e..5e67e6674e95 100644 --- a/libs/vkd3d/command.c +++ b/libs/vkd3d/command.c @@ -725,17 +725,6 @@ static bool d3d12_command_allocator_add_framebuffer(struct d3d12_command_allocat return true; }
-static bool d3d12_command_allocator_add_pipeline(struct d3d12_command_allocator *allocator, VkPipeline pipeline) -{ - if (!vkd3d_array_reserve((void **)&allocator->pipelines, &allocator->pipelines_size, - allocator->pipeline_count + 1, sizeof(*allocator->pipelines))) - return false; - - allocator->pipelines[allocator->pipeline_count++] = pipeline; - - return true; -} - static bool d3d12_command_allocator_add_descriptor_pool(struct d3d12_command_allocator *allocator, VkDescriptorPool pool) { @@ -951,12 +940,6 @@ static void d3d12_command_allocator_free_resources(struct d3d12_command_allocato } allocator->descriptor_pool_count = 0;
- for (i = 0; i < allocator->pipeline_count; ++i) - { - VK_CALL(vkDestroyPipeline(device->vk_device, allocator->pipelines[i], NULL)); - } - allocator->pipeline_count = 0; - for (i = 0; i < allocator->framebuffer_count; ++i) { VK_CALL(vkDestroyFramebuffer(device->vk_device, allocator->framebuffers[i], NULL)); @@ -1029,7 +1012,6 @@ static ULONG STDMETHODCALLTYPE d3d12_command_allocator_Release(ID3D12CommandAllo vkd3d_free(allocator->views); vkd3d_free(allocator->descriptor_pools); vkd3d_free(allocator->free_descriptor_pools); - vkd3d_free(allocator->pipelines); vkd3d_free(allocator->framebuffers); vkd3d_free(allocator->passes);
@@ -1214,10 +1196,6 @@ static HRESULT d3d12_command_allocator_init(struct d3d12_command_allocator *allo allocator->framebuffers_size = 0; allocator->framebuffer_count = 0;
- allocator->pipelines = NULL; - allocator->pipelines_size = 0; - allocator->pipeline_count = 0; - allocator->descriptor_pools = NULL; allocator->descriptor_pools_size = 0; allocator->descriptor_pool_count = 0; @@ -1837,8 +1815,8 @@ static bool d3d12_command_list_update_current_framebuffer(struct d3d12_command_l return true; }
-static VkPipeline d3d12_command_list_create_graphics_pipeline(struct d3d12_command_list *list, - const struct d3d12_graphics_pipeline_state *state) +static VkPipeline d3d12_command_list_get_or_create_pipeline(struct d3d12_command_list *list, + struct d3d12_graphics_pipeline_state *state) { struct VkVertexInputBindingDescription bindings[D3D12_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT]; const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs; @@ -1846,7 +1824,8 @@ static VkPipeline d3d12_command_list_create_graphics_pipeline(struct d3d12_comma struct VkPipelineInputAssemblyStateCreateInfo ia_desc; struct VkPipelineColorBlendStateCreateInfo blend_desc; struct VkGraphicsPipelineCreateInfo pipeline_desc; - const struct d3d12_device *device = list->device; + struct d3d12_device *device = list->device; + struct vkd3d_pipeline_key pipeline_key; size_t binding_count = 0; VkPipeline vk_pipeline; unsigned int i; @@ -1878,6 +1857,10 @@ static VkPipeline d3d12_command_list_create_graphics_pipeline(struct d3d12_comma .pDynamicStates = dynamic_states, };
+ memset(&pipeline_key, 0, sizeof(pipeline_key)); + pipeline_key.state = state; + pipeline_key.topology = list->primitive_topology; + for (i = 0, mask = 0; i < state->attribute_count; ++i) { struct VkVertexInputBindingDescription *b; @@ -1902,9 +1885,14 @@ static VkPipeline d3d12_command_list_create_graphics_pipeline(struct d3d12_comma if (!b->stride) FIXME("Invalid stride for input slot %u.\n", binding);
+ pipeline_key.strides[binding_count] = list->strides[binding]; + ++binding_count; }
+ if ((vk_pipeline = d3d12_device_find_cached_pipeline(device, &pipeline_key))) + return vk_pipeline; + input_desc.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO; input_desc.pNext = NULL; input_desc.flags = 0; @@ -1957,13 +1945,14 @@ static VkPipeline d3d12_command_list_create_graphics_pipeline(struct d3d12_comma return VK_NULL_HANDLE; }
- if (!d3d12_command_allocator_add_pipeline(list->allocator, vk_pipeline)) - { - WARN("Failed to add pipeline.\n"); - VK_CALL(vkDestroyPipeline(device->vk_device, vk_pipeline, NULL)); - return VK_NULL_HANDLE; - } + if (d3d12_device_put_pipeline_to_cache(device, &pipeline_key, vk_pipeline, &state->compiled_pipelines)) + return vk_pipeline;
+ /* Other thread compiled the pipeline before us. */ + VK_CALL(vkDestroyPipeline(device->vk_device, vk_pipeline, NULL)); + vk_pipeline = d3d12_device_find_cached_pipeline(device, &pipeline_key); + if (!vk_pipeline) + ERR("Could not get the pipeline compiled by other thread from the cache.\n"); return vk_pipeline; }
@@ -1981,7 +1970,7 @@ static bool d3d12_command_list_update_current_pipeline(struct d3d12_command_list return false; }
- if (!(vk_pipeline = d3d12_command_list_create_graphics_pipeline(list, &list->state->u.graphics))) + if (!(vk_pipeline = d3d12_command_list_get_or_create_pipeline(list, &list->state->u.graphics))) return false;
VK_CALL(vkCmdBindPipeline(list->vk_command_buffer, list->state->vk_bind_point, vk_pipeline)); diff --git a/libs/vkd3d/device.c b/libs/vkd3d/device.c index cfb17ea17d99..c1db7d30a9e8 100644 --- a/libs/vkd3d/device.c +++ b/libs/vkd3d/device.c @@ -1169,11 +1169,116 @@ static HRESULT d3d12_device_create_dummy_sampler(struct d3d12_device *device) return vkd3d_create_static_sampler(device, &sampler_desc, &device->vk_dummy_sampler); }
-static void d3d12_device_init_pipeline_cache(struct d3d12_device *device) +static void destroy_compiled_pipeline(struct vkd3d_compiled_pipeline *pipeline, + struct d3d12_device *device) +{ + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + + VK_CALL(vkDestroyPipeline(device->vk_device, pipeline->vk_pipeline, NULL)); + vkd3d_free(pipeline); +} + +static int compare_pipeline_cache_entry(const void *key, const struct rb_entry *entry) +{ + const struct vkd3d_compiled_pipeline *compiled_pipeline; + const struct vkd3d_pipeline_key *pipeline_key; + + pipeline_key = key; + compiled_pipeline = RB_ENTRY_VALUE(entry, const struct vkd3d_compiled_pipeline, entry); + return memcmp(&compiled_pipeline->key, pipeline_key, sizeof(*pipeline_key)); +} + +static void destroy_pipeline_cache_entry(struct rb_entry *entry, void *context) +{ + struct vkd3d_compiled_pipeline *pipeline; + struct d3d12_device *device = context; + + pipeline = RB_ENTRY_VALUE(entry, struct vkd3d_compiled_pipeline, entry); + destroy_compiled_pipeline(pipeline, device); +} + +VkPipeline d3d12_device_find_cached_pipeline(struct d3d12_device *device, + const struct vkd3d_pipeline_key *key) +{ + VkPipeline vk_pipeline = VK_NULL_HANDLE; + struct rb_entry *entry; + int rc; + + if (!(rc = pthread_mutex_lock(&device->pipeline_cache_mutex))) + { + if ((entry = rb_get(&device->pipeline_cache, key))) + vk_pipeline = RB_ENTRY_VALUE(entry, struct vkd3d_compiled_pipeline, entry)->vk_pipeline; + pthread_mutex_unlock(&device->pipeline_cache_mutex); + } + else + { + ERR("Failed to lock mutex, error %d.\n", rc); + } + + return vk_pipeline; +} + +bool d3d12_device_put_pipeline_to_cache(struct d3d12_device *device, + const struct vkd3d_pipeline_key *key, VkPipeline vk_pipeline, struct list *list) +{ + struct vkd3d_compiled_pipeline *compiled_pipeline; + bool ret = true; + int rc; + + if (!(compiled_pipeline = vkd3d_malloc(sizeof(*compiled_pipeline)))) + return false; + + compiled_pipeline->key = *key; + compiled_pipeline->vk_pipeline = vk_pipeline; + + if ((rc = pthread_mutex_lock(&device->pipeline_cache_mutex))) + { + ERR("Failed to lock mutex, error %d.\n", rc); + vkd3d_free(compiled_pipeline); + return false; + } + + if (rb_put(&device->pipeline_cache, key, &compiled_pipeline->entry) >= 0) + { + list_add_tail(list, &compiled_pipeline->list); + } + else + { + WARN("Failed to put pipeline to cache.\n"); + vkd3d_free(compiled_pipeline); + ret = false; + } + + pthread_mutex_unlock(&device->pipeline_cache_mutex); + return ret; +} + +void d3d12_device_destroy_compiled_pipelines(struct d3d12_device *device, struct list *list) +{ + struct vkd3d_compiled_pipeline *pipeline, *cursor; + int rc; + + if ((rc = pthread_mutex_lock(&device->pipeline_cache_mutex))) + { + ERR("Failed to lock mutex, error %d.\n", rc); + return; + } + + LIST_FOR_EACH_ENTRY_SAFE(pipeline, cursor, list, struct vkd3d_compiled_pipeline, list) + { + rb_remove(&device->pipeline_cache, &pipeline->entry); + destroy_compiled_pipeline(pipeline, device); + } + + pthread_mutex_unlock(&device->pipeline_cache_mutex); +} + +static HRESULT d3d12_device_init_pipeline_cache(struct d3d12_device *device) { const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; VkPipelineCacheCreateInfo cache_info; VkResult vr; + int rc;
cache_info.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO; cache_info.pNext = NULL; @@ -1183,9 +1288,39 @@ static void d3d12_device_init_pipeline_cache(struct d3d12_device *device) if ((vr = VK_CALL(vkCreatePipelineCache(device->vk_device, &cache_info, NULL, &device->vk_pipeline_cache))) < 0) { - ERR("Failed to create pipeline cache, vr %d.\n", vr); + ERR("Failed to create Vulkan pipeline cache, vr %d.\n", vr); device->vk_pipeline_cache = VK_NULL_HANDLE; } + + rb_init(&device->pipeline_cache, compare_pipeline_cache_entry); + + if ((rc = pthread_mutex_init(&device->pipeline_cache_mutex, NULL))) + { + ERR("Failed to initialize mutex, error %d.\n", rc); + return E_FAIL; + } + + return S_OK; +} + +static void d3d12_device_destroy_pipeline_cache(struct d3d12_device *device) +{ + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + int rc; + + if (device->vk_pipeline_cache) + VK_CALL(vkDestroyPipelineCache(device->vk_device, device->vk_pipeline_cache, NULL)); + + if ((rc = pthread_mutex_lock(&device->pipeline_cache_mutex))) + { + ERR("Failed to lock mutex, error %d.\n", rc); + return; + } + + rb_destroy(&device->pipeline_cache, destroy_pipeline_cache_entry, device); + + pthread_mutex_unlock(&device->pipeline_cache_mutex); + pthread_mutex_destroy(&device->pipeline_cache_mutex); }
D3D12_GPU_VIRTUAL_ADDRESS vkd3d_gpu_va_allocator_allocate(struct vkd3d_gpu_va_allocator *allocator, @@ -1357,8 +1492,7 @@ static ULONG STDMETHODCALLTYPE d3d12_device_Release(ID3D12Device *iface) vkd3d_gpu_va_allocator_cleanup(&device->gpu_va_allocator); vkd3d_fence_worker_stop(&device->fence_worker, device); VK_CALL(vkDestroySampler(device->vk_device, device->vk_dummy_sampler, NULL)); - if (device->vk_pipeline_cache) - VK_CALL(vkDestroyPipelineCache(device->vk_device, device->vk_pipeline_cache, NULL)); + d3d12_device_destroy_pipeline_cache(device); d3d12_device_destroy_vkd3d_queues(device); VK_CALL(vkDestroyDevice(device->vk_device, NULL)); if (device->parent) @@ -2310,18 +2444,21 @@ static HRESULT d3d12_device_init(struct d3d12_device *device, goto out_free_vk_resources; }
- if (FAILED(hr = vkd3d_fence_worker_start(&device->fence_worker, device))) + if (FAILED(hr = d3d12_device_init_pipeline_cache(device))) goto out_free_vk_resources;
- vkd3d_gpu_va_allocator_init(&device->gpu_va_allocator); + if (FAILED(hr = vkd3d_fence_worker_start(&device->fence_worker, device))) + goto out_free_pipeline_cache;
- d3d12_device_init_pipeline_cache(device); + vkd3d_gpu_va_allocator_init(&device->gpu_va_allocator);
if ((device->parent = create_info->parent)) IUnknown_AddRef(device->parent);
return S_OK;
+out_free_pipeline_cache: + d3d12_device_destroy_pipeline_cache(device); out_free_vk_resources: vk_procs = &device->vk_procs; VK_CALL(vkDestroySampler(device->vk_device, device->vk_dummy_sampler, NULL)); diff --git a/libs/vkd3d/state.c b/libs/vkd3d/state.c index 446e1a65029d..b60cebf26654 100644 --- a/libs/vkd3d/state.c +++ b/libs/vkd3d/state.c @@ -1095,11 +1095,15 @@ static ULONG STDMETHODCALLTYPE d3d12_pipeline_state_Release(ID3D12PipelineState
if (state->vk_bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS) { - for (i = 0; i < state->u.graphics.stage_count; ++i) + struct d3d12_graphics_pipeline_state *graphics = &state->u.graphics; + + for (i = 0; i < graphics->stage_count; ++i) { - VK_CALL(vkDestroyShaderModule(device->vk_device, state->u.graphics.stages[i].module, NULL)); + VK_CALL(vkDestroyShaderModule(device->vk_device, graphics->stages[i].module, NULL)); } - VK_CALL(vkDestroyRenderPass(device->vk_device, state->u.graphics.render_pass, NULL)); + VK_CALL(vkDestroyRenderPass(device->vk_device, graphics->render_pass, NULL)); + + d3d12_device_destroy_compiled_pipelines(device, &graphics->compiled_pipelines); } else if (state->vk_bind_point == VK_PIPELINE_BIND_POINT_COMPUTE) { @@ -2185,6 +2189,8 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s
graphics->root_signature = root_signature;
+ list_init(&graphics->compiled_pipelines); + state->vk_bind_point = VK_PIPELINE_BIND_POINT_GRAPHICS; state->device = device; ID3D12Device_AddRef(&device->ID3D12Device_iface); diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index 37ad13c132cd..5461cc3d36e1 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -25,6 +25,8 @@
#include "vkd3d_common.h" #include "vkd3d_memory.h" +#include "list.h" +#include "rbtree.h"
#include "vkd3d.h" #include "vkd3d_shader.h" @@ -489,6 +491,8 @@ struct d3d12_graphics_pipeline_state struct VkPipelineDepthStencilStateCreateInfo ds_desc;
const struct d3d12_root_signature *root_signature; + + struct list compiled_pipelines; };
struct d3d12_compute_pipeline_state @@ -532,6 +536,21 @@ bool d3d12_pipeline_state_is_render_pass_compatible(const struct d3d12_pipeline_ const struct d3d12_pipeline_state *state_b) DECLSPEC_HIDDEN; struct d3d12_pipeline_state *unsafe_impl_from_ID3D12PipelineState(ID3D12PipelineState *iface) DECLSPEC_HIDDEN;
+struct vkd3d_pipeline_key +{ + const struct d3d12_graphics_pipeline_state *state; + VkPrimitiveTopology topology; + uint32_t strides[D3D12_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT]; +}; + +struct vkd3d_compiled_pipeline +{ + struct rb_entry entry; + struct vkd3d_pipeline_key key; + VkPipeline vk_pipeline; + struct list list; +}; + struct vkd3d_buffer { VkBuffer vk_buffer; @@ -562,10 +581,6 @@ struct d3d12_command_allocator size_t framebuffers_size; size_t framebuffer_count;
- VkPipeline *pipelines; - size_t pipelines_size; - size_t pipeline_count; - VkDescriptorPool *descriptor_pools; size_t descriptor_pools_size; size_t descriptor_pool_count; @@ -701,6 +716,8 @@ struct d3d12_device struct vkd3d_gpu_va_allocator gpu_va_allocator; struct vkd3d_fence_worker fence_worker;
+ pthread_mutex_t pipeline_cache_mutex; + struct rb_tree pipeline_cache; VkPipelineCache vk_pipeline_cache;
/* A sampler used for SpvOpImageFetch. */ @@ -733,6 +750,12 @@ void d3d12_device_mark_as_removed(struct d3d12_device *device, HRESULT reason, const char *message, ...) VKD3D_PRINTF_FUNC(3, 4) DECLSPEC_HIDDEN; struct d3d12_device *unsafe_impl_from_ID3D12Device(ID3D12Device *iface) DECLSPEC_HIDDEN;
+void d3d12_device_destroy_compiled_pipelines(struct d3d12_device *device, struct list *list) DECLSPEC_HIDDEN; +VkPipeline d3d12_device_find_cached_pipeline(struct d3d12_device *device, + const struct vkd3d_pipeline_key *key) DECLSPEC_HIDDEN; +bool d3d12_device_put_pipeline_to_cache(struct d3d12_device *device, + const struct vkd3d_pipeline_key *key, VkPipeline vk_pipeline, struct list *list) DECLSPEC_HIDDEN; + HRESULT vkd3d_create_buffer(struct d3d12_device *device, const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags, const D3D12_RESOURCE_DESC *desc, VkBuffer *vk_buffer) DECLSPEC_HIDDEN;