From: Józef Kucia <jkucia(a)codeweavers.com>
Ideally, we would like to introduce a Vulkan extension to make vertex
buffer strides and primitive topology dynamic in Vulkan.
Signed-off-by: Józef Kucia <jkucia(a)codeweavers.com>
---
Such extension should be quite easy to implement in RADV and Anvil.
---
libs/vkd3d/command.c | 53 +++++++---------
libs/vkd3d/device.c | 151 ++++++++++++++++++++++++++++++++++++++++++---
libs/vkd3d/state.c | 12 +++-
libs/vkd3d/vkd3d_private.h | 31 ++++++++--
4 files changed, 201 insertions(+), 46 deletions(-)
diff --git a/libs/vkd3d/command.c b/libs/vkd3d/command.c
index 2fc564312f8e..5e67e6674e95 100644
--- a/libs/vkd3d/command.c
+++ b/libs/vkd3d/command.c
@@ -725,17 +725,6 @@ static bool d3d12_command_allocator_add_framebuffer(struct d3d12_command_allocat
return true;
}
-static bool d3d12_command_allocator_add_pipeline(struct d3d12_command_allocator *allocator, VkPipeline pipeline)
-{
- if (!vkd3d_array_reserve((void **)&allocator->pipelines, &allocator->pipelines_size,
- allocator->pipeline_count + 1, sizeof(*allocator->pipelines)))
- return false;
-
- allocator->pipelines[allocator->pipeline_count++] = pipeline;
-
- return true;
-}
-
static bool d3d12_command_allocator_add_descriptor_pool(struct d3d12_command_allocator *allocator,
VkDescriptorPool pool)
{
@@ -951,12 +940,6 @@ static void d3d12_command_allocator_free_resources(struct d3d12_command_allocato
}
allocator->descriptor_pool_count = 0;
- for (i = 0; i < allocator->pipeline_count; ++i)
- {
- VK_CALL(vkDestroyPipeline(device->vk_device, allocator->pipelines[i], NULL));
- }
- allocator->pipeline_count = 0;
-
for (i = 0; i < allocator->framebuffer_count; ++i)
{
VK_CALL(vkDestroyFramebuffer(device->vk_device, allocator->framebuffers[i], NULL));
@@ -1029,7 +1012,6 @@ static ULONG STDMETHODCALLTYPE d3d12_command_allocator_Release(ID3D12CommandAllo
vkd3d_free(allocator->views);
vkd3d_free(allocator->descriptor_pools);
vkd3d_free(allocator->free_descriptor_pools);
- vkd3d_free(allocator->pipelines);
vkd3d_free(allocator->framebuffers);
vkd3d_free(allocator->passes);
@@ -1214,10 +1196,6 @@ static HRESULT d3d12_command_allocator_init(struct d3d12_command_allocator *allo
allocator->framebuffers_size = 0;
allocator->framebuffer_count = 0;
- allocator->pipelines = NULL;
- allocator->pipelines_size = 0;
- allocator->pipeline_count = 0;
-
allocator->descriptor_pools = NULL;
allocator->descriptor_pools_size = 0;
allocator->descriptor_pool_count = 0;
@@ -1837,8 +1815,8 @@ static bool d3d12_command_list_update_current_framebuffer(struct d3d12_command_l
return true;
}
-static VkPipeline d3d12_command_list_create_graphics_pipeline(struct d3d12_command_list *list,
- const struct d3d12_graphics_pipeline_state *state)
+static VkPipeline d3d12_command_list_get_or_create_pipeline(struct d3d12_command_list *list,
+ struct d3d12_graphics_pipeline_state *state)
{
struct VkVertexInputBindingDescription bindings[D3D12_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT];
const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
@@ -1846,7 +1824,8 @@ static VkPipeline d3d12_command_list_create_graphics_pipeline(struct d3d12_comma
struct VkPipelineInputAssemblyStateCreateInfo ia_desc;
struct VkPipelineColorBlendStateCreateInfo blend_desc;
struct VkGraphicsPipelineCreateInfo pipeline_desc;
- const struct d3d12_device *device = list->device;
+ struct d3d12_device *device = list->device;
+ struct vkd3d_pipeline_key pipeline_key;
size_t binding_count = 0;
VkPipeline vk_pipeline;
unsigned int i;
@@ -1878,6 +1857,10 @@ static VkPipeline d3d12_command_list_create_graphics_pipeline(struct d3d12_comma
.pDynamicStates = dynamic_states,
};
+ memset(&pipeline_key, 0, sizeof(pipeline_key));
+ pipeline_key.state = state;
+ pipeline_key.topology = list->primitive_topology;
+
for (i = 0, mask = 0; i < state->attribute_count; ++i)
{
struct VkVertexInputBindingDescription *b;
@@ -1902,9 +1885,14 @@ static VkPipeline d3d12_command_list_create_graphics_pipeline(struct d3d12_comma
if (!b->stride)
FIXME("Invalid stride for input slot %u.\n", binding);
+ pipeline_key.strides[binding_count] = list->strides[binding];
+
++binding_count;
}
+ if ((vk_pipeline = d3d12_device_find_cached_pipeline(device, &pipeline_key)))
+ return vk_pipeline;
+
input_desc.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO;
input_desc.pNext = NULL;
input_desc.flags = 0;
@@ -1957,13 +1945,14 @@ static VkPipeline d3d12_command_list_create_graphics_pipeline(struct d3d12_comma
return VK_NULL_HANDLE;
}
- if (!d3d12_command_allocator_add_pipeline(list->allocator, vk_pipeline))
- {
- WARN("Failed to add pipeline.\n");
- VK_CALL(vkDestroyPipeline(device->vk_device, vk_pipeline, NULL));
- return VK_NULL_HANDLE;
- }
+ if (d3d12_device_put_pipeline_to_cache(device, &pipeline_key, vk_pipeline, &state->compiled_pipelines))
+ return vk_pipeline;
+ /* Other thread compiled the pipeline before us. */
+ VK_CALL(vkDestroyPipeline(device->vk_device, vk_pipeline, NULL));
+ vk_pipeline = d3d12_device_find_cached_pipeline(device, &pipeline_key);
+ if (!vk_pipeline)
+ ERR("Could not get the pipeline compiled by other thread from the cache.\n");
return vk_pipeline;
}
@@ -1981,7 +1970,7 @@ static bool d3d12_command_list_update_current_pipeline(struct d3d12_command_list
return false;
}
- if (!(vk_pipeline = d3d12_command_list_create_graphics_pipeline(list, &list->state->u.graphics)))
+ if (!(vk_pipeline = d3d12_command_list_get_or_create_pipeline(list, &list->state->u.graphics)))
return false;
VK_CALL(vkCmdBindPipeline(list->vk_command_buffer, list->state->vk_bind_point, vk_pipeline));
diff --git a/libs/vkd3d/device.c b/libs/vkd3d/device.c
index cfb17ea17d99..c1db7d30a9e8 100644
--- a/libs/vkd3d/device.c
+++ b/libs/vkd3d/device.c
@@ -1169,11 +1169,116 @@ static HRESULT d3d12_device_create_dummy_sampler(struct d3d12_device *device)
return vkd3d_create_static_sampler(device, &sampler_desc, &device->vk_dummy_sampler);
}
-static void d3d12_device_init_pipeline_cache(struct d3d12_device *device)
+static void destroy_compiled_pipeline(struct vkd3d_compiled_pipeline *pipeline,
+ struct d3d12_device *device)
+{
+ const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
+
+ VK_CALL(vkDestroyPipeline(device->vk_device, pipeline->vk_pipeline, NULL));
+ vkd3d_free(pipeline);
+}
+
+static int compare_pipeline_cache_entry(const void *key, const struct rb_entry *entry)
+{
+ const struct vkd3d_compiled_pipeline *compiled_pipeline;
+ const struct vkd3d_pipeline_key *pipeline_key;
+
+ pipeline_key = key;
+ compiled_pipeline = RB_ENTRY_VALUE(entry, const struct vkd3d_compiled_pipeline, entry);
+ return memcmp(&compiled_pipeline->key, pipeline_key, sizeof(*pipeline_key));
+}
+
+static void destroy_pipeline_cache_entry(struct rb_entry *entry, void *context)
+{
+ struct vkd3d_compiled_pipeline *pipeline;
+ struct d3d12_device *device = context;
+
+ pipeline = RB_ENTRY_VALUE(entry, struct vkd3d_compiled_pipeline, entry);
+ destroy_compiled_pipeline(pipeline, device);
+}
+
+VkPipeline d3d12_device_find_cached_pipeline(struct d3d12_device *device,
+ const struct vkd3d_pipeline_key *key)
+{
+ VkPipeline vk_pipeline = VK_NULL_HANDLE;
+ struct rb_entry *entry;
+ int rc;
+
+ if (!(rc = pthread_mutex_lock(&device->pipeline_cache_mutex)))
+ {
+ if ((entry = rb_get(&device->pipeline_cache, key)))
+ vk_pipeline = RB_ENTRY_VALUE(entry, struct vkd3d_compiled_pipeline, entry)->vk_pipeline;
+ pthread_mutex_unlock(&device->pipeline_cache_mutex);
+ }
+ else
+ {
+ ERR("Failed to lock mutex, error %d.\n", rc);
+ }
+
+ return vk_pipeline;
+}
+
+bool d3d12_device_put_pipeline_to_cache(struct d3d12_device *device,
+ const struct vkd3d_pipeline_key *key, VkPipeline vk_pipeline, struct list *list)
+{
+ struct vkd3d_compiled_pipeline *compiled_pipeline;
+ bool ret = true;
+ int rc;
+
+ if (!(compiled_pipeline = vkd3d_malloc(sizeof(*compiled_pipeline))))
+ return false;
+
+ compiled_pipeline->key = *key;
+ compiled_pipeline->vk_pipeline = vk_pipeline;
+
+ if ((rc = pthread_mutex_lock(&device->pipeline_cache_mutex)))
+ {
+ ERR("Failed to lock mutex, error %d.\n", rc);
+ vkd3d_free(compiled_pipeline);
+ return false;
+ }
+
+ if (rb_put(&device->pipeline_cache, key, &compiled_pipeline->entry) >= 0)
+ {
+ list_add_tail(list, &compiled_pipeline->list);
+ }
+ else
+ {
+ WARN("Failed to put pipeline to cache.\n");
+ vkd3d_free(compiled_pipeline);
+ ret = false;
+ }
+
+ pthread_mutex_unlock(&device->pipeline_cache_mutex);
+ return ret;
+}
+
+void d3d12_device_destroy_compiled_pipelines(struct d3d12_device *device, struct list *list)
+{
+ struct vkd3d_compiled_pipeline *pipeline, *cursor;
+ int rc;
+
+ if ((rc = pthread_mutex_lock(&device->pipeline_cache_mutex)))
+ {
+ ERR("Failed to lock mutex, error %d.\n", rc);
+ return;
+ }
+
+ LIST_FOR_EACH_ENTRY_SAFE(pipeline, cursor, list, struct vkd3d_compiled_pipeline, list)
+ {
+ rb_remove(&device->pipeline_cache, &pipeline->entry);
+ destroy_compiled_pipeline(pipeline, device);
+ }
+
+ pthread_mutex_unlock(&device->pipeline_cache_mutex);
+}
+
+static HRESULT d3d12_device_init_pipeline_cache(struct d3d12_device *device)
{
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
VkPipelineCacheCreateInfo cache_info;
VkResult vr;
+ int rc;
cache_info.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO;
cache_info.pNext = NULL;
@@ -1183,9 +1288,39 @@ static void d3d12_device_init_pipeline_cache(struct d3d12_device *device)
if ((vr = VK_CALL(vkCreatePipelineCache(device->vk_device, &cache_info, NULL,
&device->vk_pipeline_cache))) < 0)
{
- ERR("Failed to create pipeline cache, vr %d.\n", vr);
+ ERR("Failed to create Vulkan pipeline cache, vr %d.\n", vr);
device->vk_pipeline_cache = VK_NULL_HANDLE;
}
+
+ rb_init(&device->pipeline_cache, compare_pipeline_cache_entry);
+
+ if ((rc = pthread_mutex_init(&device->pipeline_cache_mutex, NULL)))
+ {
+ ERR("Failed to initialize mutex, error %d.\n", rc);
+ return E_FAIL;
+ }
+
+ return S_OK;
+}
+
+static void d3d12_device_destroy_pipeline_cache(struct d3d12_device *device)
+{
+ const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
+ int rc;
+
+ if (device->vk_pipeline_cache)
+ VK_CALL(vkDestroyPipelineCache(device->vk_device, device->vk_pipeline_cache, NULL));
+
+ if ((rc = pthread_mutex_lock(&device->pipeline_cache_mutex)))
+ {
+ ERR("Failed to lock mutex, error %d.\n", rc);
+ return;
+ }
+
+ rb_destroy(&device->pipeline_cache, destroy_pipeline_cache_entry, device);
+
+ pthread_mutex_unlock(&device->pipeline_cache_mutex);
+ pthread_mutex_destroy(&device->pipeline_cache_mutex);
}
D3D12_GPU_VIRTUAL_ADDRESS vkd3d_gpu_va_allocator_allocate(struct vkd3d_gpu_va_allocator *allocator,
@@ -1357,8 +1492,7 @@ static ULONG STDMETHODCALLTYPE d3d12_device_Release(ID3D12Device *iface)
vkd3d_gpu_va_allocator_cleanup(&device->gpu_va_allocator);
vkd3d_fence_worker_stop(&device->fence_worker, device);
VK_CALL(vkDestroySampler(device->vk_device, device->vk_dummy_sampler, NULL));
- if (device->vk_pipeline_cache)
- VK_CALL(vkDestroyPipelineCache(device->vk_device, device->vk_pipeline_cache, NULL));
+ d3d12_device_destroy_pipeline_cache(device);
d3d12_device_destroy_vkd3d_queues(device);
VK_CALL(vkDestroyDevice(device->vk_device, NULL));
if (device->parent)
@@ -2310,18 +2444,21 @@ static HRESULT d3d12_device_init(struct d3d12_device *device,
goto out_free_vk_resources;
}
- if (FAILED(hr = vkd3d_fence_worker_start(&device->fence_worker, device)))
+ if (FAILED(hr = d3d12_device_init_pipeline_cache(device)))
goto out_free_vk_resources;
- vkd3d_gpu_va_allocator_init(&device->gpu_va_allocator);
+ if (FAILED(hr = vkd3d_fence_worker_start(&device->fence_worker, device)))
+ goto out_free_pipeline_cache;
- d3d12_device_init_pipeline_cache(device);
+ vkd3d_gpu_va_allocator_init(&device->gpu_va_allocator);
if ((device->parent = create_info->parent))
IUnknown_AddRef(device->parent);
return S_OK;
+out_free_pipeline_cache:
+ d3d12_device_destroy_pipeline_cache(device);
out_free_vk_resources:
vk_procs = &device->vk_procs;
VK_CALL(vkDestroySampler(device->vk_device, device->vk_dummy_sampler, NULL));
diff --git a/libs/vkd3d/state.c b/libs/vkd3d/state.c
index 446e1a65029d..b60cebf26654 100644
--- a/libs/vkd3d/state.c
+++ b/libs/vkd3d/state.c
@@ -1095,11 +1095,15 @@ static ULONG STDMETHODCALLTYPE d3d12_pipeline_state_Release(ID3D12PipelineState
if (state->vk_bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS)
{
- for (i = 0; i < state->u.graphics.stage_count; ++i)
+ struct d3d12_graphics_pipeline_state *graphics = &state->u.graphics;
+
+ for (i = 0; i < graphics->stage_count; ++i)
{
- VK_CALL(vkDestroyShaderModule(device->vk_device, state->u.graphics.stages[i].module, NULL));
+ VK_CALL(vkDestroyShaderModule(device->vk_device, graphics->stages[i].module, NULL));
}
- VK_CALL(vkDestroyRenderPass(device->vk_device, state->u.graphics.render_pass, NULL));
+ VK_CALL(vkDestroyRenderPass(device->vk_device, graphics->render_pass, NULL));
+
+ d3d12_device_destroy_compiled_pipelines(device, &graphics->compiled_pipelines);
}
else if (state->vk_bind_point == VK_PIPELINE_BIND_POINT_COMPUTE)
{
@@ -2185,6 +2189,8 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s
graphics->root_signature = root_signature;
+ list_init(&graphics->compiled_pipelines);
+
state->vk_bind_point = VK_PIPELINE_BIND_POINT_GRAPHICS;
state->device = device;
ID3D12Device_AddRef(&device->ID3D12Device_iface);
diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h
index 37ad13c132cd..5461cc3d36e1 100644
--- a/libs/vkd3d/vkd3d_private.h
+++ b/libs/vkd3d/vkd3d_private.h
@@ -25,6 +25,8 @@
#include "vkd3d_common.h"
#include "vkd3d_memory.h"
+#include "list.h"
+#include "rbtree.h"
#include "vkd3d.h"
#include "vkd3d_shader.h"
@@ -489,6 +491,8 @@ struct d3d12_graphics_pipeline_state
struct VkPipelineDepthStencilStateCreateInfo ds_desc;
const struct d3d12_root_signature *root_signature;
+
+ struct list compiled_pipelines;
};
struct d3d12_compute_pipeline_state
@@ -532,6 +536,21 @@ bool d3d12_pipeline_state_is_render_pass_compatible(const struct d3d12_pipeline_
const struct d3d12_pipeline_state *state_b) DECLSPEC_HIDDEN;
struct d3d12_pipeline_state *unsafe_impl_from_ID3D12PipelineState(ID3D12PipelineState *iface) DECLSPEC_HIDDEN;
+struct vkd3d_pipeline_key
+{
+ const struct d3d12_graphics_pipeline_state *state;
+ VkPrimitiveTopology topology;
+ uint32_t strides[D3D12_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT];
+};
+
+struct vkd3d_compiled_pipeline
+{
+ struct rb_entry entry;
+ struct vkd3d_pipeline_key key;
+ VkPipeline vk_pipeline;
+ struct list list;
+};
+
struct vkd3d_buffer
{
VkBuffer vk_buffer;
@@ -562,10 +581,6 @@ struct d3d12_command_allocator
size_t framebuffers_size;
size_t framebuffer_count;
- VkPipeline *pipelines;
- size_t pipelines_size;
- size_t pipeline_count;
-
VkDescriptorPool *descriptor_pools;
size_t descriptor_pools_size;
size_t descriptor_pool_count;
@@ -701,6 +716,8 @@ struct d3d12_device
struct vkd3d_gpu_va_allocator gpu_va_allocator;
struct vkd3d_fence_worker fence_worker;
+ pthread_mutex_t pipeline_cache_mutex;
+ struct rb_tree pipeline_cache;
VkPipelineCache vk_pipeline_cache;
/* A sampler used for SpvOpImageFetch. */
@@ -733,6 +750,12 @@ void d3d12_device_mark_as_removed(struct d3d12_device *device, HRESULT reason,
const char *message, ...) VKD3D_PRINTF_FUNC(3, 4) DECLSPEC_HIDDEN;
struct d3d12_device *unsafe_impl_from_ID3D12Device(ID3D12Device *iface) DECLSPEC_HIDDEN;
+void d3d12_device_destroy_compiled_pipelines(struct d3d12_device *device, struct list *list) DECLSPEC_HIDDEN;
+VkPipeline d3d12_device_find_cached_pipeline(struct d3d12_device *device,
+ const struct vkd3d_pipeline_key *key) DECLSPEC_HIDDEN;
+bool d3d12_device_put_pipeline_to_cache(struct d3d12_device *device,
+ const struct vkd3d_pipeline_key *key, VkPipeline vk_pipeline, struct list *list) DECLSPEC_HIDDEN;
+
HRESULT vkd3d_create_buffer(struct d3d12_device *device,
const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags,
const D3D12_RESOURCE_DESC *desc, VkBuffer *vk_buffer) DECLSPEC_HIDDEN;
--
2.16.4