From: Józef Kucia [email protected]
For example, World of Warcraft creates 28 render passes for pipeline state objects instead of 600+ render passes. Other games appears to use similarly low number of render passes, e.g. 8, 17, 26, 60.
Signed-off-by: Józef Kucia [email protected] --- libs/vkd3d/device.c | 6 +- libs/vkd3d/state.c | 260 +++++++++++++++++++++++++++++++------ libs/vkd3d/vkd3d_private.h | 30 ++++- 3 files changed, 254 insertions(+), 42 deletions(-)
diff --git a/libs/vkd3d/device.c b/libs/vkd3d/device.c index 5c0c880d9d25..5364df28bbc9 100644 --- a/libs/vkd3d/device.c +++ b/libs/vkd3d/device.c @@ -1665,7 +1665,7 @@ static HRESULT d3d12_device_init_pipeline_cache(struct d3d12_device *device) VkResult vr; int rc;
- if ((rc = pthread_mutex_init(&device->pipeline_cache_mutex, NULL))) + if ((rc = pthread_mutex_init(&device->mutex, NULL))) { ERR("Failed to initialize mutex, error %d.\n", rc); return hresult_from_errno(rc); @@ -1693,7 +1693,7 @@ static void d3d12_device_destroy_pipeline_cache(struct d3d12_device *device) if (device->vk_pipeline_cache) VK_CALL(vkDestroyPipelineCache(device->vk_device, device->vk_pipeline_cache, NULL));
- pthread_mutex_destroy(&device->pipeline_cache_mutex); + pthread_mutex_destroy(&device->mutex); }
D3D12_GPU_VIRTUAL_ADDRESS vkd3d_gpu_va_allocator_allocate(struct vkd3d_gpu_va_allocator *allocator, @@ -1866,6 +1866,7 @@ static ULONG STDMETHODCALLTYPE d3d12_device_Release(ID3D12Device *iface)
vkd3d_destroy_null_resources(&device->null_resources, device); vkd3d_gpu_va_allocator_cleanup(&device->gpu_va_allocator); + vkd3d_render_pass_cache_cleanup(&device->render_pass_cache, device); vkd3d_fence_worker_stop(&device->fence_worker, device); d3d12_device_destroy_pipeline_cache(device); d3d12_device_destroy_vkd3d_queues(device); @@ -3030,6 +3031,7 @@ static HRESULT d3d12_device_init(struct d3d12_device *device, if (FAILED(hr = vkd3d_init_null_resources(&device->null_resources, device))) goto out_stop_fence_worker;
+ vkd3d_render_pass_cache_init(&device->render_pass_cache); vkd3d_gpu_va_allocator_init(&device->gpu_va_allocator);
if ((device->parent = create_info->parent)) diff --git a/libs/vkd3d/state.c b/libs/vkd3d/state.c index edf3399b1aa8..b23101e868a6 100644 --- a/libs/vkd3d/state.c +++ b/libs/vkd3d/state.c @@ -1033,6 +1033,196 @@ HRESULT d3d12_root_signature_create(struct d3d12_device *device, return S_OK; }
+/* vkd3d_render_pass_cache */ +struct vkd3d_render_pass_entry +{ + struct vkd3d_render_pass_key key; + VkRenderPass vk_render_pass; +}; + +STATIC_ASSERT(sizeof(struct vkd3d_render_pass_key) == 48); + +static HRESULT vkd3d_render_pass_cache_create_pass_locked(struct vkd3d_render_pass_cache *cache, + struct d3d12_device *device, const struct vkd3d_render_pass_key *key, VkRenderPass *vk_render_pass) +{ + VkAttachmentReference attachment_references[D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT + 1]; + VkAttachmentDescription attachments[D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT + 1]; + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + struct vkd3d_render_pass_entry *entry; + unsigned int color_attachment_index; + VkSubpassDescription sub_pass_desc; + VkRenderPassCreateInfo pass_info; + bool have_depth_stencil; + unsigned int index; + VkResult vr; + + if (!vkd3d_array_reserve((void **)&cache->render_passes, &cache->render_passes_size, + cache->render_pass_count + 1, sizeof(*cache->render_passes))) + { + *vk_render_pass = VK_NULL_HANDLE; + return E_OUTOFMEMORY; + } + + entry = &cache->render_passes[cache->render_pass_count]; + + entry->key = *key; + + have_depth_stencil = key->depth_enable || key->stencil_enable; + color_attachment_index = have_depth_stencil ? 1 : 0; + + index = 0; + if (have_depth_stencil) + { + VkImageLayout depth_layout = key->depth_stencil_write + ? VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL + : VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL; + + attachments[index].flags = 0; + attachments[index].format = key->vk_formats[index]; + attachments[index].samples = key->sample_count; + + if (key->depth_enable) + { + attachments[index].loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; + attachments[index].storeOp = VK_ATTACHMENT_STORE_OP_STORE; + } + else + { + attachments[index].loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; + attachments[index].storeOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; + } + if (key->stencil_enable) + { + attachments[index].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD; + attachments[index].stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE; + } + else + { + attachments[index].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; + attachments[index].stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; + } + attachments[index].initialLayout = depth_layout; + attachments[index].finalLayout = depth_layout; + + attachment_references[index].attachment = 0; + attachment_references[index].layout = depth_layout; + + ++index; + } + + assert(index == color_attachment_index); + for (; index < key->attachment_count; ++index) + { + + attachments[index].flags = 0; + attachments[index].format = key->vk_formats[index]; + attachments[index].samples = key->sample_count; + attachments[index].loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; + attachments[index].storeOp = VK_ATTACHMENT_STORE_OP_STORE; + attachments[index].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; + attachments[index].stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; + attachments[index].initialLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + attachments[index].finalLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + + attachment_references[index].attachment = index; + attachment_references[index].layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + } + + sub_pass_desc.flags = 0; + sub_pass_desc.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; + sub_pass_desc.inputAttachmentCount = 0; + sub_pass_desc.pInputAttachments = NULL; + sub_pass_desc.colorAttachmentCount = key->attachment_count - color_attachment_index; + sub_pass_desc.pColorAttachments = &attachment_references[color_attachment_index]; + sub_pass_desc.pResolveAttachments = NULL; + if (have_depth_stencil) + sub_pass_desc.pDepthStencilAttachment = &attachment_references[0]; + else + sub_pass_desc.pDepthStencilAttachment = NULL; + sub_pass_desc.preserveAttachmentCount = 0; + sub_pass_desc.pPreserveAttachments = NULL; + + pass_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO; + pass_info.pNext = NULL; + pass_info.flags = 0; + pass_info.attachmentCount = key->attachment_count; + pass_info.pAttachments = attachments; + pass_info.subpassCount = 1; + pass_info.pSubpasses = &sub_pass_desc; + pass_info.dependencyCount = 0; + pass_info.pDependencies = NULL; + if ((vr = VK_CALL(vkCreateRenderPass(device->vk_device, &pass_info, NULL, vk_render_pass))) >= 0) + { + entry->vk_render_pass = *vk_render_pass; + ++cache->render_pass_count; + } + else + { + WARN("Failed to create Vulkan render pass, vr %d.\n", vr); + *vk_render_pass = VK_NULL_HANDLE; + } + + return hresult_from_vk_result(vr); +} + +HRESULT vkd3d_render_pass_cache_find(struct vkd3d_render_pass_cache *cache, + struct d3d12_device *device, const struct vkd3d_render_pass_key *key, VkRenderPass *vk_render_pass) +{ + bool found = false; + HRESULT hr = S_OK; + unsigned int i; + int rc; + + if ((rc = pthread_mutex_lock(&device->mutex))) + { + ERR("Failed to lock mutex, error %d.\n", rc); + *vk_render_pass = VK_NULL_HANDLE; + return hresult_from_errno(rc); + } + + for (i = 0; i < cache->render_pass_count; ++i) + { + struct vkd3d_render_pass_entry *current = &cache->render_passes[i]; + + if (!memcmp(¤t->key, key, sizeof(*key))) + { + *vk_render_pass = current->vk_render_pass; + found = true; + break; + } + } + + if (!found) + hr = vkd3d_render_pass_cache_create_pass_locked(cache, device, key, vk_render_pass); + + pthread_mutex_unlock(&device->mutex); + + return hr; +} + +void vkd3d_render_pass_cache_init(struct vkd3d_render_pass_cache *cache) +{ + cache->render_passes = NULL; + cache->render_pass_count = 0; + cache->render_passes_size = 0; +} + +void vkd3d_render_pass_cache_cleanup(struct vkd3d_render_pass_cache *cache, + struct d3d12_device *device) +{ + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + unsigned int i; + + for (i = 0; i < cache->render_pass_count; ++i) + { + struct vkd3d_render_pass_entry *current = &cache->render_passes[i]; + VK_CALL(vkDestroyRenderPass(device->vk_device, current->vk_render_pass, NULL)); + } + + vkd3d_free(cache->render_passes); + cache->render_passes = NULL; +} + struct vkd3d_pipeline_key { D3D12_PRIMITIVE_TOPOLOGY topology; @@ -1096,7 +1286,6 @@ static void d3d12_pipeline_state_destroy_graphics(struct d3d12_pipeline_state *s { VK_CALL(vkDestroyShaderModule(device->vk_device, graphics->stages[i].module, NULL)); } - VK_CALL(vkDestroyRenderPass(device->vk_device, graphics->render_pass, NULL));
LIST_FOR_EACH_ENTRY_SAFE(current, e, &graphics->compiled_pipelines, struct vkd3d_compiled_pipeline, entry) { @@ -1883,17 +2072,15 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s struct vkd3d_shader_interface_info shader_interface; const struct d3d12_root_signature *root_signature; struct vkd3d_shader_signature input_signature; + struct vkd3d_render_pass_key render_pass_key; VkShaderStageFlagBits xfb_stage = 0; VkSampleCountFlagBits sample_count; - VkSubpassDescription sub_pass_desc; const struct vkd3d_format *format; - VkRenderPassCreateInfo pass_desc; unsigned int instance_divisor; VkVertexInputRate input_rate; unsigned int i, j; size_t rt_count; uint32_t mask; - VkResult vr; HRESULT hr; int ret;
@@ -1983,13 +2170,20 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s
if ((ds_desc->DepthEnable && ds_desc->DepthWriteMask) || (ds_desc->StencilEnable && ds_desc->StencilWriteMask)) + { depth_layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; + render_pass_key.depth_stencil_write = true; + } else + { depth_layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL; + render_pass_key.depth_stencil_write = false; + }
graphics->attachments[0].flags = 0; graphics->attachments[0].format = format->vk_format; graphics->attachments[0].samples = sample_count; + render_pass_key.depth_enable = desc->DepthStencilState.DepthEnable; if (desc->DepthStencilState.DepthEnable) { graphics->attachments[0].loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; @@ -2000,6 +2194,7 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s graphics->attachments[0].loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; graphics->attachments[0].storeOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; } + render_pass_key.stencil_enable = desc->DepthStencilState.StencilEnable; if (desc->DepthStencilState.StencilEnable) { graphics->attachments[0].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD; @@ -2017,6 +2212,8 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s graphics->attachment_references[0].layout = depth_layout; ++graphics->rt_idx;
+ render_pass_key.vk_formats[0] = format->vk_format; + if (!desc->PS.pShaderBytecode) { if (FAILED(hr = create_shader_stage(device, &graphics->stages[graphics->stage_count], @@ -2026,6 +2223,12 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s ++graphics->stage_count; } } + else + { + render_pass_key.depth_enable = false; + render_pass_key.stencil_enable = false; + render_pass_key.depth_stencil_write = false; + }
for (i = 0; i < rt_count; ++i) { @@ -2068,10 +2271,18 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s graphics->attachment_references[idx].attachment = idx; graphics->attachment_references[idx].layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
+ render_pass_key.vk_formats[idx] = format->vk_format; + blend_attachment_from_d3d12(&graphics->blend_attachments[i], rt_desc); } graphics->attachment_count = graphics->rt_idx + rt_count;
+ render_pass_key.attachment_count = graphics->rt_idx + rt_count; + render_pass_key.padding = 0; + render_pass_key.sample_count = sample_count; + for (i = render_pass_key.attachment_count; i < ARRAY_SIZE(render_pass_key.vk_formats); ++i) + render_pass_key.vk_formats[i] = VK_FORMAT_UNDEFINED; + ps_shader_parameters[0].name = VKD3D_SHADER_PARAMETER_NAME_RASTERIZER_SAMPLE_COUNT; ps_shader_parameters[0].type = VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT; ps_shader_parameters[0].data_type = VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32; @@ -2329,35 +2540,9 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s goto fail; }
- sub_pass_desc.flags = 0; - sub_pass_desc.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; - sub_pass_desc.inputAttachmentCount = 0; - sub_pass_desc.pInputAttachments = NULL; - sub_pass_desc.colorAttachmentCount = rt_count; - sub_pass_desc.pColorAttachments = &graphics->attachment_references[graphics->rt_idx]; - sub_pass_desc.pResolveAttachments = NULL; - if (graphics->rt_idx) - sub_pass_desc.pDepthStencilAttachment = &graphics->attachment_references[0]; - else - sub_pass_desc.pDepthStencilAttachment = NULL; - sub_pass_desc.preserveAttachmentCount = 0; - sub_pass_desc.pPreserveAttachments = NULL; - - pass_desc.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO; - pass_desc.pNext = NULL; - pass_desc.flags = 0; - pass_desc.attachmentCount = graphics->attachment_count; - pass_desc.pAttachments = graphics->attachments; - pass_desc.subpassCount = 1; - pass_desc.pSubpasses = &sub_pass_desc; - pass_desc.dependencyCount = 0; - pass_desc.pDependencies = NULL; - if ((vr = VK_CALL(vkCreateRenderPass(device->vk_device, &pass_desc, NULL, &graphics->render_pass))) < 0) - { - WARN("Failed to create Vulkan render pass, vr %d.\n", vr); - hr = hresult_from_vk_result(vr); + if (FAILED(hr = vkd3d_render_pass_cache_find(&device->render_pass_cache, device, + &render_pass_key, &graphics->render_pass))) goto fail; - }
rs_desc_from_d3d12(&graphics->rs_desc, &desc->RasterizerState); if ((!graphics->attachment_count && !(desc->PS.pShaderBytecode && desc->PS.BytecodeLength)) @@ -2392,10 +2577,7 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s list_init(&graphics->compiled_pipelines);
if (FAILED(hr = vkd3d_private_store_init(&state->private_store))) - { - VK_CALL(vkDestroyRenderPass(device->vk_device, graphics->render_pass, NULL)); goto fail; - }
state->vk_bind_point = VK_PIPELINE_BIND_POINT_GRAPHICS; state->device = device; @@ -2497,7 +2679,7 @@ static VkPipeline d3d12_pipeline_state_find_compiled_pipeline(const struct d3d12 struct vkd3d_compiled_pipeline *current; int rc;
- if (!(rc = pthread_mutex_lock(&device->pipeline_cache_mutex))) + if (!(rc = pthread_mutex_lock(&device->mutex))) { LIST_FOR_EACH_ENTRY(current, &graphics->compiled_pipelines, struct vkd3d_compiled_pipeline, entry) { @@ -2507,7 +2689,7 @@ static VkPipeline d3d12_pipeline_state_find_compiled_pipeline(const struct d3d12 break; } } - pthread_mutex_unlock(&device->pipeline_cache_mutex); + pthread_mutex_unlock(&device->mutex); } else { @@ -2531,7 +2713,7 @@ static bool d3d12_pipeline_state_put_pipeline_to_cache(struct d3d12_pipeline_sta compiled_pipeline->key = *key; compiled_pipeline->vk_pipeline = vk_pipeline;
- if ((rc = pthread_mutex_lock(&device->pipeline_cache_mutex))) + if ((rc = pthread_mutex_lock(&device->mutex))) { ERR("Failed to lock mutex, error %d.\n", rc); vkd3d_free(compiled_pipeline); @@ -2551,7 +2733,7 @@ static bool d3d12_pipeline_state_put_pipeline_to_cache(struct d3d12_pipeline_sta if (compiled_pipeline) list_add_tail(&graphics->compiled_pipelines, &compiled_pipeline->entry);
- pthread_mutex_unlock(&device->pipeline_cache_mutex); + pthread_mutex_unlock(&device->mutex); return compiled_pipeline; }
diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index ba586b44aab2..950be6e56981 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -184,6 +184,33 @@ void *vkd3d_gpu_va_allocator_dereference(struct vkd3d_gpu_va_allocator *allocato void vkd3d_gpu_va_allocator_free(struct vkd3d_gpu_va_allocator *allocator, D3D12_GPU_VIRTUAL_ADDRESS address) DECLSPEC_HIDDEN;
+struct vkd3d_render_pass_key +{ + unsigned int attachment_count; + bool depth_enable; + bool stencil_enable; + bool depth_stencil_write; + bool padding; + unsigned int sample_count; + VkFormat vk_formats[D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT + 1]; +}; + +struct vkd3d_render_pass_entry; + +struct vkd3d_render_pass_cache +{ + struct vkd3d_render_pass_entry *render_passes; + size_t render_pass_count; + size_t render_passes_size; +}; + +void vkd3d_render_pass_cache_cleanup(struct vkd3d_render_pass_cache *cache, + struct d3d12_device *device) DECLSPEC_HIDDEN; +HRESULT vkd3d_render_pass_cache_find(struct vkd3d_render_pass_cache *cache, + struct d3d12_device *device, const struct vkd3d_render_pass_key *key, + VkRenderPass *vk_render_pass) DECLSPEC_HIDDEN; +void vkd3d_render_pass_cache_init(struct vkd3d_render_pass_cache *cache) DECLSPEC_HIDDEN; + struct vkd3d_private_store { pthread_mutex_t mutex; @@ -925,7 +952,8 @@ struct d3d12_device struct vkd3d_gpu_va_allocator gpu_va_allocator; struct vkd3d_fence_worker fence_worker;
- pthread_mutex_t pipeline_cache_mutex; + pthread_mutex_t mutex; + struct vkd3d_render_pass_cache render_pass_cache; VkPipelineCache vk_pipeline_cache;
VkPhysicalDeviceMemoryProperties memory_properties;