From: Stefan Dösinger stefan@codeweavers.com
--- libs/vkd3d/state.c | 153 +++++++++++++++++++++++++++++++++++++ libs/vkd3d/vkd3d_private.h | 62 +++++++++++++++ 2 files changed, 215 insertions(+)
diff --git a/libs/vkd3d/state.c b/libs/vkd3d/state.c index f39055f90..eb72464bd 100644 --- a/libs/vkd3d/state.c +++ b/libs/vkd3d/state.c @@ -2476,6 +2476,133 @@ static HRESULT d3d12_pipeline_state_find_and_init_uav_counters(struct d3d12_pipe return hr; }
+static struct vkd3d_shader_cache_pipeline_state *vkd3d_cache_pipeline_from_d3d( + const struct d3d12_pipeline_state_desc *desc, + const struct d3d12_root_signature *root_signature, uint32_t *entry_size) +{ + struct vkd3d_shader_cache_pipeline_state *entry; + uint32_t size, pos = 0, i; + + size = desc->cs.BytecodeLength; + size += desc->vs.BytecodeLength; + size += desc->ps.BytecodeLength; + size += desc->ds.BytecodeLength; + size += desc->hs.BytecodeLength; + size += desc->gs.BytecodeLength; + size += desc->stream_output.NumEntries * sizeof(struct vkd3d_so_declaration_cache_entry); + size += desc->stream_output.NumStrides * sizeof(*desc->stream_output.pBufferStrides); + /* FIXME: Dynamically handle semantic strings */ + size += desc->input_layout.NumElements * sizeof(struct vkd3d_input_layout_element_cache); + + *entry_size = offsetof(struct vkd3d_shader_cache_pipeline_state, data[size]); + entry = vkd3d_calloc(1, *entry_size); + + entry->super.vkd3d_revision = VKD3D_SHADER_CACHE_VKD3D_VERSION; + entry->super.type = 0; + + entry->root_signature = root_signature->hash; + + entry->cs_size = desc->cs.BytecodeLength; + if (entry->cs_size) + { + memcpy(entry->data + pos, desc->cs.pShaderBytecode, entry->cs_size); + pos += entry->cs_size; + } + + entry->vs_size = desc->vs.BytecodeLength; + if (entry->vs_size) + { + memcpy(entry->data + pos, desc->vs.pShaderBytecode, entry->vs_size); + pos += entry->vs_size; + } + + entry->ps_size = desc->ps.BytecodeLength; + if (entry->ps_size) + { + memcpy(entry->data + pos, desc->ps.pShaderBytecode, entry->ps_size); + pos += entry->ps_size; + } + + entry->ds_size = desc->ds.BytecodeLength; + if (entry->ds_size) + { + memcpy(entry->data + pos, desc->ds.pShaderBytecode, entry->ds_size); + pos += entry->ds_size; + } + + entry->hs_size = desc->hs.BytecodeLength; + if (entry->hs_size) + { + memcpy(entry->data + pos, desc->hs.pShaderBytecode, entry->hs_size); + pos += entry->hs_size; + } + + entry->gs_size = desc->gs.BytecodeLength; + if (entry->gs_size) + { + memcpy(entry->data + pos, desc->gs.pShaderBytecode, entry->gs_size); + pos += entry->gs_size; + } + + entry->so_entries = desc->stream_output.NumEntries; + for (i = 0; i < entry->so_entries; ++i) + { + struct vkd3d_so_declaration_cache_entry *e = (void *)(entry->data + pos); + e->stream = desc->stream_output.pSODeclaration[i].Stream; + strncpy(e->semantic_name, desc->stream_output.pSODeclaration[i].SemanticName, 32); + e->semantic_name[31] = 0; + e->semantic_index = desc->stream_output.pSODeclaration[i].SemanticIndex; + e->start_component = desc->stream_output.pSODeclaration[i].StartComponent; + e->component_count = desc->stream_output.pSODeclaration[i].ComponentCount; + e->output_slot = desc->stream_output.pSODeclaration[i].OutputSlot; + + if (strlen(desc->stream_output.pSODeclaration[i].SemanticName) > 31) + FIXME("Output semantic name too long\n"); + + pos += sizeof(*e); + } + entry->so_strides = desc->stream_output.NumStrides; + if (entry->so_strides) + { + memcpy(entry->data + pos, desc->stream_output.pBufferStrides, + sizeof(*desc->stream_output.pBufferStrides) * entry->so_strides); + pos += sizeof(*desc->stream_output.pBufferStrides) * entry->so_strides; + } + + entry->input_layout_elements = desc->input_layout.NumElements; + for (i = 0; i < entry->input_layout_elements; ++i) + { + struct vkd3d_input_layout_element_cache *e = (void *)(entry->data + pos); + strncpy(e->semantic_name, desc->input_layout.pInputElementDescs[i].SemanticName, 32); + e->semantic_name[31] = 0; + e->semantic_index = desc->input_layout.pInputElementDescs[i].SemanticIndex; + e->format = desc->input_layout.pInputElementDescs[i].Format; + e->input_slot = desc->input_layout.pInputElementDescs[i].InputSlot; + e->aligned_byte_offset = desc->input_layout.pInputElementDescs[i].AlignedByteOffset; + e->input_slot_class = desc->input_layout.pInputElementDescs[i].InputSlotClass; + e->instance_data_step_rate = desc->input_layout.pInputElementDescs[i].InstanceDataStepRate; + + if (strlen(desc->input_layout.pInputElementDescs[i].SemanticName) > 31) + FIXME("Input semantic name too long\n"); + + pos += sizeof(*e); + } + + entry->blend_state = desc->blend_state; + entry->sample_mask = desc->sample_mask; + entry->rasterizer_state = desc->rasterizer_state; + entry->depth_stencil_state = desc->depth_stencil_state; + entry->strip_cut_value = desc->strip_cut_value; + entry->primitive_topology_type = desc->primitive_topology_type; + entry->rtv_formats = desc->rtv_formats; + entry->dsv_format = desc->dsv_format; + entry->sample_desc = desc->sample_desc; + entry->node_mask = desc->node_mask; + entry->flags = desc->flags; + + return entry; +} + static HRESULT d3d12_pipeline_state_init_compute(struct d3d12_pipeline_state *state, struct d3d12_device *device, const struct d3d12_pipeline_state_desc *desc) { @@ -3038,6 +3165,7 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s uint32_t aligned_offsets[D3D12_VS_INPUT_REGISTER_COUNT]; struct vkd3d_shader_descriptor_offset_info offset_info; struct vkd3d_shader_parameter ps_shader_parameters[1]; + struct vkd3d_shader_cache_pipeline_state *cache_entry; struct vkd3d_shader_transform_feedback_info xfb_info; struct vkd3d_shader_spirv_target_info ps_target_info; struct vkd3d_shader_interface_info shader_interface; @@ -3050,6 +3178,7 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s const struct vkd3d_format *format; unsigned int instance_divisor; VkVertexInputRate input_rate; + uint32_t cache_entry_size; unsigned int i, j; size_t rt_count; uint32_t mask; @@ -3555,6 +3684,18 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s state->vk_bind_point = VK_PIPELINE_BIND_POINT_GRAPHICS; d3d12_device_add_ref(state->device = device);
+ cache_entry = vkd3d_cache_pipeline_from_d3d(desc, root_signature, &cache_entry_size); + if (cache_entry) + { + uint64_t hash; + cache_entry->super.type = SHADER_CACHE_ENTRY_GRAPHICS_STATE; + hash = hash_key(cache_entry, cache_entry_size); + vkd3d_shader_cache_put(device->persistent_cache, &hash, sizeof(hash), + cache_entry, cache_entry_size); + vkd3d_free(cache_entry); + state->state_hash = hash; + } + return S_OK;
fail: @@ -3775,6 +3916,8 @@ VkPipeline d3d12_pipeline_state_get_or_create_pipeline(struct d3d12_pipeline_sta struct d3d12_graphics_pipeline_state *graphics = &state->u.graphics; VkPipelineVertexInputDivisorStateCreateInfoEXT input_divisor_info; VkPipelineTessellationStateCreateInfo tessellation_info; + struct vkd3d_graphics_pipeline_key persistent_key = {0}; + struct vkd3d_graphics_pipeline_entry cache_entry = {0}; VkPipelineVertexInputStateCreateInfo input_desc; VkPipelineInputAssemblyStateCreateInfo ia_desc; VkPipelineColorBlendStateCreateInfo blend_desc; @@ -3841,12 +3984,17 @@ VkPipeline d3d12_pipeline_state_get_or_create_pipeline(struct d3d12_pipeline_sta b->inputRate = graphics->input_rates[binding];
pipeline_key.strides[binding_count] = strides[binding]; + persistent_key.strides[binding] = strides[binding];
++binding_count; }
pipeline_key.dsv_format = dsv_format;
+ persistent_key.state = state->state_hash; + persistent_key.topology = topology; + persistent_key.dsv_format = dsv_format; + if ((vk_pipeline = d3d12_pipeline_state_find_compiled_pipeline(state, &pipeline_key, vk_render_pass))) return vk_pipeline;
@@ -3938,6 +4086,11 @@ VkPipeline d3d12_pipeline_state_get_or_create_pipeline(struct d3d12_pipeline_sta return VK_NULL_HANDLE; }
+ cache_entry.super.vkd3d_revision = VKD3D_SHADER_CACHE_VKD3D_VERSION; + cache_entry.super.type = SHADER_CACHE_ENTRY_GRAPHICS_PIPELINE; + vkd3d_shader_cache_put(device->persistent_cache, &persistent_key, sizeof(persistent_key), + &cache_entry, sizeof(cache_entry)); + if (d3d12_pipeline_state_put_pipeline_to_cache(state, &pipeline_key, vk_pipeline, pipeline_desc.renderPass)) return vk_pipeline;
diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index 0705b5e7c..f934be5fb 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -68,6 +68,9 @@ struct vkd3d_render_pass_key
enum vkd3d_shader_cache_entry_type { + SHADER_CACHE_ENTRY_COMPUTE_STATE = VKD3D_MAKE_TAG('C', 'O', 'M', 'P'), + SHADER_CACHE_ENTRY_GRAPHICS_PIPELINE = VKD3D_MAKE_TAG('G', 'F', 'X', 'P'), + SHADER_CACHE_ENTRY_GRAPHICS_STATE = VKD3D_MAKE_TAG('G', 'F', 'X', 'S'), SHADER_CACHE_ENTRY_RENDER_PASS = VKD3D_MAKE_TAG('R', 'P', 'A', 'S'), SHADER_CACHE_ENTRY_ROOT_SIGNATURE = VKD3D_MAKE_TAG('R', 'O', 'O', 'T'), SHADER_CACHE_ENTRY_VULKAN_BLOB = VKD3D_MAKE_TAG('V', 'K', 'P', 'C'), @@ -91,6 +94,64 @@ struct vkd3d_shader_cache_root_signature uint8_t dxbc[1]; };
+struct vkd3d_input_layout_element_cache +{ + char semantic_name[32]; /* Not a proper solution */ + UINT semantic_index; + DXGI_FORMAT format; + UINT input_slot; + UINT aligned_byte_offset; + D3D12_INPUT_CLASSIFICATION input_slot_class; + UINT instance_data_step_rate; +}; + +struct vkd3d_so_declaration_cache_entry +{ + UINT stream; + char semantic_name[32]; /* Not a proper solution */ + UINT semantic_index; + BYTE start_component; + BYTE component_count; + BYTE output_slot; +}; + +struct vkd3d_shader_cache_pipeline_state +{ + struct vkd3d_shader_cache_entry super; + uint64_t root_signature; + uint32_t cs_size, vs_size, ps_size, ds_size, hs_size, gs_size; + uint32_t so_entries, so_strides; + uint32_t so_RasterizedStream; + uint32_t input_layout_elements; + D3D12_BLEND_DESC blend_state; + UINT sample_mask; + D3D12_RASTERIZER_DESC rasterizer_state; + D3D12_DEPTH_STENCIL_DESC1 depth_stencil_state; + /* Input layout is appended */ + D3D12_INDEX_BUFFER_STRIP_CUT_VALUE strip_cut_value; + D3D12_PRIMITIVE_TOPOLOGY_TYPE primitive_topology_type; + struct D3D12_RT_FORMAT_ARRAY rtv_formats; + DXGI_FORMAT dsv_format; + DXGI_SAMPLE_DESC sample_desc; + UINT node_mask; + D3D12_PIPELINE_STATE_FLAGS flags; + uint8_t data[1]; +}; + +struct vkd3d_graphics_pipeline_key +{ + uint64_t state; + D3D12_PRIMITIVE_TOPOLOGY topology; + VkFormat dsv_format; + uint32_t strides[D3D12_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT]; +}; + +struct vkd3d_graphics_pipeline_entry +{ + struct vkd3d_shader_cache_entry super; + /* TODO: Translated spir-v code */ +}; + /* End shader data structures */
/* FIXME: Better name. */ @@ -1340,6 +1401,7 @@ struct d3d12_pipeline_state struct d3d12_compute_pipeline_state compute; } u; VkPipelineBindPoint vk_bind_point; + uint64_t state_hash;
struct d3d12_pipeline_uav_counter_state uav_counters;