The existing implementation using virtual descriptor heaps, where Vk descriptor sets are created for the bindings in the root descriptor tables, is inefficient when multiple command lists are used with large descriptor heaps. It also cannot support updating a descriptor set after it is bound.
This patch creates Vk sets for each D3D12 heap. Because D3D12 heaps can contain CBV, SRV and UAV descriptors in the same heap, multiple Vk sets are needed for each heap, however the total number of populated descriptors is never more than (heap size + UAV counter count).
A new 'virtual_heaps' config option is introduced to make the old implementation available when needed. It's not always possible to determine if this is necessary when the device is created.
Up to nine Vk descriptor sets may be used. It's theoretically possible to reduce this to eight by placing immutable samplers in the push descriptor set layout, but contradictions in earlier versions of the Vulkan spec made driver support inconsistent. The documentation was corrected in version 1.2.203.
This patch also adds support for UAV counter descriptor arrays. It's not practical to add this in a separate patch due to complications with combining the old UAV counter implementation with the new descriptor heap implementation.
Wine-Bug: https://bugs.winehq.org/show_bug.cgi?id=47713 Wine-Bug: https://bugs.winehq.org/show_bug.cgi?id=47154 Signed-off-by: Conor McCarthy cmccarthy@codeweavers.com --- README | 3 + include/private/vkd3d_common.h | 4 + include/vkd3d_windows.h | 1 + libs/vkd3d/command.c | 151 +++++++++++++++++++++- libs/vkd3d/device.c | 159 ++++++++++++++++++++++- libs/vkd3d/resource.c | 197 +++++++++++++++++++++++++++++ libs/vkd3d/state.c | 222 +++++++++++++++++++++++++++++++-- libs/vkd3d/vkd3d_private.h | 56 +++++++++ tests/d3d12.c | 13 +- 9 files changed, 787 insertions(+), 19 deletions(-)
diff --git a/README b/README index ed53f202..066a24bc 100644 --- a/README +++ b/README @@ -51,6 +51,9 @@ commas or semicolons. even when the output supports colour.
* VKD3D_CONFIG - a list of options that change the behavior of libvkd3d. + * virtual_heaps - Create descriptors for each D3D12 root signature + descriptor range instead of entire descriptor heaps. Useful when push + constant or bound descriptor limits are exceeded. * vk_debug - enables Vulkan debug extensions.
* VKD3D_DEBUG - controls the debug level for log messages produced by diff --git a/include/private/vkd3d_common.h b/include/private/vkd3d_common.h index 1e19758a..90dc3edb 100644 --- a/include/private/vkd3d_common.h +++ b/include/private/vkd3d_common.h @@ -201,6 +201,10 @@ static inline LONG InterlockedIncrement(LONG volatile *x) { return __sync_add_and_fetch(x, 1); } +static inline LONG64 InterlockedIncrement64(LONG64 volatile *x) +{ + return __sync_add_and_fetch(x, 1); +} static inline LONG InterlockedAdd(LONG volatile *x, LONG val) { return __sync_add_and_fetch(x, val); diff --git a/include/vkd3d_windows.h b/include/vkd3d_windows.h index c1aa9f27..e2629c2c 100644 --- a/include/vkd3d_windows.h +++ b/include/vkd3d_windows.h @@ -91,6 +91,7 @@ typedef unsigned __int64 UINT64; typedef int64_t DECLSPEC_ALIGN(8) INT64; typedef uint64_t DECLSPEC_ALIGN(8) UINT64; # endif +typedef INT64 LONG64; typedef long LONG_PTR; typedef unsigned long ULONG_PTR;
diff --git a/libs/vkd3d/command.c b/libs/vkd3d/command.c index 0973b204..eb78b189 100644 --- a/libs/vkd3d/command.c +++ b/libs/vkd3d/command.c @@ -1921,6 +1921,8 @@ static void d3d12_command_list_invalidate_root_parameters(struct d3d12_command_l bindings->descriptor_set_count = 0; bindings->descriptor_table_dirty_mask = bindings->descriptor_table_active_mask & bindings->root_signature->descriptor_table_mask; bindings->push_descriptor_dirty_mask = bindings->push_descriptor_active_mask & bindings->root_signature->push_descriptor_mask; + bindings->cbv_srv_uav_heap_id = 0; + bindings->sampler_heap_id = 0; }
static bool vk_barrier_parameters_from_d3d12_resource_state(unsigned int state, unsigned int stencil_state, @@ -3021,6 +3023,146 @@ static void d3d12_command_list_update_descriptors(struct d3d12_command_list *lis d3d12_command_list_update_uav_counter_descriptors(list, bind_point); }
+static unsigned int d3d12_command_list_bind_descriptor_table(struct d3d12_command_list *list, + struct vkd3d_pipeline_bindings *bindings, unsigned int index, + struct d3d12_descriptor_heap **cbv_srv_uav_heap, struct d3d12_descriptor_heap **sampler_heap) +{ + struct d3d12_descriptor_heap *heap; + const struct d3d12_desc *desc; + unsigned int offset; + + if (!(desc = bindings->descriptor_tables[index])) + return 0; + + /* AMD, Nvidia and Intel drivers on Windows work if SetDescriptorHeaps() + * is not called, so we bind heaps from the tables instead. No NULL check is + * needed here because it's checked when descriptor tables are set. */ + heap = vkd3d_gpu_descriptor_allocator_heap_from_descriptor(&list->device->gpu_descriptor_allocator, desc); + offset = desc - (const struct d3d12_desc *)heap->descriptors; + + if (heap->desc.Type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV) + { + if (*cbv_srv_uav_heap) + { + if (heap == *cbv_srv_uav_heap) + return offset; + /* This occurs occasionally in Rise of the Tomb Raider apparently due to a race + * condition (one of several), but adding a mutex for table updates has no effect. */ + WARN("List %p uses descriptors from more than one CBV/SRV/UAV heap.\n", list); + } + *cbv_srv_uav_heap = heap; + } + else + { + if (*sampler_heap) + { + if (heap == *sampler_heap) + return offset; + WARN("List %p uses descriptors from more than one sampler heap.\n", list); + } + *sampler_heap = heap; + } + + return offset; +} + +static void d3d12_command_list_update_descriptor_tables(struct d3d12_command_list *list, + struct vkd3d_pipeline_bindings *bindings, struct d3d12_descriptor_heap **cbv_srv_uav_heap, + struct d3d12_descriptor_heap **sampler_heap) +{ + const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs; + const struct d3d12_root_signature *rs = bindings->root_signature; + unsigned int offsets[D3D12_MAX_ROOT_COST]; + unsigned int i, j; + + for (i = 0, j = 0; i < ARRAY_SIZE(bindings->descriptor_tables); ++i) + { + if (!(rs->descriptor_table_mask & ((uint64_t)1 << i))) + continue; + offsets[j++] = d3d12_command_list_bind_descriptor_table(list, bindings, i, + cbv_srv_uav_heap, sampler_heap); + } + if (j) + { + VK_CALL(vkCmdPushConstants(list->vk_command_buffer, rs->vk_pipeline_layout, VK_SHADER_STAGE_ALL, + rs->descriptor_table_offset, j * sizeof(uint32_t), offsets)); + } +} + +static void d3d12_command_list_bind_descriptor_heap(struct d3d12_command_list *list, + enum vkd3d_pipeline_bind_point bind_point, struct d3d12_descriptor_heap *heap) +{ + struct vkd3d_pipeline_bindings *bindings = &list->pipeline_bindings[bind_point]; + const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs; + const struct d3d12_root_signature *rs = bindings->root_signature; + enum vkd3d_vk_descriptor_set_index set; + + if (!heap) + return; + + if (heap->desc.Type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV) + { + if (heap->serial_id == bindings->cbv_srv_uav_heap_id) + return; + bindings->cbv_srv_uav_heap_id = heap->serial_id; + } + else + { + if (heap->serial_id == bindings->sampler_heap_id) + return; + bindings->sampler_heap_id = heap->serial_id; + } + + /* These sets can be shared across multiple command lists, and therefore binding must + * be synchronised. On an experimental branch in which caching of Vk descriptor writes + * greatly increased the chance of multiple threads arriving here at the same time, + * GRID 2019 crashed without the mutex lock. */ + pthread_mutex_lock(&heap->vk_sets_mutex); + + for (set = 0; set < ARRAY_SIZE(heap->vk_descriptor_sets); ++set) + { + VkDescriptorSet vk_descriptor_set = heap->vk_descriptor_sets[set].vk_set; + + if (!vk_descriptor_set) + continue; + + VK_CALL(vkCmdBindDescriptorSets(list->vk_command_buffer, bindings->vk_bind_point, rs->vk_pipeline_layout, + rs->vk_set_count + set, 1, &vk_descriptor_set, 0, NULL)); + } + + pthread_mutex_unlock(&heap->vk_sets_mutex); +} + +static void d3d12_command_list_update_heap_descriptors(struct d3d12_command_list *list, + enum vkd3d_pipeline_bind_point bind_point) +{ + struct vkd3d_pipeline_bindings *bindings = &list->pipeline_bindings[bind_point]; + struct d3d12_descriptor_heap *cbv_srv_uav_heap = NULL, *sampler_heap = NULL; + const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs; + const struct d3d12_root_signature *rs = bindings->root_signature; + + if (!rs) + return; + + if (bindings->descriptor_table_dirty_mask || bindings->push_descriptor_dirty_mask) + d3d12_command_list_prepare_descriptors(list, bind_point); + if (bindings->descriptor_table_dirty_mask) + d3d12_command_list_update_descriptor_tables(list, bindings, &cbv_srv_uav_heap, &sampler_heap); + bindings->descriptor_table_dirty_mask = 0; + + d3d12_command_list_update_push_descriptors(list, bind_point); + + if (bindings->descriptor_set_count) + { + VK_CALL(vkCmdBindDescriptorSets(list->vk_command_buffer, bindings->vk_bind_point, rs->vk_pipeline_layout, + rs->main_set, bindings->descriptor_set_count, bindings->descriptor_sets, 0, NULL)); + bindings->in_use = true; + } + + d3d12_command_list_bind_descriptor_heap(list, bind_point, cbv_srv_uav_heap); + d3d12_command_list_bind_descriptor_heap(list, bind_point, sampler_heap); +} + static bool d3d12_command_list_update_compute_state(struct d3d12_command_list *list) { d3d12_command_list_end_current_render_pass(list); @@ -3028,7 +3170,7 @@ static bool d3d12_command_list_update_compute_state(struct d3d12_command_list *l if (!d3d12_command_list_update_compute_pipeline(list)) return false;
- d3d12_command_list_update_descriptors(list, VKD3D_PIPELINE_BIND_POINT_COMPUTE); + list->update_descriptors(list, VKD3D_PIPELINE_BIND_POINT_COMPUTE);
return true; } @@ -3045,7 +3187,7 @@ static bool d3d12_command_list_begin_render_pass(struct d3d12_command_list *list if (!d3d12_command_list_update_current_framebuffer(list)) return false;
- d3d12_command_list_update_descriptors(list, VKD3D_PIPELINE_BIND_POINT_GRAPHICS); + list->update_descriptors(list, VKD3D_PIPELINE_BIND_POINT_GRAPHICS);
if (list->current_render_pass != VK_NULL_HANDLE) return true; @@ -4113,6 +4255,8 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetDescriptorHeaps(ID3D12Graphi TRACE("iface %p, heap_count %u, heaps %p.\n", iface, heap_count, heaps);
/* Our current implementation does not need this method. + * In Windows it doesn't need to be called at all for correct operation, and + * at least on AMD the wrong heaps can be set here and tests still succeed. * * It could be used to validate descriptor tables but we do not have an * equivalent of the D3D12 Debug Layer. */ @@ -5706,6 +5850,9 @@ static HRESULT d3d12_command_list_init(struct d3d12_command_list *list, struct d
list->allocator = allocator;
+ list->update_descriptors = device->use_vk_heaps ? d3d12_command_list_update_heap_descriptors + : d3d12_command_list_update_descriptors; + if (SUCCEEDED(hr = d3d12_command_allocator_allocate_command_buffer(allocator, list))) { list->pipeline_bindings[VKD3D_PIPELINE_BIND_POINT_GRAPHICS].vk_uav_counter_views = NULL; diff --git a/libs/vkd3d/device.c b/libs/vkd3d/device.c index 212b0b74..5fea705f 100644 --- a/libs/vkd3d/device.c +++ b/libs/vkd3d/device.c @@ -142,6 +142,112 @@ static const struct vkd3d_optional_extension_info optional_device_extensions[] = VK_EXTENSION(EXT_VERTEX_ATTRIBUTE_DIVISOR, EXT_vertex_attribute_divisor), };
+static HRESULT vkd3d_create_vk_descriptor_heap_layout(struct d3d12_device *device, unsigned int index) +{ + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + VkDescriptorSetLayoutBindingFlagsCreateInfoEXT flags_info; + VkDescriptorSetLayoutCreateInfo set_desc; + VkDescriptorBindingFlagsEXT set_flags; + VkDescriptorSetLayoutBinding binding; + VkResult vr; + + binding.binding = 0; + binding.descriptorType = device->vk_descriptor_heap_layouts[index].type; + binding.descriptorCount = device->vk_descriptor_heap_layouts[index].count; + binding.stageFlags = VK_SHADER_STAGE_ALL; + binding.pImmutableSamplers = NULL; + + set_desc.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; + set_desc.pNext = &flags_info; + set_desc.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_UPDATE_AFTER_BIND_POOL_BIT_EXT; + set_desc.bindingCount = 1; + set_desc.pBindings = &binding; + + set_flags = VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT_EXT + | VK_DESCRIPTOR_BINDING_PARTIALLY_BOUND_BIT_EXT | VK_DESCRIPTOR_BINDING_UPDATE_AFTER_BIND_BIT_EXT + | VK_DESCRIPTOR_BINDING_UPDATE_UNUSED_WHILE_PENDING_BIT_EXT; + + flags_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_BINDING_FLAGS_CREATE_INFO_EXT; + flags_info.pNext = NULL; + flags_info.bindingCount = 1; + flags_info.pBindingFlags = &set_flags; + + if ((vr = VK_CALL(vkCreateDescriptorSetLayout(device->vk_device, &set_desc, NULL, + &device->vk_descriptor_heap_layouts[index].vk_set_layout))) < 0) + { + WARN("Failed to create Vulkan descriptor set layout, vr %d.\n", vr); + return hresult_from_vk_result(vr); + } + + return S_OK; +} + +static void vkd3d_vk_descriptor_heap_layouts_cleanup(struct d3d12_device *device) +{ + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + enum vkd3d_vk_descriptor_set_index set; + + for (set = 0; set < ARRAY_SIZE(device->vk_descriptor_heap_layouts); ++set) + VK_CALL(vkDestroyDescriptorSetLayout(device->vk_device, device->vk_descriptor_heap_layouts[set].vk_set_layout, + NULL)); +} + +static HRESULT vkd3d_vk_descriptor_heap_layouts_init(struct d3d12_device *device) +{ + static const struct vkd3d_vk_descriptor_heap_layout vk_descriptor_heap_layouts[VKD3D_SET_INDEX_COUNT] = + { + {VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, true, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV}, + {VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, true, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV}, + {VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, false, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV}, + {VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, true, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV}, + {VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, false, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV}, + {VK_DESCRIPTOR_TYPE_SAMPLER, false, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER}, + /* UAV counters */ + {VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, true, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV}, + }; + const struct vkd3d_device_descriptor_limits *limits = &device->vk_info.descriptor_limits; + enum vkd3d_vk_descriptor_set_index set; + HRESULT hr; + + for (set = 0; set < ARRAY_SIZE(device->vk_descriptor_heap_layouts); ++set) + device->vk_descriptor_heap_layouts[set] = vk_descriptor_heap_layouts[set]; + + if (!device->use_vk_heaps) + return S_OK; + + for (set = 0; set < ARRAY_SIZE(device->vk_descriptor_heap_layouts); ++set) + { + switch (device->vk_descriptor_heap_layouts[set].type) + { + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + device->vk_descriptor_heap_layouts[set].count = limits->uniform_buffer_max_descriptors; + break; + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + device->vk_descriptor_heap_layouts[set].count = limits->sampled_image_max_descriptors; + break; + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + device->vk_descriptor_heap_layouts[set].count = limits->storage_image_max_descriptors; + break; + case VK_DESCRIPTOR_TYPE_SAMPLER: + device->vk_descriptor_heap_layouts[set].count = limits->sampler_max_descriptors; + break; + default: + ERR("Unhandled descriptor type %#x.\n", device->vk_descriptor_heap_layouts[set].type); + break; + } + + if (FAILED(hr = vkd3d_create_vk_descriptor_heap_layout(device, set))) + { + vkd3d_vk_descriptor_heap_layouts_cleanup(device); + return hr; + } + } + + return S_OK; +} + static unsigned int get_spec_version(const VkExtensionProperties *extensions, unsigned int count, const char *extension_name) { @@ -431,6 +537,7 @@ static void vkd3d_init_debug_report(struct vkd3d_instance *instance)
static const struct vkd3d_debug_option vkd3d_config_options[] = { + {"virtual_heaps", VKD3D_CONFIG_FLAG_VIRTUAL_HEAPS}, /* always use virtual descriptor heaps */ {"vk_debug", VKD3D_CONFIG_FLAG_VULKAN_DEBUG}, /* enable Vulkan debug extensions */ };
@@ -1287,6 +1394,36 @@ static void vkd3d_device_descriptor_limits_init(struct vkd3d_device_descriptor_l limits->sampler_max_descriptors = min(device_limits->maxDescriptorSetSamplers, VKD3D_MAX_DESCRIPTOR_SET_SAMPLERS); }
+static void vkd3d_device_vk_heaps_descriptor_limits_init(struct vkd3d_device_descriptor_limits *limits, + const VkPhysicalDeviceDescriptorIndexingPropertiesEXT *properties) +{ + const unsigned int root_provision = D3D12_MAX_ROOT_COST / 2; + unsigned int srv_divisor = 1, uav_divisor = 1; + + /* The total number of populated sampled image or storage image descriptors never exceeds the size of + * one set (or two sets if every UAV has a counter), but the total size of bound layouts will exceed + * device limits if each set size is maxDescriptorSet*, because of the D3D12 buffer + image allowance + * (and UAV counters). Breaking limits for layouts seems to work with RADV and Nvidia drivers at + * least, but let's try to stay within them if limits are high enough. */ + if (properties->maxDescriptorSetUpdateAfterBindSampledImages >= (1u << 21)) + { + srv_divisor = 2; + uav_divisor = properties->maxDescriptorSetUpdateAfterBindSampledImages >= (3u << 20) ? 3 : 2; + } + + limits->uniform_buffer_max_descriptors = min(properties->maxDescriptorSetUpdateAfterBindUniformBuffers, + properties->maxPerStageDescriptorUpdateAfterBindUniformBuffers - root_provision); + limits->sampled_image_max_descriptors = min(properties->maxDescriptorSetUpdateAfterBindSampledImages, + properties->maxPerStageDescriptorUpdateAfterBindSampledImages / srv_divisor - root_provision); + limits->storage_buffer_max_descriptors = min(properties->maxDescriptorSetUpdateAfterBindStorageBuffers, + properties->maxPerStageDescriptorUpdateAfterBindStorageBuffers - root_provision); + limits->storage_image_max_descriptors = min(properties->maxDescriptorSetUpdateAfterBindStorageImages, + properties->maxPerStageDescriptorUpdateAfterBindStorageImages / uav_divisor - root_provision); + limits->sampler_max_descriptors = min(properties->maxDescriptorSetUpdateAfterBindSamplers, + properties->maxPerStageDescriptorUpdateAfterBindSamplers - root_provision); + limits->sampler_max_descriptors = min(limits->sampler_max_descriptors, VKD3D_MAX_DESCRIPTOR_SET_SAMPLERS); +} + static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, const struct vkd3d_device_create_info *create_info, struct vkd3d_physical_device_info *physical_device_info, @@ -1514,8 +1651,20 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, features->robustBufferAccess = VK_FALSE; }
- vkd3d_device_descriptor_limits_init(&vulkan_info->descriptor_limits, - &physical_device_info->properties2.properties.limits); + /* Select descriptor heap implementation. Forcing virtual heaps may be useful if + * a client allocates descriptor heaps too large for the Vulkan device, or the + * root signature cost exceeds the available push constant size. Virtual heaps + * use only enough descriptors for the descriptor tables of the currently bound + * root signature, and don't require a 32-bit push constant for each table. */ + device->use_vk_heaps = vulkan_info->EXT_descriptor_indexing + && !(device->vkd3d_instance->config_flags & VKD3D_CONFIG_FLAG_VIRTUAL_HEAPS); + + if (device->use_vk_heaps) + vkd3d_device_vk_heaps_descriptor_limits_init(&vulkan_info->descriptor_limits, + &physical_device_info->descriptor_indexing_properties); + else + vkd3d_device_descriptor_limits_init(&vulkan_info->descriptor_limits, + &physical_device_info->properties2.properties.limits);
return S_OK; } @@ -2506,6 +2655,7 @@ static ULONG STDMETHODCALLTYPE d3d12_device_Release(ID3D12Device *iface) vkd3d_private_store_destroy(&device->private_store);
vkd3d_cleanup_format_info(device); + vkd3d_vk_descriptor_heap_layouts_cleanup(device); vkd3d_uav_clear_state_cleanup(&device->uav_clear_state, device); vkd3d_destroy_null_resources(&device->null_resources, device); vkd3d_gpu_va_allocator_cleanup(&device->gpu_va_allocator); @@ -4007,6 +4157,9 @@ static HRESULT d3d12_device_init(struct d3d12_device *device, if (FAILED(hr = vkd3d_uav_clear_state_init(&device->uav_clear_state, device))) goto out_destroy_null_resources;
+ if (FAILED(hr = vkd3d_vk_descriptor_heap_layouts_init(device))) + goto out_cleanup_uav_clear_state; + vkd3d_render_pass_cache_init(&device->render_pass_cache); vkd3d_gpu_descriptor_allocator_init(&device->gpu_descriptor_allocator); vkd3d_gpu_va_allocator_init(&device->gpu_va_allocator); @@ -4023,6 +4176,8 @@ static HRESULT d3d12_device_init(struct d3d12_device *device,
return S_OK;
+out_cleanup_uav_clear_state: + vkd3d_uav_clear_state_cleanup(&device->uav_clear_state, device); out_destroy_null_resources: vkd3d_destroy_null_resources(&device->null_resources, device); out_cleanup_format_info: diff --git a/libs/vkd3d/resource.c b/libs/vkd3d/resource.c index 19a163be..4b979689 100644 --- a/libs/vkd3d/resource.c +++ b/libs/vkd3d/resource.c @@ -2122,6 +2122,58 @@ void vkd3d_view_decref(struct vkd3d_view *view, struct d3d12_device *device) vkd3d_view_destroy(view, device); }
+/* dst and src contain the same data unless another thread overwrites dst. The array index is + * calculated from dst, and src is thread safe. */ +static void d3d12_desc_write_vk_heap(const struct d3d12_desc *dst, const struct d3d12_desc *src, + struct d3d12_device *device) +{ + struct d3d12_descriptor_heap_vk_set *descriptor_set; + struct d3d12_descriptor_heap *descriptor_heap; + const struct vkd3d_vk_device_procs *vk_procs; + + descriptor_heap = vkd3d_gpu_descriptor_allocator_heap_from_descriptor(&device->gpu_descriptor_allocator, dst); + descriptor_set = &descriptor_heap->vk_descriptor_sets[vkd3d_vk_descriptor_set_index_from_vk_descriptor_type( + src->vk_descriptor_type)]; + vk_procs = &device->vk_procs; + + pthread_mutex_lock(&descriptor_heap->vk_sets_mutex); + + descriptor_set->vk_descriptor_write.dstArrayElement = dst + - (const struct d3d12_desc *)descriptor_heap->descriptors; + switch (src->vk_descriptor_type) + { + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + descriptor_set->vk_descriptor_write.pBufferInfo = &src->u.vk_cbv_info; + break; + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + descriptor_set->vk_image_info.imageView = src->u.view->u.vk_image_view; + break; + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + descriptor_set->vk_descriptor_write.pTexelBufferView = &src->u.view->u.vk_buffer_view; + break; + case VK_DESCRIPTOR_TYPE_SAMPLER: + descriptor_set->vk_image_info.sampler = src->u.view->u.vk_sampler; + break; + default: + ERR("Unhandled descriptor type %#x.\n", src->vk_descriptor_type); + break; + } + VK_CALL(vkUpdateDescriptorSets(device->vk_device, 1, &descriptor_set->vk_descriptor_write, 0, NULL)); + + if (src->magic == VKD3D_DESCRIPTOR_MAGIC_UAV && src->u.view->vk_counter_view) + { + descriptor_set = &descriptor_heap->vk_descriptor_sets[VKD3D_SET_INDEX_UAV_COUNTER]; + descriptor_set->vk_descriptor_write.dstArrayElement = dst + - (const struct d3d12_desc *)descriptor_heap->descriptors; + descriptor_set->vk_descriptor_write.pTexelBufferView = &src->u.view->vk_counter_view; + VK_CALL(vkUpdateDescriptorSets(device->vk_device, 1, &descriptor_set->vk_descriptor_write, 0, NULL)); + } + + pthread_mutex_unlock(&descriptor_heap->vk_sets_mutex); +} + void d3d12_desc_write_atomic(struct d3d12_desc *dst, const struct d3d12_desc *src, struct d3d12_device *device) { @@ -2143,6 +2195,9 @@ void d3d12_desc_write_atomic(struct d3d12_desc *dst, const struct d3d12_desc *sr /* Destroy the view after unlocking to reduce wait time. */ if (defunct_view) vkd3d_view_destroy(defunct_view, device); + + if (device->use_vk_heaps && dst->magic) + d3d12_desc_write_vk_heap(dst, src, device); }
static void d3d12_desc_destroy(struct d3d12_desc *descriptor, struct d3d12_device *device) @@ -3419,9 +3474,12 @@ static ULONG STDMETHODCALLTYPE d3d12_descriptor_heap_Release(ID3D12DescriptorHea
if (!refcount) { + const struct vkd3d_vk_device_procs *vk_procs; struct d3d12_device *device = heap->device; unsigned int i;
+ vk_procs = &device->vk_procs; + vkd3d_private_store_destroy(&heap->private_store);
switch (heap->desc.Type) @@ -3468,6 +3526,9 @@ static ULONG STDMETHODCALLTYPE d3d12_descriptor_heap_Release(ID3D12DescriptorHea break; }
+ VK_CALL(vkDestroyDescriptorPool(device->vk_device, heap->vk_descriptor_pool, NULL)); + pthread_mutex_destroy(&heap->vk_sets_mutex); + vkd3d_free(heap);
d3d12_device_release(device); @@ -3578,19 +3639,155 @@ static const struct ID3D12DescriptorHeapVtbl d3d12_descriptor_heap_vtbl = d3d12_descriptor_heap_GetGPUDescriptorHandleForHeapStart, };
+const enum vkd3d_vk_descriptor_set_index vk_descriptor_set_index_table[] = +{ + VKD3D_SET_INDEX_SAMPLER, + VKD3D_SET_INDEX_COUNT, + VKD3D_SET_INDEX_SAMPLED_IMAGE, + VKD3D_SET_INDEX_STORAGE_IMAGE, + VKD3D_SET_INDEX_UNIFORM_TEXEL_BUFFER, + VKD3D_SET_INDEX_STORAGE_TEXEL_BUFFER, + VKD3D_SET_INDEX_UNIFORM_BUFFER, +}; + +static HRESULT d3d12_descriptor_heap_create_descriptor_pool(struct d3d12_descriptor_heap *descriptor_heap, + struct d3d12_device *device, const D3D12_DESCRIPTOR_HEAP_DESC *desc) +{ + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + VkDescriptorPoolSize pool_sizes[VKD3D_SET_INDEX_COUNT]; + struct VkDescriptorPoolCreateInfo pool_desc; + VkDevice vk_device = device->vk_device; + enum vkd3d_vk_descriptor_set_index set; + VkResult vr; + + for (set = 0, pool_desc.poolSizeCount = 0; set < ARRAY_SIZE(device->vk_descriptor_heap_layouts); ++set) + { + if (device->vk_descriptor_heap_layouts[set].applicable_heap_type == desc->Type) + { + pool_sizes[pool_desc.poolSizeCount].type = device->vk_descriptor_heap_layouts[set].type; + pool_sizes[pool_desc.poolSizeCount++].descriptorCount = desc->NumDescriptors; + } + } + + pool_desc.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; + pool_desc.pNext = NULL; + pool_desc.flags = VK_DESCRIPTOR_POOL_CREATE_UPDATE_AFTER_BIND_BIT_EXT; + pool_desc.maxSets = pool_desc.poolSizeCount; + pool_desc.pPoolSizes = pool_sizes; + if ((vr = VK_CALL(vkCreateDescriptorPool(vk_device, &pool_desc, NULL, &descriptor_heap->vk_descriptor_pool))) < 0) + ERR("Failed to create descriptor pool, vr %d.\n", vr); + + return hresult_from_vk_result(vr); +} + +static HRESULT d3d12_descriptor_heap_create_descriptor_set(struct d3d12_descriptor_heap *descriptor_heap, + struct d3d12_device *device, unsigned int set) +{ + struct d3d12_descriptor_heap_vk_set *descriptor_set = &descriptor_heap->vk_descriptor_sets[set]; + uint32_t variable_binding_size = descriptor_heap->desc.NumDescriptors; + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + VkDescriptorSetVariableDescriptorCountAllocateInfoEXT set_size; + VkDescriptorSetAllocateInfo set_desc; + VkResult vr; + + set_desc.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; + set_desc.pNext = &set_size; + set_desc.descriptorPool = descriptor_heap->vk_descriptor_pool; + set_desc.descriptorSetCount = 1; + set_desc.pSetLayouts = &device->vk_descriptor_heap_layouts[set].vk_set_layout; + set_size.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_VARIABLE_DESCRIPTOR_COUNT_ALLOCATE_INFO_EXT; + set_size.pNext = NULL; + set_size.descriptorSetCount = 1; + set_size.pDescriptorCounts = &variable_binding_size; + if ((vr = VK_CALL(vkAllocateDescriptorSets(device->vk_device, &set_desc, &descriptor_set->vk_set))) >= 0) + { + descriptor_set->vk_descriptor_write.dstSet = descriptor_set->vk_set; + return S_OK; + } + + ERR("Failed to allocate descriptor set, vr %d.\n", vr); + return hresult_from_vk_result(vr); +} + +static HRESULT d3d12_descriptor_heap_vk_descriptor_sets_init(struct d3d12_descriptor_heap *descriptor_heap, + struct d3d12_device *device, const D3D12_DESCRIPTOR_HEAP_DESC *desc) +{ + enum vkd3d_vk_descriptor_set_index set; + HRESULT hr; + + descriptor_heap->vk_descriptor_pool = VK_NULL_HANDLE; + memset(descriptor_heap->vk_descriptor_sets, 0, sizeof(descriptor_heap->vk_descriptor_sets)); + pthread_mutex_init(&descriptor_heap->vk_sets_mutex, NULL); + + if (!device->use_vk_heaps || (desc->Type != D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV + && desc->Type != D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER)) + return S_OK; + + if (FAILED(hr = d3d12_descriptor_heap_create_descriptor_pool(descriptor_heap, device, desc))) + return hr; + + for (set = 0; set < ARRAY_SIZE(descriptor_heap->vk_descriptor_sets); ++set) + { + struct d3d12_descriptor_heap_vk_set *descriptor_set = &descriptor_heap->vk_descriptor_sets[set]; + + descriptor_set->vk_descriptor_write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + descriptor_set->vk_descriptor_write.pNext = NULL; + descriptor_set->vk_descriptor_write.dstBinding = 0; + descriptor_set->vk_descriptor_write.descriptorCount = 1; + descriptor_set->vk_descriptor_write.descriptorType = device->vk_descriptor_heap_layouts[set].type; + descriptor_set->vk_descriptor_write.pImageInfo = NULL; + descriptor_set->vk_descriptor_write.pBufferInfo = NULL; + descriptor_set->vk_descriptor_write.pTexelBufferView = NULL; + switch (device->vk_descriptor_heap_layouts[set].type) + { + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + break; + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + descriptor_set->vk_descriptor_write.pImageInfo = &descriptor_set->vk_image_info; + descriptor_set->vk_image_info.sampler = VK_NULL_HANDLE; + descriptor_set->vk_image_info.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + break; + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + descriptor_set->vk_descriptor_write.pImageInfo = &descriptor_set->vk_image_info; + descriptor_set->vk_image_info.sampler = VK_NULL_HANDLE; + descriptor_set->vk_image_info.imageLayout = VK_IMAGE_LAYOUT_GENERAL; + break; + case VK_DESCRIPTOR_TYPE_SAMPLER: + descriptor_set->vk_descriptor_write.pImageInfo = &descriptor_set->vk_image_info; + descriptor_set->vk_image_info.imageView = VK_NULL_HANDLE; + descriptor_set->vk_image_info.imageLayout = VK_IMAGE_LAYOUT_UNDEFINED; + break; + default: + ERR("Unhandled descriptor type %#x.\n", device->vk_descriptor_heap_layouts[set].type); + return E_FAIL; + } + if (device->vk_descriptor_heap_layouts[set].applicable_heap_type == desc->Type + && FAILED(hr = d3d12_descriptor_heap_create_descriptor_set(descriptor_heap, device, set))) + return hr; + } + + return S_OK; +} + static HRESULT d3d12_descriptor_heap_init(struct d3d12_descriptor_heap *descriptor_heap, struct d3d12_device *device, const D3D12_DESCRIPTOR_HEAP_DESC *desc) { + static LONG64 serial_id; HRESULT hr;
descriptor_heap->ID3D12DescriptorHeap_iface.lpVtbl = &d3d12_descriptor_heap_vtbl; descriptor_heap->refcount = 1; + descriptor_heap->serial_id = InterlockedIncrement64(&serial_id);
descriptor_heap->desc = *desc;
if (FAILED(hr = vkd3d_private_store_init(&descriptor_heap->private_store))) return hr;
+ d3d12_descriptor_heap_vk_descriptor_sets_init(descriptor_heap, device, desc); + d3d12_device_add_ref(descriptor_heap->device = device);
return S_OK; diff --git a/libs/vkd3d/state.c b/libs/vkd3d/state.c index cb7bb650..09c0e341 100644 --- a/libs/vkd3d/state.c +++ b/libs/vkd3d/state.c @@ -92,6 +92,8 @@ static void d3d12_root_signature_cleanup(struct d3d12_root_signature *root_signa if (root_signature->descriptor_mapping) vkd3d_free(root_signature->descriptor_mapping); vkd3d_free(root_signature->descriptor_offsets); + vkd3d_free(root_signature->uav_counter_mapping); + vkd3d_free(root_signature->uav_counter_offsets); if (root_signature->root_constants) vkd3d_free(root_signature->root_constants);
@@ -327,6 +329,7 @@ static bool vk_binding_from_d3d12_descriptor_range(struct VkDescriptorSetLayoutB struct d3d12_root_signature_info { size_t binding_count; + size_t uav_range_count;
size_t root_constant_count; size_t root_descriptor_count; @@ -401,6 +404,7 @@ static HRESULT d3d12_root_signature_info_count_descriptors(struct d3d12_root_sig info->binding_count += binding_count; info->uav_count += count * 2u; uav_unbounded_range |= unbounded; + ++info->uav_range_count; break; case D3D12_DESCRIPTOR_RANGE_TYPE_CBV: info->cbv_count += count; @@ -495,6 +499,7 @@ static HRESULT d3d12_root_signature_init_push_constants(struct d3d12_root_signat uint32_t *push_constant_range_count) { uint32_t push_constants_offset[D3D12_SHADER_VISIBILITY_PIXEL + 1]; + bool use_vk_heaps = root_signature->device->use_vk_heaps; unsigned int i, j, push_constant_count; uint32_t offset;
@@ -507,7 +512,8 @@ static HRESULT d3d12_root_signature_init_push_constants(struct d3d12_root_signat continue;
assert(p->ShaderVisibility <= D3D12_SHADER_VISIBILITY_PIXEL); - push_constants[p->ShaderVisibility].stageFlags = stage_flags_from_visibility(p->ShaderVisibility); + push_constants[p->ShaderVisibility].stageFlags = use_vk_heaps ? VK_SHADER_STAGE_ALL + : stage_flags_from_visibility(p->ShaderVisibility); push_constants[p->ShaderVisibility].size += p->u.Constants.Num32BitValues * sizeof(uint32_t); } if (push_constants[D3D12_SHADER_VISIBILITY_ALL].size) @@ -586,6 +592,8 @@ struct vkd3d_descriptor_set_context unsigned int table_index; unsigned int unbounded_offset; unsigned int descriptor_index; + unsigned int uav_counter_index; + unsigned int push_constant_index; uint32_t descriptor_binding; };
@@ -595,6 +603,7 @@ static bool vkd3d_validate_descriptor_set_count(struct d3d12_device *device, uns
if (set_count > max_count) { + /* NOTE: If maxBoundDescriptorSets is < 9, try VKD3D_CONFIG=virtual_heaps */ ERR("Required descriptor set count exceeds maximum allowed count of %u.\n", max_count); return false; } @@ -802,6 +811,122 @@ static void d3d12_root_signature_map_vk_unbounded_binding(struct d3d12_root_sign offset->dynamic_offset_index = ~0u; }
+static unsigned int vk_heap_binding_count_from_descriptor_range(const struct d3d12_root_descriptor_table_range *range, + unsigned int descriptor_set_size) +{ + unsigned int max_count; + + if (descriptor_set_size <= range->offset) + { + ERR("Descriptor range offset %u exceeds maximum available offset %u.\n", range->offset, descriptor_set_size - 1); + max_count = 0; + } + else + { + max_count = descriptor_set_size - range->offset; + } + + if (range->descriptor_count != UINT_MAX) + { + if (range->descriptor_count > max_count) + ERR("Range size %u exceeds available descriptor count %u.\n", range->descriptor_count, max_count); + return range->descriptor_count; + } + else + { + /* Prefer an unsupported binding count vs a zero count, because shader compilation will fail + * to match a declaration to a zero binding, resulting in failure of pipline state creation. */ + return max_count + !max_count; + } +} + +static void vkd3d_descriptor_heap_binding_from_descriptor_range(const struct d3d12_root_descriptor_table_range *range, + bool is_buffer, const struct d3d12_root_signature *root_signature, + struct vkd3d_shader_descriptor_binding *binding) +{ + const struct vkd3d_device_descriptor_limits *descriptor_limits = &root_signature->device->vk_info.descriptor_limits; + unsigned int descriptor_set_size; + + switch (range->type) + { + case VKD3D_SHADER_DESCRIPTOR_TYPE_SRV: + binding->set = is_buffer ? VKD3D_SET_INDEX_UNIFORM_TEXEL_BUFFER : VKD3D_SET_INDEX_SAMPLED_IMAGE; + descriptor_set_size = descriptor_limits->sampled_image_max_descriptors; + break; + case VKD3D_SHADER_DESCRIPTOR_TYPE_UAV: + binding->set = is_buffer ? VKD3D_SET_INDEX_STORAGE_TEXEL_BUFFER : VKD3D_SET_INDEX_STORAGE_IMAGE; + descriptor_set_size = descriptor_limits->storage_image_max_descriptors; + break; + case VKD3D_SHADER_DESCRIPTOR_TYPE_CBV: + binding->set = VKD3D_SET_INDEX_UNIFORM_BUFFER; + descriptor_set_size = descriptor_limits->uniform_buffer_max_descriptors; + break; + case VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER: + binding->set = VKD3D_SET_INDEX_SAMPLER; + descriptor_set_size = descriptor_limits->sampler_max_descriptors; + break; + default: + FIXME("Unhandled descriptor range type type %#x.\n", range->type); + binding->set = VKD3D_SET_INDEX_SAMPLED_IMAGE; + descriptor_set_size = descriptor_limits->sampled_image_max_descriptors; + break; + } + binding->set += root_signature->vk_set_count; + binding->binding = 0; + binding->count = vk_heap_binding_count_from_descriptor_range(range, descriptor_set_size); +} + +static void d3d12_root_signature_map_vk_heap_binding(struct d3d12_root_signature *root_signature, + const struct d3d12_root_descriptor_table_range *range, bool buffer_descriptor, + enum vkd3d_shader_visibility shader_visibility, struct vkd3d_descriptor_set_context *context) +{ + struct vkd3d_shader_resource_binding *mapping = &root_signature->descriptor_mapping[context->descriptor_index]; + struct vkd3d_shader_descriptor_offset *offset = &root_signature->descriptor_offsets[context->descriptor_index++]; + + mapping->type = range->type; + mapping->register_space = range->register_space; + mapping->register_index = range->base_register_idx; + mapping->shader_visibility = shader_visibility; + mapping->flags = buffer_descriptor ? VKD3D_SHADER_BINDING_FLAG_BUFFER : VKD3D_SHADER_BINDING_FLAG_IMAGE; + vkd3d_descriptor_heap_binding_from_descriptor_range(range, buffer_descriptor, root_signature, &mapping->binding); + offset->static_offset = range->offset; + offset->dynamic_offset_index = context->push_constant_index; +} + +static void d3d12_root_signature_map_vk_heap_uav_counter(struct d3d12_root_signature *root_signature, + const struct d3d12_root_descriptor_table_range *range, enum vkd3d_shader_visibility shader_visibility, + struct vkd3d_descriptor_set_context *context) +{ + struct vkd3d_shader_uav_counter_binding *mapping = &root_signature->uav_counter_mapping[context->uav_counter_index]; + struct vkd3d_shader_descriptor_offset *offset = &root_signature->uav_counter_offsets[context->uav_counter_index++]; + + mapping->register_space = range->register_space; + mapping->register_index = range->base_register_idx; + mapping->shader_visibility = shader_visibility; + mapping->binding.set = root_signature->vk_set_count + VKD3D_SET_INDEX_UAV_COUNTER; + mapping->binding.binding = 0; + mapping->binding.count = vk_heap_binding_count_from_descriptor_range(range, + root_signature->device->vk_info.descriptor_limits.storage_image_max_descriptors); + offset->static_offset = range->offset; + offset->dynamic_offset_index = context->push_constant_index; +} + +static void d3d12_root_signature_map_descriptor_heap_binding(struct d3d12_root_signature *root_signature, + const struct d3d12_root_descriptor_table_range *range, enum vkd3d_shader_visibility shader_visibility, + struct vkd3d_descriptor_set_context *context) +{ + bool is_buffer = range->type == VKD3D_SHADER_DESCRIPTOR_TYPE_CBV; + + if (range->type == VKD3D_SHADER_DESCRIPTOR_TYPE_SRV || range->type == VKD3D_SHADER_DESCRIPTOR_TYPE_UAV) + { + d3d12_root_signature_map_vk_heap_binding(root_signature, range, true, shader_visibility, context); + if (range->type == VKD3D_SHADER_DESCRIPTOR_TYPE_UAV) + d3d12_root_signature_map_vk_heap_uav_counter(root_signature, range, shader_visibility, context); + } + + d3d12_root_signature_map_vk_heap_binding(root_signature, range, is_buffer, shader_visibility, context); +} + static void d3d12_root_signature_map_descriptor_unbounded_binding(struct d3d12_root_signature *root_signature, const struct d3d12_root_descriptor_table_range *range, unsigned int descriptor_offset, enum vkd3d_shader_visibility shader_visibility, struct vkd3d_descriptor_set_context *context) @@ -868,6 +993,7 @@ static HRESULT d3d12_root_signature_init_root_descriptor_tables(struct d3d12_roo struct vkd3d_descriptor_set_context *context) { const struct d3d12_device *device = root_signature->device; + bool use_vk_heaps = root_signature->device->use_vk_heaps; struct d3d12_root_descriptor_table *table; unsigned int i, j, k, range_count; uint32_t vk_binding; @@ -935,6 +1061,16 @@ static HRESULT d3d12_root_signature_init_root_descriptor_tables(struct d3d12_roo
range = &table->ranges[j];
+ if (use_vk_heaps) + { + /* set, binding and vk_binding_count are not used. */ + range->set = 0; + range->binding = 0; + range->vk_binding_count = 0; + d3d12_root_signature_map_descriptor_heap_binding(root_signature, range, shader_visibility, context); + continue; + } + range->set = root_signature->vk_set_count - root_signature->main_set;
if (root_signature->use_descriptor_arrays) @@ -1014,6 +1150,7 @@ static HRESULT d3d12_root_signature_init_root_descriptor_tables(struct d3d12_roo
context->current_binding = cur_binding; } + ++context->push_constant_index; }
return S_OK; @@ -1084,9 +1221,36 @@ static HRESULT d3d12_root_signature_init_static_samplers(struct d3d12_root_signa }
context->current_binding = cur_binding; + if (device->use_vk_heaps) + return d3d12_root_signature_append_descriptor_set_layout(root_signature, context, 0); + return S_OK; }
+static void d3d12_root_signature_init_descriptor_table_push_constants(struct d3d12_root_signature *root_signature, + const struct vkd3d_descriptor_set_context *context) +{ + root_signature->descriptor_table_offset = 0; + if ((root_signature->descriptor_table_count = context->push_constant_index)) + { + VkPushConstantRange *range = &root_signature->push_constant_ranges[D3D12_SHADER_VISIBILITY_ALL]; + + root_signature->descriptor_table_offset = align(range->size, 16); + range->size = root_signature->descriptor_table_offset + + root_signature->descriptor_table_count * sizeof(uint32_t); + + if (range->size > root_signature->device->vk_info.device_limits.maxPushConstantsSize) + FIXME("Push constants size %u exceeds maximum allowed size %u. Try VKD3D_CONFIG=virtual_heaps.\n", + range->size, root_signature->device->vk_info.device_limits.maxPushConstantsSize); + + if (!root_signature->push_constant_range_count) + { + root_signature->push_constant_range_count = 1; + range->stageFlags = VK_SHADER_STAGE_ALL; + } + } +} + static bool vk_binding_uses_partial_binding(const VkDescriptorSetLayoutBinding *binding) { if (binding->descriptorCount == 1) @@ -1194,11 +1358,19 @@ static HRESULT vkd3d_create_pipeline_layout(struct d3d12_device *device, static unsigned int d3d12_root_signature_copy_descriptor_set_layouts(const struct d3d12_root_signature *root_signature, VkDescriptorSetLayout *vk_set_layouts) { + const struct d3d12_device *device = root_signature->device; + enum vkd3d_vk_descriptor_set_index set; unsigned int i;
for (i = 0; i < root_signature->vk_set_count; ++i) vk_set_layouts[i] = root_signature->descriptor_set_layouts[i].vk_layout;
+ if (device->use_vk_heaps) + { + for (set = 0; set < ARRAY_SIZE(device->vk_descriptor_heap_layouts); ++set) + vk_set_layouts[i++] = device->vk_descriptor_heap_layouts[set].vk_set_layout; + } + return i; }
@@ -1210,6 +1382,7 @@ static HRESULT d3d12_root_signature_init(struct d3d12_root_signature *root_signa struct vkd3d_descriptor_set_context context; VkDescriptorSetLayoutBinding *binding_desc; struct d3d12_root_signature_info info; + bool use_vk_heaps; unsigned int i; HRESULT hr;
@@ -1226,6 +1399,8 @@ static HRESULT d3d12_root_signature_init(struct d3d12_root_signature *root_signa root_signature->flags = desc->Flags; root_signature->descriptor_mapping = NULL; root_signature->descriptor_offsets = NULL; + root_signature->uav_counter_mapping = NULL; + root_signature->uav_counter_offsets = NULL; root_signature->static_sampler_count = 0; root_signature->static_samplers = NULL; root_signature->device = device; @@ -1243,9 +1418,13 @@ static HRESULT d3d12_root_signature_init(struct d3d12_root_signature *root_signa }
root_signature->binding_count = info.binding_count; + root_signature->uav_mapping_count = info.uav_range_count; root_signature->static_sampler_count = desc->NumStaticSamplers; root_signature->root_descriptor_count = info.root_descriptor_count; root_signature->use_descriptor_arrays = device->vk_info.EXT_descriptor_indexing; + root_signature->descriptor_table_count = 0; + + use_vk_heaps = device->use_vk_heaps;
hr = E_OUTOFMEMORY; root_signature->parameter_count = desc->NumParameters; @@ -1255,6 +1434,11 @@ static HRESULT d3d12_root_signature_init(struct d3d12_root_signature *root_signa if (!(root_signature->descriptor_mapping = vkd3d_calloc(root_signature->binding_count, sizeof(*root_signature->descriptor_mapping)))) goto fail; + if (use_vk_heaps && (!(root_signature->uav_counter_mapping = vkd3d_calloc(root_signature->uav_mapping_count, + sizeof(*root_signature->uav_counter_mapping))) + || !(root_signature->uav_counter_offsets = vkd3d_calloc(root_signature->uav_mapping_count, + sizeof(*root_signature->uav_counter_offsets))))) + goto fail; if (root_signature->use_descriptor_arrays && !(root_signature->descriptor_offsets = vkd3d_calloc( root_signature->binding_count, sizeof(*root_signature->descriptor_offsets)))) goto fail; @@ -1289,8 +1473,11 @@ static HRESULT d3d12_root_signature_init(struct d3d12_root_signature *root_signa goto fail; if (FAILED(hr = d3d12_root_signature_init_static_samplers(root_signature, device, desc, &context))) goto fail; + context.push_constant_index = 0; if (FAILED(hr = d3d12_root_signature_init_root_descriptor_tables(root_signature, desc, &info, &context))) goto fail; + if (use_vk_heaps) + d3d12_root_signature_init_descriptor_table_push_constants(root_signature, &context);
if (FAILED(hr = d3d12_root_signature_append_descriptor_set_layout(root_signature, &context, 0))) goto fail; @@ -1968,6 +2155,9 @@ static HRESULT d3d12_pipeline_state_find_and_init_uav_counters(struct d3d12_pipe HRESULT hr; int ret;
+ if (device->use_vk_heaps) + return S_OK; + shader_info.type = VKD3D_SHADER_STRUCTURE_TYPE_SCAN_DESCRIPTOR_INFO; shader_info.next = NULL; if ((ret = vkd3d_scan_dxbc(code, &shader_info)) < 0) @@ -2020,10 +2210,10 @@ static HRESULT d3d12_pipeline_state_init_compute(struct d3d12_pipeline_state *st { offset_info.type = VKD3D_SHADER_STRUCTURE_TYPE_DESCRIPTOR_OFFSET_INFO; offset_info.next = NULL; - offset_info.descriptor_table_offset = 0; - offset_info.descriptor_table_count = 0; + offset_info.descriptor_table_offset = root_signature->descriptor_table_offset; + offset_info.descriptor_table_count = root_signature->descriptor_table_count; offset_info.binding_offsets = root_signature->descriptor_offsets; - offset_info.uav_counter_offsets = NULL; + offset_info.uav_counter_offsets = root_signature->uav_counter_offsets; vkd3d_prepend_struct(&target_info, &offset_info); }
@@ -2035,8 +2225,16 @@ static HRESULT d3d12_pipeline_state_init_compute(struct d3d12_pipeline_state *st shader_interface.push_constant_buffer_count = root_signature->root_constant_count; shader_interface.combined_samplers = NULL; shader_interface.combined_sampler_count = 0; - shader_interface.uav_counters = state->uav_counters.bindings; - shader_interface.uav_counter_count = state->uav_counters.binding_count; + if (root_signature->uav_counter_mapping) + { + shader_interface.uav_counters = root_signature->uav_counter_mapping; + shader_interface.uav_counter_count = root_signature->uav_mapping_count; + } + else + { + shader_interface.uav_counters = state->uav_counters.bindings; + shader_interface.uav_counter_count = state->uav_counters.binding_count; + }
vk_pipeline_layout = state->uav_counters.vk_pipeline_layout ? state->uav_counters.vk_pipeline_layout : root_signature->vk_pipeline_layout; @@ -2797,10 +2995,10 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s { offset_info.type = VKD3D_SHADER_STRUCTURE_TYPE_DESCRIPTOR_OFFSET_INFO; offset_info.next = NULL; - offset_info.descriptor_table_offset = 0; - offset_info.descriptor_table_count = 0; + offset_info.descriptor_table_offset = root_signature->descriptor_table_offset; + offset_info.descriptor_table_count = root_signature->descriptor_table_count; offset_info.binding_offsets = root_signature->descriptor_offsets; - offset_info.uav_counter_offsets = NULL; + offset_info.uav_counter_offsets = root_signature->uav_counter_offsets; }
for (i = 0; i < ARRAY_SIZE(shader_stages); ++i) @@ -2842,8 +3040,10 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s break;
case VK_SHADER_STAGE_FRAGMENT_BIT: - shader_interface.uav_counters = state->uav_counters.bindings; - shader_interface.uav_counter_count = state->uav_counters.binding_count; + shader_interface.uav_counters = root_signature->uav_counter_mapping + ? root_signature->uav_counter_mapping : state->uav_counters.bindings; + shader_interface.uav_counter_count = root_signature->uav_counter_mapping + ? root_signature->uav_mapping_count : state->uav_counters.binding_count; stage_target_info = &ps_target_info; break;
diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index 5da00685..22533204 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -155,6 +155,7 @@ struct vkd3d_vulkan_info enum vkd3d_config_flags { VKD3D_CONFIG_FLAG_VULKAN_DEBUG = 0x00000001, + VKD3D_CONFIG_FLAG_VIRTUAL_HEAPS = 0x00000002, };
struct vkd3d_instance @@ -640,11 +641,51 @@ static inline struct d3d12_dsv_desc *d3d12_dsv_desc_from_cpu_handle(D3D12_CPU_DE void d3d12_dsv_desc_create_dsv(struct d3d12_dsv_desc *dsv_desc, struct d3d12_device *device, struct d3d12_resource *resource, const D3D12_DEPTH_STENCIL_VIEW_DESC *desc);
+enum vkd3d_vk_descriptor_set_index +{ + VKD3D_SET_INDEX_UNIFORM_BUFFER = 0, + VKD3D_SET_INDEX_UNIFORM_TEXEL_BUFFER = 1, + VKD3D_SET_INDEX_SAMPLED_IMAGE = 2, + VKD3D_SET_INDEX_STORAGE_TEXEL_BUFFER = 3, + VKD3D_SET_INDEX_STORAGE_IMAGE = 4, + VKD3D_SET_INDEX_SAMPLER = 5, + VKD3D_SET_INDEX_UAV_COUNTER = 6, + VKD3D_SET_INDEX_COUNT = 7 +}; + +extern const enum vkd3d_vk_descriptor_set_index vk_descriptor_set_index_table[]; + +static inline enum vkd3d_vk_descriptor_set_index vkd3d_vk_descriptor_set_index_from_vk_descriptor_type( + VkDescriptorType type) +{ + assert(type <= VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER); + assert(vk_descriptor_set_index_table[type] < VKD3D_SET_INDEX_COUNT); + + return vk_descriptor_set_index_table[type]; +} + +struct vkd3d_vk_descriptor_heap_layout +{ + VkDescriptorType type; + bool buffer_dimension; + D3D12_DESCRIPTOR_HEAP_TYPE applicable_heap_type; + unsigned int count; + VkDescriptorSetLayout vk_set_layout; +}; + +struct d3d12_descriptor_heap_vk_set +{ + VkDescriptorSet vk_set; + VkDescriptorImageInfo vk_image_info; + VkWriteDescriptorSet vk_descriptor_write; +}; + /* ID3D12DescriptorHeap */ struct d3d12_descriptor_heap { ID3D12DescriptorHeap ID3D12DescriptorHeap_iface; LONG refcount; + uint64_t serial_id;
D3D12_DESCRIPTOR_HEAP_DESC desc;
@@ -652,6 +693,10 @@ struct d3d12_descriptor_heap
struct vkd3d_private_store private_store;
+ VkDescriptorPool vk_descriptor_pool; + struct d3d12_descriptor_heap_vk_set vk_descriptor_sets[VKD3D_SET_INDEX_COUNT]; + pthread_mutex_t vk_sets_mutex; + BYTE descriptors[]; };
@@ -766,8 +811,13 @@ struct d3d12_root_signature D3D12_ROOT_SIGNATURE_FLAGS flags;
unsigned int binding_count; + unsigned int uav_mapping_count; struct vkd3d_shader_resource_binding *descriptor_mapping; struct vkd3d_shader_descriptor_offset *descriptor_offsets; + struct vkd3d_shader_uav_counter_binding *uav_counter_mapping; + struct vkd3d_shader_descriptor_offset *uav_counter_offsets; + unsigned int descriptor_table_offset; + unsigned int descriptor_table_count;
unsigned int root_constant_count; struct vkd3d_shader_push_constant_buffer *root_constants; @@ -986,6 +1036,8 @@ struct vkd3d_pipeline_bindings struct d3d12_desc *descriptor_tables[D3D12_MAX_ROOT_COST]; uint64_t descriptor_table_dirty_mask; uint64_t descriptor_table_active_mask; + uint64_t cbv_srv_uav_heap_id; + uint64_t sampler_heap_id;
VkBufferView *vk_uav_counter_views; size_t vk_uav_counter_views_size; @@ -1047,6 +1099,8 @@ struct d3d12_command_list VkBuffer so_counter_buffers[D3D12_SO_BUFFER_SLOT_COUNT]; VkDeviceSize so_counter_buffer_offsets[D3D12_SO_BUFFER_SLOT_COUNT];
+ void (*update_descriptors)(struct d3d12_command_list *list, enum vkd3d_pipeline_bind_point bind_point); + struct vkd3d_private_store private_store; };
@@ -1236,6 +1290,8 @@ struct d3d12_device struct vkd3d_uav_clear_state uav_clear_state;
VkDescriptorPoolSize vk_pool_sizes[6]; + struct vkd3d_vk_descriptor_heap_layout vk_descriptor_heap_layouts[VKD3D_SET_INDEX_COUNT]; + bool use_vk_heaps; };
HRESULT d3d12_device_create(struct vkd3d_instance *instance, diff --git a/tests/d3d12.c b/tests/d3d12.c index eb6217bc..68143828 100644 --- a/tests/d3d12.c +++ b/tests/d3d12.c @@ -16756,13 +16756,15 @@ static void test_update_descriptor_tables(void) destroy_test_context(&context); }
-/* This cannot be implemented reasonably in Vulkan. Vulkan doesn't allow - * updating descriptor sets after the vkCmdBindDescriptorSets() command - * is recorded. +/* This requires the Vulkan descriptor indexing extension and Vulkan-backed + * descriptor heaps. Vulkan doesn't allow updating descriptor sets after the + * vkCmdBindDescriptorSets() command is recorded unless the update-after-bind + * feature of descriptor indexing is used. */ static void test_update_descriptor_heap_after_closing_command_list(void) { ID3D12Resource *red_texture, *green_texture; + D3D12_RESOURCE_BINDING_TIER binding_tier; ID3D12GraphicsCommandList *command_list; D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle; ID3D12DescriptorHeap *cpu_heap, *heap; @@ -16812,6 +16814,8 @@ static void test_update_descriptor_heap_after_closing_command_list(void) command_list = context.list; queue = context.queue;
+ binding_tier = get_resource_binding_tier(context.device); + context.root_signature = create_texture_root_signature(context.device, D3D12_SHADER_VISIBILITY_PIXEL, 0, 0); context.pipeline_state = create_pipeline_state(context.device, @@ -16873,7 +16877,8 @@ static void test_update_descriptor_heap_after_closing_command_list(void) D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_COPY_SOURCE); get_texture_readback_with_command_list(context.render_target, 0, &rb, queue, command_list); value = get_readback_uint(&rb, 0, 0, 0); - todo ok(value == 0xff00ff00, "Got unexpected value %#x.\n", value); + todo_if(binding_tier < D3D12_RESOURCE_BINDING_TIER_3) + ok(value == 0xff00ff00, "Got unexpected value %#x.\n", value); release_resource_readback(&rb);
ID3D12DescriptorHeap_Release(cpu_heap);