Signed-off-by: Conor McCarthy cmccarthy@codeweavers.com --- libs/vkd3d/command.c | 13 ++----------- libs/vkd3d/device.c | 36 ++++++++++++++++++++++++++++++++++++ libs/vkd3d/vkd3d_private.h | 20 ++++++++++++++++++++ 3 files changed, 58 insertions(+), 11 deletions(-)
diff --git a/libs/vkd3d/command.c b/libs/vkd3d/command.c index 61e18105..e7375fb8 100644 --- a/libs/vkd3d/command.c +++ b/libs/vkd3d/command.c @@ -1351,15 +1351,6 @@ static bool d3d12_command_allocator_add_transfer_buffer(struct d3d12_command_all static VkDescriptorPool d3d12_command_allocator_allocate_descriptor_pool( struct d3d12_command_allocator *allocator) { - static const VkDescriptorPoolSize pool_sizes[] = - { - {VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 1024}, - {VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, 1024}, - {VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 1024}, - {VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, 1024}, - {VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1024}, - {VK_DESCRIPTOR_TYPE_SAMPLER, 1024}, - }; struct d3d12_device *device = allocator->device; const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; struct VkDescriptorPoolCreateInfo pool_desc; @@ -1379,8 +1370,8 @@ static VkDescriptorPool d3d12_command_allocator_allocate_descriptor_pool( pool_desc.pNext = NULL; pool_desc.flags = 0; pool_desc.maxSets = 512; - pool_desc.poolSizeCount = ARRAY_SIZE(pool_sizes); - pool_desc.pPoolSizes = pool_sizes; + pool_desc.poolSizeCount = ARRAY_SIZE(device->vk_pool_sizes); + pool_desc.pPoolSizes = device->vk_pool_sizes; if ((vr = VK_CALL(vkCreateDescriptorPool(vk_device, &pool_desc, NULL, &vk_pool))) < 0) { ERR("Failed to create descriptor pool, vr %d.\n", vr); diff --git a/libs/vkd3d/device.c b/libs/vkd3d/device.c index 3360fd6e..59fa9af9 100644 --- a/libs/vkd3d/device.c +++ b/libs/vkd3d/device.c @@ -1277,6 +1277,16 @@ static void vkd3d_init_feature_level(struct vkd3d_vulkan_info *vk_info, TRACE("Max feature level: %#x.\n", vk_info->max_feature_level); }
+static void vkd3d_device_descriptor_limits_init(struct vkd3d_device_descriptor_limits *limits, + const VkPhysicalDeviceLimits *device_limits) +{ + limits->uniform_buffer_max_descriptors = device_limits->maxDescriptorSetUniformBuffers; + limits->sampled_image_max_descriptors = device_limits->maxDescriptorSetSampledImages; + limits->storage_buffer_max_descriptors = device_limits->maxDescriptorSetStorageBuffers; + limits->storage_image_max_descriptors = device_limits->maxDescriptorSetStorageImages; + limits->sampler_max_descriptors = min(device_limits->maxDescriptorSetSamplers, VKD3D_MAX_DESCRIPTOR_SET_SAMPLERS); +} + static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, const struct vkd3d_device_create_info *create_info, struct vkd3d_physical_device_info *physical_device_info, @@ -1504,6 +1514,9 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, features->robustBufferAccess = VK_FALSE; }
+ vkd3d_device_descriptor_limits_init(&vulkan_info->descriptor_limits, + &physical_device_info->properties2.properties.limits); + return S_OK; }
@@ -2419,6 +2432,27 @@ static void vkd3d_time_domains_init(struct d3d12_device *device) WARN("Found no acceptable host time domain. Calibrated timestamps will not be available.\n"); }
+static void vkd3d_init_descriptor_pool_sizes(VkDescriptorPoolSize *pool_sizes, + const struct vkd3d_device_descriptor_limits *limits) +{ + pool_sizes[0].type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + pool_sizes[0].descriptorCount = min(limits->uniform_buffer_max_descriptors, + VKD3D_MAX_VIRTUAL_HEAP_DESCRIPTORS_PER_TYPE); + pool_sizes[1].type = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; + pool_sizes[1].descriptorCount = min(limits->sampled_image_max_descriptors, + VKD3D_MAX_VIRTUAL_HEAP_DESCRIPTORS_PER_TYPE); + pool_sizes[2].type = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + pool_sizes[2].descriptorCount = pool_sizes[1].descriptorCount; + pool_sizes[3].type = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; + pool_sizes[3].descriptorCount = min(limits->storage_image_max_descriptors, + VKD3D_MAX_VIRTUAL_HEAP_DESCRIPTORS_PER_TYPE); + pool_sizes[4].type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; + pool_sizes[4].descriptorCount = pool_sizes[3].descriptorCount; + pool_sizes[5].type = VK_DESCRIPTOR_TYPE_SAMPLER; + pool_sizes[5].descriptorCount = min(limits->sampler_max_descriptors, + VKD3D_MAX_VIRTUAL_HEAP_DESCRIPTORS_PER_TYPE); +}; + /* ID3D12Device */ static inline struct d3d12_device *impl_from_ID3D12Device(ID3D12Device *iface) { @@ -3979,6 +4013,8 @@ static HRESULT d3d12_device_init(struct d3d12_device *device, for (i = 0; i < ARRAY_SIZE(device->desc_mutex); ++i) vkd3d_mutex_init(&device->desc_mutex[i]);
+ vkd3d_init_descriptor_pool_sizes(device->vk_pool_sizes, &device->vk_info.descriptor_limits); + if ((device->parent = create_info->parent)) IUnknown_AddRef(device->parent);
diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index 67989c11..ca7a3f22 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -62,6 +62,12 @@ #define VKD3D_MAX_SHADER_STAGES 5u #define VKD3D_MAX_VK_SYNC_OBJECTS 4u #define VKD3D_MAX_DESCRIPTOR_SETS 64u +/* D3D12 binding tier 3 has a limit of 2048 samplers. */ +#define VKD3D_MAX_DESCRIPTOR_SET_SAMPLERS 2048u +/* The main limitation here is the simple descriptor pool recycling scheme + * requiring each pool to contain all descriptor types used by vkd3d. Limit + * this number to prevent excessive pool memory use. */ +#define VKD3D_MAX_VIRTUAL_HEAP_DESCRIPTORS_PER_TYPE (16 * 1024u)
struct d3d12_command_list; struct d3d12_device; @@ -95,6 +101,15 @@ HRESULT hresult_from_errno(int rc); HRESULT hresult_from_vk_result(VkResult vr); HRESULT hresult_from_vkd3d_result(int vkd3d_result);
+struct vkd3d_device_descriptor_limits +{ + unsigned int uniform_buffer_max_descriptors; + unsigned int sampled_image_max_descriptors; + unsigned int storage_buffer_max_descriptors; + unsigned int storage_image_max_descriptors; + unsigned int sampler_max_descriptors; +}; + struct vkd3d_vulkan_info { /* KHR instance extensions */ @@ -130,6 +145,7 @@ struct vkd3d_vulkan_info
VkPhysicalDeviceLimits device_limits; VkPhysicalDeviceSparseProperties sparse_properties; + struct vkd3d_device_descriptor_limits descriptor_limits;
VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT texel_buffer_alignment_properties;
@@ -1299,6 +1315,8 @@ struct vkd3d_uav_clear_state HRESULT vkd3d_uav_clear_state_init(struct vkd3d_uav_clear_state *state, struct d3d12_device *device); void vkd3d_uav_clear_state_cleanup(struct vkd3d_uav_clear_state *state, struct d3d12_device *device);
+#define VKD3D_DESCRIPTOR_POOL_COUNT 6 + /* ID3D12Device */ struct d3d12_device { @@ -1352,6 +1370,8 @@ struct d3d12_device const struct vkd3d_format_compatibility_list *format_compatibility_lists; struct vkd3d_null_resources null_resources; struct vkd3d_uav_clear_state uav_clear_state; + + VkDescriptorPoolSize vk_pool_sizes[VKD3D_DESCRIPTOR_POOL_COUNT]; };
HRESULT d3d12_device_create(struct vkd3d_instance *instance,
Signed-off-by: Conor McCarthy cmccarthy@codeweavers.com --- libs/vkd3d/state.c | 81 +++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 73 insertions(+), 8 deletions(-)
diff --git a/libs/vkd3d/state.c b/libs/vkd3d/state.c index 1d4f91e3..91f6e27b 100644 --- a/libs/vkd3d/state.c +++ b/libs/vkd3d/state.c @@ -331,14 +331,25 @@ struct d3d12_root_signature_info size_t root_constant_count; size_t root_descriptor_count;
+ unsigned int cbv_count; + unsigned int srv_count; + unsigned int uav_count; + unsigned int sampler_count; + unsigned int cbv_unbounded_range_count; + unsigned int srv_unbounded_range_count; + unsigned int uav_unbounded_range_count; + unsigned int sampler_unbounded_range_count; + size_t cost; };
static HRESULT d3d12_root_signature_info_count_descriptors(struct d3d12_root_signature_info *info, const D3D12_ROOT_DESCRIPTOR_TABLE *table, bool use_array) { + bool cbv_unbounded_range = false, srv_unbounded_range = false, uav_unbounded_range = false; + bool sampler_unbounded_range = false; bool unbounded = false; - unsigned int i; + unsigned int i, count;
for (i = 0; i < table->NumDescriptorRanges; ++i) { @@ -365,8 +376,12 @@ static HRESULT d3d12_root_signature_info_count_descriptors(struct d3d12_root_sig return E_INVALIDARG; }
+ count = range->NumDescriptors; if (range->NumDescriptors == UINT_MAX) + { unbounded = true; + count = 0; + }
binding_count = use_array ? 1 : range->NumDescriptors;
@@ -378,14 +393,22 @@ static HRESULT d3d12_root_signature_info_count_descriptors(struct d3d12_root_sig * root signature, we create descriptor set layouts with two bindings for * each SRV and UAV. */ info->binding_count += binding_count; + info->srv_count += count * 2u; + srv_unbounded_range |= unbounded; break; case D3D12_DESCRIPTOR_RANGE_TYPE_UAV: /* As above. */ info->binding_count += binding_count; + info->uav_count += count * 2u; + uav_unbounded_range |= unbounded; break; case D3D12_DESCRIPTOR_RANGE_TYPE_CBV: + info->cbv_count += count; + cbv_unbounded_range |= unbounded; break; case D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER: + info->sampler_count += count; + sampler_unbounded_range |= unbounded; break; default: FIXME("Unhandled descriptor type %#x.\n", range->RangeType); @@ -401,6 +424,11 @@ static HRESULT d3d12_root_signature_info_count_descriptors(struct d3d12_root_sig return E_FAIL; }
+ info->srv_unbounded_range_count += srv_unbounded_range * 2u; + info->uav_unbounded_range_count += uav_unbounded_range * 2u; + info->cbv_unbounded_range_count += cbv_unbounded_range; + info->sampler_unbounded_range_count += sampler_unbounded_range; + return S_OK; }
@@ -427,16 +455,19 @@ static HRESULT d3d12_root_signature_info_from_desc(struct d3d12_root_signature_i
case D3D12_ROOT_PARAMETER_TYPE_CBV: ++info->root_descriptor_count; + ++info->cbv_count; ++info->binding_count; info->cost += 2; break; case D3D12_ROOT_PARAMETER_TYPE_SRV: ++info->root_descriptor_count; + ++info->srv_count; ++info->binding_count; info->cost += 2; break; case D3D12_ROOT_PARAMETER_TYPE_UAV: ++info->root_descriptor_count; + ++info->uav_count; ++info->binding_count; info->cost += 2; break; @@ -453,6 +484,7 @@ static HRESULT d3d12_root_signature_info_from_desc(struct d3d12_root_signature_i }
info->binding_count += desc->NumStaticSamplers; + info->sampler_count += desc->NumStaticSamplers;
return S_OK; } @@ -675,14 +707,44 @@ static uint32_t vkd3d_descriptor_magic_from_d3d12(D3D12_DESCRIPTOR_RANGE_TYPE ty } }
-static unsigned int vk_binding_count_from_descriptor_range(const struct d3d12_root_descriptor_table_range *range) +static unsigned int vk_binding_count_from_descriptor_range(const struct d3d12_root_descriptor_table_range *range, + const struct d3d12_root_signature_info *info, const struct vkd3d_device_descriptor_limits *limits) { + unsigned int count, limit; + if (range->descriptor_count != UINT_MAX) return range->descriptor_count;
- /* TODO: Calculate an upper bound from unbounded set counts and Vulkan - * device limits. */ - return 1024; + switch (range->type) + { + case VKD3D_SHADER_DESCRIPTOR_TYPE_CBV: + limit = limits->uniform_buffer_max_descriptors; + count = (limit - min(info->cbv_count, limit)) / info->cbv_unbounded_range_count; + break; + case VKD3D_SHADER_DESCRIPTOR_TYPE_SRV: + limit = limits->sampled_image_max_descriptors; + count = (limit - min(info->srv_count, limit)) / info->srv_unbounded_range_count; + break; + case VKD3D_SHADER_DESCRIPTOR_TYPE_UAV: + limit = limits->storage_image_max_descriptors; + count = (limit - min(info->uav_count, limit)) / info->uav_unbounded_range_count; + break; + case VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER: + limit = limits->sampler_max_descriptors; + count = (limit - min(info->sampler_count, limit)) / info->sampler_unbounded_range_count; + break; + default: + ERR("Unhandled type %#x.\n", range->type); + return 1; + } + + if (!count) + { + WARN("Descriptor table exceeds type %#x limit of %u.\n", range->type, limit); + count = 1; + } + + return min(count, VKD3D_MAX_VIRTUAL_HEAP_DESCRIPTORS_PER_TYPE); }
static HRESULT d3d12_root_signature_init_descriptor_array_binding(struct d3d12_root_signature *root_signature, @@ -802,8 +864,10 @@ static HRESULT validate_descriptor_register_ranges(const struct d3d12_root_descr }
static HRESULT d3d12_root_signature_init_root_descriptor_tables(struct d3d12_root_signature *root_signature, - const D3D12_ROOT_SIGNATURE_DESC *desc, struct vkd3d_descriptor_set_context *context) + const D3D12_ROOT_SIGNATURE_DESC *desc, const struct d3d12_root_signature_info *info, + struct vkd3d_descriptor_set_context *context) { + const struct d3d12_device *device = root_signature->device; struct d3d12_root_descriptor_table *table; unsigned int i, j, k, range_count; uint32_t vk_binding; @@ -904,7 +968,8 @@ static HRESULT d3d12_root_signature_init_root_descriptor_tables(struct d3d12_roo }
range->binding = context->descriptor_binding; - range->vk_binding_count = vk_binding_count_from_descriptor_range(range); + range->vk_binding_count = vk_binding_count_from_descriptor_range(range, + info, &device->vk_info.descriptor_limits);
if (FAILED(hr = d3d12_root_signature_init_descriptor_array_binding(root_signature, range, p->ShaderVisibility, context))) @@ -1224,7 +1289,7 @@ static HRESULT d3d12_root_signature_init(struct d3d12_root_signature *root_signa goto fail; if (FAILED(hr = d3d12_root_signature_init_static_samplers(root_signature, device, desc, &context))) goto fail; - if (FAILED(hr = d3d12_root_signature_init_root_descriptor_tables(root_signature, desc, &context))) + if (FAILED(hr = d3d12_root_signature_init_root_descriptor_tables(root_signature, desc, &info, &context))) goto fail;
if (FAILED(hr = d3d12_root_signature_append_descriptor_set_layout(root_signature, &context, 0)))
Signed-off-by: Henri Verbeet hverbeet@codeweavers.com
The existing implementation using virtual descriptor heaps, where Vk descriptor sets are created for the bindings in the root descriptor tables, is inefficient when multiple command lists are used with large descriptor heaps. It also cannot support updating a descriptor set after it is bound.
This patch creates Vk sets for each D3D12 heap. Because D3D12 heaps can contain CBV, SRV and UAV descriptors in the same heap, multiple Vk sets are needed for each heap, however the total number of populated descriptors is never more than (heap size + UAV counter count).
A new 'virtual_heaps' config option is introduced to make the old implementation available when needed. It's not always possible to determine if this is necessary when the device is created.
Up to nine Vk descriptor sets may be used. It's theoretically possible to reduce this to eight by placing immutable samplers in the push descriptor set layout, but contradictions in earlier versions of the Vulkan spec made driver support inconsistent. The documentation was corrected in version 1.2.203.
This patch also adds support for UAV counter descriptor arrays. It's not practical to add this in a separate patch due to complications with combining the old UAV counter implementation with the new descriptor heap implementation.
Wine-Bug: https://bugs.winehq.org/show_bug.cgi?id=47713 Wine-Bug: https://bugs.winehq.org/show_bug.cgi?id=47154 Signed-off-by: Conor McCarthy cmccarthy@codeweavers.com --- README | 3 + include/private/vkd3d_common.h | 4 + include/vkd3d_windows.h | 1 + libs/vkd3d/command.c | 151 +++++++++++++++++++++- libs/vkd3d/device.c | 159 ++++++++++++++++++++++- libs/vkd3d/resource.c | 197 +++++++++++++++++++++++++++++ libs/vkd3d/state.c | 222 +++++++++++++++++++++++++++++++-- libs/vkd3d/vkd3d_private.h | 56 +++++++++ tests/d3d12.c | 13 +- 9 files changed, 787 insertions(+), 19 deletions(-)
diff --git a/README b/README index ed53f202..066a24bc 100644 --- a/README +++ b/README @@ -51,6 +51,9 @@ commas or semicolons. even when the output supports colour.
* VKD3D_CONFIG - a list of options that change the behavior of libvkd3d. + * virtual_heaps - Create descriptors for each D3D12 root signature + descriptor range instead of entire descriptor heaps. Useful when push + constant or bound descriptor limits are exceeded. * vk_debug - enables Vulkan debug extensions.
* VKD3D_DEBUG - controls the debug level for log messages produced by diff --git a/include/private/vkd3d_common.h b/include/private/vkd3d_common.h index 1e19758a..90dc3edb 100644 --- a/include/private/vkd3d_common.h +++ b/include/private/vkd3d_common.h @@ -201,6 +201,10 @@ static inline LONG InterlockedIncrement(LONG volatile *x) { return __sync_add_and_fetch(x, 1); } +static inline LONG64 InterlockedIncrement64(LONG64 volatile *x) +{ + return __sync_add_and_fetch(x, 1); +} static inline LONG InterlockedAdd(LONG volatile *x, LONG val) { return __sync_add_and_fetch(x, val); diff --git a/include/vkd3d_windows.h b/include/vkd3d_windows.h index c1aa9f27..e2629c2c 100644 --- a/include/vkd3d_windows.h +++ b/include/vkd3d_windows.h @@ -91,6 +91,7 @@ typedef unsigned __int64 UINT64; typedef int64_t DECLSPEC_ALIGN(8) INT64; typedef uint64_t DECLSPEC_ALIGN(8) UINT64; # endif +typedef INT64 LONG64; typedef long LONG_PTR; typedef unsigned long ULONG_PTR;
diff --git a/libs/vkd3d/command.c b/libs/vkd3d/command.c index e7375fb8..104a0c54 100644 --- a/libs/vkd3d/command.c +++ b/libs/vkd3d/command.c @@ -1921,6 +1921,8 @@ static void d3d12_command_list_invalidate_root_parameters(struct d3d12_command_l bindings->descriptor_set_count = 0; bindings->descriptor_table_dirty_mask = bindings->descriptor_table_active_mask & bindings->root_signature->descriptor_table_mask; bindings->push_descriptor_dirty_mask = bindings->push_descriptor_active_mask & bindings->root_signature->push_descriptor_mask; + bindings->cbv_srv_uav_heap_id = 0; + bindings->sampler_heap_id = 0; }
static bool vk_barrier_parameters_from_d3d12_resource_state(unsigned int state, unsigned int stencil_state, @@ -3021,6 +3023,146 @@ static void d3d12_command_list_update_descriptors(struct d3d12_command_list *lis d3d12_command_list_update_uav_counter_descriptors(list, bind_point); }
+static unsigned int d3d12_command_list_bind_descriptor_table(struct d3d12_command_list *list, + struct vkd3d_pipeline_bindings *bindings, unsigned int index, + struct d3d12_descriptor_heap **cbv_srv_uav_heap, struct d3d12_descriptor_heap **sampler_heap) +{ + struct d3d12_descriptor_heap *heap; + const struct d3d12_desc *desc; + unsigned int offset; + + if (!(desc = bindings->descriptor_tables[index])) + return 0; + + /* AMD, Nvidia and Intel drivers on Windows work if SetDescriptorHeaps() + * is not called, so we bind heaps from the tables instead. No NULL check is + * needed here because it's checked when descriptor tables are set. */ + heap = vkd3d_gpu_descriptor_allocator_heap_from_descriptor(&list->device->gpu_descriptor_allocator, desc); + offset = desc - (const struct d3d12_desc *)heap->descriptors; + + if (heap->desc.Type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV) + { + if (*cbv_srv_uav_heap) + { + if (heap == *cbv_srv_uav_heap) + return offset; + /* This occurs occasionally in Rise of the Tomb Raider apparently due to a race + * condition (one of several), but adding a mutex for table updates has no effect. */ + WARN("List %p uses descriptors from more than one CBV/SRV/UAV heap.\n", list); + } + *cbv_srv_uav_heap = heap; + } + else + { + if (*sampler_heap) + { + if (heap == *sampler_heap) + return offset; + WARN("List %p uses descriptors from more than one sampler heap.\n", list); + } + *sampler_heap = heap; + } + + return offset; +} + +static void d3d12_command_list_update_descriptor_tables(struct d3d12_command_list *list, + struct vkd3d_pipeline_bindings *bindings, struct d3d12_descriptor_heap **cbv_srv_uav_heap, + struct d3d12_descriptor_heap **sampler_heap) +{ + const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs; + const struct d3d12_root_signature *rs = bindings->root_signature; + unsigned int offsets[D3D12_MAX_ROOT_COST]; + unsigned int i, j; + + for (i = 0, j = 0; i < ARRAY_SIZE(bindings->descriptor_tables); ++i) + { + if (!(rs->descriptor_table_mask & ((uint64_t)1 << i))) + continue; + offsets[j++] = d3d12_command_list_bind_descriptor_table(list, bindings, i, + cbv_srv_uav_heap, sampler_heap); + } + if (j) + { + VK_CALL(vkCmdPushConstants(list->vk_command_buffer, rs->vk_pipeline_layout, VK_SHADER_STAGE_ALL, + rs->descriptor_table_offset, j * sizeof(uint32_t), offsets)); + } +} + +static void d3d12_command_list_bind_descriptor_heap(struct d3d12_command_list *list, + enum vkd3d_pipeline_bind_point bind_point, struct d3d12_descriptor_heap *heap) +{ + struct vkd3d_pipeline_bindings *bindings = &list->pipeline_bindings[bind_point]; + const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs; + const struct d3d12_root_signature *rs = bindings->root_signature; + enum vkd3d_vk_descriptor_set_index set; + + if (!heap) + return; + + if (heap->desc.Type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV) + { + if (heap->serial_id == bindings->cbv_srv_uav_heap_id) + return; + bindings->cbv_srv_uav_heap_id = heap->serial_id; + } + else + { + if (heap->serial_id == bindings->sampler_heap_id) + return; + bindings->sampler_heap_id = heap->serial_id; + } + + /* These sets can be shared across multiple command lists, and therefore binding must + * be synchronised. On an experimental branch in which caching of Vk descriptor writes + * greatly increased the chance of multiple threads arriving here at the same time, + * GRID 2019 crashed without the mutex lock. */ + vkd3d_mutex_lock(&heap->vk_sets_mutex); + + for (set = 0; set < ARRAY_SIZE(heap->vk_descriptor_sets); ++set) + { + VkDescriptorSet vk_descriptor_set = heap->vk_descriptor_sets[set].vk_set; + + if (!vk_descriptor_set) + continue; + + VK_CALL(vkCmdBindDescriptorSets(list->vk_command_buffer, bindings->vk_bind_point, rs->vk_pipeline_layout, + rs->vk_set_count + set, 1, &vk_descriptor_set, 0, NULL)); + } + + vkd3d_mutex_unlock(&heap->vk_sets_mutex); +} + +static void d3d12_command_list_update_heap_descriptors(struct d3d12_command_list *list, + enum vkd3d_pipeline_bind_point bind_point) +{ + struct vkd3d_pipeline_bindings *bindings = &list->pipeline_bindings[bind_point]; + struct d3d12_descriptor_heap *cbv_srv_uav_heap = NULL, *sampler_heap = NULL; + const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs; + const struct d3d12_root_signature *rs = bindings->root_signature; + + if (!rs) + return; + + if (bindings->descriptor_table_dirty_mask || bindings->push_descriptor_dirty_mask) + d3d12_command_list_prepare_descriptors(list, bind_point); + if (bindings->descriptor_table_dirty_mask) + d3d12_command_list_update_descriptor_tables(list, bindings, &cbv_srv_uav_heap, &sampler_heap); + bindings->descriptor_table_dirty_mask = 0; + + d3d12_command_list_update_push_descriptors(list, bind_point); + + if (bindings->descriptor_set_count) + { + VK_CALL(vkCmdBindDescriptorSets(list->vk_command_buffer, bindings->vk_bind_point, rs->vk_pipeline_layout, + rs->main_set, bindings->descriptor_set_count, bindings->descriptor_sets, 0, NULL)); + bindings->in_use = true; + } + + d3d12_command_list_bind_descriptor_heap(list, bind_point, cbv_srv_uav_heap); + d3d12_command_list_bind_descriptor_heap(list, bind_point, sampler_heap); +} + static bool d3d12_command_list_update_compute_state(struct d3d12_command_list *list) { d3d12_command_list_end_current_render_pass(list); @@ -3028,7 +3170,7 @@ static bool d3d12_command_list_update_compute_state(struct d3d12_command_list *l if (!d3d12_command_list_update_compute_pipeline(list)) return false;
- d3d12_command_list_update_descriptors(list, VKD3D_PIPELINE_BIND_POINT_COMPUTE); + list->update_descriptors(list, VKD3D_PIPELINE_BIND_POINT_COMPUTE);
return true; } @@ -3045,7 +3187,7 @@ static bool d3d12_command_list_begin_render_pass(struct d3d12_command_list *list if (!d3d12_command_list_update_current_framebuffer(list)) return false;
- d3d12_command_list_update_descriptors(list, VKD3D_PIPELINE_BIND_POINT_GRAPHICS); + list->update_descriptors(list, VKD3D_PIPELINE_BIND_POINT_GRAPHICS);
if (list->current_render_pass != VK_NULL_HANDLE) return true; @@ -4113,6 +4255,8 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetDescriptorHeaps(ID3D12Graphi TRACE("iface %p, heap_count %u, heaps %p.\n", iface, heap_count, heaps);
/* Our current implementation does not need this method. + * In Windows it doesn't need to be called at all for correct operation, and + * at least on AMD the wrong heaps can be set here and tests still succeed. * * It could be used to validate descriptor tables but we do not have an * equivalent of the D3D12 Debug Layer. */ @@ -5706,6 +5850,9 @@ static HRESULT d3d12_command_list_init(struct d3d12_command_list *list, struct d
list->allocator = allocator;
+ list->update_descriptors = device->use_vk_heaps ? d3d12_command_list_update_heap_descriptors + : d3d12_command_list_update_descriptors; + if (SUCCEEDED(hr = d3d12_command_allocator_allocate_command_buffer(allocator, list))) { list->pipeline_bindings[VKD3D_PIPELINE_BIND_POINT_GRAPHICS].vk_uav_counter_views = NULL; diff --git a/libs/vkd3d/device.c b/libs/vkd3d/device.c index 59fa9af9..e00ac853 100644 --- a/libs/vkd3d/device.c +++ b/libs/vkd3d/device.c @@ -142,6 +142,112 @@ static const struct vkd3d_optional_extension_info optional_device_extensions[] = VK_EXTENSION(EXT_VERTEX_ATTRIBUTE_DIVISOR, EXT_vertex_attribute_divisor), };
+static HRESULT vkd3d_create_vk_descriptor_heap_layout(struct d3d12_device *device, unsigned int index) +{ + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + VkDescriptorSetLayoutBindingFlagsCreateInfoEXT flags_info; + VkDescriptorSetLayoutCreateInfo set_desc; + VkDescriptorBindingFlagsEXT set_flags; + VkDescriptorSetLayoutBinding binding; + VkResult vr; + + binding.binding = 0; + binding.descriptorType = device->vk_descriptor_heap_layouts[index].type; + binding.descriptorCount = device->vk_descriptor_heap_layouts[index].count; + binding.stageFlags = VK_SHADER_STAGE_ALL; + binding.pImmutableSamplers = NULL; + + set_desc.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; + set_desc.pNext = &flags_info; + set_desc.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_UPDATE_AFTER_BIND_POOL_BIT_EXT; + set_desc.bindingCount = 1; + set_desc.pBindings = &binding; + + set_flags = VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT_EXT + | VK_DESCRIPTOR_BINDING_PARTIALLY_BOUND_BIT_EXT | VK_DESCRIPTOR_BINDING_UPDATE_AFTER_BIND_BIT_EXT + | VK_DESCRIPTOR_BINDING_UPDATE_UNUSED_WHILE_PENDING_BIT_EXT; + + flags_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_BINDING_FLAGS_CREATE_INFO_EXT; + flags_info.pNext = NULL; + flags_info.bindingCount = 1; + flags_info.pBindingFlags = &set_flags; + + if ((vr = VK_CALL(vkCreateDescriptorSetLayout(device->vk_device, &set_desc, NULL, + &device->vk_descriptor_heap_layouts[index].vk_set_layout))) < 0) + { + WARN("Failed to create Vulkan descriptor set layout, vr %d.\n", vr); + return hresult_from_vk_result(vr); + } + + return S_OK; +} + +static void vkd3d_vk_descriptor_heap_layouts_cleanup(struct d3d12_device *device) +{ + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + enum vkd3d_vk_descriptor_set_index set; + + for (set = 0; set < ARRAY_SIZE(device->vk_descriptor_heap_layouts); ++set) + VK_CALL(vkDestroyDescriptorSetLayout(device->vk_device, device->vk_descriptor_heap_layouts[set].vk_set_layout, + NULL)); +} + +static HRESULT vkd3d_vk_descriptor_heap_layouts_init(struct d3d12_device *device) +{ + static const struct vkd3d_vk_descriptor_heap_layout vk_descriptor_heap_layouts[VKD3D_SET_INDEX_COUNT] = + { + {VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, true, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV}, + {VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, true, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV}, + {VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, false, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV}, + {VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, true, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV}, + {VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, false, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV}, + {VK_DESCRIPTOR_TYPE_SAMPLER, false, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER}, + /* UAV counters */ + {VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, true, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV}, + }; + const struct vkd3d_device_descriptor_limits *limits = &device->vk_info.descriptor_limits; + enum vkd3d_vk_descriptor_set_index set; + HRESULT hr; + + for (set = 0; set < ARRAY_SIZE(device->vk_descriptor_heap_layouts); ++set) + device->vk_descriptor_heap_layouts[set] = vk_descriptor_heap_layouts[set]; + + if (!device->use_vk_heaps) + return S_OK; + + for (set = 0; set < ARRAY_SIZE(device->vk_descriptor_heap_layouts); ++set) + { + switch (device->vk_descriptor_heap_layouts[set].type) + { + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + device->vk_descriptor_heap_layouts[set].count = limits->uniform_buffer_max_descriptors; + break; + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + device->vk_descriptor_heap_layouts[set].count = limits->sampled_image_max_descriptors; + break; + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + device->vk_descriptor_heap_layouts[set].count = limits->storage_image_max_descriptors; + break; + case VK_DESCRIPTOR_TYPE_SAMPLER: + device->vk_descriptor_heap_layouts[set].count = limits->sampler_max_descriptors; + break; + default: + ERR("Unhandled descriptor type %#x.\n", device->vk_descriptor_heap_layouts[set].type); + break; + } + + if (FAILED(hr = vkd3d_create_vk_descriptor_heap_layout(device, set))) + { + vkd3d_vk_descriptor_heap_layouts_cleanup(device); + return hr; + } + } + + return S_OK; +} + static unsigned int get_spec_version(const VkExtensionProperties *extensions, unsigned int count, const char *extension_name) { @@ -431,6 +537,7 @@ static void vkd3d_init_debug_report(struct vkd3d_instance *instance)
static const struct vkd3d_debug_option vkd3d_config_options[] = { + {"virtual_heaps", VKD3D_CONFIG_FLAG_VIRTUAL_HEAPS}, /* always use virtual descriptor heaps */ {"vk_debug", VKD3D_CONFIG_FLAG_VULKAN_DEBUG}, /* enable Vulkan debug extensions */ };
@@ -1287,6 +1394,36 @@ static void vkd3d_device_descriptor_limits_init(struct vkd3d_device_descriptor_l limits->sampler_max_descriptors = min(device_limits->maxDescriptorSetSamplers, VKD3D_MAX_DESCRIPTOR_SET_SAMPLERS); }
+static void vkd3d_device_vk_heaps_descriptor_limits_init(struct vkd3d_device_descriptor_limits *limits, + const VkPhysicalDeviceDescriptorIndexingPropertiesEXT *properties) +{ + const unsigned int root_provision = D3D12_MAX_ROOT_COST / 2; + unsigned int srv_divisor = 1, uav_divisor = 1; + + /* The total number of populated sampled image or storage image descriptors never exceeds the size of + * one set (or two sets if every UAV has a counter), but the total size of bound layouts will exceed + * device limits if each set size is maxDescriptorSet*, because of the D3D12 buffer + image allowance + * (and UAV counters). Breaking limits for layouts seems to work with RADV and Nvidia drivers at + * least, but let's try to stay within them if limits are high enough. */ + if (properties->maxDescriptorSetUpdateAfterBindSampledImages >= (1u << 21)) + { + srv_divisor = 2; + uav_divisor = properties->maxDescriptorSetUpdateAfterBindSampledImages >= (3u << 20) ? 3 : 2; + } + + limits->uniform_buffer_max_descriptors = min(properties->maxDescriptorSetUpdateAfterBindUniformBuffers, + properties->maxPerStageDescriptorUpdateAfterBindUniformBuffers - root_provision); + limits->sampled_image_max_descriptors = min(properties->maxDescriptorSetUpdateAfterBindSampledImages, + properties->maxPerStageDescriptorUpdateAfterBindSampledImages / srv_divisor - root_provision); + limits->storage_buffer_max_descriptors = min(properties->maxDescriptorSetUpdateAfterBindStorageBuffers, + properties->maxPerStageDescriptorUpdateAfterBindStorageBuffers - root_provision); + limits->storage_image_max_descriptors = min(properties->maxDescriptorSetUpdateAfterBindStorageImages, + properties->maxPerStageDescriptorUpdateAfterBindStorageImages / uav_divisor - root_provision); + limits->sampler_max_descriptors = min(properties->maxDescriptorSetUpdateAfterBindSamplers, + properties->maxPerStageDescriptorUpdateAfterBindSamplers - root_provision); + limits->sampler_max_descriptors = min(limits->sampler_max_descriptors, VKD3D_MAX_DESCRIPTOR_SET_SAMPLERS); +} + static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, const struct vkd3d_device_create_info *create_info, struct vkd3d_physical_device_info *physical_device_info, @@ -1514,8 +1651,20 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, features->robustBufferAccess = VK_FALSE; }
- vkd3d_device_descriptor_limits_init(&vulkan_info->descriptor_limits, - &physical_device_info->properties2.properties.limits); + /* Select descriptor heap implementation. Forcing virtual heaps may be useful if + * a client allocates descriptor heaps too large for the Vulkan device, or the + * root signature cost exceeds the available push constant size. Virtual heaps + * use only enough descriptors for the descriptor tables of the currently bound + * root signature, and don't require a 32-bit push constant for each table. */ + device->use_vk_heaps = vulkan_info->EXT_descriptor_indexing + && !(device->vkd3d_instance->config_flags & VKD3D_CONFIG_FLAG_VIRTUAL_HEAPS); + + if (device->use_vk_heaps) + vkd3d_device_vk_heaps_descriptor_limits_init(&vulkan_info->descriptor_limits, + &physical_device_info->descriptor_indexing_properties); + else + vkd3d_device_descriptor_limits_init(&vulkan_info->descriptor_limits, + &physical_device_info->properties2.properties.limits);
return S_OK; } @@ -2504,6 +2653,7 @@ static ULONG STDMETHODCALLTYPE d3d12_device_Release(ID3D12Device *iface) vkd3d_private_store_destroy(&device->private_store);
vkd3d_cleanup_format_info(device); + vkd3d_vk_descriptor_heap_layouts_cleanup(device); vkd3d_uav_clear_state_cleanup(&device->uav_clear_state, device); vkd3d_destroy_null_resources(&device->null_resources, device); vkd3d_gpu_va_allocator_cleanup(&device->gpu_va_allocator); @@ -4005,6 +4155,9 @@ static HRESULT d3d12_device_init(struct d3d12_device *device, if (FAILED(hr = vkd3d_uav_clear_state_init(&device->uav_clear_state, device))) goto out_destroy_null_resources;
+ if (FAILED(hr = vkd3d_vk_descriptor_heap_layouts_init(device))) + goto out_cleanup_uav_clear_state; + vkd3d_render_pass_cache_init(&device->render_pass_cache); vkd3d_gpu_descriptor_allocator_init(&device->gpu_descriptor_allocator); vkd3d_gpu_va_allocator_init(&device->gpu_va_allocator); @@ -4020,6 +4173,8 @@ static HRESULT d3d12_device_init(struct d3d12_device *device,
return S_OK;
+out_cleanup_uav_clear_state: + vkd3d_uav_clear_state_cleanup(&device->uav_clear_state, device); out_destroy_null_resources: vkd3d_destroy_null_resources(&device->null_resources, device); out_cleanup_format_info: diff --git a/libs/vkd3d/resource.c b/libs/vkd3d/resource.c index 4c48e22e..e5827955 100644 --- a/libs/vkd3d/resource.c +++ b/libs/vkd3d/resource.c @@ -2128,6 +2128,58 @@ void vkd3d_view_decref(struct vkd3d_view *view, struct d3d12_device *device) vkd3d_view_destroy(view, device); }
+/* dst and src contain the same data unless another thread overwrites dst. The array index is + * calculated from dst, and src is thread safe. */ +static void d3d12_desc_write_vk_heap(const struct d3d12_desc *dst, const struct d3d12_desc *src, + struct d3d12_device *device) +{ + struct d3d12_descriptor_heap_vk_set *descriptor_set; + struct d3d12_descriptor_heap *descriptor_heap; + const struct vkd3d_vk_device_procs *vk_procs; + + descriptor_heap = vkd3d_gpu_descriptor_allocator_heap_from_descriptor(&device->gpu_descriptor_allocator, dst); + descriptor_set = &descriptor_heap->vk_descriptor_sets[vkd3d_vk_descriptor_set_index_from_vk_descriptor_type( + src->vk_descriptor_type)]; + vk_procs = &device->vk_procs; + + vkd3d_mutex_lock(&descriptor_heap->vk_sets_mutex); + + descriptor_set->vk_descriptor_write.dstArrayElement = dst + - (const struct d3d12_desc *)descriptor_heap->descriptors; + switch (src->vk_descriptor_type) + { + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + descriptor_set->vk_descriptor_write.pBufferInfo = &src->u.vk_cbv_info; + break; + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + descriptor_set->vk_image_info.imageView = src->u.view->u.vk_image_view; + break; + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + descriptor_set->vk_descriptor_write.pTexelBufferView = &src->u.view->u.vk_buffer_view; + break; + case VK_DESCRIPTOR_TYPE_SAMPLER: + descriptor_set->vk_image_info.sampler = src->u.view->u.vk_sampler; + break; + default: + ERR("Unhandled descriptor type %#x.\n", src->vk_descriptor_type); + break; + } + VK_CALL(vkUpdateDescriptorSets(device->vk_device, 1, &descriptor_set->vk_descriptor_write, 0, NULL)); + + if (src->magic == VKD3D_DESCRIPTOR_MAGIC_UAV && src->u.view->vk_counter_view) + { + descriptor_set = &descriptor_heap->vk_descriptor_sets[VKD3D_SET_INDEX_UAV_COUNTER]; + descriptor_set->vk_descriptor_write.dstArrayElement = dst + - (const struct d3d12_desc *)descriptor_heap->descriptors; + descriptor_set->vk_descriptor_write.pTexelBufferView = &src->u.view->vk_counter_view; + VK_CALL(vkUpdateDescriptorSets(device->vk_device, 1, &descriptor_set->vk_descriptor_write, 0, NULL)); + } + + vkd3d_mutex_unlock(&descriptor_heap->vk_sets_mutex); +} + void d3d12_desc_write_atomic(struct d3d12_desc *dst, const struct d3d12_desc *src, struct d3d12_device *device) { @@ -2149,6 +2201,9 @@ void d3d12_desc_write_atomic(struct d3d12_desc *dst, const struct d3d12_desc *sr /* Destroy the view after unlocking to reduce wait time. */ if (defunct_view) vkd3d_view_destroy(defunct_view, device); + + if (device->use_vk_heaps && dst->magic) + d3d12_desc_write_vk_heap(dst, src, device); }
static void d3d12_desc_destroy(struct d3d12_desc *descriptor, struct d3d12_device *device) @@ -3425,9 +3480,12 @@ static ULONG STDMETHODCALLTYPE d3d12_descriptor_heap_Release(ID3D12DescriptorHea
if (!refcount) { + const struct vkd3d_vk_device_procs *vk_procs; struct d3d12_device *device = heap->device; unsigned int i;
+ vk_procs = &device->vk_procs; + vkd3d_private_store_destroy(&heap->private_store);
switch (heap->desc.Type) @@ -3474,6 +3532,9 @@ static ULONG STDMETHODCALLTYPE d3d12_descriptor_heap_Release(ID3D12DescriptorHea break; }
+ VK_CALL(vkDestroyDescriptorPool(device->vk_device, heap->vk_descriptor_pool, NULL)); + vkd3d_mutex_destroy(&heap->vk_sets_mutex); + vkd3d_free(heap);
d3d12_device_release(device); @@ -3584,19 +3645,155 @@ static const struct ID3D12DescriptorHeapVtbl d3d12_descriptor_heap_vtbl = d3d12_descriptor_heap_GetGPUDescriptorHandleForHeapStart, };
+const enum vkd3d_vk_descriptor_set_index vk_descriptor_set_index_table[] = +{ + VKD3D_SET_INDEX_SAMPLER, + VKD3D_SET_INDEX_COUNT, + VKD3D_SET_INDEX_SAMPLED_IMAGE, + VKD3D_SET_INDEX_STORAGE_IMAGE, + VKD3D_SET_INDEX_UNIFORM_TEXEL_BUFFER, + VKD3D_SET_INDEX_STORAGE_TEXEL_BUFFER, + VKD3D_SET_INDEX_UNIFORM_BUFFER, +}; + +static HRESULT d3d12_descriptor_heap_create_descriptor_pool(struct d3d12_descriptor_heap *descriptor_heap, + struct d3d12_device *device, const D3D12_DESCRIPTOR_HEAP_DESC *desc) +{ + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + VkDescriptorPoolSize pool_sizes[VKD3D_SET_INDEX_COUNT]; + struct VkDescriptorPoolCreateInfo pool_desc; + VkDevice vk_device = device->vk_device; + enum vkd3d_vk_descriptor_set_index set; + VkResult vr; + + for (set = 0, pool_desc.poolSizeCount = 0; set < ARRAY_SIZE(device->vk_descriptor_heap_layouts); ++set) + { + if (device->vk_descriptor_heap_layouts[set].applicable_heap_type == desc->Type) + { + pool_sizes[pool_desc.poolSizeCount].type = device->vk_descriptor_heap_layouts[set].type; + pool_sizes[pool_desc.poolSizeCount++].descriptorCount = desc->NumDescriptors; + } + } + + pool_desc.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; + pool_desc.pNext = NULL; + pool_desc.flags = VK_DESCRIPTOR_POOL_CREATE_UPDATE_AFTER_BIND_BIT_EXT; + pool_desc.maxSets = pool_desc.poolSizeCount; + pool_desc.pPoolSizes = pool_sizes; + if ((vr = VK_CALL(vkCreateDescriptorPool(vk_device, &pool_desc, NULL, &descriptor_heap->vk_descriptor_pool))) < 0) + ERR("Failed to create descriptor pool, vr %d.\n", vr); + + return hresult_from_vk_result(vr); +} + +static HRESULT d3d12_descriptor_heap_create_descriptor_set(struct d3d12_descriptor_heap *descriptor_heap, + struct d3d12_device *device, unsigned int set) +{ + struct d3d12_descriptor_heap_vk_set *descriptor_set = &descriptor_heap->vk_descriptor_sets[set]; + uint32_t variable_binding_size = descriptor_heap->desc.NumDescriptors; + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + VkDescriptorSetVariableDescriptorCountAllocateInfoEXT set_size; + VkDescriptorSetAllocateInfo set_desc; + VkResult vr; + + set_desc.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; + set_desc.pNext = &set_size; + set_desc.descriptorPool = descriptor_heap->vk_descriptor_pool; + set_desc.descriptorSetCount = 1; + set_desc.pSetLayouts = &device->vk_descriptor_heap_layouts[set].vk_set_layout; + set_size.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_VARIABLE_DESCRIPTOR_COUNT_ALLOCATE_INFO_EXT; + set_size.pNext = NULL; + set_size.descriptorSetCount = 1; + set_size.pDescriptorCounts = &variable_binding_size; + if ((vr = VK_CALL(vkAllocateDescriptorSets(device->vk_device, &set_desc, &descriptor_set->vk_set))) >= 0) + { + descriptor_set->vk_descriptor_write.dstSet = descriptor_set->vk_set; + return S_OK; + } + + ERR("Failed to allocate descriptor set, vr %d.\n", vr); + return hresult_from_vk_result(vr); +} + +static HRESULT d3d12_descriptor_heap_vk_descriptor_sets_init(struct d3d12_descriptor_heap *descriptor_heap, + struct d3d12_device *device, const D3D12_DESCRIPTOR_HEAP_DESC *desc) +{ + enum vkd3d_vk_descriptor_set_index set; + HRESULT hr; + + descriptor_heap->vk_descriptor_pool = VK_NULL_HANDLE; + memset(descriptor_heap->vk_descriptor_sets, 0, sizeof(descriptor_heap->vk_descriptor_sets)); + vkd3d_mutex_init(&descriptor_heap->vk_sets_mutex); + + if (!device->use_vk_heaps || (desc->Type != D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV + && desc->Type != D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER)) + return S_OK; + + if (FAILED(hr = d3d12_descriptor_heap_create_descriptor_pool(descriptor_heap, device, desc))) + return hr; + + for (set = 0; set < ARRAY_SIZE(descriptor_heap->vk_descriptor_sets); ++set) + { + struct d3d12_descriptor_heap_vk_set *descriptor_set = &descriptor_heap->vk_descriptor_sets[set]; + + descriptor_set->vk_descriptor_write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + descriptor_set->vk_descriptor_write.pNext = NULL; + descriptor_set->vk_descriptor_write.dstBinding = 0; + descriptor_set->vk_descriptor_write.descriptorCount = 1; + descriptor_set->vk_descriptor_write.descriptorType = device->vk_descriptor_heap_layouts[set].type; + descriptor_set->vk_descriptor_write.pImageInfo = NULL; + descriptor_set->vk_descriptor_write.pBufferInfo = NULL; + descriptor_set->vk_descriptor_write.pTexelBufferView = NULL; + switch (device->vk_descriptor_heap_layouts[set].type) + { + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + break; + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + descriptor_set->vk_descriptor_write.pImageInfo = &descriptor_set->vk_image_info; + descriptor_set->vk_image_info.sampler = VK_NULL_HANDLE; + descriptor_set->vk_image_info.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + break; + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + descriptor_set->vk_descriptor_write.pImageInfo = &descriptor_set->vk_image_info; + descriptor_set->vk_image_info.sampler = VK_NULL_HANDLE; + descriptor_set->vk_image_info.imageLayout = VK_IMAGE_LAYOUT_GENERAL; + break; + case VK_DESCRIPTOR_TYPE_SAMPLER: + descriptor_set->vk_descriptor_write.pImageInfo = &descriptor_set->vk_image_info; + descriptor_set->vk_image_info.imageView = VK_NULL_HANDLE; + descriptor_set->vk_image_info.imageLayout = VK_IMAGE_LAYOUT_UNDEFINED; + break; + default: + ERR("Unhandled descriptor type %#x.\n", device->vk_descriptor_heap_layouts[set].type); + return E_FAIL; + } + if (device->vk_descriptor_heap_layouts[set].applicable_heap_type == desc->Type + && FAILED(hr = d3d12_descriptor_heap_create_descriptor_set(descriptor_heap, device, set))) + return hr; + } + + return S_OK; +} + static HRESULT d3d12_descriptor_heap_init(struct d3d12_descriptor_heap *descriptor_heap, struct d3d12_device *device, const D3D12_DESCRIPTOR_HEAP_DESC *desc) { + static LONG64 serial_id; HRESULT hr;
descriptor_heap->ID3D12DescriptorHeap_iface.lpVtbl = &d3d12_descriptor_heap_vtbl; descriptor_heap->refcount = 1; + descriptor_heap->serial_id = InterlockedIncrement64(&serial_id);
descriptor_heap->desc = *desc;
if (FAILED(hr = vkd3d_private_store_init(&descriptor_heap->private_store))) return hr;
+ d3d12_descriptor_heap_vk_descriptor_sets_init(descriptor_heap, device, desc); + d3d12_device_add_ref(descriptor_heap->device = device);
return S_OK; diff --git a/libs/vkd3d/state.c b/libs/vkd3d/state.c index 91f6e27b..2deaa928 100644 --- a/libs/vkd3d/state.c +++ b/libs/vkd3d/state.c @@ -92,6 +92,8 @@ static void d3d12_root_signature_cleanup(struct d3d12_root_signature *root_signa if (root_signature->descriptor_mapping) vkd3d_free(root_signature->descriptor_mapping); vkd3d_free(root_signature->descriptor_offsets); + vkd3d_free(root_signature->uav_counter_mapping); + vkd3d_free(root_signature->uav_counter_offsets); if (root_signature->root_constants) vkd3d_free(root_signature->root_constants);
@@ -327,6 +329,7 @@ static bool vk_binding_from_d3d12_descriptor_range(struct VkDescriptorSetLayoutB struct d3d12_root_signature_info { size_t binding_count; + size_t uav_range_count;
size_t root_constant_count; size_t root_descriptor_count; @@ -401,6 +404,7 @@ static HRESULT d3d12_root_signature_info_count_descriptors(struct d3d12_root_sig info->binding_count += binding_count; info->uav_count += count * 2u; uav_unbounded_range |= unbounded; + ++info->uav_range_count; break; case D3D12_DESCRIPTOR_RANGE_TYPE_CBV: info->cbv_count += count; @@ -495,6 +499,7 @@ static HRESULT d3d12_root_signature_init_push_constants(struct d3d12_root_signat uint32_t *push_constant_range_count) { uint32_t push_constants_offset[D3D12_SHADER_VISIBILITY_PIXEL + 1]; + bool use_vk_heaps = root_signature->device->use_vk_heaps; unsigned int i, j, push_constant_count; uint32_t offset;
@@ -507,7 +512,8 @@ static HRESULT d3d12_root_signature_init_push_constants(struct d3d12_root_signat continue;
assert(p->ShaderVisibility <= D3D12_SHADER_VISIBILITY_PIXEL); - push_constants[p->ShaderVisibility].stageFlags = stage_flags_from_visibility(p->ShaderVisibility); + push_constants[p->ShaderVisibility].stageFlags = use_vk_heaps ? VK_SHADER_STAGE_ALL + : stage_flags_from_visibility(p->ShaderVisibility); push_constants[p->ShaderVisibility].size += p->u.Constants.Num32BitValues * sizeof(uint32_t); } if (push_constants[D3D12_SHADER_VISIBILITY_ALL].size) @@ -586,6 +592,8 @@ struct vkd3d_descriptor_set_context unsigned int table_index; unsigned int unbounded_offset; unsigned int descriptor_index; + unsigned int uav_counter_index; + unsigned int push_constant_index; uint32_t descriptor_binding; };
@@ -595,6 +603,7 @@ static bool vkd3d_validate_descriptor_set_count(struct d3d12_device *device, uns
if (set_count > max_count) { + /* NOTE: If maxBoundDescriptorSets is < 9, try VKD3D_CONFIG=virtual_heaps */ ERR("Required descriptor set count exceeds maximum allowed count of %u.\n", max_count); return false; } @@ -802,6 +811,122 @@ static void d3d12_root_signature_map_vk_unbounded_binding(struct d3d12_root_sign offset->dynamic_offset_index = ~0u; }
+static unsigned int vk_heap_binding_count_from_descriptor_range(const struct d3d12_root_descriptor_table_range *range, + unsigned int descriptor_set_size) +{ + unsigned int max_count; + + if (descriptor_set_size <= range->offset) + { + ERR("Descriptor range offset %u exceeds maximum available offset %u.\n", range->offset, descriptor_set_size - 1); + max_count = 0; + } + else + { + max_count = descriptor_set_size - range->offset; + } + + if (range->descriptor_count != UINT_MAX) + { + if (range->descriptor_count > max_count) + ERR("Range size %u exceeds available descriptor count %u.\n", range->descriptor_count, max_count); + return range->descriptor_count; + } + else + { + /* Prefer an unsupported binding count vs a zero count, because shader compilation will fail + * to match a declaration to a zero binding, resulting in failure of pipline state creation. */ + return max_count + !max_count; + } +} + +static void vkd3d_descriptor_heap_binding_from_descriptor_range(const struct d3d12_root_descriptor_table_range *range, + bool is_buffer, const struct d3d12_root_signature *root_signature, + struct vkd3d_shader_descriptor_binding *binding) +{ + const struct vkd3d_device_descriptor_limits *descriptor_limits = &root_signature->device->vk_info.descriptor_limits; + unsigned int descriptor_set_size; + + switch (range->type) + { + case VKD3D_SHADER_DESCRIPTOR_TYPE_SRV: + binding->set = is_buffer ? VKD3D_SET_INDEX_UNIFORM_TEXEL_BUFFER : VKD3D_SET_INDEX_SAMPLED_IMAGE; + descriptor_set_size = descriptor_limits->sampled_image_max_descriptors; + break; + case VKD3D_SHADER_DESCRIPTOR_TYPE_UAV: + binding->set = is_buffer ? VKD3D_SET_INDEX_STORAGE_TEXEL_BUFFER : VKD3D_SET_INDEX_STORAGE_IMAGE; + descriptor_set_size = descriptor_limits->storage_image_max_descriptors; + break; + case VKD3D_SHADER_DESCRIPTOR_TYPE_CBV: + binding->set = VKD3D_SET_INDEX_UNIFORM_BUFFER; + descriptor_set_size = descriptor_limits->uniform_buffer_max_descriptors; + break; + case VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER: + binding->set = VKD3D_SET_INDEX_SAMPLER; + descriptor_set_size = descriptor_limits->sampler_max_descriptors; + break; + default: + FIXME("Unhandled descriptor range type type %#x.\n", range->type); + binding->set = VKD3D_SET_INDEX_SAMPLED_IMAGE; + descriptor_set_size = descriptor_limits->sampled_image_max_descriptors; + break; + } + binding->set += root_signature->vk_set_count; + binding->binding = 0; + binding->count = vk_heap_binding_count_from_descriptor_range(range, descriptor_set_size); +} + +static void d3d12_root_signature_map_vk_heap_binding(struct d3d12_root_signature *root_signature, + const struct d3d12_root_descriptor_table_range *range, bool buffer_descriptor, + enum vkd3d_shader_visibility shader_visibility, struct vkd3d_descriptor_set_context *context) +{ + struct vkd3d_shader_resource_binding *mapping = &root_signature->descriptor_mapping[context->descriptor_index]; + struct vkd3d_shader_descriptor_offset *offset = &root_signature->descriptor_offsets[context->descriptor_index++]; + + mapping->type = range->type; + mapping->register_space = range->register_space; + mapping->register_index = range->base_register_idx; + mapping->shader_visibility = shader_visibility; + mapping->flags = buffer_descriptor ? VKD3D_SHADER_BINDING_FLAG_BUFFER : VKD3D_SHADER_BINDING_FLAG_IMAGE; + vkd3d_descriptor_heap_binding_from_descriptor_range(range, buffer_descriptor, root_signature, &mapping->binding); + offset->static_offset = range->offset; + offset->dynamic_offset_index = context->push_constant_index; +} + +static void d3d12_root_signature_map_vk_heap_uav_counter(struct d3d12_root_signature *root_signature, + const struct d3d12_root_descriptor_table_range *range, enum vkd3d_shader_visibility shader_visibility, + struct vkd3d_descriptor_set_context *context) +{ + struct vkd3d_shader_uav_counter_binding *mapping = &root_signature->uav_counter_mapping[context->uav_counter_index]; + struct vkd3d_shader_descriptor_offset *offset = &root_signature->uav_counter_offsets[context->uav_counter_index++]; + + mapping->register_space = range->register_space; + mapping->register_index = range->base_register_idx; + mapping->shader_visibility = shader_visibility; + mapping->binding.set = root_signature->vk_set_count + VKD3D_SET_INDEX_UAV_COUNTER; + mapping->binding.binding = 0; + mapping->binding.count = vk_heap_binding_count_from_descriptor_range(range, + root_signature->device->vk_info.descriptor_limits.storage_image_max_descriptors); + offset->static_offset = range->offset; + offset->dynamic_offset_index = context->push_constant_index; +} + +static void d3d12_root_signature_map_descriptor_heap_binding(struct d3d12_root_signature *root_signature, + const struct d3d12_root_descriptor_table_range *range, enum vkd3d_shader_visibility shader_visibility, + struct vkd3d_descriptor_set_context *context) +{ + bool is_buffer = range->type == VKD3D_SHADER_DESCRIPTOR_TYPE_CBV; + + if (range->type == VKD3D_SHADER_DESCRIPTOR_TYPE_SRV || range->type == VKD3D_SHADER_DESCRIPTOR_TYPE_UAV) + { + d3d12_root_signature_map_vk_heap_binding(root_signature, range, true, shader_visibility, context); + if (range->type == VKD3D_SHADER_DESCRIPTOR_TYPE_UAV) + d3d12_root_signature_map_vk_heap_uav_counter(root_signature, range, shader_visibility, context); + } + + d3d12_root_signature_map_vk_heap_binding(root_signature, range, is_buffer, shader_visibility, context); +} + static void d3d12_root_signature_map_descriptor_unbounded_binding(struct d3d12_root_signature *root_signature, const struct d3d12_root_descriptor_table_range *range, unsigned int descriptor_offset, enum vkd3d_shader_visibility shader_visibility, struct vkd3d_descriptor_set_context *context) @@ -868,6 +993,7 @@ static HRESULT d3d12_root_signature_init_root_descriptor_tables(struct d3d12_roo struct vkd3d_descriptor_set_context *context) { const struct d3d12_device *device = root_signature->device; + bool use_vk_heaps = root_signature->device->use_vk_heaps; struct d3d12_root_descriptor_table *table; unsigned int i, j, k, range_count; uint32_t vk_binding; @@ -935,6 +1061,16 @@ static HRESULT d3d12_root_signature_init_root_descriptor_tables(struct d3d12_roo
range = &table->ranges[j];
+ if (use_vk_heaps) + { + /* set, binding and vk_binding_count are not used. */ + range->set = 0; + range->binding = 0; + range->vk_binding_count = 0; + d3d12_root_signature_map_descriptor_heap_binding(root_signature, range, shader_visibility, context); + continue; + } + range->set = root_signature->vk_set_count - root_signature->main_set;
if (root_signature->use_descriptor_arrays) @@ -1014,6 +1150,7 @@ static HRESULT d3d12_root_signature_init_root_descriptor_tables(struct d3d12_roo
context->current_binding = cur_binding; } + ++context->push_constant_index; }
return S_OK; @@ -1084,9 +1221,36 @@ static HRESULT d3d12_root_signature_init_static_samplers(struct d3d12_root_signa }
context->current_binding = cur_binding; + if (device->use_vk_heaps) + return d3d12_root_signature_append_descriptor_set_layout(root_signature, context, 0); + return S_OK; }
+static void d3d12_root_signature_init_descriptor_table_push_constants(struct d3d12_root_signature *root_signature, + const struct vkd3d_descriptor_set_context *context) +{ + root_signature->descriptor_table_offset = 0; + if ((root_signature->descriptor_table_count = context->push_constant_index)) + { + VkPushConstantRange *range = &root_signature->push_constant_ranges[D3D12_SHADER_VISIBILITY_ALL]; + + root_signature->descriptor_table_offset = align(range->size, 16); + range->size = root_signature->descriptor_table_offset + + root_signature->descriptor_table_count * sizeof(uint32_t); + + if (range->size > root_signature->device->vk_info.device_limits.maxPushConstantsSize) + FIXME("Push constants size %u exceeds maximum allowed size %u. Try VKD3D_CONFIG=virtual_heaps.\n", + range->size, root_signature->device->vk_info.device_limits.maxPushConstantsSize); + + if (!root_signature->push_constant_range_count) + { + root_signature->push_constant_range_count = 1; + range->stageFlags = VK_SHADER_STAGE_ALL; + } + } +} + static bool vk_binding_uses_partial_binding(const VkDescriptorSetLayoutBinding *binding) { if (binding->descriptorCount == 1) @@ -1194,11 +1358,19 @@ static HRESULT vkd3d_create_pipeline_layout(struct d3d12_device *device, static unsigned int d3d12_root_signature_copy_descriptor_set_layouts(const struct d3d12_root_signature *root_signature, VkDescriptorSetLayout *vk_set_layouts) { + const struct d3d12_device *device = root_signature->device; + enum vkd3d_vk_descriptor_set_index set; unsigned int i;
for (i = 0; i < root_signature->vk_set_count; ++i) vk_set_layouts[i] = root_signature->descriptor_set_layouts[i].vk_layout;
+ if (device->use_vk_heaps) + { + for (set = 0; set < ARRAY_SIZE(device->vk_descriptor_heap_layouts); ++set) + vk_set_layouts[i++] = device->vk_descriptor_heap_layouts[set].vk_set_layout; + } + return i; }
@@ -1210,6 +1382,7 @@ static HRESULT d3d12_root_signature_init(struct d3d12_root_signature *root_signa struct vkd3d_descriptor_set_context context; VkDescriptorSetLayoutBinding *binding_desc; struct d3d12_root_signature_info info; + bool use_vk_heaps; unsigned int i; HRESULT hr;
@@ -1226,6 +1399,8 @@ static HRESULT d3d12_root_signature_init(struct d3d12_root_signature *root_signa root_signature->flags = desc->Flags; root_signature->descriptor_mapping = NULL; root_signature->descriptor_offsets = NULL; + root_signature->uav_counter_mapping = NULL; + root_signature->uav_counter_offsets = NULL; root_signature->static_sampler_count = 0; root_signature->static_samplers = NULL; root_signature->device = device; @@ -1243,9 +1418,13 @@ static HRESULT d3d12_root_signature_init(struct d3d12_root_signature *root_signa }
root_signature->binding_count = info.binding_count; + root_signature->uav_mapping_count = info.uav_range_count; root_signature->static_sampler_count = desc->NumStaticSamplers; root_signature->root_descriptor_count = info.root_descriptor_count; root_signature->use_descriptor_arrays = device->vk_info.EXT_descriptor_indexing; + root_signature->descriptor_table_count = 0; + + use_vk_heaps = device->use_vk_heaps;
hr = E_OUTOFMEMORY; root_signature->parameter_count = desc->NumParameters; @@ -1255,6 +1434,11 @@ static HRESULT d3d12_root_signature_init(struct d3d12_root_signature *root_signa if (!(root_signature->descriptor_mapping = vkd3d_calloc(root_signature->binding_count, sizeof(*root_signature->descriptor_mapping)))) goto fail; + if (use_vk_heaps && (!(root_signature->uav_counter_mapping = vkd3d_calloc(root_signature->uav_mapping_count, + sizeof(*root_signature->uav_counter_mapping))) + || !(root_signature->uav_counter_offsets = vkd3d_calloc(root_signature->uav_mapping_count, + sizeof(*root_signature->uav_counter_offsets))))) + goto fail; if (root_signature->use_descriptor_arrays && !(root_signature->descriptor_offsets = vkd3d_calloc( root_signature->binding_count, sizeof(*root_signature->descriptor_offsets)))) goto fail; @@ -1289,8 +1473,11 @@ static HRESULT d3d12_root_signature_init(struct d3d12_root_signature *root_signa goto fail; if (FAILED(hr = d3d12_root_signature_init_static_samplers(root_signature, device, desc, &context))) goto fail; + context.push_constant_index = 0; if (FAILED(hr = d3d12_root_signature_init_root_descriptor_tables(root_signature, desc, &info, &context))) goto fail; + if (use_vk_heaps) + d3d12_root_signature_init_descriptor_table_push_constants(root_signature, &context);
if (FAILED(hr = d3d12_root_signature_append_descriptor_set_layout(root_signature, &context, 0))) goto fail; @@ -1968,6 +2155,9 @@ static HRESULT d3d12_pipeline_state_find_and_init_uav_counters(struct d3d12_pipe HRESULT hr; int ret;
+ if (device->use_vk_heaps) + return S_OK; + shader_info.type = VKD3D_SHADER_STRUCTURE_TYPE_SCAN_DESCRIPTOR_INFO; shader_info.next = NULL; if ((ret = vkd3d_scan_dxbc(code, &shader_info)) < 0) @@ -2020,10 +2210,10 @@ static HRESULT d3d12_pipeline_state_init_compute(struct d3d12_pipeline_state *st { offset_info.type = VKD3D_SHADER_STRUCTURE_TYPE_DESCRIPTOR_OFFSET_INFO; offset_info.next = NULL; - offset_info.descriptor_table_offset = 0; - offset_info.descriptor_table_count = 0; + offset_info.descriptor_table_offset = root_signature->descriptor_table_offset; + offset_info.descriptor_table_count = root_signature->descriptor_table_count; offset_info.binding_offsets = root_signature->descriptor_offsets; - offset_info.uav_counter_offsets = NULL; + offset_info.uav_counter_offsets = root_signature->uav_counter_offsets; vkd3d_prepend_struct(&target_info, &offset_info); }
@@ -2035,8 +2225,16 @@ static HRESULT d3d12_pipeline_state_init_compute(struct d3d12_pipeline_state *st shader_interface.push_constant_buffer_count = root_signature->root_constant_count; shader_interface.combined_samplers = NULL; shader_interface.combined_sampler_count = 0; - shader_interface.uav_counters = state->uav_counters.bindings; - shader_interface.uav_counter_count = state->uav_counters.binding_count; + if (root_signature->uav_counter_mapping) + { + shader_interface.uav_counters = root_signature->uav_counter_mapping; + shader_interface.uav_counter_count = root_signature->uav_mapping_count; + } + else + { + shader_interface.uav_counters = state->uav_counters.bindings; + shader_interface.uav_counter_count = state->uav_counters.binding_count; + }
vk_pipeline_layout = state->uav_counters.vk_pipeline_layout ? state->uav_counters.vk_pipeline_layout : root_signature->vk_pipeline_layout; @@ -2797,10 +2995,10 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s { offset_info.type = VKD3D_SHADER_STRUCTURE_TYPE_DESCRIPTOR_OFFSET_INFO; offset_info.next = NULL; - offset_info.descriptor_table_offset = 0; - offset_info.descriptor_table_count = 0; + offset_info.descriptor_table_offset = root_signature->descriptor_table_offset; + offset_info.descriptor_table_count = root_signature->descriptor_table_count; offset_info.binding_offsets = root_signature->descriptor_offsets; - offset_info.uav_counter_offsets = NULL; + offset_info.uav_counter_offsets = root_signature->uav_counter_offsets; }
for (i = 0; i < ARRAY_SIZE(shader_stages); ++i) @@ -2842,8 +3040,10 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s break;
case VK_SHADER_STAGE_FRAGMENT_BIT: - shader_interface.uav_counters = state->uav_counters.bindings; - shader_interface.uav_counter_count = state->uav_counters.binding_count; + shader_interface.uav_counters = root_signature->uav_counter_mapping + ? root_signature->uav_counter_mapping : state->uav_counters.bindings; + shader_interface.uav_counter_count = root_signature->uav_counter_mapping + ? root_signature->uav_mapping_count : state->uav_counters.binding_count; stage_target_info = &ps_target_info; break;
diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index ca7a3f22..323efd22 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -158,6 +158,7 @@ struct vkd3d_vulkan_info enum vkd3d_config_flags { VKD3D_CONFIG_FLAG_VULKAN_DEBUG = 0x00000001, + VKD3D_CONFIG_FLAG_VIRTUAL_HEAPS = 0x00000002, };
struct vkd3d_instance @@ -774,11 +775,51 @@ static inline struct d3d12_dsv_desc *d3d12_dsv_desc_from_cpu_handle(D3D12_CPU_DE void d3d12_dsv_desc_create_dsv(struct d3d12_dsv_desc *dsv_desc, struct d3d12_device *device, struct d3d12_resource *resource, const D3D12_DEPTH_STENCIL_VIEW_DESC *desc);
+enum vkd3d_vk_descriptor_set_index +{ + VKD3D_SET_INDEX_UNIFORM_BUFFER = 0, + VKD3D_SET_INDEX_UNIFORM_TEXEL_BUFFER = 1, + VKD3D_SET_INDEX_SAMPLED_IMAGE = 2, + VKD3D_SET_INDEX_STORAGE_TEXEL_BUFFER = 3, + VKD3D_SET_INDEX_STORAGE_IMAGE = 4, + VKD3D_SET_INDEX_SAMPLER = 5, + VKD3D_SET_INDEX_UAV_COUNTER = 6, + VKD3D_SET_INDEX_COUNT = 7 +}; + +extern const enum vkd3d_vk_descriptor_set_index vk_descriptor_set_index_table[]; + +static inline enum vkd3d_vk_descriptor_set_index vkd3d_vk_descriptor_set_index_from_vk_descriptor_type( + VkDescriptorType type) +{ + assert(type <= VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER); + assert(vk_descriptor_set_index_table[type] < VKD3D_SET_INDEX_COUNT); + + return vk_descriptor_set_index_table[type]; +} + +struct vkd3d_vk_descriptor_heap_layout +{ + VkDescriptorType type; + bool buffer_dimension; + D3D12_DESCRIPTOR_HEAP_TYPE applicable_heap_type; + unsigned int count; + VkDescriptorSetLayout vk_set_layout; +}; + +struct d3d12_descriptor_heap_vk_set +{ + VkDescriptorSet vk_set; + VkDescriptorImageInfo vk_image_info; + VkWriteDescriptorSet vk_descriptor_write; +}; + /* ID3D12DescriptorHeap */ struct d3d12_descriptor_heap { ID3D12DescriptorHeap ID3D12DescriptorHeap_iface; LONG refcount; + uint64_t serial_id;
D3D12_DESCRIPTOR_HEAP_DESC desc;
@@ -786,6 +827,10 @@ struct d3d12_descriptor_heap
struct vkd3d_private_store private_store;
+ VkDescriptorPool vk_descriptor_pool; + struct d3d12_descriptor_heap_vk_set vk_descriptor_sets[VKD3D_SET_INDEX_COUNT]; + struct vkd3d_mutex vk_sets_mutex; + BYTE descriptors[]; };
@@ -900,8 +945,13 @@ struct d3d12_root_signature D3D12_ROOT_SIGNATURE_FLAGS flags;
unsigned int binding_count; + unsigned int uav_mapping_count; struct vkd3d_shader_resource_binding *descriptor_mapping; struct vkd3d_shader_descriptor_offset *descriptor_offsets; + struct vkd3d_shader_uav_counter_binding *uav_counter_mapping; + struct vkd3d_shader_descriptor_offset *uav_counter_offsets; + unsigned int descriptor_table_offset; + unsigned int descriptor_table_count;
unsigned int root_constant_count; struct vkd3d_shader_push_constant_buffer *root_constants; @@ -1120,6 +1170,8 @@ struct vkd3d_pipeline_bindings struct d3d12_desc *descriptor_tables[D3D12_MAX_ROOT_COST]; uint64_t descriptor_table_dirty_mask; uint64_t descriptor_table_active_mask; + uint64_t cbv_srv_uav_heap_id; + uint64_t sampler_heap_id;
VkBufferView *vk_uav_counter_views; size_t vk_uav_counter_views_size; @@ -1181,6 +1233,8 @@ struct d3d12_command_list VkBuffer so_counter_buffers[D3D12_SO_BUFFER_SLOT_COUNT]; VkDeviceSize so_counter_buffer_offsets[D3D12_SO_BUFFER_SLOT_COUNT];
+ void (*update_descriptors)(struct d3d12_command_list *list, enum vkd3d_pipeline_bind_point bind_point); + struct vkd3d_private_store private_store; };
@@ -1372,6 +1426,8 @@ struct d3d12_device struct vkd3d_uav_clear_state uav_clear_state;
VkDescriptorPoolSize vk_pool_sizes[VKD3D_DESCRIPTOR_POOL_COUNT]; + struct vkd3d_vk_descriptor_heap_layout vk_descriptor_heap_layouts[VKD3D_SET_INDEX_COUNT]; + bool use_vk_heaps; };
HRESULT d3d12_device_create(struct vkd3d_instance *instance, diff --git a/tests/d3d12.c b/tests/d3d12.c index eb6217bc..68143828 100644 --- a/tests/d3d12.c +++ b/tests/d3d12.c @@ -16756,13 +16756,15 @@ static void test_update_descriptor_tables(void) destroy_test_context(&context); }
-/* This cannot be implemented reasonably in Vulkan. Vulkan doesn't allow - * updating descriptor sets after the vkCmdBindDescriptorSets() command - * is recorded. +/* This requires the Vulkan descriptor indexing extension and Vulkan-backed + * descriptor heaps. Vulkan doesn't allow updating descriptor sets after the + * vkCmdBindDescriptorSets() command is recorded unless the update-after-bind + * feature of descriptor indexing is used. */ static void test_update_descriptor_heap_after_closing_command_list(void) { ID3D12Resource *red_texture, *green_texture; + D3D12_RESOURCE_BINDING_TIER binding_tier; ID3D12GraphicsCommandList *command_list; D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle; ID3D12DescriptorHeap *cpu_heap, *heap; @@ -16812,6 +16814,8 @@ static void test_update_descriptor_heap_after_closing_command_list(void) command_list = context.list; queue = context.queue;
+ binding_tier = get_resource_binding_tier(context.device); + context.root_signature = create_texture_root_signature(context.device, D3D12_SHADER_VISIBILITY_PIXEL, 0, 0); context.pipeline_state = create_pipeline_state(context.device, @@ -16873,7 +16877,8 @@ static void test_update_descriptor_heap_after_closing_command_list(void) D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_COPY_SOURCE); get_texture_readback_with_command_list(context.render_target, 0, &rb, queue, command_list); value = get_readback_uint(&rb, 0, 0, 0); - todo ok(value == 0xff00ff00, "Got unexpected value %#x.\n", value); + todo_if(binding_tier < D3D12_RESOURCE_BINDING_TIER_3) + ok(value == 0xff00ff00, "Got unexpected value %#x.\n", value); release_resource_readback(&rb);
ID3D12DescriptorHeap_Release(cpu_heap);
On Mon, 21 Feb 2022 at 16:19, Conor McCarthy cmccarthy@codeweavers.com wrote:
README | 3 + include/private/vkd3d_common.h | 4 + include/vkd3d_windows.h | 1 + libs/vkd3d/command.c | 151 +++++++++++++++++++++- libs/vkd3d/device.c | 159 ++++++++++++++++++++++- libs/vkd3d/resource.c | 197 +++++++++++++++++++++++++++++ libs/vkd3d/state.c | 222 +++++++++++++++++++++++++++++++-- libs/vkd3d/vkd3d_private.h | 56 +++++++++ tests/d3d12.c | 13 +- 9 files changed, 787 insertions(+), 19 deletions(-)
This patch causes tests/texture-load-typed.shader_test to fail for me. VKD3D_CONFIG=virtual_heaps is a workaround.
March 11, 2022 8:20 PM, "Henri Verbeet" hverbeet@gmail.com wrote:
This patch causes tests/texture-load-typed.shader_test to fail for me. VKD3D_CONFIG=virtual_heaps is a workaround.
The old implementation enforces no constraints on how many descriptor heaps are bound, but this patch allows only one heap of each type to be bound, as per the D3D12 spec. The failing test tries to bind 3 CBV/SRV/UAV heaps. This does work in Windows at least on AMD and Intel, but we can't reasonably support it because bindings will be a mess. One option is to keep virtual heaps as the default and add an option to use Vulkan-backed heaps.
On 3/11/22 19:59, Conor McCarthy wrote:
March 11, 2022 8:20 PM, "Henri Verbeet" hverbeet@gmail.com wrote:
This patch causes tests/texture-load-typed.shader_test to fail for me. VKD3D_CONFIG=virtual_heaps is a workaround.
The old implementation enforces no constraints on how many descriptor heaps are bound, but this patch allows only one heap of each type to be bound, as per the D3D12 spec. The failing test tries to bind 3 CBV/SRV/UAV heaps. This does work in Windows at least on AMD and Intel, but we can't reasonably support it because bindings will be a mess. One option is to keep virtual heaps as the default and add an option to use Vulkan-backed heaps.
Maybe, but it probably makes more sense just to rewrite the test to not use one heap per resource. I only wrote it that way for the sake of programming simplicity.
I'll start working on a patch.
Signed-off-by: Henri Verbeet hverbeet@codeweavers.com
Source descriptors are copied to separate arrays to facilitate use of pre-initialised Vulkan structures, and allow arrayed writes where possible.
Signed-off-by: Conor McCarthy cmccarthy@codeweavers.com --- libs/vkd3d/device.c | 144 ++++++++++++++++++++++++++++ libs/vkd3d/resource.c | 190 +++++++++++++++++++++++++++++++------ libs/vkd3d/vkd3d_private.h | 24 ++++- 3 files changed, 329 insertions(+), 29 deletions(-)
diff --git a/libs/vkd3d/device.c b/libs/vkd3d/device.c index e00ac853..efa3cc94 100644 --- a/libs/vkd3d/device.c +++ b/libs/vkd3d/device.c @@ -3556,6 +3556,127 @@ static void STDMETHODCALLTYPE d3d12_device_CreateSampler(ID3D12Device *iface, d3d12_desc_write_atomic(d3d12_desc_from_cpu_handle(descriptor), &tmp, device); }
+static void flush_desc_writes(struct d3d12_desc_copy_location locations[][VKD3D_DESCRIPTOR_WRITE_BUFFER_SIZE], + struct d3d12_desc_copy_info *infos, struct d3d12_descriptor_heap *descriptor_heap, struct d3d12_device *device) +{ + enum vkd3d_vk_descriptor_set_index set; + for (set = 0; set < VKD3D_SET_INDEX_COUNT; ++set) + { + if (!infos[set].count) + continue; + d3d12_desc_copy_vk_heap_range(locations[set], &infos[set], descriptor_heap, set, device); + infos[set].count = 0; + infos[set].uav_counter = false; + } +} + +static void d3d12_desc_buffered_copy_atomic(struct d3d12_desc *dst, const struct d3d12_desc *src, + struct d3d12_desc_copy_location locations[][VKD3D_DESCRIPTOR_WRITE_BUFFER_SIZE], + struct d3d12_desc_copy_info *infos, struct d3d12_descriptor_heap *descriptor_heap, struct d3d12_device *device) +{ + struct d3d12_desc_copy_location *location; + enum vkd3d_vk_descriptor_set_index set; + struct vkd3d_mutex *mutex; + + mutex = d3d12_device_get_descriptor_mutex(device, src); + vkd3d_mutex_lock(mutex); + + if (src->magic == VKD3D_DESCRIPTOR_MAGIC_FREE) + { + /* Source must be unlocked first, and therefore can't be used as a null source. */ + static const struct d3d12_desc null = {0}; + vkd3d_mutex_unlock(mutex); + d3d12_desc_write_atomic(dst, &null, device); + return; + } + + set = vkd3d_vk_descriptor_set_index_from_vk_descriptor_type(src->vk_descriptor_type); + location = &locations[set][infos[set].count++]; + + location->src = *src; + + if (location->src.magic & VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW) + vkd3d_view_incref(location->src.u.view); + + vkd3d_mutex_unlock(mutex); + + infos[set].uav_counter |= (location->src.magic == VKD3D_DESCRIPTOR_MAGIC_UAV) + & !!location->src.u.view->vk_counter_view; + location->dst = dst; + + if (infos[set].count == ARRAY_SIZE(locations[0])) + { + d3d12_desc_copy_vk_heap_range(locations[set], &infos[set], descriptor_heap, set, device); + infos[set].count = 0; + infos[set].uav_counter = false; + } +} + +/* Some games, e.g. Control, copy a large number of descriptors per frame, so the + * speed of this function is critical. */ +static void d3d12_device_vk_heaps_copy_descriptors(struct d3d12_device *device, + UINT dst_descriptor_range_count, const D3D12_CPU_DESCRIPTOR_HANDLE *dst_descriptor_range_offsets, + const UINT *dst_descriptor_range_sizes, + UINT src_descriptor_range_count, const D3D12_CPU_DESCRIPTOR_HANDLE *src_descriptor_range_offsets, + const UINT *src_descriptor_range_sizes) +{ + struct d3d12_desc_copy_location locations[VKD3D_SET_INDEX_COUNT][VKD3D_DESCRIPTOR_WRITE_BUFFER_SIZE]; + unsigned int dst_range_idx, dst_idx, src_range_idx, src_idx; + /* The locations array is relatively large, and often mostly empty. Keeping these + * values together in a separate array will likely result in fewer cache misses. */ + struct d3d12_desc_copy_info infos[VKD3D_SET_INDEX_COUNT]; + struct d3d12_descriptor_heap *descriptor_heap = NULL; + const struct d3d12_desc *src, *heap_base, *heap_end; + unsigned int dst_range_size, src_range_size; + struct d3d12_desc *dst; + + descriptor_heap = vkd3d_gpu_descriptor_allocator_heap_from_descriptor(&device->gpu_descriptor_allocator, + d3d12_desc_from_cpu_handle(dst_descriptor_range_offsets[0])); + heap_base = (const struct d3d12_desc *)descriptor_heap->descriptors; + heap_end = heap_base + descriptor_heap->desc.NumDescriptors; + + memset(infos, 0, sizeof(infos)); + dst_range_idx = dst_idx = 0; + src_range_idx = src_idx = 0; + while (dst_range_idx < dst_descriptor_range_count && src_range_idx < src_descriptor_range_count) + { + dst_range_size = dst_descriptor_range_sizes ? dst_descriptor_range_sizes[dst_range_idx] : 1; + src_range_size = src_descriptor_range_sizes ? src_descriptor_range_sizes[src_range_idx] : 1; + + dst = d3d12_desc_from_cpu_handle(dst_descriptor_range_offsets[dst_range_idx]); + src = d3d12_desc_from_cpu_handle(src_descriptor_range_offsets[src_range_idx]); + + if (dst < heap_base || dst >= heap_end) + { + flush_desc_writes(locations, infos, descriptor_heap, device); + descriptor_heap = vkd3d_gpu_descriptor_allocator_heap_from_descriptor(&device->gpu_descriptor_allocator, + dst); + heap_base = (const struct d3d12_desc *)descriptor_heap->descriptors; + heap_end = heap_base + descriptor_heap->desc.NumDescriptors; + } + + for (; dst_idx < dst_range_size && src_idx < src_range_size; src_idx++, dst_idx++) + { + d3d12_desc_buffered_copy_atomic(&dst[dst_idx], &src[src_idx], locations, infos, descriptor_heap, device); + } + + if (dst_idx >= dst_range_size) + { + ++dst_range_idx; + dst_idx = 0; + } + if (src_idx >= src_range_size) + { + ++src_range_idx; + src_idx = 0; + } + } + + flush_desc_writes(locations, infos, descriptor_heap, device); +} + +#define VKD3D_DESCRIPTOR_OPTIMISED_COPY_MIN_COUNT 8 + static void STDMETHODCALLTYPE d3d12_device_CopyDescriptors(ID3D12Device *iface, UINT dst_descriptor_range_count, const D3D12_CPU_DESCRIPTOR_HANDLE *dst_descriptor_range_offsets, const UINT *dst_descriptor_range_sizes, @@ -3584,6 +3705,18 @@ static void STDMETHODCALLTYPE d3d12_device_CopyDescriptors(ID3D12Device *iface, return; }
+ if (!dst_descriptor_range_count) + return; + + if (device->use_vk_heaps && (dst_descriptor_range_count > 1 || (dst_descriptor_range_sizes + && dst_descriptor_range_sizes[0] >= VKD3D_DESCRIPTOR_OPTIMISED_COPY_MIN_COUNT))) + { + d3d12_device_vk_heaps_copy_descriptors(device, dst_descriptor_range_count, dst_descriptor_range_offsets, + dst_descriptor_range_sizes, src_descriptor_range_count, src_descriptor_range_offsets, + src_descriptor_range_sizes); + return; + } + dst_range_idx = dst_idx = 0; src_range_idx = src_idx = 0; while (dst_range_idx < dst_descriptor_range_count && src_range_idx < src_descriptor_range_count) @@ -3620,6 +3753,17 @@ static void STDMETHODCALLTYPE d3d12_device_CopyDescriptorsSimple(ID3D12Device *i iface, descriptor_count, dst_descriptor_range_offset.ptr, src_descriptor_range_offset.ptr, descriptor_heap_type);
+ if (descriptor_count >= VKD3D_DESCRIPTOR_OPTIMISED_COPY_MIN_COUNT) + { + struct d3d12_device *device = impl_from_ID3D12Device(iface); + if (device->use_vk_heaps) + { + d3d12_device_vk_heaps_copy_descriptors(device, 1, &dst_descriptor_range_offset, + &descriptor_count, 1, &src_descriptor_range_offset, &descriptor_count); + return; + } + } + d3d12_device_CopyDescriptors(iface, 1, &dst_descriptor_range_offset, &descriptor_count, 1, &src_descriptor_range_offset, &descriptor_count, descriptor_heap_type); } diff --git a/libs/vkd3d/resource.c b/libs/vkd3d/resource.c index e5827955..7d679f02 100644 --- a/libs/vkd3d/resource.c +++ b/libs/vkd3d/resource.c @@ -2128,6 +2128,53 @@ void vkd3d_view_decref(struct vkd3d_view *view, struct d3d12_device *device) vkd3d_view_destroy(view, device); }
+static void d3d12_descriptor_heap_write_vk_descriptor_range(struct d3d12_descriptor_heap_vk_set *descriptor_set, + struct d3d12_desc_copy_location *locations, unsigned int write_count) +{ + unsigned int i, info_index = 0, write_index = 0; + + switch (locations[0].src.vk_descriptor_type) + { + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + for (; write_index < write_count; ++write_index) + { + descriptor_set->vk_descriptor_writes[write_index].pBufferInfo = &descriptor_set->vk_buffer_infos[info_index]; + for (i = 0; i < descriptor_set->vk_descriptor_writes[write_index].descriptorCount; ++i, ++info_index) + descriptor_set->vk_buffer_infos[info_index] = locations[info_index].src.u.vk_cbv_info; + } + break; + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + for (; write_index < write_count; ++write_index) + { + descriptor_set->vk_descriptor_writes[write_index].pImageInfo = &descriptor_set->vk_image_infos[info_index]; + for (i = 0; i < descriptor_set->vk_descriptor_writes[write_index].descriptorCount; ++i, ++info_index) + descriptor_set->vk_image_infos[info_index].imageView = locations[info_index].src.u.view->u.vk_image_view; + } + break; + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + for (; write_index < write_count; ++write_index) + { + descriptor_set->vk_descriptor_writes[write_index].pTexelBufferView = &descriptor_set->vk_buffer_views[info_index]; + for (i = 0; i < descriptor_set->vk_descriptor_writes[write_index].descriptorCount; ++i, ++info_index) + descriptor_set->vk_buffer_views[info_index] = locations[info_index].src.u.view->u.vk_buffer_view; + } + break; + case VK_DESCRIPTOR_TYPE_SAMPLER: + for (; write_index < write_count; ++write_index) + { + descriptor_set->vk_descriptor_writes[write_index].pImageInfo = &descriptor_set->vk_image_infos[info_index]; + for (i = 0; i < descriptor_set->vk_descriptor_writes[write_index].descriptorCount; ++i, ++info_index) + descriptor_set->vk_image_infos[info_index].sampler = locations[info_index].src.u.view->u.vk_sampler; + } + break; + default: + ERR("Unhandled descriptor type %#x.\n", locations[0].src.vk_descriptor_type); + break; + } +} + /* dst and src contain the same data unless another thread overwrites dst. The array index is * calculated from dst, and src is thread safe. */ static void d3d12_desc_write_vk_heap(const struct d3d12_desc *dst, const struct d3d12_desc *src, @@ -2144,42 +2191,67 @@ static void d3d12_desc_write_vk_heap(const struct d3d12_desc *dst, const struct
vkd3d_mutex_lock(&descriptor_heap->vk_sets_mutex);
- descriptor_set->vk_descriptor_write.dstArrayElement = dst + descriptor_set->vk_descriptor_writes[0].dstArrayElement = dst - (const struct d3d12_desc *)descriptor_heap->descriptors; + descriptor_set->vk_descriptor_writes[0].descriptorCount = 1; switch (src->vk_descriptor_type) { case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: - descriptor_set->vk_descriptor_write.pBufferInfo = &src->u.vk_cbv_info; + descriptor_set->vk_descriptor_writes[0].pBufferInfo = &src->u.vk_cbv_info; break; case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: - descriptor_set->vk_image_info.imageView = src->u.view->u.vk_image_view; + descriptor_set->vk_image_infos[0].imageView = src->u.view->u.vk_image_view; break; case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: - descriptor_set->vk_descriptor_write.pTexelBufferView = &src->u.view->u.vk_buffer_view; + descriptor_set->vk_descriptor_writes[0].pTexelBufferView = &src->u.view->u.vk_buffer_view; break; case VK_DESCRIPTOR_TYPE_SAMPLER: - descriptor_set->vk_image_info.sampler = src->u.view->u.vk_sampler; + descriptor_set->vk_image_infos[0].sampler = src->u.view->u.vk_sampler; break; default: ERR("Unhandled descriptor type %#x.\n", src->vk_descriptor_type); break; } - VK_CALL(vkUpdateDescriptorSets(device->vk_device, 1, &descriptor_set->vk_descriptor_write, 0, NULL)); + VK_CALL(vkUpdateDescriptorSets(device->vk_device, 1, descriptor_set->vk_descriptor_writes, 0, NULL));
if (src->magic == VKD3D_DESCRIPTOR_MAGIC_UAV && src->u.view->vk_counter_view) { descriptor_set = &descriptor_heap->vk_descriptor_sets[VKD3D_SET_INDEX_UAV_COUNTER]; - descriptor_set->vk_descriptor_write.dstArrayElement = dst + descriptor_set->vk_descriptor_writes[0].dstArrayElement = dst - (const struct d3d12_desc *)descriptor_heap->descriptors; - descriptor_set->vk_descriptor_write.pTexelBufferView = &src->u.view->vk_counter_view; - VK_CALL(vkUpdateDescriptorSets(device->vk_device, 1, &descriptor_set->vk_descriptor_write, 0, NULL)); + descriptor_set->vk_descriptor_writes[0].descriptorCount = 1; + descriptor_set->vk_descriptor_writes[0].pTexelBufferView = &src->u.view->vk_counter_view; + VK_CALL(vkUpdateDescriptorSets(device->vk_device, 1, descriptor_set->vk_descriptor_writes, 0, NULL)); }
vkd3d_mutex_unlock(&descriptor_heap->vk_sets_mutex); }
+static void d3d12_desc_write_atomic_d3d12_only(struct d3d12_desc *dst, const struct d3d12_desc *src, struct d3d12_device *device) +{ + struct vkd3d_view *defunct_view; + struct vkd3d_mutex *mutex; + + mutex = d3d12_device_get_descriptor_mutex(device, dst); + vkd3d_mutex_lock(mutex); + + if (!(dst->magic & VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW) || InterlockedDecrement(&dst->u.view->refcount)) + { + *dst = *src; + vkd3d_mutex_unlock(mutex); + return; + } + + defunct_view = dst->u.view; + *dst = *src; + vkd3d_mutex_unlock(mutex); + + /* Destroy the view after unlocking to reduce wait time. */ + vkd3d_view_destroy(defunct_view, device); +} + void d3d12_desc_write_atomic(struct d3d12_desc *dst, const struct d3d12_desc *src, struct d3d12_device *device) { @@ -2213,6 +2285,56 @@ static void d3d12_desc_destroy(struct d3d12_desc *descriptor, struct d3d12_devic d3d12_desc_write_atomic(descriptor, &null_desc, device); }
+void d3d12_desc_copy_vk_heap_range(struct d3d12_desc_copy_location *locations, const struct d3d12_desc_copy_info *info, + struct d3d12_descriptor_heap *descriptor_heap, enum vkd3d_vk_descriptor_set_index set, + struct d3d12_device *device) +{ + struct d3d12_descriptor_heap_vk_set *descriptor_set = &descriptor_heap->vk_descriptor_sets[set]; + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + unsigned int i, write_count; + + vkd3d_mutex_lock(&descriptor_heap->vk_sets_mutex); + + for (i = 0, write_count = 0; i < info->count; ++i) + { + d3d12_desc_write_atomic_d3d12_only(locations[i].dst, &locations[i].src, device); + + if (i && locations[i].dst == locations[i - 1].dst + 1) + { + ++descriptor_set->vk_descriptor_writes[write_count - 1].descriptorCount; + continue; + } + descriptor_set->vk_descriptor_writes[write_count].dstArrayElement = locations[i].dst + - (const struct d3d12_desc *)descriptor_heap->descriptors; + descriptor_set->vk_descriptor_writes[write_count++].descriptorCount = 1; + } + d3d12_descriptor_heap_write_vk_descriptor_range(descriptor_set, locations, write_count); + /* We could pass a VkCopyDescriptorSet array instead, but that would require also storing a src array index + * for each location, which means querying the src descriptor heap. Contiguous copies require contiguous src + * descriptors as well as dst, which is less likely to occur. And client race conditions may break it. */ + VK_CALL(vkUpdateDescriptorSets(device->vk_device, write_count, descriptor_set->vk_descriptor_writes, 0, NULL)); + + if (!info->uav_counter) + goto done; + + descriptor_set = &descriptor_heap->vk_descriptor_sets[VKD3D_SET_INDEX_UAV_COUNTER]; + + for (i = 0, write_count = 0; i < info->count; ++i) + { + if (!locations[i].src.u.view->vk_counter_view) + continue; + descriptor_set->vk_buffer_views[write_count] = locations[i].src.u.view->vk_counter_view; + descriptor_set->vk_descriptor_writes[write_count].pTexelBufferView = &descriptor_set->vk_buffer_views[write_count]; + descriptor_set->vk_descriptor_writes[write_count].dstArrayElement = locations[i].dst + - (const struct d3d12_desc *)descriptor_heap->descriptors; + descriptor_set->vk_descriptor_writes[write_count++].descriptorCount = 1; + } + VK_CALL(vkUpdateDescriptorSets(device->vk_device, write_count, descriptor_set->vk_descriptor_writes, 0, NULL)); + +done: + vkd3d_mutex_unlock(&descriptor_heap->vk_sets_mutex); +} + void d3d12_desc_copy(struct d3d12_desc *dst, const struct d3d12_desc *src, struct d3d12_device *device) { @@ -3694,6 +3816,7 @@ static HRESULT d3d12_descriptor_heap_create_descriptor_set(struct d3d12_descript const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; VkDescriptorSetVariableDescriptorCountAllocateInfoEXT set_size; VkDescriptorSetAllocateInfo set_desc; + unsigned int i; VkResult vr;
set_desc.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; @@ -3707,7 +3830,8 @@ static HRESULT d3d12_descriptor_heap_create_descriptor_set(struct d3d12_descript set_size.pDescriptorCounts = &variable_binding_size; if ((vr = VK_CALL(vkAllocateDescriptorSets(device->vk_device, &set_desc, &descriptor_set->vk_set))) >= 0) { - descriptor_set->vk_descriptor_write.dstSet = descriptor_set->vk_set; + for (i = 0; i < ARRAY_SIZE(descriptor_set->vk_descriptor_writes); ++i) + descriptor_set->vk_descriptor_writes[i].dstSet = descriptor_set->vk_set; return S_OK; }
@@ -3735,15 +3859,18 @@ static HRESULT d3d12_descriptor_heap_vk_descriptor_sets_init(struct d3d12_descri for (set = 0; set < ARRAY_SIZE(descriptor_heap->vk_descriptor_sets); ++set) { struct d3d12_descriptor_heap_vk_set *descriptor_set = &descriptor_heap->vk_descriptor_sets[set]; + unsigned int i;
- descriptor_set->vk_descriptor_write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - descriptor_set->vk_descriptor_write.pNext = NULL; - descriptor_set->vk_descriptor_write.dstBinding = 0; - descriptor_set->vk_descriptor_write.descriptorCount = 1; - descriptor_set->vk_descriptor_write.descriptorType = device->vk_descriptor_heap_layouts[set].type; - descriptor_set->vk_descriptor_write.pImageInfo = NULL; - descriptor_set->vk_descriptor_write.pBufferInfo = NULL; - descriptor_set->vk_descriptor_write.pTexelBufferView = NULL; + for (i = 0; i < ARRAY_SIZE(descriptor_set->vk_descriptor_writes); ++i) + { + descriptor_set->vk_descriptor_writes[i].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + descriptor_set->vk_descriptor_writes[i].pNext = NULL; + descriptor_set->vk_descriptor_writes[i].dstBinding = 0; + descriptor_set->vk_descriptor_writes[i].descriptorType = device->vk_descriptor_heap_layouts[set].type; + descriptor_set->vk_descriptor_writes[i].pImageInfo = NULL; + descriptor_set->vk_descriptor_writes[i].pBufferInfo = NULL; + descriptor_set->vk_descriptor_writes[i].pTexelBufferView = NULL; + } switch (device->vk_descriptor_heap_layouts[set].type) { case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: @@ -3751,19 +3878,28 @@ static HRESULT d3d12_descriptor_heap_vk_descriptor_sets_init(struct d3d12_descri case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: break; case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: - descriptor_set->vk_descriptor_write.pImageInfo = &descriptor_set->vk_image_info; - descriptor_set->vk_image_info.sampler = VK_NULL_HANDLE; - descriptor_set->vk_image_info.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + descriptor_set->vk_descriptor_writes[0].pImageInfo = &descriptor_set->vk_image_infos[0]; + for (i = 0; i < ARRAY_SIZE(descriptor_set->vk_image_infos); ++i) + { + descriptor_set->vk_image_infos[i].sampler = VK_NULL_HANDLE; + descriptor_set->vk_image_infos[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + } break; case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: - descriptor_set->vk_descriptor_write.pImageInfo = &descriptor_set->vk_image_info; - descriptor_set->vk_image_info.sampler = VK_NULL_HANDLE; - descriptor_set->vk_image_info.imageLayout = VK_IMAGE_LAYOUT_GENERAL; + descriptor_set->vk_descriptor_writes[0].pImageInfo = &descriptor_set->vk_image_infos[0]; + for (i = 0; i < ARRAY_SIZE(descriptor_set->vk_image_infos); ++i) + { + descriptor_set->vk_image_infos[i].sampler = VK_NULL_HANDLE; + descriptor_set->vk_image_infos[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL; + } break; case VK_DESCRIPTOR_TYPE_SAMPLER: - descriptor_set->vk_descriptor_write.pImageInfo = &descriptor_set->vk_image_info; - descriptor_set->vk_image_info.imageView = VK_NULL_HANDLE; - descriptor_set->vk_image_info.imageLayout = VK_IMAGE_LAYOUT_UNDEFINED; + descriptor_set->vk_descriptor_writes[0].pImageInfo = &descriptor_set->vk_image_infos[0]; + for (i = 0; i < ARRAY_SIZE(descriptor_set->vk_image_infos); ++i) + { + descriptor_set->vk_image_infos[i].imageView = VK_NULL_HANDLE; + descriptor_set->vk_image_infos[i].imageLayout = VK_IMAGE_LAYOUT_UNDEFINED; + } break; default: ERR("Unhandled descriptor type %#x.\n", device->vk_descriptor_heap_layouts[set].type); diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index 323efd22..bdaa2a12 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -807,11 +807,15 @@ struct vkd3d_vk_descriptor_heap_layout VkDescriptorSetLayout vk_set_layout; };
+#define VKD3D_DESCRIPTOR_WRITE_BUFFER_SIZE 64 + struct d3d12_descriptor_heap_vk_set { VkDescriptorSet vk_set; - VkDescriptorImageInfo vk_image_info; - VkWriteDescriptorSet vk_descriptor_write; + VkDescriptorBufferInfo vk_buffer_infos[VKD3D_DESCRIPTOR_WRITE_BUFFER_SIZE]; + VkBufferView vk_buffer_views[VKD3D_DESCRIPTOR_WRITE_BUFFER_SIZE]; + VkDescriptorImageInfo vk_image_infos[VKD3D_DESCRIPTOR_WRITE_BUFFER_SIZE]; + VkWriteDescriptorSet vk_descriptor_writes[VKD3D_DESCRIPTOR_WRITE_BUFFER_SIZE]; };
/* ID3D12DescriptorHeap */ @@ -837,6 +841,22 @@ struct d3d12_descriptor_heap HRESULT d3d12_descriptor_heap_create(struct d3d12_device *device, const D3D12_DESCRIPTOR_HEAP_DESC *desc, struct d3d12_descriptor_heap **descriptor_heap);
+struct d3d12_desc_copy_location +{ + struct d3d12_desc src; + struct d3d12_desc *dst; +}; + +struct d3d12_desc_copy_info +{ + unsigned int count; + bool uav_counter; +}; + +void d3d12_desc_copy_vk_heap_range(struct d3d12_desc_copy_location *locations, const struct d3d12_desc_copy_info *info, + struct d3d12_descriptor_heap *descriptor_heap, enum vkd3d_vk_descriptor_set_index set, + struct d3d12_device *device); + /* ID3D12QueryHeap */ struct d3d12_query_heap {
Signed-off-by: Henri Verbeet hverbeet@codeweavers.com
Improves performance in Control, which copies large numbers of descriptors per frame where often only ~10% are not identical.
Signed-off-by: Conor McCarthy cmccarthy@codeweavers.com --- libs/vkd3d/command.c | 8 ++--- libs/vkd3d/device.c | 11 +++++-- libs/vkd3d/resource.c | 63 ++++++++++++++++++++++---------------- libs/vkd3d/vkd3d_private.h | 11 ++++++- 4 files changed, 60 insertions(+), 33 deletions(-)
diff --git a/libs/vkd3d/command.c b/libs/vkd3d/command.c index 104a0c54..2d06d487 100644 --- a/libs/vkd3d/command.c +++ b/libs/vkd3d/command.c @@ -2655,7 +2655,7 @@ static bool vk_write_descriptor_set_from_d3d12_desc(VkWriteDescriptorSet *vk_des unsigned int index, bool use_array) { uint32_t descriptor_range_magic = range->descriptor_magic; - const struct vkd3d_view *view = descriptor->u.view; + const struct vkd3d_view *view = descriptor->u.view_info.view; uint32_t vk_binding = range->binding; uint32_t set = range->set;
@@ -2793,7 +2793,7 @@ static void d3d12_command_list_update_descriptor_table(struct d3d12_command_list && state->uav_counters.bindings[k].register_index == register_idx) { VkBufferView vk_counter_view = descriptor->magic == VKD3D_DESCRIPTOR_MAGIC_UAV - ? descriptor->u.view->vk_counter_view : VK_NULL_HANDLE; + ? descriptor->u.view_info.view->vk_counter_view : VK_NULL_HANDLE; if (bindings->vk_uav_counter_views[k] != vk_counter_view) bindings->uav_counters_dirty = true; bindings->vk_uav_counter_views[k] = vk_counter_view; @@ -5252,7 +5252,7 @@ static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewUint(ID iface, gpu_handle.ptr, cpu_handle.ptr, resource, values, rect_count, rects);
resource_impl = unsafe_impl_from_ID3D12Resource(resource); - view = d3d12_desc_from_cpu_handle(cpu_handle)->u.view; + view = d3d12_desc_from_cpu_handle(cpu_handle)->u.view_info.view; memcpy(colour.uint32, values, sizeof(colour.uint32));
if (view->format->type != VKD3D_FORMAT_TYPE_UINT) @@ -5311,7 +5311,7 @@ static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewFloat(I iface, gpu_handle.ptr, cpu_handle.ptr, resource, values, rect_count, rects);
resource_impl = unsafe_impl_from_ID3D12Resource(resource); - view = d3d12_desc_from_cpu_handle(cpu_handle)->u.view; + view = d3d12_desc_from_cpu_handle(cpu_handle)->u.view_info.view; memcpy(colour.float32, values, sizeof(colour.float32));
d3d12_command_list_clear_uav(list, resource_impl, view, &colour, rect_count, rects); diff --git a/libs/vkd3d/device.c b/libs/vkd3d/device.c index efa3cc94..a8237582 100644 --- a/libs/vkd3d/device.c +++ b/libs/vkd3d/device.c @@ -3596,12 +3596,12 @@ static void d3d12_desc_buffered_copy_atomic(struct d3d12_desc *dst, const struct location->src = *src;
if (location->src.magic & VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW) - vkd3d_view_incref(location->src.u.view); + vkd3d_view_incref(location->src.u.view_info.view);
vkd3d_mutex_unlock(mutex);
infos[set].uav_counter |= (location->src.magic == VKD3D_DESCRIPTOR_MAGIC_UAV) - & !!location->src.u.view->vk_counter_view; + & !!location->src.u.view_info.view->vk_counter_view; location->dst = dst;
if (infos[set].count == ARRAY_SIZE(locations[0])) @@ -3657,6 +3657,13 @@ static void d3d12_device_vk_heaps_copy_descriptors(struct d3d12_device *device,
for (; dst_idx < dst_range_size && src_idx < src_range_size; src_idx++, dst_idx++) { + /* We don't need to lock either descriptor for the identity check. The descriptor + * mutex is only intended to prevent use-after-free of the vkd3d_view caused by a + * race condition in the calling app. It is unnecessary to protect this test as it's + * the app's race condition, not ours. */ + if (dst[dst_idx].magic == src[src_idx].magic && (dst[dst_idx].magic & VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW) + && dst[dst_idx].u.view_info.written_serial_id == src[src_idx].u.view_info.view->serial_id) + continue; d3d12_desc_buffered_copy_atomic(&dst[dst_idx], &src[src_idx], locations, infos, descriptor_heap, device); }
diff --git a/libs/vkd3d/resource.c b/libs/vkd3d/resource.c index 7d679f02..cbbd4170 100644 --- a/libs/vkd3d/resource.c +++ b/libs/vkd3d/resource.c @@ -22,6 +22,8 @@ #define VKD3D_NULL_BUFFER_SIZE 16 #define VKD3D_NULL_VIEW_FORMAT DXGI_FORMAT_R8G8B8A8_UNORM
+LONG64 object_global_serial_id; + static inline bool is_cpu_accessible_heap(const D3D12_HEAP_PROPERTIES *properties) { if (properties->Type == D3D12_HEAP_TYPE_DEFAULT) @@ -2085,6 +2087,7 @@ static struct vkd3d_view *vkd3d_view_create(enum vkd3d_view_type type) { view->refcount = 1; view->type = type; + view->serial_id = InterlockedIncrement64(&object_global_serial_id); view->vk_counter_view = VK_NULL_HANDLE; } return view; @@ -2149,7 +2152,7 @@ static void d3d12_descriptor_heap_write_vk_descriptor_range(struct d3d12_descrip { descriptor_set->vk_descriptor_writes[write_index].pImageInfo = &descriptor_set->vk_image_infos[info_index]; for (i = 0; i < descriptor_set->vk_descriptor_writes[write_index].descriptorCount; ++i, ++info_index) - descriptor_set->vk_image_infos[info_index].imageView = locations[info_index].src.u.view->u.vk_image_view; + descriptor_set->vk_image_infos[info_index].imageView = locations[info_index].src.u.view_info.view->u.vk_image_view; } break; case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: @@ -2158,7 +2161,7 @@ static void d3d12_descriptor_heap_write_vk_descriptor_range(struct d3d12_descrip { descriptor_set->vk_descriptor_writes[write_index].pTexelBufferView = &descriptor_set->vk_buffer_views[info_index]; for (i = 0; i < descriptor_set->vk_descriptor_writes[write_index].descriptorCount; ++i, ++info_index) - descriptor_set->vk_buffer_views[info_index] = locations[info_index].src.u.view->u.vk_buffer_view; + descriptor_set->vk_buffer_views[info_index] = locations[info_index].src.u.view_info.view->u.vk_buffer_view; } break; case VK_DESCRIPTOR_TYPE_SAMPLER: @@ -2166,7 +2169,7 @@ static void d3d12_descriptor_heap_write_vk_descriptor_range(struct d3d12_descrip { descriptor_set->vk_descriptor_writes[write_index].pImageInfo = &descriptor_set->vk_image_infos[info_index]; for (i = 0; i < descriptor_set->vk_descriptor_writes[write_index].descriptorCount; ++i, ++info_index) - descriptor_set->vk_image_infos[info_index].sampler = locations[info_index].src.u.view->u.vk_sampler; + descriptor_set->vk_image_infos[info_index].sampler = locations[info_index].src.u.view_info.view->u.vk_sampler; } break; default: @@ -2201,14 +2204,14 @@ static void d3d12_desc_write_vk_heap(const struct d3d12_desc *dst, const struct break; case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: - descriptor_set->vk_image_infos[0].imageView = src->u.view->u.vk_image_view; + descriptor_set->vk_image_infos[0].imageView = src->u.view_info.view->u.vk_image_view; break; case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: - descriptor_set->vk_descriptor_writes[0].pTexelBufferView = &src->u.view->u.vk_buffer_view; + descriptor_set->vk_descriptor_writes[0].pTexelBufferView = &src->u.view_info.view->u.vk_buffer_view; break; case VK_DESCRIPTOR_TYPE_SAMPLER: - descriptor_set->vk_image_infos[0].sampler = src->u.view->u.vk_sampler; + descriptor_set->vk_image_infos[0].sampler = src->u.view_info.view->u.vk_sampler; break; default: ERR("Unhandled descriptor type %#x.\n", src->vk_descriptor_type); @@ -2216,13 +2219,13 @@ static void d3d12_desc_write_vk_heap(const struct d3d12_desc *dst, const struct } VK_CALL(vkUpdateDescriptorSets(device->vk_device, 1, descriptor_set->vk_descriptor_writes, 0, NULL));
- if (src->magic == VKD3D_DESCRIPTOR_MAGIC_UAV && src->u.view->vk_counter_view) + if (src->magic == VKD3D_DESCRIPTOR_MAGIC_UAV && src->u.view_info.view->vk_counter_view) { descriptor_set = &descriptor_heap->vk_descriptor_sets[VKD3D_SET_INDEX_UAV_COUNTER]; descriptor_set->vk_descriptor_writes[0].dstArrayElement = dst - (const struct d3d12_desc *)descriptor_heap->descriptors; descriptor_set->vk_descriptor_writes[0].descriptorCount = 1; - descriptor_set->vk_descriptor_writes[0].pTexelBufferView = &src->u.view->vk_counter_view; + descriptor_set->vk_descriptor_writes[0].pTexelBufferView = &src->u.view_info.view->vk_counter_view; VK_CALL(vkUpdateDescriptorSets(device->vk_device, 1, descriptor_set->vk_descriptor_writes, 0, NULL)); }
@@ -2237,14 +2240,14 @@ static void d3d12_desc_write_atomic_d3d12_only(struct d3d12_desc *dst, const str mutex = d3d12_device_get_descriptor_mutex(device, dst); vkd3d_mutex_lock(mutex);
- if (!(dst->magic & VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW) || InterlockedDecrement(&dst->u.view->refcount)) + if (!(dst->magic & VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW) || InterlockedDecrement(&dst->u.view_info.view->refcount)) { *dst = *src; vkd3d_mutex_unlock(mutex); return; }
- defunct_view = dst->u.view; + defunct_view = dst->u.view_info.view; *dst = *src; vkd3d_mutex_unlock(mutex);
@@ -2263,8 +2266,8 @@ void d3d12_desc_write_atomic(struct d3d12_desc *dst, const struct d3d12_desc *sr
/* Nothing to do for VKD3D_DESCRIPTOR_MAGIC_CBV. */ if ((dst->magic & VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW) - && !InterlockedDecrement(&dst->u.view->refcount)) - defunct_view = dst->u.view; + && !InterlockedDecrement(&dst->u.view_info.view->refcount)) + defunct_view = dst->u.view_info.view;
*dst = *src;
@@ -2321,9 +2324,9 @@ void d3d12_desc_copy_vk_heap_range(struct d3d12_desc_copy_location *locations, c
for (i = 0, write_count = 0; i < info->count; ++i) { - if (!locations[i].src.u.view->vk_counter_view) + if (!locations[i].src.u.view_info.view->vk_counter_view) continue; - descriptor_set->vk_buffer_views[write_count] = locations[i].src.u.view->vk_counter_view; + descriptor_set->vk_buffer_views[write_count] = locations[i].src.u.view_info.view->vk_counter_view; descriptor_set->vk_descriptor_writes[write_count].pTexelBufferView = &descriptor_set->vk_buffer_views[write_count]; descriptor_set->vk_descriptor_writes[write_count].dstArrayElement = locations[i].dst - (const struct d3d12_desc *)descriptor_heap->descriptors; @@ -2349,7 +2352,7 @@ void d3d12_desc_copy(struct d3d12_desc *dst, const struct d3d12_desc *src, vkd3d_mutex_lock(mutex);
if (src->magic & VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW) - vkd3d_view_incref(src->u.view); + vkd3d_view_incref(src->u.view_info.view);
tmp = *src;
@@ -2820,7 +2823,8 @@ static void vkd3d_create_null_srv(struct d3d12_desc *descriptor, { descriptor->magic = VKD3D_DESCRIPTOR_MAGIC_SRV; descriptor->vk_descriptor_type = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; - descriptor->u.view = view; + descriptor->u.view_info.view = view; + descriptor->u.view_info.written_serial_id = view->serial_id; } return;
@@ -2856,7 +2860,8 @@ static void vkd3d_create_null_srv(struct d3d12_desc *descriptor,
descriptor->magic = VKD3D_DESCRIPTOR_MAGIC_SRV; descriptor->vk_descriptor_type = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; - descriptor->u.view = view; + descriptor->u.view_info.view = view; + descriptor->u.view_info.written_serial_id = view->serial_id; }
static void vkd3d_create_buffer_srv(struct d3d12_desc *descriptor, @@ -2886,7 +2891,8 @@ static void vkd3d_create_buffer_srv(struct d3d12_desc *descriptor,
descriptor->magic = VKD3D_DESCRIPTOR_MAGIC_SRV; descriptor->vk_descriptor_type = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; - descriptor->u.view = view; + descriptor->u.view_info.view = view; + descriptor->u.view_info.written_serial_id = view->serial_id; }
static VkImageAspectFlags vk_image_aspect_flags_from_d3d12_plane_slice(const struct vkd3d_format *format, @@ -3016,7 +3022,8 @@ void d3d12_desc_create_srv(struct d3d12_desc *descriptor,
descriptor->magic = VKD3D_DESCRIPTOR_MAGIC_SRV; descriptor->vk_descriptor_type = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; - descriptor->u.view = view; + descriptor->u.view_info.view = view; + descriptor->u.view_info.written_serial_id = view->serial_id; }
static unsigned int vkd3d_view_flags_from_d3d12_buffer_uav_flags(D3D12_BUFFER_UAV_FLAGS flags) @@ -3053,7 +3060,8 @@ static void vkd3d_create_null_uav(struct d3d12_desc *descriptor, { descriptor->magic = VKD3D_DESCRIPTOR_MAGIC_UAV; descriptor->vk_descriptor_type = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; - descriptor->u.view = view; + descriptor->u.view_info.view = view; + descriptor->u.view_info.written_serial_id = view->serial_id; } return;
@@ -3089,7 +3097,8 @@ static void vkd3d_create_null_uav(struct d3d12_desc *descriptor,
descriptor->magic = VKD3D_DESCRIPTOR_MAGIC_UAV; descriptor->vk_descriptor_type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; - descriptor->u.view = view; + descriptor->u.view_info.view = view; + descriptor->u.view_info.written_serial_id = view->serial_id; }
static void vkd3d_create_buffer_uav(struct d3d12_desc *descriptor, struct d3d12_device *device, @@ -3119,7 +3128,8 @@ static void vkd3d_create_buffer_uav(struct d3d12_desc *descriptor, struct d3d12_
descriptor->magic = VKD3D_DESCRIPTOR_MAGIC_UAV; descriptor->vk_descriptor_type = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; - descriptor->u.view = view; + descriptor->u.view_info.view = view; + descriptor->u.view_info.written_serial_id = view->serial_id;
if (counter_resource) { @@ -3194,7 +3204,8 @@ static void vkd3d_create_texture_uav(struct d3d12_desc *descriptor,
descriptor->magic = VKD3D_DESCRIPTOR_MAGIC_UAV; descriptor->vk_descriptor_type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; - descriptor->u.view = view; + descriptor->u.view_info.view = view; + descriptor->u.view_info.written_serial_id = view->serial_id; }
void d3d12_desc_create_uav(struct d3d12_desc *descriptor, struct d3d12_device *device, @@ -3356,7 +3367,8 @@ void d3d12_desc_create_sampler(struct d3d12_desc *sampler,
sampler->magic = VKD3D_DESCRIPTOR_MAGIC_SAMPLER; sampler->vk_descriptor_type = VK_DESCRIPTOR_TYPE_SAMPLER; - sampler->u.view = view; + sampler->u.view_info.view = view; + sampler->u.view_info.written_serial_id = view->serial_id; }
HRESULT vkd3d_create_static_sampler(struct d3d12_device *device, @@ -3916,12 +3928,11 @@ static HRESULT d3d12_descriptor_heap_vk_descriptor_sets_init(struct d3d12_descri static HRESULT d3d12_descriptor_heap_init(struct d3d12_descriptor_heap *descriptor_heap, struct d3d12_device *device, const D3D12_DESCRIPTOR_HEAP_DESC *desc) { - static LONG64 serial_id; HRESULT hr;
descriptor_heap->ID3D12DescriptorHeap_iface.lpVtbl = &d3d12_descriptor_heap_vtbl; descriptor_heap->refcount = 1; - descriptor_heap->serial_id = InterlockedIncrement64(&serial_id); + descriptor_heap->serial_id = InterlockedIncrement64(&object_global_serial_id);
descriptor_heap->desc = *desc;
diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index bdaa2a12..d7aba6a8 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -69,6 +69,8 @@ * this number to prevent excessive pool memory use. */ #define VKD3D_MAX_VIRTUAL_HEAP_DESCRIPTORS_PER_TYPE (16 * 1024u)
+extern LONG64 object_global_serial_id; + struct d3d12_command_list; struct d3d12_device; struct d3d12_resource; @@ -651,6 +653,7 @@ struct vkd3d_view { LONG refcount; enum vkd3d_view_type type; + uint64_t serial_id; union { VkBufferView vk_buffer_view; @@ -697,6 +700,12 @@ bool vkd3d_create_buffer_view(struct d3d12_device *device, VkBuffer vk_buffer, c bool vkd3d_create_texture_view(struct d3d12_device *device, VkImage vk_image, const struct vkd3d_texture_view_desc *desc, struct vkd3d_view **view);
+struct vkd3d_view_info +{ + uint64_t written_serial_id; + struct vkd3d_view *view; +}; + struct d3d12_desc { uint32_t magic; @@ -704,7 +713,7 @@ struct d3d12_desc union { VkDescriptorBufferInfo vk_cbv_info; - struct vkd3d_view *view; + struct vkd3d_view_info view_info; } u; };
Signed-off-by: Henri Verbeet hverbeet@codeweavers.com