These can generate many messages per frame in some games, e.g. Control.
Signed-off-by: Conor McCarthy cmccarthy@codeweavers.com --- libs/vkd3d/command.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/libs/vkd3d/command.c b/libs/vkd3d/command.c index 1db624e0..37159a7f 100644 --- a/libs/vkd3d/command.c +++ b/libs/vkd3d/command.c @@ -3070,7 +3070,7 @@ static void d3d12_command_list_check_index_buffer_strip_cut_value(struct d3d12_c case D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFF: if (list->index_buffer_format != DXGI_FORMAT_R16_UINT) { - FIXME("Strip cut value 0xffff is not supported with index buffer format %#x.\n", + FIXME_ONCE("Strip cut value 0xffff is not supported with index buffer format %#x.\n", list->index_buffer_format); } break; @@ -3078,7 +3078,7 @@ static void d3d12_command_list_check_index_buffer_strip_cut_value(struct d3d12_c case D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFFFFFF: if (list->index_buffer_format != DXGI_FORMAT_R32_UINT) { - FIXME("Strip cut value 0xffffffff is not supported with index buffer format %#x.\n", + FIXME_ONCE("Strip cut value 0xffffffff is not supported with index buffer format %#x.\n", list->index_buffer_format); } break;
The performance improvement will be useful for Vulkan-backed heaps, where descriptor heaps must be found more often.
Signed-off-by: Conor McCarthy cmccarthy@codeweavers.com --- libs/vkd3d/device.c | 60 +++++++++++++++++++++++++++++++-------------- 1 file changed, 42 insertions(+), 18 deletions(-)
diff --git a/libs/vkd3d/device.c b/libs/vkd3d/device.c index 09bdb7a5..537bc7b2 100644 --- a/libs/vkd3d/device.c +++ b/libs/vkd3d/device.c @@ -2184,6 +2184,7 @@ bool vkd3d_gpu_descriptor_allocator_register_range(struct vkd3d_gpu_descriptor_a const struct d3d12_desc *base, size_t count) { struct vkd3d_gpu_descriptor_allocation *allocation; + size_t i; int rc;
if ((rc = pthread_mutex_lock(&allocator->mutex))) @@ -2199,7 +2200,13 @@ bool vkd3d_gpu_descriptor_allocator_register_range(struct vkd3d_gpu_descriptor_a return false; }
- allocation = &allocator->allocations[allocator->allocation_count++]; + for (i = 0; i < allocator->allocation_count; ++i) + if (base < allocator->allocations[i].base) + break; + + allocation = &allocator->allocations[i]; + memmove(allocation + 1, allocation, (allocator->allocation_count++ - i) * sizeof(*allocation)); + allocation->base = base; allocation->count = count;
@@ -2226,8 +2233,8 @@ bool vkd3d_gpu_descriptor_allocator_unregister_range( if (allocator->allocations[i].base != base) continue;
- if (i != --allocator->allocation_count) - allocator->allocations[i] = allocator->allocations[allocator->allocation_count]; + memmove(&allocator->allocations[i], &allocator->allocations[i + 1], + (--allocator->allocation_count - i) * sizeof(allocator->allocations[0]));
found = true; break; @@ -2238,12 +2245,40 @@ bool vkd3d_gpu_descriptor_allocator_unregister_range( return found; }
+/* We could use bsearch() or recursion here, but it probably helps to omit + * all the extra function calls. */ +static struct vkd3d_gpu_descriptor_allocation *vkd3d_gpu_descriptor_allocator_binary_search( + const struct vkd3d_gpu_descriptor_allocator *allocator, const struct d3d12_desc *desc) +{ + struct vkd3d_gpu_descriptor_allocation *allocations = allocator->allocations; + const struct d3d12_desc *base; + size_t centre, count; + + for (count = allocator->allocation_count; count > 1; ) + { + centre = count >> 1; + base = allocations[centre].base; + if (base <= desc) + { + allocations += centre; + count -= centre; + } + else + { + count = centre; + } + } + + return (desc >= allocations->base && desc < allocations->base + allocations->count) ? allocations : NULL; +} + + /* Return the available size from the specified descriptor to the heap end. */ size_t vkd3d_gpu_descriptor_allocator_range_size_from_descriptor( struct vkd3d_gpu_descriptor_allocator *allocator, const struct d3d12_desc *desc) { struct vkd3d_gpu_descriptor_allocation *allocation; - size_t remaining, offset, i; + size_t remaining; int rc;
if ((rc = pthread_mutex_lock(&allocator->mutex))) @@ -2252,20 +2287,9 @@ size_t vkd3d_gpu_descriptor_allocator_range_size_from_descriptor( return 0; }
- for (i = 0, remaining = 0; i < allocator->allocation_count; ++i) - { - allocation = &allocator->allocations[i]; - - if (desc < allocation->base) - continue; - - offset = desc - allocation->base; - if (offset >= allocation->count) - continue; - - remaining = allocation->count - offset; - break; - } + remaining = 0; + if ((allocation = vkd3d_gpu_descriptor_allocator_binary_search(allocator, desc))) + remaining = allocation->count - (desc - allocation->base);
pthread_mutex_unlock(&allocator->mutex);
On Fri, 10 Dec 2021 at 06:07, Conor McCarthy cmccarthy@codeweavers.com wrote:
@@ -2199,7 +2200,13 @@ bool vkd3d_gpu_descriptor_allocator_register_range(struct vkd3d_gpu_descriptor_a return false; }
- allocation = &allocator->allocations[allocator->allocation_count++];
- for (i = 0; i < allocator->allocation_count; ++i)
if (base < allocator->allocations[i].base)
break;
We could use a binary search here...
- allocation = &allocator->allocations[i];
- memmove(allocation + 1, allocation, (allocator->allocation_count++ - i) * sizeof(*allocation));
"&allocation[1]", arguably.
+/* We could use bsearch() or recursion here, but it probably helps to omit
- all the extra function calls. */
+static struct vkd3d_gpu_descriptor_allocation *vkd3d_gpu_descriptor_allocator_binary_search(
const struct vkd3d_gpu_descriptor_allocator *allocator, const struct d3d12_desc *desc)
+{
The fact that this is using a binary search is really just an implementation detail; callers wouldn't care if this e.g. used an rbtree instead. Something like vkd3d_gpu_descriptor_allocator_find()/_search()/_get() seems more appropriate.
- struct vkd3d_gpu_descriptor_allocation *allocations = allocator->allocations;
- const struct d3d12_desc *base;
- size_t centre, count;
- for (count = allocator->allocation_count; count > 1; )
- {
centre = count >> 1;
base = allocations[centre].base;
if (base <= desc)
{
allocations += centre;
count -= centre;
}
else
{
count = centre;
}
- }
- return (desc >= allocations->base && desc < allocations->base + allocations->count) ? allocations : NULL;
+}
"allocations->base + allocations->count" probably can't overflow because of other constraints, but by convention, we'd probably want that check to be "desc - allocations->base < allocations->count".
Saves a couple of conversion calls later, and more when Vulkan-backed heaps are added.
Signed-off-by: Conor McCarthy cmccarthy@codeweavers.com --- libs/vkd3d/command.c | 6 +++--- libs/vkd3d/vkd3d_private.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/libs/vkd3d/command.c b/libs/vkd3d/command.c index 37159a7f..9fbde800 100644 --- a/libs/vkd3d/command.c +++ b/libs/vkd3d/command.c @@ -2605,7 +2605,7 @@ static void d3d12_command_list_prepare_descriptors(struct d3d12_command_list *li
if (unbounded_offset != UINT_MAX /* Descriptors may not be set, eg. WoW. */ - && (base_descriptor = d3d12_desc_from_gpu_handle(bindings->descriptor_tables[table_index]))) + && (base_descriptor = bindings->descriptor_tables[table_index])) { heap_size = vkd3d_gpu_descriptor_allocator_range_size_from_descriptor( &device->gpu_descriptor_allocator, base_descriptor); @@ -2981,7 +2981,7 @@ static void d3d12_command_list_update_descriptors(struct d3d12_command_list *lis { if (bindings->descriptor_table_dirty_mask & ((uint64_t)1 << i)) { - if ((base_descriptor = d3d12_desc_from_gpu_handle(bindings->descriptor_tables[i]))) + if ((base_descriptor = bindings->descriptor_tables[i])) d3d12_command_list_update_descriptor_table(list, bind_point, i, base_descriptor); else WARN("Descriptor table %u is not set.\n", i); @@ -4182,7 +4182,7 @@ static void d3d12_command_list_set_descriptor_table(struct d3d12_command_list *l assert(root_signature_get_descriptor_table(root_signature, index));
assert(index < ARRAY_SIZE(bindings->descriptor_tables)); - bindings->descriptor_tables[index] = base_descriptor; + bindings->descriptor_tables[index] = d3d12_desc_from_gpu_handle(base_descriptor); bindings->descriptor_table_dirty_mask |= (uint64_t)1 << index; bindings->descriptor_table_active_mask |= (uint64_t)1 << index; } diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index 8af374a1..f5cac0cc 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -961,7 +961,7 @@ struct vkd3d_pipeline_bindings VkDescriptorSet descriptor_sets[VKD3D_MAX_DESCRIPTOR_SETS]; bool in_use;
- D3D12_GPU_DESCRIPTOR_HANDLE descriptor_tables[D3D12_MAX_ROOT_COST]; + struct d3d12_desc *descriptor_tables[D3D12_MAX_ROOT_COST]; uint64_t descriptor_table_dirty_mask; uint64_t descriptor_table_active_mask;
Signed-off-by: Henri Verbeet hverbeet@codeweavers.com
The availability of allocation info makes it possible to check that the descriptor belongs to a heap of the correct type. This will be more important when Vulkan-backed descriptor heaps are added.
Signed-off-by: Conor McCarthy cmccarthy@codeweavers.com --- libs/vkd3d/command.c | 15 ++++++++++++++- libs/vkd3d/device.c | 20 ++++++++++++++++++++ libs/vkd3d/vkd3d_private.h | 11 +++++++++++ 3 files changed, 45 insertions(+), 1 deletion(-)
diff --git a/libs/vkd3d/command.c b/libs/vkd3d/command.c index 9fbde800..0090324f 100644 --- a/libs/vkd3d/command.c +++ b/libs/vkd3d/command.c @@ -4178,11 +4178,24 @@ static void d3d12_command_list_set_descriptor_table(struct d3d12_command_list *l { struct vkd3d_pipeline_bindings *bindings = &list->pipeline_bindings[bind_point]; const struct d3d12_root_signature *root_signature = bindings->root_signature; + const struct d3d12_descriptor_heap *heap; + struct d3d12_desc *desc;
assert(root_signature_get_descriptor_table(root_signature, index));
assert(index < ARRAY_SIZE(bindings->descriptor_tables)); - bindings->descriptor_tables[index] = d3d12_desc_from_gpu_handle(base_descriptor); + desc = d3d12_desc_from_gpu_handle(base_descriptor); + + if (desc && !(heap = vkd3d_gpu_descriptor_allocator_heap_from_descriptor(&list->device->gpu_descriptor_allocator, + desc))) + { + /* Failure to find the heap means the descriptor handle is from the wrong heap type or not a handle at all. */ + ERR("Invalid heap for base descriptor %"PRIx64".\n", base_descriptor.ptr); + /* TODO: Mark list as invalid? */ + return; + } + + bindings->descriptor_tables[index] = desc; bindings->descriptor_table_dirty_mask |= (uint64_t)1 << index; bindings->descriptor_table_active_mask |= (uint64_t)1 << index; } diff --git a/libs/vkd3d/device.c b/libs/vkd3d/device.c index 537bc7b2..fee6222d 100644 --- a/libs/vkd3d/device.c +++ b/libs/vkd3d/device.c @@ -2272,6 +2272,26 @@ static struct vkd3d_gpu_descriptor_allocation *vkd3d_gpu_descriptor_allocator_bi return (desc >= allocations->base && desc < allocations->base + allocations->count) ? allocations : NULL; }
+struct vkd3d_gpu_descriptor_allocation *vkd3d_gpu_descriptor_allocator_allocation_from_descriptor( + struct vkd3d_gpu_descriptor_allocator *allocator, const struct d3d12_desc *desc) +{ + struct vkd3d_gpu_descriptor_allocation *allocation; + int rc; + + assert(allocator->allocation_count); + + if ((rc = pthread_mutex_lock(&allocator->mutex))) + { + ERR("Failed to lock mutex, error %d.\n", rc); + return NULL; + } + + allocation = vkd3d_gpu_descriptor_allocator_binary_search(allocator, desc); + + pthread_mutex_unlock(&allocator->mutex); + + return allocation; +}
/* Return the available size from the specified descriptor to the heap end. */ size_t vkd3d_gpu_descriptor_allocator_range_size_from_descriptor( diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index f5cac0cc..305a003b 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -255,6 +255,8 @@ struct vkd3d_gpu_descriptor_allocator size_t allocation_count; };
+struct vkd3d_gpu_descriptor_allocation *vkd3d_gpu_descriptor_allocator_allocation_from_descriptor( + struct vkd3d_gpu_descriptor_allocator *allocator, const struct d3d12_desc *desc); size_t vkd3d_gpu_descriptor_allocator_range_size_from_descriptor( struct vkd3d_gpu_descriptor_allocator *allocator, const struct d3d12_desc *desc); bool vkd3d_gpu_descriptor_allocator_register_range(struct vkd3d_gpu_descriptor_allocator *allocator, @@ -636,6 +638,15 @@ struct d3d12_descriptor_heap HRESULT d3d12_descriptor_heap_create(struct d3d12_device *device, const D3D12_DESCRIPTOR_HEAP_DESC *desc, struct d3d12_descriptor_heap **descriptor_heap);
+static inline struct d3d12_descriptor_heap *vkd3d_gpu_descriptor_allocator_heap_from_descriptor( + struct vkd3d_gpu_descriptor_allocator *allocator, const struct d3d12_desc *desc) +{ + struct vkd3d_gpu_descriptor_allocation *allocation + = vkd3d_gpu_descriptor_allocator_allocation_from_descriptor(allocator, desc); + return allocation ? CONTAINING_RECORD(allocation->base, struct d3d12_descriptor_heap, descriptors) + : NULL; +} + /* ID3D12QueryHeap */ struct d3d12_query_heap {
On Fri, 10 Dec 2021 at 06:07, Conor McCarthy cmccarthy@codeweavers.com wrote:
@@ -4178,11 +4178,24 @@ static void d3d12_command_list_set_descriptor_table(struct d3d12_command_list *l { struct vkd3d_pipeline_bindings *bindings = &list->pipeline_bindings[bind_point]; const struct d3d12_root_signature *root_signature = bindings->root_signature;
const struct d3d12_descriptor_heap *heap;
struct d3d12_desc *desc;
assert(root_signature_get_descriptor_table(root_signature, index));
assert(index < ARRAY_SIZE(bindings->descriptor_tables));
- bindings->descriptor_tables[index] = d3d12_desc_from_gpu_handle(base_descriptor);
- desc = d3d12_desc_from_gpu_handle(base_descriptor);
- if (desc && !(heap = vkd3d_gpu_descriptor_allocator_heap_from_descriptor(&list->device->gpu_descriptor_allocator,
desc)))
- {
/* Failure to find the heap means the descriptor handle is from the wrong heap type or not a handle at all. */
ERR("Invalid heap for base descriptor %"PRIx64".\n", base_descriptor.ptr);
/* TODO: Mark list as invalid? */
return;
- }
I suppose we might use the "heap" pointer in future patches, but as far as this patch is concerned, we might as well just call vkd3d_gpu_descriptor_allocator_allocation_from_descriptor().
+struct vkd3d_gpu_descriptor_allocation *vkd3d_gpu_descriptor_allocator_allocation_from_descriptor(
struct vkd3d_gpu_descriptor_allocator *allocator, const struct d3d12_desc *desc)
+{
- struct vkd3d_gpu_descriptor_allocation *allocation;
- int rc;
- assert(allocator->allocation_count);
Does that assertion make sense? Since we're mostly using this to check for invalid descriptor handles above, I don't think we can assume a descriptor heap would necessarily have been created either.
Signed-off-by: Conor McCarthy cmccarthy@codeweavers.com --- libs/vkd3d/command.c | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/libs/vkd3d/command.c b/libs/vkd3d/command.c index 0090324f..f01d0488 100644 --- a/libs/vkd3d/command.c +++ b/libs/vkd3d/command.c @@ -4186,6 +4186,9 @@ static void d3d12_command_list_set_descriptor_table(struct d3d12_command_list *l assert(index < ARRAY_SIZE(bindings->descriptor_tables)); desc = d3d12_desc_from_gpu_handle(base_descriptor);
+ if (bindings->descriptor_tables[index] == desc) + return; + if (desc && !(heap = vkd3d_gpu_descriptor_allocator_heap_from_descriptor(&list->device->gpu_descriptor_allocator, desc))) {
The existing implementation using virtual descriptor heaps, where Vk descriptor sets are created for the bindings in the root descriptor tables, is inefficient when multiple command lists are used with large descriptor heaps. This patch creates Vk sets for each D3D12 heap. Because D3D12 heaps can contain CBV, SRV and UAV descriptors in the same heap, multiple Vk sets are needed for each heap, however the total number of populated descriptors is never more than (heap size + UAV counter count).
A new 'virtual_heaps' config option is introduced to make the old implementation available when needed. It's not always possible to determine if this is necessary when the device is created.
Up to nine Vk descriptor sets may be used. It's theoretically possible to reduce this to eight by placing immutable samplers in the push descriptor set layout, but contradictions in the Vulkan docs mean driver support is inconsistent. I've open an issue for this: https://github.com/KhronosGroup/Vulkan-Docs/issues/1686
This patch also adds support for UAV counter descriptor arrays. It's not practical to add this in a separate patch due to complications with combining the old UAV counter implementation with the new descriptor heap implementation.
Wine-Bug: https://bugs.winehq.org/show_bug.cgi?id=47713 Signed-off-by: Conor McCarthy cmccarthy@codeweavers.com --- README | 3 + libs/vkd3d/command.c | 219 ++++++++++++++++++++++++++- libs/vkd3d/device.c | 159 +++++++++++++++++++ libs/vkd3d/resource.c | 236 +++++++++++++++++++++++++++++ libs/vkd3d/state.c | 303 +++++++++++++++++++++++++++++++------ libs/vkd3d/vkd3d_private.h | 78 ++++++++++ tests/d3d12.c | 13 +- 7 files changed, 957 insertions(+), 54 deletions(-)
diff --git a/README b/README index fff5712f..936316af 100644 --- a/README +++ b/README @@ -54,6 +54,9 @@ commas or semicolons. * atomic_descriptor - Make all descriptor copy and write operations thread- safe. This is a workaround for race condition bugs which Windows tolerates but which will crash vkd3d. + * virtual_heaps - Create descriptors for each D3D12 root signature + descriptor range instead of entire descriptor heaps. Useful when push + constant or bound descriptor limits are exceeded. * vk_debug - enables Vulkan debug extensions.
* VKD3D_DEBUG - controls the debug level for log messages produced by diff --git a/libs/vkd3d/command.c b/libs/vkd3d/command.c index f01d0488..b922c31c 100644 --- a/libs/vkd3d/command.c +++ b/libs/vkd3d/command.c @@ -1873,14 +1873,20 @@ static void d3d12_command_list_invalidate_current_render_pass(struct d3d12_comma static void d3d12_command_list_invalidate_bindings(struct d3d12_command_list *list, struct d3d12_pipeline_state *state) { - if (state && state->uav_counters.binding_count) + if (state) { enum vkd3d_pipeline_bind_point bind_point = (enum vkd3d_pipeline_bind_point)state->vk_bind_point; struct vkd3d_pipeline_bindings *bindings = &list->pipeline_bindings[bind_point];
- vkd3d_array_reserve((void **)&bindings->vk_uav_counter_views, &bindings->vk_uav_counter_views_size, - state->uav_counters.binding_count, sizeof(*bindings->vk_uav_counter_views)); - bindings->uav_counters_dirty = true; + if (state->uav_counters.binding_count) + { + vkd3d_array_reserve((void **)&bindings->vk_uav_counter_views, &bindings->vk_uav_counter_views_size, + state->uav_counters.binding_count, sizeof(*bindings->vk_uav_counter_views)); + bindings->uav_counters_dirty = true; + } + + bindings->cbv_srv_uav_heap_dirty = true; + bindings->sampler_heap_dirty = true; } }
@@ -2375,6 +2381,11 @@ static void d3d12_command_list_reset_state(struct d3d12_command_list *list, memset(list->so_counter_buffers, 0, sizeof(list->so_counter_buffers)); memset(list->so_counter_buffer_offsets, 0, sizeof(list->so_counter_buffer_offsets));
+ list->cbv_srv_uav_heap = NULL; + list->sampler_heap = NULL; + list->cbv_srv_uav_heap_id = 0; + list->sampler_heap_id = 0; + ID3D12GraphicsCommandList2_SetPipelineState(iface, initial_pipeline_state); }
@@ -3002,6 +3013,163 @@ static void d3d12_command_list_update_descriptors(struct d3d12_command_list *lis d3d12_command_list_update_uav_counter_descriptors(list, bind_point); }
+static bool vkd3d_bind_unbound_descriptor_heap(struct d3d12_descriptor_heap **dst, + struct d3d12_descriptor_heap *src, struct d3d12_command_list *list) +{ + const char *heap_type = src->desc.Type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV ? "CBV/SRV/UAV" : "sampler"; + + if (!*dst) + { + WARN("Binding %s heap %p which was not set with SetDescriptorHeaps().\n", heap_type, src); + *dst = src; + return true; + } + else + { + ERR("Descriptors from an unbound %s heap %p are used by list %p.\n", heap_type, src, list); + return false; + } +} + +/* It's possible for a new descriptor heap to have the same pointer value as a previously + * destroyed heap because the allocator can place it in the same location. */ +static inline bool d3d12_command_list_descriptor_heap_is_set(const struct d3d12_command_list *list, + const struct d3d12_descriptor_heap *heap) +{ + return (heap == list->cbv_srv_uav_heap && heap->serial_id == list->cbv_srv_uav_heap_id) + || (heap == list->sampler_heap && heap->serial_id == list->sampler_heap_id); +} + +static unsigned int d3d12_command_list_bind_descriptor_table(struct d3d12_command_list *list, + struct vkd3d_pipeline_bindings *bindings, unsigned int index) +{ + struct d3d12_descriptor_heap *heap; + const struct d3d12_desc *desc; + unsigned int offset; + + if (!(desc = bindings->descriptor_tables[index])) + return 0; + + /* No NULL check is needed here because it's checked when descriptor tables are set. */ + heap = vkd3d_gpu_descriptor_allocator_heap_from_descriptor(&list->device->gpu_descriptor_allocator, desc); + offset = desc - (const struct d3d12_desc *)heap->descriptors; + + if (!(bindings->descriptor_table_dirty_mask & ((uint64_t)1 << index)) + || d3d12_command_list_descriptor_heap_is_set(list, heap)) + { + return offset; + } + + /* AMD, Nvidia and Intel drivers on Windows work if SetDescriptorHeaps() + * is not called. Bind any unbound heaps. */ + if (heap->desc.Type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV) + { + bindings->cbv_srv_uav_heap_dirty |= vkd3d_bind_unbound_descriptor_heap(&list->cbv_srv_uav_heap, + heap, list); + } + else + { + bindings->sampler_heap_dirty |= vkd3d_bind_unbound_descriptor_heap(&list->sampler_heap, heap, list); + } + + return offset; +} + +static void d3d12_command_list_update_descriptor_tables(struct d3d12_command_list *list, + struct vkd3d_pipeline_bindings *bindings) +{ + const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs; + const struct d3d12_root_signature *rs = bindings->root_signature; + unsigned int offsets[D3D12_MAX_ROOT_COST]; + unsigned int i, j; + + for (i = 0, j = 0; i < ARRAY_SIZE(bindings->descriptor_tables); ++i) + { + if (rs->descriptor_table_mask & ((uint64_t)1 << i)) + offsets[j++] = d3d12_command_list_bind_descriptor_table(list, bindings, i); + } + if (j) + { + VK_CALL(vkCmdPushConstants(list->vk_command_buffer, rs->vk_pipeline_layout, VK_SHADER_STAGE_ALL, + rs->descriptor_table_offset, j * sizeof(uint32_t), offsets)); + } +} + +static void d3d12_command_list_bind_descriptor_heap(struct d3d12_command_list *list, + enum vkd3d_pipeline_bind_point bind_point, struct d3d12_descriptor_heap *heap) +{ + struct vkd3d_pipeline_bindings *bindings = &list->pipeline_bindings[bind_point]; + const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs; + const struct d3d12_root_signature *rs = bindings->root_signature; + unsigned int i; + + if (!heap) + return; + + for (i = 0; i < ARRAY_SIZE(heap->vk_descriptor_sets); ++i) + { + VkDescriptorSet vk_descriptor_set = heap->vk_descriptor_sets[i].vk_set; + unsigned int set_flag; + + if (!vk_descriptor_set) + continue; + + /* Descriptor sets are created while descriptors are being populated, so all + * current sets didn't necessarily exist on the previous call to this function. */ + set_flag = 1 << i; + if (bindings->bound_descriptor_set_flags & set_flag) + continue; + + bindings->bound_descriptor_set_flags |= set_flag; + + /* These sets can be shared across multiple command lists, and therefore binding must + * be synchronised. On an experimental branch in which caching of Vk descriptor writes + * greatly increased the chance of multiple threads arriving here at the same time, + * GRID 2019 crashed without the mutex lock. */ + pthread_mutex_lock(&heap->vk_descriptor_sets[i].mutex); + VK_CALL(vkCmdBindDescriptorSets(list->vk_command_buffer, bindings->vk_bind_point, rs->vk_pipeline_layout, + rs->vk_set_count + i, 1, &vk_descriptor_set, 0, NULL)); + pthread_mutex_unlock(&heap->vk_descriptor_sets[i].mutex); + } +} + +static void d3d12_command_list_update_heap_descriptors(struct d3d12_command_list *list, + enum vkd3d_pipeline_bind_point bind_point) +{ + struct vkd3d_pipeline_bindings *bindings = &list->pipeline_bindings[bind_point]; + const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs; + const struct d3d12_root_signature *rs = bindings->root_signature; + const struct d3d12_device *device = list->device; + + if (!rs) + return; + + if (bindings->descriptor_table_dirty_mask || bindings->push_descriptor_dirty_mask) + d3d12_command_list_prepare_descriptors(list, bind_point); + if (bindings->descriptor_table_dirty_mask) + d3d12_command_list_update_descriptor_tables(list, bindings); + bindings->descriptor_table_dirty_mask = 0; + + d3d12_command_list_update_push_descriptors(list, bind_point); + + if (bindings->descriptor_set_count) + { + VK_CALL(vkCmdBindDescriptorSets(list->vk_command_buffer, bindings->vk_bind_point, rs->vk_pipeline_layout, + rs->main_set, bindings->descriptor_set_count, bindings->descriptor_sets, 0, NULL)); + bindings->in_use = true; + } + + if (bindings->cbv_srv_uav_heap_dirty) + bindings->bound_descriptor_set_flags &= device->sampler_vk_set_flags; + if (bindings->sampler_heap_dirty) + bindings->bound_descriptor_set_flags &= device->cbv_srv_uav_vk_set_flags; + + d3d12_command_list_bind_descriptor_heap(list, bind_point, list->cbv_srv_uav_heap); + d3d12_command_list_bind_descriptor_heap(list, bind_point, list->sampler_heap); + bindings->cbv_srv_uav_heap_dirty = false; + bindings->sampler_heap_dirty = false; +} + static bool d3d12_command_list_update_compute_state(struct d3d12_command_list *list) { d3d12_command_list_end_current_render_pass(list); @@ -3009,7 +3177,10 @@ static bool d3d12_command_list_update_compute_state(struct d3d12_command_list *l if (!d3d12_command_list_update_compute_pipeline(list)) return false;
- d3d12_command_list_update_descriptors(list, VKD3D_PIPELINE_BIND_POINT_COMPUTE); + if (list->device->use_vk_heaps) + d3d12_command_list_update_heap_descriptors(list, VKD3D_PIPELINE_BIND_POINT_COMPUTE); + else + d3d12_command_list_update_descriptors(list, VKD3D_PIPELINE_BIND_POINT_COMPUTE);
return true; } @@ -3026,7 +3197,10 @@ static bool d3d12_command_list_begin_render_pass(struct d3d12_command_list *list if (!d3d12_command_list_update_current_framebuffer(list)) return false;
- d3d12_command_list_update_descriptors(list, VKD3D_PIPELINE_BIND_POINT_GRAPHICS); + if (list->device->use_vk_heaps) + d3d12_command_list_update_heap_descriptors(list, VKD3D_PIPELINE_BIND_POINT_GRAPHICS); + else + d3d12_command_list_update_descriptors(list, VKD3D_PIPELINE_BIND_POINT_GRAPHICS);
if (list->current_render_pass != VK_NULL_HANDLE) return true; @@ -4130,12 +4304,43 @@ static void STDMETHODCALLTYPE d3d12_command_list_ExecuteBundle(ID3D12GraphicsCom static void STDMETHODCALLTYPE d3d12_command_list_SetDescriptorHeaps(ID3D12GraphicsCommandList2 *iface, UINT heap_count, ID3D12DescriptorHeap *const *heaps) { + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct vkd3d_pipeline_bindings *bindings; + struct d3d12_descriptor_heap *heap; + unsigned int i; + TRACE("iface %p, heap_count %u, heaps %p.\n", iface, heap_count, heaps);
- /* Our current implementation does not need this method. + /* Our current virtual descriptor heaps implementation does not need this method. * * It could be used to validate descriptor tables but we do not have an * equivalent of the D3D12 Debug Layer. */ + if (!list->device->use_vk_heaps) + return; + + bindings = list->state ? &list->pipeline_bindings[list->state->vk_bind_point] : NULL; + + for (i = 0; i < heap_count; ++i) + { + if (!(heap = unsafe_impl_from_ID3D12DescriptorHeap(heaps[i])) + || d3d12_command_list_descriptor_heap_is_set(list, heap)) + continue; + + if (heap->desc.Type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV) + { + list->cbv_srv_uav_heap = heap; + list->cbv_srv_uav_heap_id = heap->serial_id; + if (bindings) + bindings->cbv_srv_uav_heap_dirty = true; + } + else + { + list->sampler_heap = heap; + list->sampler_heap_id = heap->serial_id; + if (bindings) + bindings->sampler_heap_dirty = true; + } + } }
static void d3d12_command_list_set_root_signature(struct d3d12_command_list *list, diff --git a/libs/vkd3d/device.c b/libs/vkd3d/device.c index fee6222d..e47ccb4e 100644 --- a/libs/vkd3d/device.c +++ b/libs/vkd3d/device.c @@ -140,6 +140,118 @@ static const struct vkd3d_optional_extension_info optional_device_extensions[] = VK_EXTENSION(EXT_VERTEX_ATTRIBUTE_DIVISOR, EXT_vertex_attribute_divisor), };
+static struct vkd3d_vk_descriptor_heap_layout vk_descriptor_heap_layouts[VKD3D_SET_INDEX_COUNT] = +{ + {VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, true, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV}, + {VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, true, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV}, + {VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, false, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV}, + {VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, true, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV}, + {VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, false, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV}, + {VK_DESCRIPTOR_TYPE_SAMPLER, false, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER}, + /* UAV counters */ + {VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, true, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV}, +}; + +static HRESULT vkd3d_create_vk_descriptor_heap_layout(struct d3d12_device *device, unsigned int index) +{ + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + VkDescriptorSetLayoutBindingFlagsCreateInfoEXT flags_info; + VkDescriptorSetLayoutCreateInfo set_desc; + VkDescriptorBindingFlagsEXT set_flags; + VkDescriptorSetLayoutBinding binding; + VkResult vr; + + binding.binding = 0; + binding.descriptorType = device->vk_descriptor_heap_layouts[index].type; + binding.descriptorCount = device->vk_descriptor_heap_layouts[index].count; + binding.stageFlags = VK_SHADER_STAGE_ALL; + binding.pImmutableSamplers = NULL; + + set_desc.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; + set_desc.pNext = &flags_info; + set_desc.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_UPDATE_AFTER_BIND_POOL_BIT; + set_desc.bindingCount = 1; + set_desc.pBindings = &binding; + + set_flags = VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT_EXT + | VK_DESCRIPTOR_BINDING_PARTIALLY_BOUND_BIT_EXT | VK_DESCRIPTOR_BINDING_UPDATE_AFTER_BIND_BIT_EXT + | VK_DESCRIPTOR_BINDING_UPDATE_UNUSED_WHILE_PENDING_BIT_EXT; + + flags_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_BINDING_FLAGS_CREATE_INFO_EXT; + flags_info.pNext = NULL; + flags_info.bindingCount = 1; + flags_info.pBindingFlags = &set_flags; + + if ((vr = VK_CALL(vkCreateDescriptorSetLayout(device->vk_device, &set_desc, NULL, + &device->vk_descriptor_heap_layouts[index].vk_set_layout))) < 0) + { + WARN("Failed to create Vulkan descriptor set layout, vr %d.\n", vr); + return hresult_from_vk_result(vr); + } + + return S_OK; +} + +static HRESULT vkd3d_vk_descriptor_heap_layouts_init(struct d3d12_device *device) +{ + const struct vkd3d_device_descriptor_limits *limits = &device->vk_info.descriptor_limits; + unsigned int i; + HRESULT hr; + + device->cbv_srv_uav_vk_set_flags = 0; + device->sampler_vk_set_flags = 0; + + for (i = 0; i < ARRAY_SIZE(device->vk_descriptor_heap_layouts); ++i) + device->vk_descriptor_heap_layouts[i] = vk_descriptor_heap_layouts[i]; + + if (!device->use_vk_heaps) + return S_OK; + + for (i = 0; i < ARRAY_SIZE(device->vk_descriptor_heap_layouts); ++i) + { + if (vk_descriptor_heap_layouts[i].applicable_heap_type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV) + device->cbv_srv_uav_vk_set_flags |= 1 << i; + else + device->sampler_vk_set_flags |= 1 << i; + + switch (device->vk_descriptor_heap_layouts[i].type) + { + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + device->vk_descriptor_heap_layouts[i].count = limits->uniform_buffer_max_descriptors; + break; + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + device->vk_descriptor_heap_layouts[i].count = limits->sampled_image_max_descriptors; + break; + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + device->vk_descriptor_heap_layouts[i].count = limits->storage_image_max_descriptors; + break; + case VK_DESCRIPTOR_TYPE_SAMPLER: + device->vk_descriptor_heap_layouts[i].count = limits->sampler_max_descriptors; + break; + default: + ERR("Unhandled descriptor type %#x.\n", device->vk_descriptor_heap_layouts[i].type); + break; + } + + if (FAILED(hr = vkd3d_create_vk_descriptor_heap_layout(device, i))) + return hr; + } + + return S_OK; +} + +static void vkd3d_vk_descriptor_heap_layouts_cleanup(struct d3d12_device *device) +{ + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(device->vk_descriptor_heap_layouts); ++i) + VK_CALL(vkDestroyDescriptorSetLayout(device->vk_device, device->vk_descriptor_heap_layouts[i].vk_set_layout, + NULL)); +} + static unsigned int get_spec_version(const VkExtensionProperties *extensions, unsigned int count, const char *extension_name) { @@ -430,6 +542,7 @@ static void vkd3d_init_debug_report(struct vkd3d_instance *instance) static const struct vkd3d_debug_option vkd3d_config_options[] = { {"atomic_descriptor", VKD3D_CONFIG_FLAG_ATOMIC_DESC_OPS}, /* atomic descriptor read/write */ + {"virtual_heaps", VKD3D_CONFIG_FLAG_VIRTUAL_HEAPS}, /* always use virtual descriptor heaps */ {"vk_debug", VKD3D_CONFIG_FLAG_VULKAN_DEBUG}, /* enable Vulkan debug extensions */ };
@@ -1267,6 +1380,36 @@ static void vkd3d_init_feature_level(struct vkd3d_vulkan_info *vk_info, TRACE("Max feature level: %#x.\n", vk_info->max_feature_level); }
+static void vkd3d_device_descriptor_limits_init(struct vkd3d_device_descriptor_limits *limits, + const VkPhysicalDeviceDescriptorIndexingPropertiesEXT *properties) +{ + const unsigned int root_provision = D3D12_MAX_ROOT_COST / 2; + unsigned int srv_divisor = 1, uav_divisor = 1; + + /* The total number of populated sampled image or storage image descriptors never exceeds the size of + * one set (or two sets if every UAV has a counter), but the total size of bound layouts will exceed + * device limits if each set size is maxDescriptorSet*, because of the D3D12 buffer + image allowance + * (and UAV counters). Breaking limits for layouts seems to work with RADV and Nvidia drivers at + * least, but let's try to stay within them if limits are high enough. */ + if (properties->maxDescriptorSetUpdateAfterBindSampledImages >= (1u << 21)) + { + srv_divisor = 2; + uav_divisor = properties->maxDescriptorSetUpdateAfterBindSampledImages >= (3u << 20) ? 3 : 2; + } + + limits->uniform_buffer_max_descriptors = min(properties->maxDescriptorSetUpdateAfterBindUniformBuffers, + properties->maxPerStageDescriptorUpdateAfterBindUniformBuffers - root_provision); + limits->sampled_image_max_descriptors = min(properties->maxDescriptorSetUpdateAfterBindSampledImages, + properties->maxPerStageDescriptorUpdateAfterBindSampledImages / srv_divisor - root_provision); + limits->storage_buffer_max_descriptors = min(properties->maxDescriptorSetUpdateAfterBindStorageBuffers, + properties->maxPerStageDescriptorUpdateAfterBindStorageBuffers - root_provision); + limits->storage_image_max_descriptors = min(properties->maxDescriptorSetUpdateAfterBindStorageImages, + properties->maxPerStageDescriptorUpdateAfterBindStorageImages / uav_divisor - root_provision); + limits->sampler_max_descriptors = min(properties->maxDescriptorSetUpdateAfterBindSamplers, + properties->maxPerStageDescriptorUpdateAfterBindSamplers - root_provision); + limits->sampler_max_descriptors = min(limits->sampler_max_descriptors, VKD3D_MAX_DESCRIPTOR_SET_SAMPLERS); +} + static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, const struct vkd3d_device_create_info *create_info, struct vkd3d_physical_device_info *physical_device_info, @@ -1299,6 +1442,8 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device,
vulkan_info->device_limits = physical_device_info->properties2.properties.limits; vulkan_info->sparse_properties = physical_device_info->properties2.properties.sparseProperties; + vkd3d_device_descriptor_limits_init(&vulkan_info->descriptor_limits, + &physical_device_info->descriptor_indexing_properties); vulkan_info->rasterization_stream = physical_device_info->xfb_properties.transformFeedbackRasterizationStreamSelect; vulkan_info->transform_feedback_queries = physical_device_info->xfb_properties.transformFeedbackQueries; vulkan_info->max_vertex_attrib_divisor = max(physical_device_info->vertex_divisor_properties.maxVertexAttribDivisor, 1); @@ -1494,6 +1639,14 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, features->robustBufferAccess = VK_FALSE; }
+ /* Select descriptor heap implementation. Forcing virtual heaps may be useful if + * a client allocates descriptor heaps too large for the Vulkan device, or the + * root signature cost exceeds the available push constant size. Virtual heaps + * use only enough descriptors for the descriptor tables of the currently bound + * root signature, and don't require a 32-bit push constant for each table. */ + device->use_vk_heaps = vulkan_info->EXT_descriptor_indexing + && !(device->vkd3d_instance->config_flags & VKD3D_CONFIG_FLAG_VIRTUAL_HEAPS); + return S_OK; }
@@ -2387,6 +2540,7 @@ static ULONG STDMETHODCALLTYPE d3d12_device_Release(ID3D12Device *iface) vkd3d_private_store_destroy(&device->private_store);
vkd3d_cleanup_format_info(device); + vkd3d_vk_descriptor_heap_layouts_cleanup(device); vkd3d_uav_clear_state_cleanup(&device->uav_clear_state, device); vkd3d_destroy_null_resources(&device->null_resources, device); vkd3d_gpu_va_allocator_cleanup(&device->gpu_va_allocator); @@ -3895,6 +4049,9 @@ static HRESULT d3d12_device_init(struct d3d12_device *device, if (FAILED(hr = vkd3d_uav_clear_state_init(&device->uav_clear_state, device))) goto out_destroy_null_resources;
+ if (FAILED(hr = vkd3d_vk_descriptor_heap_layouts_init(device))) + goto out_destroy_vk_heap_layouts; + vkd3d_render_pass_cache_init(&device->render_pass_cache); vkd3d_gpu_descriptor_allocator_init(&device->gpu_descriptor_allocator); vkd3d_gpu_va_allocator_init(&device->gpu_va_allocator); @@ -3917,6 +4074,8 @@ static HRESULT d3d12_device_init(struct d3d12_device *device,
return S_OK;
+out_destroy_vk_heap_layouts: + vkd3d_vk_descriptor_heap_layouts_cleanup(device); out_destroy_null_resources: vkd3d_destroy_null_resources(&device->null_resources, device); out_cleanup_format_info: diff --git a/libs/vkd3d/resource.c b/libs/vkd3d/resource.c index 80f3977e..9f99dd2c 100644 --- a/libs/vkd3d/resource.c +++ b/libs/vkd3d/resource.c @@ -2118,6 +2118,102 @@ void vkd3d_view_decref(struct vkd3d_view *view, struct d3d12_device *device) vkd3d_view_destroy(view, device); }
+static HRESULT d3d12_descriptor_heap_create_descriptor_set(struct d3d12_descriptor_heap *descriptor_heap, + struct d3d12_device *device, unsigned int set) +{ + struct d3d12_descriptor_heap_vk_set *descriptor_set = &descriptor_heap->vk_descriptor_sets[set]; + uint32_t variable_binding_size = descriptor_heap->desc.NumDescriptors; + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + VkDescriptorSetVariableDescriptorCountAllocateInfoEXT set_size; + VkDescriptorSetAllocateInfo set_desc; + VkResult vr; + + set_desc.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; + set_desc.pNext = NULL; + set_desc.descriptorPool = descriptor_heap->vk_descriptor_pool; + set_desc.descriptorSetCount = 1; + set_desc.pSetLayouts = &device->vk_descriptor_heap_layouts[set].vk_set_layout; + set_desc.pNext = &set_size; + set_size.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_VARIABLE_DESCRIPTOR_COUNT_ALLOCATE_INFO_EXT; + set_size.pNext = NULL; + set_size.descriptorSetCount = 1; + set_size.pDescriptorCounts = &variable_binding_size; + if ((vr = VK_CALL(vkAllocateDescriptorSets(device->vk_device, &set_desc, &descriptor_set->vk_set))) >= 0) + { + descriptor_set->vk_descriptor_write.dstSet = descriptor_set->vk_set; + return S_OK; + } + + ERR("Failed to allocate descriptor set, vr %d.\n", vr); + return hresult_from_vk_result(vr); +} + +static void d3d12_descriptor_heap_write_buffer(struct d3d12_descriptor_heap_vk_set *descriptor_set, + const struct d3d12_desc *src) +{ + descriptor_set->vk_descriptor_write.pBufferInfo = &src->u.vk_cbv_info; +} + +static void d3d12_descriptor_heap_write_texel_buffer(struct d3d12_descriptor_heap_vk_set *descriptor_set, + const struct d3d12_desc *src) +{ + descriptor_set->vk_descriptor_write.pTexelBufferView = &src->u.view->u.vk_buffer_view; +} + +static void d3d12_descriptor_heap_write_uav_counter_texel_buffer(struct d3d12_descriptor_heap_vk_set *descriptor_set, + const struct d3d12_desc *src) +{ + descriptor_set->vk_descriptor_write.pTexelBufferView = &src->u.view->vk_counter_view; +} + +static void d3d12_descriptor_heap_write_image(struct d3d12_descriptor_heap_vk_set *descriptor_set, + const struct d3d12_desc *src) +{ + descriptor_set->vk_image_info.imageView = src->u.view->u.vk_image_view; +} + +static void d3d12_descriptor_heap_write_sampler(struct d3d12_descriptor_heap_vk_set *descriptor_set, + const struct d3d12_desc *src) +{ + descriptor_set->vk_image_info.sampler = src->u.view->u.vk_sampler; +} + +/* dst and src contain the same data unless another thread overwrites dst. The array index is + * calculated from dst, and src is thread safe. */ +static void d3d12_descriptor_heap_write_vk_descriptor(struct d3d12_descriptor_heap *descriptor_heap, + enum vkd3d_vk_descriptor_set_index set, const struct d3d12_desc *dst, const struct d3d12_desc *src) +{ + struct d3d12_descriptor_heap_vk_set *descriptor_set = &descriptor_heap->vk_descriptor_sets[set]; + const struct d3d12_device *device = descriptor_heap->device; + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + + pthread_mutex_lock(&descriptor_set->mutex); + + if (descriptor_set->vk_set || SUCCEEDED(d3d12_descriptor_heap_create_descriptor_set(descriptor_heap, + descriptor_heap->device, set))) + { + descriptor_set->vk_descriptor_write.dstArrayElement = dst + - (const struct d3d12_desc *)descriptor_heap->descriptors; + descriptor_set->write_descriptor(descriptor_set, src); + VK_CALL(vkUpdateDescriptorSets(device->vk_device, 1, &descriptor_set->vk_descriptor_write, 0, NULL)); + } + + pthread_mutex_unlock(&descriptor_set->mutex); +} + +static void d3d12_desc_write_vk_heap(const struct d3d12_desc *dst, const struct d3d12_desc *src, + struct d3d12_device *device) +{ + struct d3d12_descriptor_heap *descriptor_heap; + + descriptor_heap = vkd3d_gpu_descriptor_allocator_heap_from_descriptor(&device->gpu_descriptor_allocator, dst); + + d3d12_descriptor_heap_write_vk_descriptor(descriptor_heap, vkd3d_vk_descriptor_set_index_from_vk_descriptor_type( + src->vk_descriptor_type), dst, src); + if (src->magic == VKD3D_DESCRIPTOR_MAGIC_UAV && src->u.view->vk_counter_view) + d3d12_descriptor_heap_write_vk_descriptor(descriptor_heap, VKD3D_SET_INDEX_UAV_COUNTER, dst, src); +} + void d3d12_desc_write(struct d3d12_desc *dst, const struct d3d12_desc *src, struct d3d12_device *device) { @@ -2127,6 +2223,9 @@ void d3d12_desc_write(struct d3d12_desc *dst, const struct d3d12_desc *src, vkd3d_view_destroy(dst->u.view, device);
*dst = *src; + + if (device->use_vk_heaps && dst->magic) + d3d12_desc_write_vk_heap(dst, src, device); }
void d3d12_desc_write_atomic(struct d3d12_desc *dst, const struct d3d12_desc *src, @@ -2150,6 +2249,9 @@ void d3d12_desc_write_atomic(struct d3d12_desc *dst, const struct d3d12_desc *sr /* Destroy the view after unlocking to reduce wait time. */ if (defunct_view) vkd3d_view_destroy(defunct_view, device); + + if (device->use_vk_heaps && dst->magic) + d3d12_desc_write_vk_heap(dst, src, device); }
static void d3d12_desc_destroy(struct d3d12_desc *descriptor, struct d3d12_device *device) @@ -3387,6 +3489,17 @@ void d3d12_dsv_desc_create_dsv(struct d3d12_dsv_desc *dsv_desc, struct d3d12_dev }
/* ID3D12DescriptorHeap */ + +static const struct ID3D12DescriptorHeapVtbl d3d12_descriptor_heap_vtbl; + +struct d3d12_descriptor_heap *unsafe_impl_from_ID3D12DescriptorHeap(ID3D12DescriptorHeap *iface) +{ + if (!iface) + return NULL; + assert(iface->lpVtbl == &d3d12_descriptor_heap_vtbl); + return CONTAINING_RECORD(iface, struct d3d12_descriptor_heap, ID3D12DescriptorHeap_iface); +} + static inline struct d3d12_descriptor_heap *impl_from_ID3D12DescriptorHeap(ID3D12DescriptorHeap *iface) { return CONTAINING_RECORD(iface, struct d3d12_descriptor_heap, ID3D12DescriptorHeap_iface); @@ -3433,9 +3546,12 @@ static ULONG STDMETHODCALLTYPE d3d12_descriptor_heap_Release(ID3D12DescriptorHea
if (!refcount) { + const struct vkd3d_vk_device_procs *vk_procs; struct d3d12_device *device = heap->device; unsigned int i;
+ vk_procs = &device->vk_procs; + vkd3d_private_store_destroy(&heap->private_store);
switch (heap->desc.Type) @@ -3482,6 +3598,12 @@ static ULONG STDMETHODCALLTYPE d3d12_descriptor_heap_Release(ID3D12DescriptorHea break; }
+ VK_CALL(vkDestroyDescriptorPool(device->vk_device, heap->vk_descriptor_pool, NULL)); + for (i = 0; i < ARRAY_SIZE(heap->vk_descriptor_sets); ++i) + { + pthread_mutex_destroy(&heap->vk_descriptor_sets[i].mutex); + } + vkd3d_free(heap);
d3d12_device_release(device); @@ -3592,19 +3714,133 @@ static const struct ID3D12DescriptorHeapVtbl d3d12_descriptor_heap_vtbl = d3d12_descriptor_heap_GetGPUDescriptorHandleForHeapStart, };
+const enum vkd3d_vk_descriptor_set_index vk_descriptor_set_index_table[] = +{ + VKD3D_SET_INDEX_SAMPLER, + VKD3D_SET_INDEX_COUNT, + VKD3D_SET_INDEX_SAMPLED_IMAGE, + VKD3D_SET_INDEX_STORAGE_IMAGE, + VKD3D_SET_INDEX_UNIFORM_TEXEL_BUFFER, + VKD3D_SET_INDEX_STORAGE_TEXEL_BUFFER, + VKD3D_SET_INDEX_UNIFORM_BUFFER, +}; + +static HRESULT d3d12_descriptor_heap_create_descriptor_pool(struct d3d12_descriptor_heap *descriptor_heap, + struct d3d12_device *device, const D3D12_DESCRIPTOR_HEAP_DESC *desc) +{ + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + VkDescriptorPoolSize pool_sizes[VKD3D_SET_INDEX_COUNT]; + struct VkDescriptorPoolCreateInfo pool_desc; + VkDevice vk_device = device->vk_device; + unsigned int i; + VkResult vr; + + for (i = 0, pool_desc.poolSizeCount = 0; i < ARRAY_SIZE(device->vk_descriptor_heap_layouts); ++i) + { + if (device->vk_descriptor_heap_layouts[i].applicable_heap_type == desc->Type) + { + pool_sizes[pool_desc.poolSizeCount].type = device->vk_descriptor_heap_layouts[i].type; + pool_sizes[pool_desc.poolSizeCount++].descriptorCount = desc->NumDescriptors; + } + } + + pool_desc.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; + pool_desc.pNext = NULL; + pool_desc.flags = VK_DESCRIPTOR_POOL_CREATE_UPDATE_AFTER_BIND_BIT_EXT; + pool_desc.maxSets = pool_desc.poolSizeCount; + pool_desc.pPoolSizes = pool_sizes; + if ((vr = VK_CALL(vkCreateDescriptorPool(vk_device, &pool_desc, NULL, &descriptor_heap->vk_descriptor_pool))) < 0) + ERR("Failed to create descriptor pool, vr %d.\n", vr); + + return hresult_from_vk_result(vr); +} + +static HRESULT d3d12_descriptor_heap_vk_descriptor_sets_init(struct d3d12_descriptor_heap *descriptor_heap, + struct d3d12_device *device, const D3D12_DESCRIPTOR_HEAP_DESC *desc) +{ + enum vkd3d_vk_descriptor_set_index set; + HRESULT hr; + + descriptor_heap->vk_descriptor_pool = VK_NULL_HANDLE; + memset(descriptor_heap->vk_descriptor_sets, 0, sizeof(descriptor_heap->vk_descriptor_sets)); + + if (!device->use_vk_heaps || (desc->Type != D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV + && desc->Type != D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER)) + return S_OK; + + if (FAILED(hr = d3d12_descriptor_heap_create_descriptor_pool(descriptor_heap, device, desc))) + return hr; + + for (set = 0; set < ARRAY_SIZE(descriptor_heap->vk_descriptor_sets); ++set) + { + struct d3d12_descriptor_heap_vk_set *descriptor_set = &descriptor_heap->vk_descriptor_sets[set]; + + pthread_mutex_init(&descriptor_set->mutex, NULL); + + descriptor_set->vk_descriptor_write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + descriptor_set->vk_descriptor_write.pNext = NULL; + descriptor_set->vk_descriptor_write.dstBinding = 0; + descriptor_set->vk_descriptor_write.descriptorCount = 1; + descriptor_set->vk_descriptor_write.descriptorType = device->vk_descriptor_heap_layouts[set].type; + descriptor_set->vk_descriptor_write.pImageInfo = NULL; + descriptor_set->vk_descriptor_write.pBufferInfo = NULL; + descriptor_set->vk_descriptor_write.pTexelBufferView = NULL; + + switch(device->vk_descriptor_heap_layouts[set].type) + { + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + descriptor_set->write_descriptor = d3d12_descriptor_heap_write_buffer; + break; + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + descriptor_set->write_descriptor = (set == VKD3D_SET_INDEX_UAV_COUNTER) + ? d3d12_descriptor_heap_write_uav_counter_texel_buffer + : d3d12_descriptor_heap_write_texel_buffer; + break; + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + descriptor_set->write_descriptor = d3d12_descriptor_heap_write_image; + descriptor_set->vk_descriptor_write.pImageInfo = &descriptor_set->vk_image_info; + descriptor_set->vk_image_info.sampler = VK_NULL_HANDLE; + descriptor_set->vk_image_info.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + break; + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + descriptor_set->write_descriptor = d3d12_descriptor_heap_write_image; + descriptor_set->vk_descriptor_write.pImageInfo = &descriptor_set->vk_image_info; + descriptor_set->vk_image_info.sampler = VK_NULL_HANDLE; + descriptor_set->vk_image_info.imageLayout = VK_IMAGE_LAYOUT_GENERAL; + break; + case VK_DESCRIPTOR_TYPE_SAMPLER: + descriptor_set->write_descriptor = d3d12_descriptor_heap_write_sampler; + descriptor_set->vk_descriptor_write.pImageInfo = &descriptor_set->vk_image_info; + descriptor_set->vk_image_info.imageView = VK_NULL_HANDLE; + descriptor_set->vk_image_info.imageLayout = VK_IMAGE_LAYOUT_UNDEFINED; + break; + default: + FIXME("Unhandled descriptor type %#x.\n", device->vk_descriptor_heap_layouts[set].type); + return E_FAIL; + } + } + + return S_OK; +} + static HRESULT d3d12_descriptor_heap_init(struct d3d12_descriptor_heap *descriptor_heap, struct d3d12_device *device, const D3D12_DESCRIPTOR_HEAP_DESC *desc) { + static LONG volatile serial_id; HRESULT hr;
descriptor_heap->ID3D12DescriptorHeap_iface.lpVtbl = &d3d12_descriptor_heap_vtbl; descriptor_heap->refcount = 1; + descriptor_heap->serial_id = InterlockedIncrement(&serial_id);
descriptor_heap->desc = *desc;
if (FAILED(hr = vkd3d_private_store_init(&descriptor_heap->private_store))) return hr;
+ d3d12_descriptor_heap_vk_descriptor_sets_init(descriptor_heap, device, desc); + d3d12_device_add_ref(descriptor_heap->device = device);
return S_OK; diff --git a/libs/vkd3d/state.c b/libs/vkd3d/state.c index 10503a60..8656432a 100644 --- a/libs/vkd3d/state.c +++ b/libs/vkd3d/state.c @@ -91,7 +91,9 @@ static void d3d12_root_signature_cleanup(struct d3d12_root_signature *root_signa
if (root_signature->descriptor_mapping) vkd3d_free(root_signature->descriptor_mapping); + vkd3d_free(root_signature->uav_counter_mapping); vkd3d_free(root_signature->descriptor_offsets); + vkd3d_free(root_signature->uav_counter_offsets); if (root_signature->root_constants) vkd3d_free(root_signature->root_constants);
@@ -327,6 +329,7 @@ static bool vk_binding_from_d3d12_descriptor_range(struct VkDescriptorSetLayoutB struct d3d12_root_signature_info { size_t binding_count; + size_t uav_range_count;
size_t root_constant_count; size_t root_descriptor_count; @@ -382,6 +385,7 @@ static HRESULT d3d12_root_signature_info_count_descriptors(struct d3d12_root_sig case D3D12_DESCRIPTOR_RANGE_TYPE_UAV: /* As above. */ info->binding_count += binding_count; + ++info->uav_range_count; break; case D3D12_DESCRIPTOR_RANGE_TYPE_CBV: break; @@ -463,6 +467,7 @@ static HRESULT d3d12_root_signature_init_push_constants(struct d3d12_root_signat uint32_t *push_constant_range_count) { uint32_t push_constants_offset[D3D12_SHADER_VISIBILITY_PIXEL + 1]; + bool use_vk_heaps = root_signature->device->use_vk_heaps; unsigned int i, j, push_constant_count; uint32_t offset;
@@ -475,7 +480,8 @@ static HRESULT d3d12_root_signature_init_push_constants(struct d3d12_root_signat continue;
assert(p->ShaderVisibility <= D3D12_SHADER_VISIBILITY_PIXEL); - push_constants[p->ShaderVisibility].stageFlags = stage_flags_from_visibility(p->ShaderVisibility); + push_constants[p->ShaderVisibility].stageFlags = use_vk_heaps ? VK_SHADER_STAGE_ALL + : stage_flags_from_visibility(p->ShaderVisibility); push_constants[p->ShaderVisibility].size += p->u.Constants.Num32BitValues * sizeof(uint32_t); } if (push_constants[D3D12_SHADER_VISIBILITY_ALL].size) @@ -554,6 +560,8 @@ struct vkd3d_descriptor_set_context unsigned int table_index; unsigned int unbounded_offset; unsigned int descriptor_index; + unsigned int uav_counter_index; + unsigned int push_constant_index; uint32_t descriptor_binding; };
@@ -563,6 +571,7 @@ static bool vkd3d_validate_descriptor_set_count(struct d3d12_device *device, uns
if (set_count > max_count) { + /* NOTE: If maxBoundDescriptorSets is < 9, try VKD3D_CONFIG=virtual_heaps */ ERR("Required descriptor set count exceeds maximum allowed count of %u.\n", max_count); return false; } @@ -740,6 +749,122 @@ static void d3d12_root_signature_map_vk_unbounded_binding(struct d3d12_root_sign offset->dynamic_offset_index = ~0u; }
+static unsigned int vk_heap_binding_count_from_descriptor_range(const struct d3d12_root_descriptor_table_range *range, + unsigned int descriptor_set_size) +{ + unsigned int max_count; + + if (descriptor_set_size <= range->offset) + { + ERR("Descriptor range offset %u exceeds maximum available offset %u.\n", range->offset, descriptor_set_size - 1); + max_count = 0; + } + else + { + max_count = descriptor_set_size - range->offset; + } + + if (range->descriptor_count != UINT_MAX) + { + if (range->descriptor_count > max_count) + ERR("Range size %u exceeds available descriptor count %u.\n", range->descriptor_count, max_count); + return range->descriptor_count; + } + else + { + /* Prefer an unsupported binding count vs a zero count, because shader compilation will fail + * to match a declaration to a zero binding, resulting in failure of pipline state creation. */ + return max_count + !max_count; + } +} + +static void vkd3d_descriptor_heap_binding_from_descriptor_range(const struct d3d12_root_descriptor_table_range *range, + bool is_buffer, const struct d3d12_root_signature *root_signature, + struct vkd3d_shader_descriptor_binding *binding) +{ + const struct vkd3d_device_descriptor_limits *descriptor_limits = &root_signature->device->vk_info.descriptor_limits; + unsigned int descriptor_set_size; + + switch (range->type) + { + case VKD3D_SHADER_DESCRIPTOR_TYPE_SRV: + binding->set = is_buffer ? VKD3D_SET_INDEX_UNIFORM_TEXEL_BUFFER : VKD3D_SET_INDEX_SAMPLED_IMAGE; + descriptor_set_size = descriptor_limits->sampled_image_max_descriptors; + break; + case VKD3D_SHADER_DESCRIPTOR_TYPE_UAV: + binding->set = is_buffer ? VKD3D_SET_INDEX_STORAGE_TEXEL_BUFFER : VKD3D_SET_INDEX_STORAGE_IMAGE; + descriptor_set_size = descriptor_limits->storage_image_max_descriptors; + break; + case VKD3D_SHADER_DESCRIPTOR_TYPE_CBV: + binding->set = VKD3D_SET_INDEX_UNIFORM_BUFFER; + descriptor_set_size = descriptor_limits->uniform_buffer_max_descriptors; + break; + case VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER: + binding->set = VKD3D_SET_INDEX_SAMPLER; + descriptor_set_size = descriptor_limits->sampler_max_descriptors; + break; + default: + FIXME("Unhandled descriptor range type type %#x.\n", range->type); + binding->set = VKD3D_SET_INDEX_SAMPLED_IMAGE; + descriptor_set_size = descriptor_limits->sampled_image_max_descriptors; + break; + } + binding->set += root_signature->vk_set_count; + binding->binding = 0; + binding->count = vk_heap_binding_count_from_descriptor_range(range, descriptor_set_size); +} + +static void d3d12_root_signature_map_vk_heap_binding(struct d3d12_root_signature *root_signature, + const struct d3d12_root_descriptor_table_range *range, bool buffer_descriptor, + enum vkd3d_shader_visibility shader_visibility, struct vkd3d_descriptor_set_context *context) +{ + struct vkd3d_shader_resource_binding *mapping = &root_signature->descriptor_mapping[context->descriptor_index]; + struct vkd3d_shader_descriptor_offset *offset = &root_signature->descriptor_offsets[context->descriptor_index++]; + + mapping->type = range->type; + mapping->register_space = range->register_space; + mapping->register_index = range->base_register_idx; + mapping->shader_visibility = shader_visibility; + mapping->flags = buffer_descriptor ? VKD3D_SHADER_BINDING_FLAG_BUFFER : VKD3D_SHADER_BINDING_FLAG_IMAGE; + vkd3d_descriptor_heap_binding_from_descriptor_range(range, buffer_descriptor, root_signature, &mapping->binding); + offset->static_offset = range->offset; + offset->dynamic_offset_index = context->push_constant_index; +} + +static void d3d12_root_signature_map_vk_heap_uav_counter(struct d3d12_root_signature *root_signature, + const struct d3d12_root_descriptor_table_range *range, enum vkd3d_shader_visibility shader_visibility, + struct vkd3d_descriptor_set_context *context) +{ + struct vkd3d_shader_uav_counter_binding *mapping = &root_signature->uav_counter_mapping[context->uav_counter_index]; + struct vkd3d_shader_descriptor_offset *offset = &root_signature->uav_counter_offsets[context->uav_counter_index++]; + + mapping->register_space = range->register_space; + mapping->register_index = range->base_register_idx; + mapping->shader_visibility = shader_visibility; + mapping->binding.set = root_signature->vk_set_count + VKD3D_SET_INDEX_UAV_COUNTER; + mapping->binding.binding = 0; + mapping->binding.count = vk_heap_binding_count_from_descriptor_range(range, + root_signature->device->vk_info.descriptor_limits.storage_image_max_descriptors); + offset->static_offset = range->offset; + offset->dynamic_offset_index = context->push_constant_index; +} + +static void d3d12_root_signature_map_descriptor_heap_binding(struct d3d12_root_signature *root_signature, + const struct d3d12_root_descriptor_table_range *range, enum vkd3d_shader_visibility shader_visibility, + struct vkd3d_descriptor_set_context *context) +{ + bool is_buffer = range->type == VKD3D_SHADER_DESCRIPTOR_TYPE_CBV; + + if (range->type == VKD3D_SHADER_DESCRIPTOR_TYPE_SRV || range->type == VKD3D_SHADER_DESCRIPTOR_TYPE_UAV) + { + d3d12_root_signature_map_vk_heap_binding(root_signature, range, true, shader_visibility, context); + if (range->type == VKD3D_SHADER_DESCRIPTOR_TYPE_UAV) + d3d12_root_signature_map_vk_heap_uav_counter(root_signature, range, shader_visibility, context); + } + + d3d12_root_signature_map_vk_heap_binding(root_signature, range, is_buffer, shader_visibility, context); +} + static void d3d12_root_signature_map_descriptor_unbounded_binding(struct d3d12_root_signature *root_signature, const struct d3d12_root_descriptor_table_range *range, unsigned int descriptor_offset, enum vkd3d_shader_visibility shader_visibility, struct vkd3d_descriptor_set_context *context) @@ -804,6 +929,7 @@ static HRESULT validate_descriptor_register_ranges(const struct d3d12_root_descr static HRESULT d3d12_root_signature_init_root_descriptor_tables(struct d3d12_root_signature *root_signature, const D3D12_ROOT_SIGNATURE_DESC *desc, struct vkd3d_descriptor_set_context *context) { + bool use_vk_heaps = root_signature->device->use_vk_heaps; struct d3d12_root_descriptor_table *table; unsigned int i, j, k, range_count; uint32_t vk_binding; @@ -871,6 +997,16 @@ static HRESULT d3d12_root_signature_init_root_descriptor_tables(struct d3d12_roo
range = &table->ranges[j];
+ if (use_vk_heaps) + { + /* set, binding and vk_binding_count are not used. */ + range->set = 0; + range->binding = 0; + range->vk_binding_count = 0; + d3d12_root_signature_map_descriptor_heap_binding(root_signature, range, shader_visibility, context); + continue; + } + range->set = root_signature->vk_set_count - root_signature->main_set;
if (root_signature->use_descriptor_arrays) @@ -949,6 +1085,7 @@ static HRESULT d3d12_root_signature_init_root_descriptor_tables(struct d3d12_roo
context->current_binding = cur_binding; } + ++context->push_constant_index; }
return S_OK; @@ -1022,6 +1159,30 @@ static HRESULT d3d12_root_signature_init_static_samplers(struct d3d12_root_signa return S_OK; }
+static void d3d12_root_signature_init_descriptor_table_push_constants(struct d3d12_root_signature *root_signature, + const struct vkd3d_descriptor_set_context *context) +{ + root_signature->descriptor_table_offset = 0; + if ((root_signature->descriptor_table_count = context->push_constant_index)) + { + VkPushConstantRange *range = &root_signature->push_constant_ranges[D3D12_SHADER_VISIBILITY_ALL]; + + root_signature->descriptor_table_offset = (range->size + 15) & ~15; + range->size = root_signature->descriptor_table_offset + + root_signature->descriptor_table_count * sizeof(uint32_t); + + if (range->size > root_signature->device->vk_info.device_limits.maxPushConstantsSize) + FIXME("Push constants size %u exceeds maximum allowed size %u. Try VKD3D_CONFIG=virtual_heaps.\n", + range->size, root_signature->device->vk_info.device_limits.maxPushConstantsSize); + + if (!root_signature->push_constant_range_count) + { + root_signature->push_constant_range_count = 1; + range->stageFlags = VK_SHADER_STAGE_ALL; + } + } +} + static bool vk_binding_uses_partial_binding(const VkDescriptorSetLayoutBinding *binding) { if (binding->descriptorCount == 1) @@ -1134,7 +1295,8 @@ static HRESULT d3d12_root_signature_init(struct d3d12_root_signature *root_signa struct vkd3d_descriptor_set_context context; VkDescriptorSetLayoutBinding *binding_desc; struct d3d12_root_signature_info info; - unsigned int i; + bool use_vk_heaps; + unsigned int i, j; HRESULT hr;
memset(&context, 0, sizeof(context)); @@ -1149,7 +1311,9 @@ static HRESULT d3d12_root_signature_init(struct d3d12_root_signature *root_signa root_signature->parameters = NULL; root_signature->flags = desc->Flags; root_signature->descriptor_mapping = NULL; + root_signature->uav_counter_mapping = NULL; root_signature->descriptor_offsets = NULL; + root_signature->uav_counter_offsets = NULL; root_signature->static_sampler_count = 0; root_signature->static_samplers = NULL; root_signature->device = device; @@ -1167,10 +1331,13 @@ static HRESULT d3d12_root_signature_init(struct d3d12_root_signature *root_signa }
root_signature->binding_count = info.binding_count; + root_signature->uav_mapping_count = info.uav_range_count; root_signature->static_sampler_count = desc->NumStaticSamplers; root_signature->root_descriptor_count = info.root_descriptor_count; root_signature->use_descriptor_arrays = device->vk_info.EXT_descriptor_indexing;
+ use_vk_heaps = device->use_vk_heaps; + hr = E_OUTOFMEMORY; root_signature->parameter_count = desc->NumParameters; if (!(root_signature->parameters = vkd3d_calloc(root_signature->parameter_count, @@ -1179,9 +1346,15 @@ static HRESULT d3d12_root_signature_init(struct d3d12_root_signature *root_signa if (!(root_signature->descriptor_mapping = vkd3d_calloc(root_signature->binding_count, sizeof(*root_signature->descriptor_mapping)))) goto fail; + if (use_vk_heaps && !(root_signature->uav_counter_mapping = vkd3d_calloc(root_signature->uav_mapping_count, + sizeof(*root_signature->uav_counter_mapping)))) + goto fail; if (root_signature->use_descriptor_arrays && !(root_signature->descriptor_offsets = vkd3d_calloc( root_signature->binding_count, sizeof(*root_signature->descriptor_offsets)))) goto fail; + if (use_vk_heaps && !(root_signature->uav_counter_offsets = vkd3d_calloc(root_signature->uav_mapping_count, + sizeof(*root_signature->uav_counter_offsets)))) + goto fail; root_signature->root_constant_count = info.root_constant_count; if (!(root_signature->root_constants = vkd3d_calloc(root_signature->root_constant_count, sizeof(*root_signature->root_constants)))) @@ -1213,8 +1386,13 @@ static HRESULT d3d12_root_signature_init(struct d3d12_root_signature *root_signa goto fail; if (FAILED(hr = d3d12_root_signature_init_static_samplers(root_signature, device, desc, &context))) goto fail; + if (use_vk_heaps && FAILED(hr = d3d12_root_signature_append_descriptor_set_layout(root_signature, &context, 0))) + goto fail; + context.push_constant_index = 0; if (FAILED(hr = d3d12_root_signature_init_root_descriptor_tables(root_signature, desc, &context))) goto fail; + if (use_vk_heaps) + d3d12_root_signature_init_descriptor_table_push_constants(root_signature, &context);
if (FAILED(hr = d3d12_root_signature_append_descriptor_set_layout(root_signature, &context, 0))) goto fail; @@ -1227,7 +1405,13 @@ static HRESULT d3d12_root_signature_init(struct d3d12_root_signature *root_signa vk_layouts[i] = root_signature->descriptor_set_layouts[i].vk_layout; }
- if (FAILED(hr = vkd3d_create_pipeline_layout(device, root_signature->vk_set_count, + if (use_vk_heaps) + { + for (j = 0; j < ARRAY_SIZE(device->vk_descriptor_heap_layouts); ++j) + vk_layouts[i++] = device->vk_descriptor_heap_layouts[j].vk_set_layout; + } + + if (FAILED(hr = vkd3d_create_pipeline_layout(device, i, vk_layouts, root_signature->push_constant_range_count, root_signature->push_constant_ranges, &root_signature->vk_pipeline_layout))) goto fail; @@ -1835,6 +2019,11 @@ static HRESULT d3d12_pipeline_state_init_uav_counters(struct d3d12_pipeline_stat descriptor_binding = 0; for (set_index = 0; set_index < root_signature->vk_set_count; ++set_index) set_layouts[set_index] = root_signature->descriptor_set_layouts[set_index].vk_layout; + if (device->use_vk_heaps) + { + for (i = 0; i < ARRAY_SIZE(device->vk_descriptor_heap_layouts); ++i) + set_layouts[set_index++] = device->vk_descriptor_heap_layouts[i].vk_set_layout; + }
for (i = 0, j = 0; i < shader_info->descriptor_count; ++i) { @@ -1913,21 +2102,24 @@ static HRESULT d3d12_pipeline_state_init_compute(struct d3d12_pipeline_state *st return E_INVALIDARG; }
- shader_info.type = VKD3D_SHADER_STRUCTURE_TYPE_SCAN_DESCRIPTOR_INFO; - shader_info.next = NULL; - if ((ret = vkd3d_scan_dxbc(&desc->CS, &shader_info)) < 0) + if (!device->use_vk_heaps) { - WARN("Failed to scan shader bytecode, vkd3d result %d.\n", ret); - return hresult_from_vkd3d_result(ret); - } + shader_info.type = VKD3D_SHADER_STRUCTURE_TYPE_SCAN_DESCRIPTOR_INFO; + shader_info.next = NULL; + if ((ret = vkd3d_scan_dxbc(&desc->CS, &shader_info)) < 0) + { + WARN("Failed to scan shader bytecode, vkd3d result %d.\n", ret); + return hresult_from_vkd3d_result(ret); + }
- if (FAILED(hr = d3d12_pipeline_state_init_uav_counters(state, - device, root_signature, &shader_info, VK_SHADER_STAGE_COMPUTE_BIT))) - { - WARN("Failed to create descriptor set layout for UAV counters, hr %#x.\n", hr); - return hr; + if (FAILED(hr = d3d12_pipeline_state_init_uav_counters(state, + device, root_signature, &shader_info, VK_SHADER_STAGE_COMPUTE_BIT))) + { + WARN("Failed to create descriptor set layout for UAV counters, hr %#x.\n", hr); + return hr; + } + vkd3d_shader_free_scan_descriptor_info(&shader_info); } - vkd3d_shader_free_scan_descriptor_info(&shader_info);
memset(&target_info, 0, sizeof(target_info)); target_info.type = VKD3D_SHADER_STRUCTURE_TYPE_SPIRV_TARGET_INFO; @@ -1939,10 +2131,10 @@ static HRESULT d3d12_pipeline_state_init_compute(struct d3d12_pipeline_state *st { offset_info.type = VKD3D_SHADER_STRUCTURE_TYPE_DESCRIPTOR_OFFSET_INFO; offset_info.next = NULL; - offset_info.descriptor_table_offset = 0; - offset_info.descriptor_table_count = 0; + offset_info.descriptor_table_offset = root_signature->descriptor_table_offset; + offset_info.descriptor_table_count = device->use_vk_heaps ? root_signature->descriptor_table_count : 0; offset_info.binding_offsets = root_signature->descriptor_offsets; - offset_info.uav_counter_offsets = NULL; + offset_info.uav_counter_offsets = root_signature->uav_counter_offsets; vkd3d_prepend_struct(&target_info, &offset_info); }
@@ -1954,8 +2146,16 @@ static HRESULT d3d12_pipeline_state_init_compute(struct d3d12_pipeline_state *st shader_interface.push_constant_buffer_count = root_signature->root_constant_count; shader_interface.combined_samplers = NULL; shader_interface.combined_sampler_count = 0; - shader_interface.uav_counters = state->uav_counters.bindings; - shader_interface.uav_counter_count = state->uav_counters.binding_count; + if (root_signature->uav_counter_mapping) + { + shader_interface.uav_counters = root_signature->uav_counter_mapping; + shader_interface.uav_counter_count = root_signature->uav_mapping_count; + } + else + { + shader_interface.uav_counters = state->uav_counters.bindings; + shader_interface.uav_counter_count = state->uav_counters.binding_count; + }
vk_pipeline_layout = state->uav_counters.vk_pipeline_layout ? state->uav_counters.vk_pipeline_layout : root_signature->vk_pipeline_layout; @@ -2448,13 +2648,14 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s VkVertexInputBindingDivisorDescriptionEXT *binding_divisor; const struct vkd3d_vulkan_info *vk_info = &device->vk_info; uint32_t instance_divisors[D3D12_VS_INPUT_REGISTER_COUNT]; + struct vkd3d_shader_spirv_target_info *stage_target_info; uint32_t aligned_offsets[D3D12_VS_INPUT_REGISTER_COUNT]; + struct vkd3d_shader_descriptor_offset_info offset_info; struct vkd3d_shader_parameter ps_shader_parameters[1]; struct vkd3d_shader_transform_feedback_info xfb_info; struct vkd3d_shader_spirv_target_info ps_target_info; struct vkd3d_shader_interface_info shader_interface; - struct vkd3d_shader_descriptor_offset_info offset_info; - struct vkd3d_shader_spirv_target_info *target_info; + struct vkd3d_shader_spirv_target_info target_info; const struct d3d12_root_signature *root_signature; struct vkd3d_shader_signature input_signature; bool have_attachment, is_dsv_format_unknown; @@ -2659,6 +2860,12 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s } }
+ memset(&target_info, 0, sizeof(target_info)); + target_info.type = VKD3D_SHADER_STRUCTURE_TYPE_SPIRV_TARGET_INFO; + target_info.environment = VKD3D_SHADER_SPIRV_ENVIRONMENT_VULKAN_1_0; + target_info.extensions = vk_info->shader_extensions; + target_info.extension_count = vk_info->shader_extension_count; + graphics->xfb_enabled = false; if (so_desc->NumEntries) { @@ -2707,10 +2914,10 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s { offset_info.type = VKD3D_SHADER_STRUCTURE_TYPE_DESCRIPTOR_OFFSET_INFO; offset_info.next = NULL; - offset_info.descriptor_table_offset = 0; - offset_info.descriptor_table_count = 0; + offset_info.descriptor_table_offset = root_signature->descriptor_table_offset; + offset_info.descriptor_table_count = root_signature->descriptor_table_count; offset_info.binding_offsets = root_signature->descriptor_offsets; - offset_info.uav_counter_offsets = NULL; + offset_info.uav_counter_offsets = root_signature->uav_counter_offsets; vkd3d_prepend_struct(&shader_interface, &offset_info); }
@@ -2726,24 +2933,27 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s if (!b->pShaderBytecode) continue;
- if ((ret = vkd3d_scan_dxbc(b, &shader_info)) < 0) + if (!device->use_vk_heaps) { - WARN("Failed to scan shader bytecode, stage %#x, vkd3d result %d.\n", - shader_stages[i].stage, ret); - hr = hresult_from_vkd3d_result(ret); - goto fail; - } - if (FAILED(hr = d3d12_pipeline_state_init_uav_counters(state, - device, root_signature, &shader_info, shader_stages[i].stage))) - { - WARN("Failed to create descriptor set layout for UAV counters, hr %#x.\n", hr); - goto fail; + if ((ret = vkd3d_scan_dxbc(b, &shader_info)) < 0) + { + WARN("Failed to scan shader bytecode, stage %#x, vkd3d result %d.\n", + shader_stages[i].stage, ret); + hr = hresult_from_vkd3d_result(ret); + goto fail; + } + if (FAILED(hr = d3d12_pipeline_state_init_uav_counters(state, + device, root_signature, &shader_info, shader_stages[i].stage))) + { + WARN("Failed to create descriptor set layout for UAV counters, hr %#x.\n", hr); + goto fail; + } + vkd3d_shader_free_scan_descriptor_info(&shader_info); } - vkd3d_shader_free_scan_descriptor_info(&shader_info);
shader_interface.uav_counters = NULL; shader_interface.uav_counter_count = 0; - target_info = NULL; + stage_target_info = &target_info; switch (shader_stages[i].stage) { case VK_SHADER_STAGE_VERTEX_BIT: @@ -2768,9 +2978,11 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s break;
case VK_SHADER_STAGE_FRAGMENT_BIT: - shader_interface.uav_counters = state->uav_counters.bindings; - shader_interface.uav_counter_count = state->uav_counters.binding_count; - target_info = &ps_target_info; + shader_interface.uav_counters = root_signature->uav_counter_mapping + ? root_signature->uav_counter_mapping : state->uav_counters.bindings; + shader_interface.uav_counter_count = root_signature->uav_counter_mapping + ? root_signature->uav_mapping_count : state->uav_counters.binding_count; + stage_target_info = &ps_target_info; break;
default: @@ -2779,10 +2991,15 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s }
shader_interface.next = NULL; + xfb_info.next = NULL; + ps_target_info.next = NULL; + target_info.next = NULL; + offset_info.next = NULL; if (shader_stages[i].stage == xfb_stage) vkd3d_prepend_struct(&shader_interface, &xfb_info); - if (target_info) - vkd3d_prepend_struct(&shader_interface, target_info); + vkd3d_prepend_struct(&shader_interface, stage_target_info); + if (root_signature->descriptor_offsets) + vkd3d_prepend_struct(&shader_interface, &offset_info);
if (FAILED(hr = create_shader_stage(device, &graphics->stages[graphics->stage_count], shader_stages[i].stage, b, &shader_interface))) diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index 305a003b..0f80a84b 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -59,6 +59,9 @@ #define VKD3D_MAX_SHADER_STAGES 5u #define VKD3D_MAX_VK_SYNC_OBJECTS 4u #define VKD3D_MAX_DESCRIPTOR_SETS 64u +#define VKD3D_MAX_BOUND_DESCRIPTOR_HEAPS 2u +/* D3D12 binding tier 3 has a limit of 2048 samplers. */ +#define VKD3D_MAX_DESCRIPTOR_SET_SAMPLERS 2048u
struct d3d12_command_list; struct d3d12_device; @@ -92,6 +95,15 @@ HRESULT hresult_from_errno(int rc); HRESULT hresult_from_vk_result(VkResult vr); HRESULT hresult_from_vkd3d_result(int vkd3d_result);
+struct vkd3d_device_descriptor_limits +{ + unsigned int uniform_buffer_max_descriptors; + unsigned int sampled_image_max_descriptors; + unsigned int storage_buffer_max_descriptors; + unsigned int storage_image_max_descriptors; + unsigned int sampler_max_descriptors; +}; + struct vkd3d_vulkan_info { /* KHR instance extensions */ @@ -125,6 +137,7 @@ struct vkd3d_vulkan_info
VkPhysicalDeviceLimits device_limits; VkPhysicalDeviceSparseProperties sparse_properties; + struct vkd3d_device_descriptor_limits descriptor_limits;
VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT texel_buffer_alignment_properties;
@@ -138,6 +151,7 @@ enum vkd3d_config_flags { VKD3D_CONFIG_FLAG_VULKAN_DEBUG = 0x00000001, VKD3D_CONFIG_FLAG_ATOMIC_DESC_OPS = 0x00000002, + VKD3D_CONFIG_FLAG_VIRTUAL_HEAPS = 0x00000004, };
struct vkd3d_instance @@ -620,11 +634,53 @@ static inline struct d3d12_dsv_desc *d3d12_dsv_desc_from_cpu_handle(D3D12_CPU_DE void d3d12_dsv_desc_create_dsv(struct d3d12_dsv_desc *dsv_desc, struct d3d12_device *device, struct d3d12_resource *resource, const D3D12_DEPTH_STENCIL_VIEW_DESC *desc);
+enum vkd3d_vk_descriptor_set_index +{ + VKD3D_SET_INDEX_UNIFORM_BUFFER = 0, + VKD3D_SET_INDEX_UNIFORM_TEXEL_BUFFER = 1, + VKD3D_SET_INDEX_SAMPLED_IMAGE = 2, + VKD3D_SET_INDEX_STORAGE_TEXEL_BUFFER = 3, + VKD3D_SET_INDEX_STORAGE_IMAGE = 4, + VKD3D_SET_INDEX_SAMPLER = 5, + VKD3D_SET_INDEX_UAV_COUNTER = 6, + VKD3D_SET_INDEX_COUNT = 7 +}; + +extern const enum vkd3d_vk_descriptor_set_index vk_descriptor_set_index_table[]; + +static inline enum vkd3d_vk_descriptor_set_index vkd3d_vk_descriptor_set_index_from_vk_descriptor_type( + VkDescriptorType type) +{ + assert(type <= VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER); + assert(vk_descriptor_set_index_table[type] < VKD3D_SET_INDEX_COUNT); + + return vk_descriptor_set_index_table[type]; +} + +struct vkd3d_vk_descriptor_heap_layout +{ + VkDescriptorType type; + bool buffer_dimension; + D3D12_DESCRIPTOR_HEAP_TYPE applicable_heap_type; + unsigned int count; + VkDescriptorSetLayout vk_set_layout; +}; + +struct d3d12_descriptor_heap_vk_set +{ + VkDescriptorSet vk_set; + pthread_mutex_t mutex; + VkWriteDescriptorSet vk_descriptor_write; + VkDescriptorImageInfo vk_image_info; + void (*write_descriptor)(struct d3d12_descriptor_heap_vk_set *descriptor_set, const struct d3d12_desc *src); +}; + /* ID3D12DescriptorHeap */ struct d3d12_descriptor_heap { ID3D12DescriptorHeap ID3D12DescriptorHeap_iface; LONG refcount; + unsigned int serial_id;
D3D12_DESCRIPTOR_HEAP_DESC desc;
@@ -632,11 +688,15 @@ struct d3d12_descriptor_heap
struct vkd3d_private_store private_store;
+ VkDescriptorPool vk_descriptor_pool; + struct d3d12_descriptor_heap_vk_set vk_descriptor_sets[VKD3D_SET_INDEX_COUNT]; + BYTE descriptors[]; };
HRESULT d3d12_descriptor_heap_create(struct d3d12_device *device, const D3D12_DESCRIPTOR_HEAP_DESC *desc, struct d3d12_descriptor_heap **descriptor_heap); +struct d3d12_descriptor_heap *unsafe_impl_from_ID3D12DescriptorHeap(ID3D12DescriptorHeap *iface);
static inline struct d3d12_descriptor_heap *vkd3d_gpu_descriptor_allocator_heap_from_descriptor( struct vkd3d_gpu_descriptor_allocator *allocator, const struct d3d12_desc *desc) @@ -755,8 +815,13 @@ struct d3d12_root_signature D3D12_ROOT_SIGNATURE_FLAGS flags;
unsigned int binding_count; + unsigned int uav_mapping_count; struct vkd3d_shader_resource_binding *descriptor_mapping; + struct vkd3d_shader_uav_counter_binding *uav_counter_mapping; struct vkd3d_shader_descriptor_offset *descriptor_offsets; + struct vkd3d_shader_descriptor_offset *uav_counter_offsets; + unsigned int descriptor_table_offset; + unsigned int descriptor_table_count;
unsigned int root_constant_count; struct vkd3d_shader_push_constant_buffer *root_constants; @@ -975,6 +1040,9 @@ struct vkd3d_pipeline_bindings struct d3d12_desc *descriptor_tables[D3D12_MAX_ROOT_COST]; uint64_t descriptor_table_dirty_mask; uint64_t descriptor_table_active_mask; + unsigned int bound_descriptor_set_flags; + bool cbv_srv_uav_heap_dirty; + bool sampler_heap_dirty;
VkBufferView *vk_uav_counter_views; size_t vk_uav_counter_views_size; @@ -1036,6 +1104,11 @@ struct d3d12_command_list VkBuffer so_counter_buffers[D3D12_SO_BUFFER_SLOT_COUNT]; VkDeviceSize so_counter_buffer_offsets[D3D12_SO_BUFFER_SLOT_COUNT];
+ struct d3d12_descriptor_heap *cbv_srv_uav_heap; + struct d3d12_descriptor_heap *sampler_heap; + unsigned int cbv_srv_uav_heap_id; + unsigned int sampler_heap_id; + struct vkd3d_private_store private_store; };
@@ -1228,6 +1301,11 @@ struct d3d12_device void (*descriptor_vk_heap_write)(struct d3d12_desc *dst, const struct d3d12_desc *src, struct d3d12_device *device); bool atomic_desc_ops; + + bool use_vk_heaps; + struct vkd3d_vk_descriptor_heap_layout vk_descriptor_heap_layouts[VKD3D_SET_INDEX_COUNT]; + unsigned int cbv_srv_uav_vk_set_flags; + unsigned int sampler_vk_set_flags; };
HRESULT d3d12_device_create(struct vkd3d_instance *instance, diff --git a/tests/d3d12.c b/tests/d3d12.c index 3b0c4242..10e6d9a4 100644 --- a/tests/d3d12.c +++ b/tests/d3d12.c @@ -16743,13 +16743,15 @@ static void test_update_descriptor_tables(void) destroy_test_context(&context); }
-/* This cannot be implemented reasonably in Vulkan. Vulkan doesn't allow - * updating descriptor sets after the vkCmdBindDescriptorSets() command - * is recorded. +/* This requires the Vulkan descriptor indexing extension and Vulkan-backed + * descriptor heaps. Vulkan doesn't allow updating descriptor sets after the + * vkCmdBindDescriptorSets() command is recorded unless the update-after-bind + * feature of descriptor indexing is used. */ static void test_update_descriptor_heap_after_closing_command_list(void) { ID3D12Resource *red_texture, *green_texture; + D3D12_RESOURCE_BINDING_TIER binding_tier; ID3D12GraphicsCommandList *command_list; D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle; ID3D12DescriptorHeap *cpu_heap, *heap; @@ -16799,6 +16801,8 @@ static void test_update_descriptor_heap_after_closing_command_list(void) command_list = context.list; queue = context.queue;
+ binding_tier = get_resource_binding_tier(context.device); + context.root_signature = create_texture_root_signature(context.device, D3D12_SHADER_VISIBILITY_PIXEL, 0, 0); context.pipeline_state = create_pipeline_state(context.device, @@ -16860,7 +16864,8 @@ static void test_update_descriptor_heap_after_closing_command_list(void) D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_COPY_SOURCE); get_texture_readback_with_command_list(context.render_target, 0, &rb, queue, command_list); value = get_readback_uint(&rb, 0, 0, 0); - todo ok(value == 0xff00ff00, "Got unexpected value %#x.\n", value); + todo_if(binding_tier < D3D12_RESOURCE_BINDING_TIER_3) + ok(value == 0xff00ff00, "Got unexpected value %#x.\n", value); release_resource_readback(&rb);
ID3D12DescriptorHeap_Release(cpu_heap);