From: Conor McCarthy cmccarthy@codeweavers.com
Unpolished. Raises framerate by around 15% in SotTR because vk_sets_mutex is locked only once for each descriptor heap. It's possible to collate multiple writes for a single vkUpdateDescriptorSets() call as was done in the optimised copying path, but it may not be worth it.
A possible complication with this: if it becomes possible to remove the descriptor mutexes after the remaining problems with queue sequencing are fixed, we theoretically may still need them for this. Descriptors should not be updated while binding, but there's no reason more can't be written in another thread while a command list is submitted to a queue. --- include/private/vkd3d_common.h | 8 ++ libs/vkd3d/command.c | 39 +++++++ libs/vkd3d/device.c | 146 ------------------------- libs/vkd3d/resource.c | 188 +++++++++++---------------------- libs/vkd3d/vkd3d_private.h | 24 ++--- 5 files changed, 117 insertions(+), 288 deletions(-)
diff --git a/include/private/vkd3d_common.h b/include/private/vkd3d_common.h index 78a15c19..a283af45 100644 --- a/include/private/vkd3d_common.h +++ b/include/private/vkd3d_common.h @@ -237,6 +237,14 @@ static inline LONG InterlockedAdd(LONG volatile *x, LONG val) { return __sync_add_and_fetch(x, val); } +static inline LONG InterlockedOr(LONG volatile *x, LONG val) +{ + return __sync_or_and_fetch(x, val); +} +static inline LONG InterlockedExchange(LONG volatile *x, LONG val) +{ + return __atomic_exchange_n(x, val, __ATOMIC_SEQ_CST); +} # else # error "InterlockedIncrement() not implemented for this platform" # endif /* HAVE_SYNC_ADD_AND_FETCH */ diff --git a/libs/vkd3d/command.c b/libs/vkd3d/command.c index e2db84bd..08a75170 100644 --- a/libs/vkd3d/command.c +++ b/libs/vkd3d/command.c @@ -2742,6 +2742,41 @@ static bool d3d12_command_list_update_current_framebuffer(struct d3d12_command_l return true; }
+static bool contains_heap(const struct d3d12_descriptor_heap **heap_array, unsigned int count, + const struct d3d12_descriptor_heap *query) +{ + unsigned int i; + + for (i = 0; i < count; ++i) + if (heap_array[i] == query) + return true; + return false; +} + +static void pipeline_bindings_flush_vk_heap_updates(struct vkd3d_pipeline_bindings *bindings, + struct d3d12_device *device) +{ + const struct d3d12_descriptor_heap *heap_array[3]; + struct d3d12_descriptor_heap *descriptor_heap; + unsigned int i, count; + uint64_t mask; + + mask = bindings->descriptor_table_active_mask & bindings->root_signature->descriptor_table_mask; + + for (i = 0, count = 0; i < ARRAY_SIZE(bindings->descriptor_tables); ++i) + { + if (!(mask & (1ull << i)) || !bindings->descriptor_tables[i]) + continue; + + descriptor_heap = d3d12_desc_get_descriptor_heap(bindings->descriptor_tables[i]); + if (contains_heap(heap_array, count, descriptor_heap)) + continue; + assert(count < ARRAY_SIZE(heap_array)); + heap_array[count++] = descriptor_heap; + d3d12_desc_flush_vk_heap_updates(descriptor_heap, device); + } +} + static bool d3d12_command_list_update_compute_pipeline(struct d3d12_command_list *list) { const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs; @@ -6324,6 +6359,10 @@ static void STDMETHODCALLTYPE d3d12_command_queue_ExecuteCommandLists(ID3D12Comm return; }
+ if (cmd_list->state) + pipeline_bindings_flush_vk_heap_updates(&cmd_list->pipeline_bindings[cmd_list->state->vk_bind_point], + cmd_list->device); + buffers[i] = cmd_list->vk_command_buffer; }
diff --git a/libs/vkd3d/device.c b/libs/vkd3d/device.c index 071972d5..12c71bfc 100644 --- a/libs/vkd3d/device.c +++ b/libs/vkd3d/device.c @@ -3399,132 +3399,6 @@ static void STDMETHODCALLTYPE d3d12_device_CreateSampler(ID3D12Device *iface, d3d12_desc_write_atomic(d3d12_desc_from_cpu_handle(descriptor), &tmp, device); }
-static void flush_desc_writes(struct d3d12_desc_copy_location locations[][VKD3D_DESCRIPTOR_WRITE_BUFFER_SIZE], - struct d3d12_desc_copy_info *infos, struct d3d12_descriptor_heap *descriptor_heap, struct d3d12_device *device) -{ - enum vkd3d_vk_descriptor_set_index set; - for (set = 0; set < VKD3D_SET_INDEX_COUNT; ++set) - { - if (!infos[set].count) - continue; - d3d12_desc_copy_vk_heap_range(locations[set], &infos[set], descriptor_heap, set, device); - infos[set].count = 0; - infos[set].uav_counter = false; - } -} - -static void d3d12_desc_buffered_copy_atomic(struct d3d12_desc *dst, const struct d3d12_desc *src, - struct d3d12_desc_copy_location locations[][VKD3D_DESCRIPTOR_WRITE_BUFFER_SIZE], - struct d3d12_desc_copy_info *infos, struct d3d12_descriptor_heap *descriptor_heap, struct d3d12_device *device) -{ - struct d3d12_desc_copy_location *location; - enum vkd3d_vk_descriptor_set_index set; - struct vkd3d_mutex *mutex; - - mutex = d3d12_device_get_descriptor_mutex(device, src); - vkd3d_mutex_lock(mutex); - - if (src->s.magic == VKD3D_DESCRIPTOR_MAGIC_FREE) - { - /* Source must be unlocked first, and therefore can't be used as a null source. */ - static const struct d3d12_desc null = {0}; - vkd3d_mutex_unlock(mutex); - d3d12_desc_write_atomic(dst, &null, device); - return; - } - - set = vkd3d_vk_descriptor_set_index_from_vk_descriptor_type(src->s.vk_descriptor_type); - location = &locations[set][infos[set].count++]; - - location->src.s = src->s; - - if (location->src.s.magic & VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW) - vkd3d_view_incref(location->src.s.u.view_info.view); - - vkd3d_mutex_unlock(mutex); - - infos[set].uav_counter |= (location->src.s.magic == VKD3D_DESCRIPTOR_MAGIC_UAV) - && !!location->src.s.u.view_info.view->vk_counter_view; - location->dst = dst; - - if (infos[set].count == ARRAY_SIZE(locations[0])) - { - d3d12_desc_copy_vk_heap_range(locations[set], &infos[set], descriptor_heap, set, device); - infos[set].count = 0; - infos[set].uav_counter = false; - } -} - -/* Some games, e.g. Control, copy a large number of descriptors per frame, so the - * speed of this function is critical. */ -static void d3d12_device_vk_heaps_copy_descriptors(struct d3d12_device *device, - UINT dst_descriptor_range_count, const D3D12_CPU_DESCRIPTOR_HANDLE *dst_descriptor_range_offsets, - const UINT *dst_descriptor_range_sizes, - UINT src_descriptor_range_count, const D3D12_CPU_DESCRIPTOR_HANDLE *src_descriptor_range_offsets, - const UINT *src_descriptor_range_sizes) -{ - struct d3d12_desc_copy_location locations[VKD3D_SET_INDEX_COUNT][VKD3D_DESCRIPTOR_WRITE_BUFFER_SIZE]; - unsigned int dst_range_idx, dst_idx, src_range_idx, src_idx; - /* The locations array is relatively large, and often mostly empty. Keeping these - * values together in a separate array will likely result in fewer cache misses. */ - struct d3d12_desc_copy_info infos[VKD3D_SET_INDEX_COUNT]; - struct d3d12_descriptor_heap *descriptor_heap = NULL; - const struct d3d12_desc *src, *heap_base, *heap_end; - unsigned int dst_range_size, src_range_size; - struct d3d12_desc *dst; - - descriptor_heap = d3d12_desc_get_descriptor_heap(d3d12_desc_from_cpu_handle(dst_descriptor_range_offsets[0])); - heap_base = (const struct d3d12_desc *)descriptor_heap->descriptors; - heap_end = heap_base + descriptor_heap->desc.NumDescriptors; - - memset(infos, 0, sizeof(infos)); - dst_range_idx = dst_idx = 0; - src_range_idx = src_idx = 0; - while (dst_range_idx < dst_descriptor_range_count && src_range_idx < src_descriptor_range_count) - { - dst_range_size = dst_descriptor_range_sizes ? dst_descriptor_range_sizes[dst_range_idx] : 1; - src_range_size = src_descriptor_range_sizes ? src_descriptor_range_sizes[src_range_idx] : 1; - - dst = d3d12_desc_from_cpu_handle(dst_descriptor_range_offsets[dst_range_idx]); - src = d3d12_desc_from_cpu_handle(src_descriptor_range_offsets[src_range_idx]); - - if (dst < heap_base || dst >= heap_end) - { - flush_desc_writes(locations, infos, descriptor_heap, device); - descriptor_heap = d3d12_desc_get_descriptor_heap(dst); - heap_base = (const struct d3d12_desc *)descriptor_heap->descriptors; - heap_end = heap_base + descriptor_heap->desc.NumDescriptors; - } - - for (; dst_idx < dst_range_size && src_idx < src_range_size; src_idx++, dst_idx++) - { - /* We don't need to lock either descriptor for the identity check. The descriptor - * mutex is only intended to prevent use-after-free of the vkd3d_view caused by a - * race condition in the calling app. It is unnecessary to protect this test as it's - * the app's race condition, not ours. */ - if (dst[dst_idx].s.magic == src[src_idx].s.magic && (dst[dst_idx].s.magic & VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW) - && dst[dst_idx].s.u.view_info.written_serial_id == src[src_idx].s.u.view_info.view->serial_id) - continue; - d3d12_desc_buffered_copy_atomic(&dst[dst_idx], &src[src_idx], locations, infos, descriptor_heap, device); - } - - if (dst_idx >= dst_range_size) - { - ++dst_range_idx; - dst_idx = 0; - } - if (src_idx >= src_range_size) - { - ++src_range_idx; - src_idx = 0; - } - } - - flush_desc_writes(locations, infos, descriptor_heap, device); -} - -#define VKD3D_DESCRIPTOR_OPTIMISED_COPY_MIN_COUNT 8 - static void STDMETHODCALLTYPE d3d12_device_CopyDescriptors(ID3D12Device *iface, UINT dst_descriptor_range_count, const D3D12_CPU_DESCRIPTOR_HANDLE *dst_descriptor_range_offsets, const UINT *dst_descriptor_range_sizes, @@ -3556,15 +3430,6 @@ static void STDMETHODCALLTYPE d3d12_device_CopyDescriptors(ID3D12Device *iface, if (!dst_descriptor_range_count) return;
- if (device->use_vk_heaps && (dst_descriptor_range_count > 1 || (dst_descriptor_range_sizes - && dst_descriptor_range_sizes[0] >= VKD3D_DESCRIPTOR_OPTIMISED_COPY_MIN_COUNT))) - { - d3d12_device_vk_heaps_copy_descriptors(device, dst_descriptor_range_count, dst_descriptor_range_offsets, - dst_descriptor_range_sizes, src_descriptor_range_count, src_descriptor_range_offsets, - src_descriptor_range_sizes); - return; - } - dst_range_idx = dst_idx = 0; src_range_idx = src_idx = 0; while (dst_range_idx < dst_descriptor_range_count && src_range_idx < src_descriptor_range_count) @@ -3601,17 +3466,6 @@ static void STDMETHODCALLTYPE d3d12_device_CopyDescriptorsSimple(ID3D12Device *i iface, descriptor_count, dst_descriptor_range_offset.ptr, src_descriptor_range_offset.ptr, descriptor_heap_type);
- if (descriptor_count >= VKD3D_DESCRIPTOR_OPTIMISED_COPY_MIN_COUNT) - { - struct d3d12_device *device = impl_from_ID3D12Device(iface); - if (device->use_vk_heaps) - { - d3d12_device_vk_heaps_copy_descriptors(device, 1, &dst_descriptor_range_offset, - &descriptor_count, 1, &src_descriptor_range_offset, &descriptor_count); - return; - } - } - d3d12_device_CopyDescriptors(iface, 1, &dst_descriptor_range_offset, &descriptor_count, 1, &src_descriptor_range_offset, &descriptor_count, descriptor_heap_type); } diff --git a/libs/vkd3d/resource.c b/libs/vkd3d/resource.c index 60f2bb75..2ac8ed62 100644 --- a/libs/vkd3d/resource.c +++ b/libs/vkd3d/resource.c @@ -2131,54 +2131,6 @@ void vkd3d_view_decref(struct vkd3d_view *view, struct d3d12_device *device) vkd3d_view_destroy(view, device); }
-/* TODO: write null descriptors to all applicable sets (invalid behaviour workaround). */ -static void d3d12_descriptor_heap_write_vk_descriptor_range(struct d3d12_descriptor_heap_vk_set *descriptor_set, - struct d3d12_desc_copy_location *locations, unsigned int write_count) -{ - unsigned int i, info_index = 0, write_index = 0; - - switch (locations[0].src.s.vk_descriptor_type) - { - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: - for (; write_index < write_count; ++write_index) - { - descriptor_set->vk_descriptor_writes[write_index].pBufferInfo = &descriptor_set->vk_buffer_infos[info_index]; - for (i = 0; i < descriptor_set->vk_descriptor_writes[write_index].descriptorCount; ++i, ++info_index) - descriptor_set->vk_buffer_infos[info_index] = locations[info_index].src.s.u.vk_cbv_info; - } - break; - case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: - case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: - for (; write_index < write_count; ++write_index) - { - descriptor_set->vk_descriptor_writes[write_index].pImageInfo = &descriptor_set->vk_image_infos[info_index]; - for (i = 0; i < descriptor_set->vk_descriptor_writes[write_index].descriptorCount; ++i, ++info_index) - descriptor_set->vk_image_infos[info_index].imageView = locations[info_index].src.s.u.view_info.view->u.vk_image_view; - } - break; - case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: - case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: - for (; write_index < write_count; ++write_index) - { - descriptor_set->vk_descriptor_writes[write_index].pTexelBufferView = &descriptor_set->vk_buffer_views[info_index]; - for (i = 0; i < descriptor_set->vk_descriptor_writes[write_index].descriptorCount; ++i, ++info_index) - descriptor_set->vk_buffer_views[info_index] = locations[info_index].src.s.u.view_info.view->u.vk_buffer_view; - } - break; - case VK_DESCRIPTOR_TYPE_SAMPLER: - for (; write_index < write_count; ++write_index) - { - descriptor_set->vk_descriptor_writes[write_index].pImageInfo = &descriptor_set->vk_image_infos[info_index]; - for (i = 0; i < descriptor_set->vk_descriptor_writes[write_index].descriptorCount; ++i, ++info_index) - descriptor_set->vk_image_infos[info_index].sampler = locations[info_index].src.s.u.view_info.view->u.vk_sampler; - } - break; - default: - ERR("Unhandled descriptor type %#x.\n", locations[0].src.s.vk_descriptor_type); - break; - } -} - static void d3d12_desc_write_vk_heap_null_descriptor(struct d3d12_descriptor_heap *descriptor_heap, uint32_t dst_array_element, const struct d3d12_device *device) { @@ -2222,22 +2174,18 @@ static void d3d12_desc_write_vk_heap_null_descriptor(struct d3d12_descriptor_hea
/* dst and src contain the same data unless another thread overwrites dst. The array index is * calculated from dst, and src is thread safe. */ -static void d3d12_desc_write_vk_heap(const struct d3d12_desc *dst, const struct d3d12_desc *src, - struct d3d12_device *device) +static void d3d12_desc_write_vk_heap_locked(struct d3d12_descriptor_heap *descriptor_heap, + const struct d3d12_desc *src, struct d3d12_device *device) { struct d3d12_descriptor_heap_vk_set *descriptor_set; - struct d3d12_descriptor_heap *descriptor_heap; const struct vkd3d_vk_device_procs *vk_procs; bool is_null = false;
- descriptor_heap = d3d12_desc_get_descriptor_heap(dst); descriptor_set = &descriptor_heap->vk_descriptor_sets[vkd3d_vk_descriptor_set_index_from_vk_descriptor_type( src->s.vk_descriptor_type)]; vk_procs = &device->vk_procs;
- vkd3d_mutex_lock(&descriptor_heap->vk_sets_mutex); - - descriptor_set->vk_descriptor_writes[0].dstArrayElement = dst->index; + descriptor_set->vk_descriptor_writes[0].dstArrayElement = src->index; descriptor_set->vk_descriptor_writes[0].descriptorCount = 1; switch (src->s.vk_descriptor_type) { @@ -2265,7 +2213,6 @@ static void d3d12_desc_write_vk_heap(const struct d3d12_desc *dst, const struct { d3d12_desc_write_vk_heap_null_descriptor(descriptor_heap, descriptor_set->vk_descriptor_writes[0].dstArrayElement, device); - vkd3d_mutex_unlock(&descriptor_heap->vk_sets_mutex); return; }
@@ -2274,36 +2221,75 @@ static void d3d12_desc_write_vk_heap(const struct d3d12_desc *dst, const struct if (src->s.magic == VKD3D_DESCRIPTOR_MAGIC_UAV && src->s.u.view_info.view->vk_counter_view) { descriptor_set = &descriptor_heap->vk_descriptor_sets[VKD3D_SET_INDEX_UAV_COUNTER]; - descriptor_set->vk_descriptor_writes[0].dstArrayElement = dst->index; + descriptor_set->vk_descriptor_writes[0].dstArrayElement = src->index; descriptor_set->vk_descriptor_writes[0].descriptorCount = 1; descriptor_set->vk_descriptor_writes[0].pTexelBufferView = &src->s.u.view_info.view->vk_counter_view; VK_CALL(vkUpdateDescriptorSets(device->vk_device, 1, descriptor_set->vk_descriptor_writes, 0, NULL)); } - - vkd3d_mutex_unlock(&descriptor_heap->vk_sets_mutex); }
-static void d3d12_desc_write_atomic_d3d12_only(struct d3d12_desc *dst, const struct d3d12_desc *src, struct d3d12_device *device) +void d3d12_desc_flush_vk_heap_updates(struct d3d12_descriptor_heap *descriptor_heap, struct d3d12_device *device) { - struct vkd3d_view *defunct_view; + const struct d3d12_desc *descriptors, *dst; struct vkd3d_mutex *mutex; + unsigned int i, j, count; + struct d3d12_desc src; + volatile LONG *flags; + unsigned int f;
- mutex = d3d12_device_get_descriptor_mutex(device, dst); - vkd3d_mutex_lock(mutex); + descriptors = (const struct d3d12_desc *)descriptor_heap->descriptors; + flags = d3d12_descriptor_heap_get_dirty_flags(descriptor_heap); + count = (descriptor_heap->desc.NumDescriptors + 31) / 32u; + + vkd3d_mutex_lock(&descriptor_heap->vk_sets_mutex);
- if (!(dst->s.magic & VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW) || InterlockedDecrement(&dst->s.u.view_info.view->refcount)) + for (i = 0; i < count; ++i, descriptors += 32) { - d3d12_desc_copy_raw(dst, src); - vkd3d_mutex_unlock(mutex); - return; + if (!flags[i]) + continue; + + f = InterlockedExchange(&flags[i], 0); + + for (j = 0; f; ++j) + { + bool b = f & 1; + f >>= 1; + if (!b) + continue; + + dst = &descriptors[j]; + + mutex = d3d12_device_get_descriptor_mutex(device, dst); + vkd3d_mutex_lock(mutex); + + src = *dst; + if (src.s.magic & VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW) + vkd3d_view_incref(src.s.u.view_info.view); + + vkd3d_mutex_unlock(mutex); + + if (!src.s.magic) + continue; + + d3d12_desc_write_vk_heap_locked(descriptor_heap, &src, device); + + if (src.s.magic & VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW) + vkd3d_view_decref(src.s.u.view_info.view, device); + } }
- defunct_view = dst->s.u.view_info.view; - d3d12_desc_copy_raw(dst, src); - vkd3d_mutex_unlock(mutex); + vkd3d_mutex_unlock(&descriptor_heap->vk_sets_mutex); +}
- /* Destroy the view after unlocking to reduce wait time. */ - vkd3d_view_destroy(defunct_view, device); +static void d3d12_desc_write_vk_heap(const struct d3d12_desc *dst, const struct d3d12_desc *src, + struct d3d12_device *device) +{ + struct d3d12_descriptor_heap *descriptor_heap; + unsigned int i; + + descriptor_heap = d3d12_desc_get_descriptor_heap(dst); + i = dst->index / 32u; + InterlockedOr(&d3d12_descriptor_heap_get_dirty_flags(descriptor_heap)[i], 1 << (dst->index & 31)); }
void d3d12_desc_write_atomic(struct d3d12_desc *dst, const struct d3d12_desc *src, @@ -2339,58 +2325,6 @@ static void d3d12_desc_destroy(struct d3d12_desc *descriptor, struct d3d12_devic d3d12_desc_write_atomic(descriptor, &null_desc, device); }
-void d3d12_desc_copy_vk_heap_range(struct d3d12_desc_copy_location *locations, const struct d3d12_desc_copy_info *info, - struct d3d12_descriptor_heap *descriptor_heap, enum vkd3d_vk_descriptor_set_index set, - struct d3d12_device *device) -{ - struct d3d12_descriptor_heap_vk_set *descriptor_set = &descriptor_heap->vk_descriptor_sets[set]; - const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; - unsigned int i, write_count; - - vkd3d_mutex_lock(&descriptor_heap->vk_sets_mutex); - - for (i = 0, write_count = 0; i < info->count; ++i) - { - d3d12_desc_write_atomic_d3d12_only(locations[i].dst, &locations[i].src, device); - - if (i && locations[i].dst == locations[i - 1].dst + 1) - { - ++descriptor_set->vk_descriptor_writes[write_count - 1].descriptorCount; - continue; - } - /* Accessing dst->index will be slow if a cache miss occurs, so calculate instead. */ - descriptor_set->vk_descriptor_writes[write_count].dstArrayElement = locations[i].dst - - (const struct d3d12_desc *)descriptor_heap->descriptors; - descriptor_set->vk_descriptor_writes[write_count++].descriptorCount = 1; - } - d3d12_descriptor_heap_write_vk_descriptor_range(descriptor_set, locations, write_count); - /* We could pass a VkCopyDescriptorSet array instead, but that would require also storing a src array index - * for each location, which means querying the src descriptor heap. Contiguous copies require contiguous src - * descriptors as well as dst, which is less likely to occur. And client race conditions may break it. */ - VK_CALL(vkUpdateDescriptorSets(device->vk_device, write_count, descriptor_set->vk_descriptor_writes, 0, NULL)); - - if (!info->uav_counter) - goto done; - - descriptor_set = &descriptor_heap->vk_descriptor_sets[VKD3D_SET_INDEX_UAV_COUNTER]; - - for (i = 0, write_count = 0; i < info->count; ++i) - { - if (!locations[i].src.s.u.view_info.view->vk_counter_view) - continue; - descriptor_set->vk_buffer_views[write_count] = locations[i].src.s.u.view_info.view->vk_counter_view; - descriptor_set->vk_descriptor_writes[write_count].pTexelBufferView = &descriptor_set->vk_buffer_views[write_count]; - /* Accessing dst->index will be slow if a cache miss occurs, so calculate instead. */ - descriptor_set->vk_descriptor_writes[write_count].dstArrayElement = locations[i].dst - - (const struct d3d12_desc *)descriptor_heap->descriptors; - descriptor_set->vk_descriptor_writes[write_count++].descriptorCount = 1; - } - VK_CALL(vkUpdateDescriptorSets(device->vk_device, write_count, descriptor_set->vk_descriptor_writes, 0, NULL)); - -done: - vkd3d_mutex_unlock(&descriptor_heap->vk_sets_mutex); -} - void d3d12_desc_copy(struct d3d12_desc *dst, const struct d3d12_desc *src, struct d3d12_device *device) { @@ -4022,8 +3956,8 @@ HRESULT d3d12_descriptor_heap_create(struct d3d12_device *device, { size_t max_descriptor_count, descriptor_size; struct d3d12_descriptor_heap *object; + unsigned int i, dirty_flags_size; struct d3d12_desc *dst; - unsigned int i; HRESULT hr;
if (!(descriptor_size = d3d12_device_get_descriptor_handle_increment_size(device, desc->Type))) @@ -4046,8 +3980,9 @@ HRESULT d3d12_descriptor_heap_create(struct d3d12_device *device, return E_OUTOFMEMORY; }
+ dirty_flags_size = (desc->NumDescriptors + 31) / 32u * sizeof(LONG); if (!(object = vkd3d_malloc(offsetof(struct d3d12_descriptor_heap, - descriptors[descriptor_size * desc->NumDescriptors])))) + descriptors[descriptor_size * desc->NumDescriptors]) + dirty_flags_size))) return E_OUTOFMEMORY;
if (FAILED(hr = d3d12_descriptor_heap_init(object, device, desc))) @@ -4064,6 +3999,7 @@ HRESULT d3d12_descriptor_heap_create(struct d3d12_device *device, memset(&dst[i].s, 0, sizeof(dst[i].s)); dst[i].index = i; } + memset(d3d12_descriptor_heap_get_dirty_flags(object), 0, dirty_flags_size); } else { diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index 3cabb2da..c97b7cfa 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -842,6 +842,14 @@ struct d3d12_descriptor_heap BYTE descriptors[]; };
+static inline LONG *d3d12_descriptor_heap_get_dirty_flags(struct d3d12_descriptor_heap *descriptor_heap) +{ + struct d3d12_desc *descriptors = (struct d3d12_desc *)descriptor_heap->descriptors; + return (LONG *)(descriptors + descriptor_heap->desc.NumDescriptors); +} + +void d3d12_desc_flush_vk_heap_updates(struct d3d12_descriptor_heap *descriptor_heap, struct d3d12_device *device); + static inline struct d3d12_descriptor_heap *d3d12_desc_get_descriptor_heap(const struct d3d12_desc *descriptor) { return CONTAINING_RECORD(descriptor - descriptor->index, struct d3d12_descriptor_heap, descriptors); @@ -856,22 +864,6 @@ static inline unsigned int d3d12_desc_heap_range_size(const struct d3d12_desc *d HRESULT d3d12_descriptor_heap_create(struct d3d12_device *device, const D3D12_DESCRIPTOR_HEAP_DESC *desc, struct d3d12_descriptor_heap **descriptor_heap);
-struct d3d12_desc_copy_location -{ - struct d3d12_desc src; - struct d3d12_desc *dst; -}; - -struct d3d12_desc_copy_info -{ - unsigned int count; - bool uav_counter; -}; - -void d3d12_desc_copy_vk_heap_range(struct d3d12_desc_copy_location *locations, const struct d3d12_desc_copy_info *info, - struct d3d12_descriptor_heap *descriptor_heap, enum vkd3d_vk_descriptor_set_index set, - struct d3d12_device *device); - /* ID3D12QueryHeap */ struct d3d12_query_heap {