From: Conor McCarthy cmccarthy@codeweavers.com
--- libs/vkd3d/command.c | 6 +-- libs/vkd3d/device.c | 40 ++++++++++++-- libs/vkd3d/resource.c | 107 +++++++++++++++++++++++++++++++++++++ libs/vkd3d/vkd3d_private.h | 8 ++- tests/d3d12.c | 4 +- 5 files changed, 156 insertions(+), 9 deletions(-)
diff --git a/libs/vkd3d/command.c b/libs/vkd3d/command.c index 7115a74a6..d7276ff2a 100644 --- a/libs/vkd3d/command.c +++ b/libs/vkd3d/command.c @@ -31,7 +31,7 @@ static HRESULT d3d12_command_queue_flush_ops(struct d3d12_command_queue *queue, static HRESULT d3d12_command_queue_flush_ops_locked(struct d3d12_command_queue *queue, bool *flushed_any);
HRESULT vkd3d_queue_create(struct d3d12_device *device, - uint32_t family_index, const VkQueueFamilyProperties *properties, struct vkd3d_queue **queue) + uint32_t family_index, VkQueueFlags vk_queue_flags, uint32_t timestamp_bits, struct vkd3d_queue **queue) { const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; struct vkd3d_queue *object; @@ -45,8 +45,8 @@ HRESULT vkd3d_queue_create(struct d3d12_device *device, object->submitted_sequence_number = 0;
object->vk_family_index = family_index; - object->vk_queue_flags = properties->queueFlags; - object->timestamp_bits = properties->timestampValidBits; + object->vk_queue_flags = vk_queue_flags; + object->timestamp_bits = timestamp_bits;
object->semaphores = NULL; object->semaphores_size = 0; diff --git a/libs/vkd3d/device.c b/libs/vkd3d/device.c index 90de27c53..cac57f7cc 100644 --- a/libs/vkd3d/device.c +++ b/libs/vkd3d/device.c @@ -1829,6 +1829,7 @@ enum vkd3d_queue_family VKD3D_QUEUE_FAMILY_DIRECT, VKD3D_QUEUE_FAMILY_COMPUTE, VKD3D_QUEUE_FAMILY_TRANSFER, + VKD3D_QUEUE_FAMILY_TILED_BINDING,
VKD3D_QUEUE_FAMILY_COUNT, }; @@ -1851,10 +1852,13 @@ static void d3d12_device_destroy_vkd3d_queues(struct d3d12_device *device) if (device->copy_queue && device->copy_queue != device->direct_queue && device->copy_queue != device->compute_queue) vkd3d_queue_destroy(device->copy_queue, device); + if (device->tiled_binding_queue && device->tiled_binding_queue != device->direct_queue) + vkd3d_queue_destroy(device->tiled_binding_queue, device);
device->direct_queue = NULL; device->compute_queue = NULL; device->copy_queue = NULL; + device->tiled_binding_queue = NULL; }
static HRESULT d3d12_device_create_vkd3d_queues(struct d3d12_device *device, @@ -1868,12 +1872,15 @@ static HRESULT d3d12_device_create_vkd3d_queues(struct d3d12_device *device, device->direct_queue = NULL; device->compute_queue = NULL; device->copy_queue = NULL; + device->tiled_binding_queue = NULL; + device->tiled_binding_family_index = queue_info->family_index[VKD3D_QUEUE_FAMILY_TILED_BINDING];
device->queue_family_count = 0; memset(device->queue_family_indices, 0, sizeof(device->queue_family_indices));
if (SUCCEEDED((hr = vkd3d_queue_create(device, direct_family_index, - &queue_info->vk_properties[VKD3D_QUEUE_FAMILY_DIRECT], &device->direct_queue)))) + queue_info->vk_properties[VKD3D_QUEUE_FAMILY_DIRECT].queueFlags, + queue_info->vk_properties[VKD3D_QUEUE_FAMILY_DIRECT].timestampValidBits, &device->direct_queue)))) device->queue_family_indices[device->queue_family_count++] = direct_family_index; else goto out_destroy_queues; @@ -1881,7 +1888,8 @@ static HRESULT d3d12_device_create_vkd3d_queues(struct d3d12_device *device, if (compute_family_index == direct_family_index) device->compute_queue = device->direct_queue; else if (SUCCEEDED(hr = vkd3d_queue_create(device, compute_family_index, - &queue_info->vk_properties[VKD3D_QUEUE_FAMILY_COMPUTE], &device->compute_queue))) + queue_info->vk_properties[VKD3D_QUEUE_FAMILY_COMPUTE].queueFlags, + queue_info->vk_properties[VKD3D_QUEUE_FAMILY_COMPUTE].timestampValidBits, &device->compute_queue))) device->queue_family_indices[device->queue_family_count++] = compute_family_index; else goto out_destroy_queues; @@ -1891,11 +1899,15 @@ static HRESULT d3d12_device_create_vkd3d_queues(struct d3d12_device *device, else if (transfer_family_index == compute_family_index) device->copy_queue = device->compute_queue; else if (SUCCEEDED(hr = vkd3d_queue_create(device, transfer_family_index, - &queue_info->vk_properties[VKD3D_QUEUE_FAMILY_TRANSFER], &device->copy_queue))) + queue_info->vk_properties[VKD3D_QUEUE_FAMILY_TRANSFER].queueFlags, + queue_info->vk_properties[VKD3D_QUEUE_FAMILY_TRANSFER].timestampValidBits, &device->copy_queue))) device->queue_family_indices[device->queue_family_count++] = transfer_family_index; else goto out_destroy_queues;
+ if (device->tiled_binding_family_index == direct_family_index) + device->tiled_binding_queue = device->direct_queue; + device->feature_options3.CopyQueueTimestampQueriesSupported = !!device->copy_queue->timestamp_bits;
return S_OK; @@ -1905,6 +1917,17 @@ out_destroy_queues: return hr; }
+struct vkd3d_queue *d3d12_device_get_tiled_binding_queue(struct d3d12_device *device) +{ + HRESULT hr; + + if (!device->tiled_binding_queue + && (SUCCEEDED(hr = vkd3d_queue_create(device, device->tiled_binding_family_index, + VK_QUEUE_SPARSE_BINDING_BIT, 0, &device->tiled_binding_queue)))) + device->queue_family_indices[device->queue_family_count++] = device->tiled_binding_family_index; + return device->tiled_binding_queue; +} + static float queue_priorities[] = {1.0f};
static HRESULT vkd3d_select_queues(const struct vkd3d_instance *vkd3d_instance, @@ -1944,6 +1967,12 @@ static HRESULT vkd3d_select_queues(const struct vkd3d_instance *vkd3d_instance, vkd3d_family = VKD3D_QUEUE_FAMILY_TRANSFER; }
+ if (info->family_index[VKD3D_QUEUE_FAMILY_TILED_BINDING] == UINT_MAX + && (queue_properties[i].queueFlags & VK_QUEUE_SPARSE_BINDING_BIT)) + { + info->family_index[VKD3D_QUEUE_FAMILY_TILED_BINDING] = i; + } + if (vkd3d_family == VKD3D_QUEUE_FAMILY_COUNT) continue;
@@ -1978,6 +2007,11 @@ static HRESULT vkd3d_select_queues(const struct vkd3d_instance *vkd3d_instance, info->family_index[VKD3D_QUEUE_FAMILY_TRANSFER] = info->family_index[VKD3D_QUEUE_FAMILY_DIRECT]; info->vk_properties[VKD3D_QUEUE_FAMILY_TRANSFER] = info->vk_properties[VKD3D_QUEUE_FAMILY_DIRECT]; } + if (info->vk_properties[VKD3D_QUEUE_FAMILY_DIRECT].queueFlags & VK_QUEUE_SPARSE_BINDING_BIT) + { + info->family_index[VKD3D_QUEUE_FAMILY_TILED_BINDING] = info->family_index[VKD3D_QUEUE_FAMILY_DIRECT]; + info->vk_properties[VKD3D_QUEUE_FAMILY_TILED_BINDING] = info->vk_properties[VKD3D_QUEUE_FAMILY_DIRECT]; + }
/* Compact the array. */ info->vk_family_count = 1; diff --git a/libs/vkd3d/resource.c b/libs/vkd3d/resource.c index 179999148..3940e46bc 100644 --- a/libs/vkd3d/resource.c +++ b/libs/vkd3d/resource.c @@ -671,6 +671,7 @@ HRESULT vkd3d_create_buffer(struct d3d12_device *device, buffer_info.flags |= VK_BUFFER_CREATE_SPARSE_BINDING_BIT; if (device->vk_info.sparse_properties.residencyNonResidentStrict) buffer_info.flags |= VK_BUFFER_CREATE_SPARSE_RESIDENCY_BIT; + d3d12_device_get_tiled_binding_queue(device); }
buffer_info.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT @@ -861,6 +862,7 @@ static HRESULT vkd3d_create_image(struct d3d12_device *device, }
image_info.tiling = VK_IMAGE_TILING_OPTIMAL; + d3d12_device_get_tiled_binding_queue(device); } else if (desc->Layout == D3D12_TEXTURE_LAYOUT_UNKNOWN) { @@ -977,6 +979,15 @@ HRESULT vkd3d_get_image_allocation_info(struct d3d12_device *device,
static void d3d12_resource_tile_info_cleanup(struct d3d12_resource *resource) { + const struct vkd3d_vk_device_procs *vk_procs = &resource->device->vk_procs; + + if (!resource->tiles.subresources) + return; + + VK_CALL(vkFreeMemory(resource->device->vk_device, resource->tiles.mip_tail_memory, NULL)); + + vkd3d_free(resource->tiles.bind_buffer); + vkd3d_free(resource->tiles.subresources); }
@@ -1141,6 +1152,66 @@ void d3d12_resource_get_tiling(struct d3d12_device *device, const struct d3d12_r *subresource_tiling_count = i; }
+static void d3d12_resource_bind_sparse_mip_tail(struct d3d12_resource *resource, + VkSparseImageMemoryRequirements *sparse_requirements) +{ + const struct vkd3d_vk_device_procs *vk_procs = &resource->device->vk_procs; + VkSparseMemoryBind *memory_bind = resource->tiles.bind_buffer; + VkSparseImageOpaqueMemoryBindInfo opaque_bind_info; + struct d3d12_device *device = resource->device; + struct vkd3d_queue *vkd3d_queue; + VkBindSparseInfo sparse_info; + unsigned int i, layer_count; + VkDeviceSize memory_offset; + VkQueue vk_queue; + VkResult vr; + + if (!resource->tiles.packed_mip_tile_count) + return; + + if (!(vkd3d_queue = d3d12_device_get_tiled_binding_queue(device))) + { + ERR("Failed to get sparse binding queue.\n"); + return; + } + + opaque_bind_info.image = resource->u.vk_image; + opaque_bind_info.bindCount = 1; + opaque_bind_info.pBinds = memory_bind; + + layer_count = resource->tiles.single_mip_tail ? 1 : d3d12_resource_desc_get_layer_count(&resource->desc); + + for (i = 0, memory_offset = 0; i < layer_count; ++i) + { + memory_bind->resourceOffset = sparse_requirements->imageMipTailOffset + + i * sparse_requirements->imageMipTailStride; + memory_bind->size = sparse_requirements->imageMipTailSize; + memory_bind->memory = resource->tiles.mip_tail_memory; + memory_bind->memoryOffset = memory_offset; + memory_bind->flags = 0; + memory_offset += memory_bind->size; + } + + memset(&sparse_info, 0, sizeof(sparse_info)); + sparse_info.sType = VK_STRUCTURE_TYPE_BIND_SPARSE_INFO; + sparse_info.imageOpaqueBindCount = 1; + sparse_info.pImageOpaqueBinds = &opaque_bind_info; + + if (!(vk_queue = vkd3d_queue_acquire(vkd3d_queue))) + { + ERR("Failed to acquire queue %p.\n", vkd3d_queue); + return; + } + + if ((vr = VK_CALL(vkQueueBindSparse(vk_queue, 1, &sparse_info, VK_NULL_HANDLE))) < 0) + ERR("Failed to submit sparse image bind, vr %d.\n", vr); + /* The caller may use the resource in another queue. Avoid sync complications by waiting for idle. */ + if ((vr = VK_CALL(vkQueueWaitIdle(vk_queue)) < 0)) + WARN("Failed to wait for queue, vr %d.\n", vr); + + vkd3d_queue_release(vkd3d_queue); +} + static bool d3d12_resource_init_tiles(struct d3d12_resource *resource, struct d3d12_device *device) { unsigned int i, start_idx, subresource_count, tile_count, miplevel_idx; @@ -1148,9 +1219,11 @@ static bool d3d12_resource_init_tiles(struct d3d12_resource *resource, struct d3 VkSparseImageMemoryRequirements *sparse_requirements_array; VkSparseImageMemoryRequirements sparse_requirements = {0}; struct vkd3d_subresource_tile_info *tile_info; + D3D12_HEAP_PROPERTIES heap_properties; VkMemoryRequirements requirements; const VkExtent3D *tile_extent; uint32_t requirement_count; + HRESULT hr;
subresource_count = d3d12_resource_desc_get_sub_resource_count(&resource->desc);
@@ -1214,6 +1287,8 @@ static bool d3d12_resource_init_tiles(struct d3d12_resource *resource, struct d3 sparse_requirements = sparse_requirements_array[i]; } } + if (sparse_requirements_array[i].formatProperties.aspectMask & VK_IMAGE_ASPECT_METADATA_BIT) + FIXME("Mip tail metadata binding is not implemented.\n"); } vkd3d_free(sparse_requirements_array); if (!sparse_requirements.formatProperties.aspectMask) @@ -1223,6 +1298,8 @@ static bool d3d12_resource_init_tiles(struct d3d12_resource *resource, struct d3 }
resource->tiles.tile_extent = sparse_requirements.formatProperties.imageGranularity; + resource->tiles.single_mip_tail = !!(sparse_requirements.formatProperties.flags + & VK_SPARSE_IMAGE_FORMAT_SINGLE_MIPTAIL_BIT); resource->tiles.subresource_count = subresource_count; resource->tiles.standard_mip_count = sparse_requirements.imageMipTailSize ? sparse_requirements.imageMipTailFirstLod : resource->desc.MipLevels; @@ -1252,9 +1329,39 @@ static bool d3d12_resource_init_tiles(struct d3d12_resource *resource, struct d3 } } resource->tiles.total_count = start_idx; + + if (resource->tiles.packed_mip_tile_count) + { + memset(&heap_properties, 0, sizeof(heap_properties)); + heap_properties.Type = D3D12_HEAP_TYPE_DEFAULT; + requirements.size = sparse_requirements.imageMipTailSize; + if (!resource->tiles.single_mip_tail) + requirements.size *= d3d12_resource_desc_get_layer_count(&resource->desc); + if (FAILED(hr = vkd3d_allocate_device_memory(device, &heap_properties, 0, &requirements, NULL, + &resource->tiles.mip_tail_memory, NULL))) + { + ERR("Failed to allocate device memory for mip tail, hr %s.\n", debugstr_hresult(hr)); + goto error; + } + } + + if (!(resource->tiles.bind_buffer = vkd3d_malloc(start_idx * max(sizeof(VkSparseImageMemoryBind), + sizeof(VkBufferImageCopy))))) + { + ERR("Failed to allocate binding buffer.\n"); + goto error; + } + + /* Vulkan implementations may merge layer miptails into a single miptail, which is not supported in D3D12. + * TODO: do this only if single miptails are used, otherwise handle miptails in UpdateTileMappings(). */ + d3d12_resource_bind_sparse_mip_tail(resource, &sparse_requirements); }
return true; + +error: + d3d12_resource_tile_info_cleanup(resource); + return false; }
/* ID3D12Resource */ diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index 8de57a336..140aa3575 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -620,7 +620,10 @@ struct d3d12_resource_tile_info unsigned int standard_mip_count; unsigned int packed_mip_tile_count; unsigned int subresource_count; + bool single_mip_tail; struct vkd3d_subresource_tile_info *subresources; + VkDeviceMemory mip_tail_memory; + void *bind_buffer; };
/* ID3D12Resource */ @@ -1465,7 +1468,7 @@ struct vkd3d_queue
VkQueue vkd3d_queue_acquire(struct vkd3d_queue *queue); HRESULT vkd3d_queue_create(struct d3d12_device *device, uint32_t family_index, - const VkQueueFamilyProperties *properties, struct vkd3d_queue **queue); + VkQueueFlags vk_queue_flags, uint32_t timestamp_bits, struct vkd3d_queue **queue); void vkd3d_queue_destroy(struct vkd3d_queue *queue, struct d3d12_device *device); void vkd3d_queue_release(struct vkd3d_queue *queue);
@@ -1715,9 +1718,11 @@ struct d3d12_device struct vkd3d_queue *direct_queue; struct vkd3d_queue *compute_queue; struct vkd3d_queue *copy_queue; + struct vkd3d_queue *tiled_binding_queue; uint32_t queue_family_indices[VKD3D_MAX_QUEUE_FAMILY_COUNT]; unsigned int queue_family_count; VkTimeDomainEXT vk_host_time_domain; + unsigned int tiled_binding_family_index;
struct vkd3d_mutex blocked_queues_mutex; struct d3d12_command_queue *blocked_queues[VKD3D_MAX_DEVICE_BLOCKED_QUEUES]; @@ -1742,6 +1747,7 @@ struct d3d12_device HRESULT d3d12_device_create(struct vkd3d_instance *instance, const struct vkd3d_device_create_info *create_info, struct d3d12_device **device); struct vkd3d_queue *d3d12_device_get_vkd3d_queue(struct d3d12_device *device, D3D12_COMMAND_LIST_TYPE type); +struct vkd3d_queue *d3d12_device_get_tiled_binding_queue(struct d3d12_device *device); bool d3d12_device_is_uma(struct d3d12_device *device, bool *coherent); void d3d12_device_mark_as_removed(struct d3d12_device *device, HRESULT reason, const char *message, ...) VKD3D_PRINTF_FUNC(3, 4); diff --git a/tests/d3d12.c b/tests/d3d12.c index fc2e176c3..48785e6d7 100644 --- a/tests/d3d12.c +++ b/tests/d3d12.c @@ -39113,7 +39113,7 @@ static void test_update_tile_mappings(void) for (i = 0; i < j; i++) { set_box(&box, i, 0, 0, i + 1, 1, 1); - todo check_readback_data_uint(&rb.rb, &box, i + 1, 0); + todo_if(i < packed_mip_info.StartTileIndexInOverallResource) check_readback_data_uint(&rb.rb, &box, i + 1, 0); }
release_resource_readback(&rb); @@ -39209,7 +39209,7 @@ static void test_update_tile_mappings(void) for (i = 0; i < j; i++) { set_box(&box, i, 0, 0, i + 1, 1, 1); - todo_if(texture_region_tiles[i]) + todo_if(i < packed_mip_info.StartTileIndexInOverallResource && texture_region_tiles[i]) check_readback_data_uint(&rb.rb, &box, texture_region_tiles[i], 0); }