From: Conor McCarthy cmccarthy@codeweavers.com
--- libs/vkd3d/command.c | 365 ++++++++++++++++++++++++++++++++++++- libs/vkd3d/resource.c | 11 +- libs/vkd3d/vkd3d_private.h | 4 + tests/d3d12.c | 13 +- 4 files changed, 385 insertions(+), 8 deletions(-)
diff --git a/libs/vkd3d/command.c b/libs/vkd3d/command.c index 39c0f4cd3..69451b601 100644 --- a/libs/vkd3d/command.c +++ b/libs/vkd3d/command.c @@ -34,7 +34,9 @@ HRESULT vkd3d_queue_create(struct d3d12_device *device, uint32_t family_index, const VkQueueFamilyProperties *properties, struct vkd3d_queue **queue) { const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + VkSemaphoreCreateInfo semaphore_info; struct vkd3d_queue *object; + VkResult vr;
if (!(object = vkd3d_malloc(sizeof(*object)))) return E_OUTOFMEMORY; @@ -54,6 +56,20 @@ HRESULT vkd3d_queue_create(struct d3d12_device *device,
memset(object->old_vk_semaphores, 0, sizeof(object->old_vk_semaphores));
+ object->tiled_binding_semaphore = VK_NULL_HANDLE; + if (object->vk_queue_flags & VK_QUEUE_SPARSE_BINDING_BIT) + { + semaphore_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; + semaphore_info.pNext = NULL; + semaphore_info.flags = 0; + if ((vr = VK_CALL(vkCreateSemaphore(device->vk_device, &semaphore_info, NULL, + &object->tiled_binding_semaphore))) < 0) + { + ERR("Failed to create tiled binding semaphore, vr %d.\n", vr); + return hresult_from_vk_result(vr); + } + } + VK_CALL(vkGetDeviceQueue(device->vk_device, family_index, 0, &object->vk_queue));
TRACE("Created queue %p for queue family index %u.\n", object, family_index); @@ -81,6 +97,8 @@ void vkd3d_queue_destroy(struct vkd3d_queue *queue, struct d3d12_device *device) VK_CALL(vkDestroySemaphore(device->vk_device, queue->old_vk_semaphores[i], NULL)); }
+ VK_CALL(vkDestroySemaphore(device->vk_device, queue->tiled_binding_semaphore, NULL)); + vkd3d_mutex_unlock(&queue->mutex);
vkd3d_mutex_destroy(&queue->mutex); @@ -104,6 +122,27 @@ void vkd3d_queue_release(struct vkd3d_queue *queue) vkd3d_mutex_unlock(&queue->mutex); }
+VkResult vkd3d_queue_submit_wait_acquired(const struct vkd3d_queue *queue, VkSemaphore vk_semaphore, + struct d3d12_device *device) +{ + VkPipelineStageFlags stage_mask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + VkSubmitInfo submit_info; + + memset(&submit_info, 0, sizeof(submit_info)); + submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + submit_info.pNext = NULL; + submit_info.waitSemaphoreCount = 1; + submit_info.pWaitSemaphores = &vk_semaphore; + submit_info.pWaitDstStageMask = &stage_mask; + submit_info.commandBufferCount = 0; + submit_info.pCommandBuffers = 0; + submit_info.signalSemaphoreCount = 0; + submit_info.pSignalSemaphores = NULL; + + return VK_CALL(vkQueueSubmit(queue->vk_queue, 1, &submit_info, VK_NULL_HANDLE)); +} + static VkResult vkd3d_queue_wait_idle(struct vkd3d_queue *queue, const struct vkd3d_vk_device_procs *vk_procs) { @@ -3887,6 +3926,104 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyResource(ID3D12GraphicsComm } }
+struct vkd3d_resource_tile_coordinate +{ + unsigned int x; + unsigned int y; + unsigned int z; +}; + +static inline unsigned int d3d12_tile_region_size_compute_tile_count(const D3D12_TILE_REGION_SIZE *region_size) +{ + return region_size->Width * region_size->Height * region_size->Depth; +} + +static inline void d3d12_tile_region_size_set_entire_subresource(D3D12_TILE_REGION_SIZE *region_size, + const struct d3d12_resource *resource, unsigned int subresource) +{ + const struct vkd3d_tiled_region_extent *extent = &resource->tiles.subresources[subresource].extent; + region_size->Width = extent->width; + region_size->Height = extent->height; + region_size->Depth = extent->depth; +} + +static bool resource_validate_tiled_coordinate(const struct d3d12_resource *resource, + const D3D12_TILED_RESOURCE_COORDINATE *coordinate) +{ + const struct vkd3d_tiled_region_extent *extent = &resource->tiles.subresources[coordinate->Subresource].extent; + + return coordinate->Subresource < resource->tiles.subresource_count + && coordinate->X < extent->width && coordinate->Y < extent->height && coordinate->Z < extent->depth; +} + +/* coordinate must already be validated */ +static bool resource_validate_tile_region_size(const struct d3d12_resource *resource, + const D3D12_TILED_RESOURCE_COORDINATE *coordinate, const D3D12_TILE_REGION_SIZE *size) +{ + const struct vkd3d_tiled_region_extent *extent = &resource->tiles.subresources[coordinate->Subresource].extent; + D3D12_TILE_REGION_SIZE max_size; + + if (!size || !size->UseBox) + return true; + + max_size.Width = extent->width - coordinate->X; + max_size.Height = extent->height - coordinate->Y; + max_size.Depth = extent->depth - coordinate->Z; + return size->Width <= max_size.Width && size->Height <= max_size.Height && size->Depth <= max_size.Depth; +} + +/* Initialises a region in base_coordinate and region_size, where base_coordinate is always the front + * top left. If src_region_size->UseBox is true, start_coordinate is also the front top left, otherwise + * it can start anywhere within the region and the region front top left is always {0, 0, 0}. */ +static bool vkd3d_initialise_tile_region(struct vkd3d_resource_tile_coordinate *base_coordinate, + D3D12_TILE_REGION_SIZE *region_size, const D3D12_TILED_RESOURCE_COORDINATE *start_coordinate, + const D3D12_TILE_REGION_SIZE *src_region_size, const struct d3d12_resource *resource) +{ + unsigned int count; + + if (!resource_validate_tiled_coordinate(resource, start_coordinate)) + { + WARN("Invalid start coordinate (%u: %u, %u, %u).\n", start_coordinate->Subresource, start_coordinate->X, + start_coordinate->Y, start_coordinate->Z); + return false; + } + if (!resource_validate_tile_region_size(resource, start_coordinate, src_region_size)) + { + WARN("Invalid region size (%u, %u, %u).\n", src_region_size->Width, src_region_size->Height, + src_region_size->Depth); + return false; + } + + if (src_region_size) + { + *region_size = *src_region_size; + } + else + { + region_size->UseBox = false; + region_size->NumTiles = 1; + } + + if (region_size->UseBox) + { + base_coordinate->x = start_coordinate->X; + base_coordinate->y = start_coordinate->Y; + base_coordinate->z = start_coordinate->Z; + /* NumTiles should be set by the caller. Validate it. */ + count = d3d12_tile_region_size_compute_tile_count(region_size); + if (region_size->NumTiles != count) + WARN("NumTiles does not match the box size.\n"); + region_size->NumTiles = count; + } + else + { + memset(base_coordinate, 0, sizeof(*base_coordinate)); + d3d12_tile_region_size_set_entire_subresource(region_size, resource, start_coordinate->Subresource); + } + + return true; +} + static void STDMETHODCALLTYPE d3d12_command_list_CopyTiles(ID3D12GraphicsCommandList2 *iface, ID3D12Resource *tiled_resource, const D3D12_TILED_RESOURCE_COORDINATE *tile_region_start_coordinate, const D3D12_TILE_REGION_SIZE *tile_region_size, ID3D12Resource *buffer, UINT64 buffer_offset, @@ -6274,6 +6411,227 @@ done: vkd3d_mutex_unlock(&command_queue->op_mutex); }
+static void deaggregate_sparse_memory_bind(VkSparseBufferMemoryBindInfo *buffer_bind_info, + const VkSparseMemoryBind *src, unsigned int tile_count, struct d3d12_resource *resource) +{ + VkSparseMemoryBind *memory_binds = resource->tiles.bind_buffer; + unsigned int i; + + for (i = 0; i < tile_count; ++i) + { + memory_binds[i].resourceOffset = src->resourceOffset + i * D3D12_TILE_SIZE; + memory_binds[i].size = D3D12_TILE_SIZE; + memory_binds[i].memory = src->memory; + memory_binds[i].memoryOffset = src->memoryOffset + i * D3D12_TILE_SIZE; + memory_binds[i].flags = src->flags; + } + + buffer_bind_info->bindCount = tile_count; + buffer_bind_info->pBinds = memory_binds; +} + +static unsigned int d3d12_command_queue_bind_sparse_block(struct d3d12_command_queue *command_queue, + struct d3d12_resource *resource, const struct vkd3d_resource_tile_coordinate *base_coordinate, + D3D12_TILED_RESOURCE_COORDINATE *coordinate, const D3D12_TILE_REGION_SIZE *region_size, + VkDeviceMemory vk_memory, unsigned int memory_offset, unsigned int memory_tile_count, bool skip_binding) +{ + const struct vkd3d_vk_device_procs *vk_procs = &resource->device->vk_procs; + struct vkd3d_queue *vkd3d_queue = command_queue->vkd3d_queue; + unsigned int subresource = coordinate->Subresource; + VkSparseBufferMemoryBindInfo buffer_bind_info; + VkSparseMemoryBind memory_bind; + VkBindSparseInfo sparse_info; + unsigned int tiles_used; + VkResult vr; + + /* The tiled resource spec for D3D11 seems to apply to D3D12 also, and states: + * "For mipmaps that use nonstandard tiling and/or are packed, any subresource + * value that indicates any of the packed mips all refer to the same tile." */ + if (subresource % resource->desc.MipLevels >= resource->tiles.standard_mip_count) + { + /* Already bound, but the caller expects this to use the required number of tiles, + * which is 1 because we bind the mip tails on resource creation and return a + * dummy value of 1. */ + return 1; + } + + memset(&sparse_info, 0, sizeof(sparse_info)); + sparse_info.sType = VK_STRUCTURE_TYPE_BIND_SPARSE_INFO; + + if (d3d12_resource_is_buffer(resource)) + { + tiles_used = region_size->NumTiles; + tiles_used = min(tiles_used, memory_tile_count); + + memory_bind.resourceOffset = coordinate->X * D3D12_TILE_SIZE; + coordinate->X += tiles_used; + + if (skip_binding || !tiles_used) + return tiles_used; + + memory_bind.size = tiles_used * D3D12_TILE_SIZE; + memory_bind.memory = vk_memory; + memory_bind.memoryOffset = memory_offset * D3D12_TILE_SIZE; + memory_bind.flags = 0; + + buffer_bind_info.buffer = resource->u.vk_buffer; + /* A bug in NVIDIA drivers (older ones at least) requires one tile per struct to workaround. This + * could be skipped on other hardware by checking physical_device_info->properties2.properties.vendorID. */ + deaggregate_sparse_memory_bind(&buffer_bind_info, &memory_bind, tiles_used, resource); + + sparse_info.bufferBindCount = 1; + sparse_info.pBufferBinds = &buffer_bind_info; + } + else + { + vkd3d_unreachable(); + } + + sparse_info.pSignalSemaphores = &vkd3d_queue->tiled_binding_semaphore; + sparse_info.signalSemaphoreCount = 1; + + if ((vr = VK_CALL(vkQueueBindSparse(vkd3d_queue->vk_queue, 1, &sparse_info, VK_NULL_HANDLE))) < 0) + ERR("Failed to submit sparse image bind, vr %d.\n", vr); + if (vkd3d_queue_submit_wait_acquired(vkd3d_queue, vkd3d_queue->tiled_binding_semaphore, command_queue->device) < 0) + ERR("Failed to submit queue wait, vr %d.\n", vr); + + return tiles_used; +} + +static void d3d12_command_queue_update_tile_mappings(struct d3d12_command_queue *command_queue, + struct d3d12_resource *resource, UINT region_count, + const D3D12_TILED_RESOURCE_COORDINATE *region_start_coordinates, + const D3D12_TILE_REGION_SIZE *region_sizes, + struct d3d12_heap *heap, + UINT range_count, + const D3D12_TILE_RANGE_FLAGS *range_flags, + const UINT *heap_range_offsets, + const UINT *range_tile_counts, + D3D12_TILE_MAPPING_FLAGS flags) +{ + bool null_binding, aliased_binding, skip_binding, have_unsupported_aliasing; + VkDeviceMemory vk_memory = heap ? heap->vk_memory : VK_NULL_HANDLE; + unsigned int memory_offset, memory_tile_count, tiles_used; + struct vkd3d_resource_tile_coordinate base_coordinate; + D3D12_TILED_RESOURCE_COORDINATE coordinate_zero; + D3D12_TILE_REGION_SIZE region_size_default; + D3D12_TILED_RESOURCE_COORDINATE coordinate; + D3D12_TILE_REGION_SIZE region_size; + unsigned int region_idx, range_idx; + D3D12_TILE_RANGE_FLAGS cur_flags; + unsigned int tile_count_all; + VkQueue vk_queue; + + if (d3d12_resource_is_texture(resource)) + { + FIXME("Tiled textures are not implemented yet.\n"); + return; + } + + if (region_count == 1) + { + if (!region_sizes) + { + region_size_default.UseBox = false; + region_size_default.NumTiles = region_start_coordinates ? 1 : resource->tiles.total_count; + region_sizes = ®ion_size_default; + } + if (!region_start_coordinates) + { + memset(&coordinate_zero, 0, sizeof(coordinate_zero)); + region_start_coordinates = &coordinate_zero; + } + } + + if (range_count == 1 && !range_tile_counts) + { + tile_count_all = resource->tiles.total_count; + range_tile_counts = &tile_count_all; + } + + if (flags) + WARN("Ignoring flags %#x.\n", flags); + + memory_offset = heap_range_offsets ? heap_range_offsets[0] : 0; + memory_tile_count = range_tile_counts[0]; + coordinate = region_start_coordinates[0]; + + if (!vkd3d_initialise_tile_region(&base_coordinate, ®ion_size, &coordinate, ®ion_sizes[0], resource)) + return; + + region_idx = 0; + range_idx = 0; + null_binding = false; + aliased_binding = false; + skip_binding = false; + have_unsupported_aliasing = false; + + if (!(vk_queue = vkd3d_queue_acquire(command_queue->vkd3d_queue))) + { + ERR("Failed to acquire queue %p.\n", command_queue->vkd3d_queue); + return; + } + + if (heap) + vkd3d_mutex_lock(&heap->mutex); + + do + { + if (range_flags) + { + cur_flags = range_flags[range_idx]; + null_binding = !!(cur_flags & D3D12_TILE_RANGE_FLAG_NULL); + skip_binding = !!(cur_flags & D3D12_TILE_RANGE_FLAG_SKIP); + aliased_binding = !!(cur_flags & D3D12_TILE_RANGE_FLAG_REUSE_SINGLE_TILE); + if (aliased_binding && !(null_binding || skip_binding)) + { + have_unsupported_aliasing = true; + skip_binding = true; + } + if ((cur_flags &= ~(D3D12_TILE_RANGE_FLAG_NULL | D3D12_TILE_RANGE_FLAG_SKIP | D3D12_TILE_RANGE_FLAG_REUSE_SINGLE_TILE))) + FIXME("Ignoring flags %#x.\n", cur_flags); + } + + if (!heap_range_offsets && !null_binding) + { + WARN("Heap range offets may be NULL only if D3D12_TILE_RANGE_FLAG_NULL is used.\n"); + break; + } + + tiles_used = d3d12_command_queue_bind_sparse_block(command_queue, resource, &base_coordinate, &coordinate, + ®ion_size, null_binding ? VK_NULL_HANDLE : vk_memory, memory_offset, + aliased_binding ? 1 : memory_tile_count, skip_binding); + + if (!aliased_binding) + memory_offset += tiles_used; + memory_tile_count -= tiles_used; + region_size.NumTiles -= tiles_used; + + if (!memory_tile_count && ++range_idx < range_count) + { + memory_offset = heap_range_offsets ? heap_range_offsets[range_idx] : 0; + memory_tile_count = range_tile_counts[range_idx]; + } + + if (!region_size.NumTiles && ++region_idx < region_count) + { + coordinate = region_start_coordinates[region_idx]; + if (!vkd3d_initialise_tile_region(&base_coordinate, ®ion_size, &coordinate, + region_sizes ? ®ion_sizes[region_idx] : NULL, resource)) + break; + } + } + while (region_idx < region_count && range_idx < range_count); + + if (heap) + vkd3d_mutex_unlock(&heap->mutex); + + if (have_unsupported_aliasing) + FIXME("Aliased bindings are not implemented.\n"); + + vkd3d_queue_release(command_queue->vkd3d_queue); +} + static void STDMETHODCALLTYPE d3d12_command_queue_CopyTileMappings(ID3D12CommandQueue *iface, ID3D12Resource *dst_resource, const D3D12_TILED_RESOURCE_COORDINATE *dst_region_start_coordinate, @@ -7058,7 +7416,12 @@ static HRESULT d3d12_command_queue_flush_ops_locked(struct d3d12_command_queue * break;
case VKD3D_CS_OP_UPDATE_MAPPINGS: - FIXME("Tiled resource binding is not supported yet.\n"); + d3d12_command_queue_update_tile_mappings(queue, op->u.update_mappings.resource, + op->u.update_mappings.region_count, op->u.update_mappings.region_start_coordinates, + op->u.update_mappings.region_sizes, op->u.update_mappings.heap, + op->u.update_mappings.range_count, op->u.update_mappings.range_flags, + op->u.update_mappings.heap_range_offsets, op->u.update_mappings.range_tile_counts, + op->u.update_mappings.flags); vkd3d_free(op->u.update_mappings.region_start_coordinates); vkd3d_free(op->u.update_mappings.region_sizes); vkd3d_free(op->u.update_mappings.range_flags); diff --git a/libs/vkd3d/resource.c b/libs/vkd3d/resource.c index 86b8cd268..bfdc67a2d 100644 --- a/libs/vkd3d/resource.c +++ b/libs/vkd3d/resource.c @@ -1192,6 +1192,8 @@ static void d3d12_resource_bind_sparse_mip_tail(struct d3d12_resource *resource, sparse_info.sType = VK_STRUCTURE_TYPE_BIND_SPARSE_INFO; sparse_info.imageOpaqueBindCount = 1; sparse_info.pImageOpaqueBinds = &opaque_bind_info; + sparse_info.pSignalSemaphores = &vkd3d_queue->tiled_binding_semaphore; + sparse_info.signalSemaphoreCount = 1;
if (!(vk_queue = vkd3d_queue_acquire(vkd3d_queue))) { @@ -1201,7 +1203,8 @@ static void d3d12_resource_bind_sparse_mip_tail(struct d3d12_resource *resource,
if ((vr = VK_CALL(vkQueueBindSparse(vk_queue, 1, &sparse_info, VK_NULL_HANDLE))) < 0) ERR("Failed to submit sparse image bind, vr %d.\n", vr); - /* TODO: wait on a semaphore when binding commands are implemented. */ + if (vkd3d_queue_submit_wait_acquired(vkd3d_queue, vkd3d_queue->tiled_binding_semaphore, resource->device) < 0) + ERR("Failed to submit queue wait, vr %d.\n", vr);
vkd3d_queue_release(vkd3d_queue); } @@ -1243,6 +1246,12 @@ static bool d3d12_resource_init_tiles(struct d3d12_resource *resource, struct d3 resource->tiles.subresource_count = 1; resource->tiles.standard_mip_count = 1; resource->tiles.packed_mip_tile_count = 0; + + if (!(resource->tiles.bind_buffer = vkd3d_malloc(resource->tiles.total_count * sizeof(VkSparseMemoryBind)))) + { + ERR("Failed to allocate binding buffer.\n"); + goto error; + } } else { diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index 563cb4bb9..3eab9ed06 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -1483,6 +1483,8 @@ struct vkd3d_queue size_t semaphore_count;
VkSemaphore old_vk_semaphores[VKD3D_MAX_VK_SYNC_OBJECTS]; + + VkSemaphore tiled_binding_semaphore; };
VkQueue vkd3d_queue_acquire(struct vkd3d_queue *queue); @@ -1490,6 +1492,8 @@ HRESULT vkd3d_queue_create(struct d3d12_device *device, uint32_t family_index, const VkQueueFamilyProperties *properties, struct vkd3d_queue **queue); void vkd3d_queue_destroy(struct vkd3d_queue *queue, struct d3d12_device *device); void vkd3d_queue_release(struct vkd3d_queue *queue); +VkResult vkd3d_queue_submit_wait_acquired(const struct vkd3d_queue *queue, VkSemaphore vk_semaphore, + struct d3d12_device *device);
enum vkd3d_cs_op { diff --git a/tests/d3d12.c b/tests/d3d12.c index 65b6c0123..e670e61d1 100644 --- a/tests/d3d12.c +++ b/tests/d3d12.c @@ -37080,7 +37080,7 @@ static void test_update_tile_mappings(void) for (i = 0; i < 64; i++) { set_box(&box, i, 0, 0, i + 1, 1, 1); - todo check_readback_data_uint(&rb.rb, &box, i + 1, 0); + check_readback_data_uint(&rb.rb, &box, i + 1, 0); }
release_resource_readback(&rb); @@ -37163,7 +37163,8 @@ static void test_update_tile_mappings(void) for (i = 0; i < ARRAY_SIZE(buffer_region_tiles); i++) { set_box(&box, i, 0, 0, i + 1, 1, 1); - todo_if(buffer_region_tiles[i]) check_readback_data_uint(&rb.rb, &box, buffer_region_tiles[i], 0); + todo_if((i >= region_offsets[0].X && i < region_offsets[0].X + region_sizes[0].NumTiles) || (i >= 24 && i <= 26)) + check_readback_data_uint(&rb.rb, &box, buffer_region_tiles[i], 0); }
release_resource_readback(&rb); @@ -37671,9 +37672,9 @@ static void test_sparse_buffer_memory_lifetime(void) transition_resource_state(context.list, buffer, D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_COPY_SOURCE); get_buffer_readback_with_command_list(buffer, DXGI_FORMAT_R32_UINT, &rb, context.queue, context.list); i = get_readback_uint(&rb.rb, 0, 0, 0); - todo ok(i == 42, "Got #%x, expected 42.\n", i); + ok(i == 42, "Got #%x, expected 42.\n", i); i = get_readback_uint(&rb.rb, 64 * 1024 / 4, 0, 0); - todo ok(i == 42, "Got #%x, expected 42.\n", i); + ok(i == 42, "Got #%x, expected 42.\n", i); release_resource_readback(&rb);
reset_command_list(context.list, context.allocator); @@ -37690,9 +37691,9 @@ static void test_sparse_buffer_memory_lifetime(void) get_buffer_readback_with_command_list(buffer, DXGI_FORMAT_R32_UINT, &rb, context.queue, context.list);
i = get_readback_uint(&rb.rb, 2048 / 4, 0, 0); - todo ok(i == 42, "Got #%x, expected 42.\n", i); + ok(i == 42, "Got #%x, expected 42.\n", i); i = get_readback_uint(&rb.rb, 64 * 1024 / 4, 0, 0); - todo ok(i == 42, "Got #%x, expected 42.\n", i); + ok(i == 42, "Got #%x, expected 42.\n", i); release_resource_readback(&rb);
ID3D12Resource_Release(buffer);