-- v16: vkd3d: Support aliased tile binding if available. vkd3d: Implement ID3D12CommandQueue::UpdateTileMappings() for textures. vkd3d: Implement ID3D12CommandQueue::UpdateTileMappings() for buffers. vkd3d: Pre-bind sparse texture mip tails. tests/d3d12: Add a test for freeing underlying memory of a reserved resource. tests/d3d12: Add tests for UpdateTileMappings().
From: Conor McCarthy cmccarthy@codeweavers.com
Based on a vkd3d-proton patch by Philip Rebohle. --- tests/d3d12.c | 842 +++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 841 insertions(+), 1 deletion(-)
diff --git a/tests/d3d12.c b/tests/d3d12.c index 31f57c409..b09a01aab 100644 --- a/tests/d3d12.c +++ b/tests/d3d12.c @@ -38347,7 +38347,6 @@ static void test_unused_interpolated_input(void) release_resource_readback(&rb);
ID3D12Resource_Release(buffer); - destroy_test_context(&context); }
static void test_shader_cache(void) @@ -38543,6 +38542,846 @@ static void test_shader_cache(void) destroy_test_context(&context); }
+static void set_region_offset(D3D12_TILED_RESOURCE_COORDINATE *region, uint32_t x, uint32_t y, uint32_t z, uint32_t subresource) +{ + region->X = x; + region->Y = y; + region->Z = z; + region->Subresource = subresource; +} + +static void set_region_size(D3D12_TILE_REGION_SIZE *region, uint32_t num_tiles, bool use_box, uint32_t w, uint32_t h, uint32_t d) +{ + region->NumTiles = num_tiles; + region->UseBox = use_box; + region->Width = w; + region->Height = h; + region->Depth = d; +} + +static void test_update_tile_mappings(void) +{ + ID3D12Resource *resource, *resource_2, *readback_buffer; + D3D12_TILED_RESOURCE_COORDINATE region_offsets[8]; + ID3D12PipelineState *check_texture_3d_pipeline; + D3D12_ROOT_SIGNATURE_DESC root_signature_desc; + ID3D12PipelineState *clear_texture_pipeline; + ID3D12PipelineState *check_texture_pipeline; + ID3D12PipelineState *check_buffer_pipeline; + D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc; + ID3D12DescriptorHeap *cpu_heap, *gpu_heap; + ID3D12RootSignature *clear_root_signature; + D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc; + D3D12_DESCRIPTOR_RANGE descriptor_range; + D3D12_ROOT_PARAMETER root_parameters[2]; + D3D12_TILE_REGION_SIZE region_sizes[8]; + D3D12_GPU_VIRTUAL_ADDRESS readback_va; + D3D12_HEAP_PROPERTIES heap_properties; + D3D12_PACKED_MIP_INFO packed_mip_info; + D3D12_TILED_RESOURCES_TIER tiled_tier; + D3D12_SUBRESOURCE_TILING tilings[10]; + D3D12_TILE_RANGE_FLAGS tile_flags[8]; + ID3D12RootSignature *root_signature; + struct d3d12_resource_readback rb; + D3D12_RESOURCE_DESC resource_desc; + struct test_context_desc desc; + struct test_context context; + D3D12_TILE_SHAPE tile_shape; + unsigned int i, j, x, y, z; + D3D12_HEAP_DESC heap_desc; + UINT tile_offsets[8]; + UINT tile_counts[8]; + ID3D12Heap *heap; + UINT num_tilings; + D3D12_BOX box; + HRESULT hr; + +#if 0 + StructuredBuffer<uint> tiled_buffer : register(t0); + RWStructuredBuffer<uint> out_buffer : register(u0); + + [numthreads(64, 1, 1)] + void main(uint3 thread_id : SV_DispatchThreadID) + { + out_buffer[thread_id.x] = tiled_buffer[16384 * thread_id.x]; + } +#endif + static const DWORD cs_buffer_code[] = + { + 0x43425844, 0xa8625c41, 0xfd85df89, 0xcedb7945, 0x0e3444ea, 0x00000001, 0x00000108, 0x00000003, + 0x0000002c, 0x0000003c, 0x0000004c, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f, + 0x00000008, 0x00000000, 0x00000008, 0x58454853, 0x000000b4, 0x00050050, 0x0000002d, 0x0100086a, + 0x040000a2, 0x00107000, 0x00000000, 0x00000004, 0x0400009e, 0x0011e000, 0x00000000, 0x00000004, + 0x0200005f, 0x00020012, 0x02000068, 0x00000001, 0x0400009b, 0x00000040, 0x00000001, 0x00000001, + 0x06000029, 0x00100012, 0x00000000, 0x0002000a, 0x00004001, 0x0000000e, 0x8b0000a7, 0x80002302, + 0x00199983, 0x00100012, 0x00000000, 0x0010000a, 0x00000000, 0x00004001, 0x00000000, 0x00107006, + 0x00000000, 0x080000a8, 0x0011e012, 0x00000000, 0x0002000a, 0x00004001, 0x00000000, 0x0010000a, + 0x00000000, 0x0100003e, + }; +#if 0 + Texture2D<uint> tiled_texture : register(t0); + RWStructuredBuffer<uint> out_buffer : register(u0); + + [numthreads(28,1,1)] + void main(uint3 thread_id : SV_DispatchThreadID) + { + uint2 tile_size = uint2(128, 128); + uint tile_index = 0; + uint tile_count = 4; + uint mip_count = 10; + uint mip_level = 0; + + while (thread_id.x >= tile_index + tile_count * tile_count && mip_level < mip_count) + { + tile_index += tile_count * tile_count; + tile_count = max(tile_count / 2, 1); + mip_level += 1; + } + + uint2 tile_coord; + tile_coord.x = (thread_id.x - tile_index) % tile_count; + tile_coord.y = (thread_id.x - tile_index) / tile_count; + + out_buffer[thread_id.x] = tiled_texture.mips[mip_level][tile_coord * tile_size]; + } +#endif + static const DWORD cs_texture_code[] = + { + 0x43425844, 0x03e118db, 0xda7deb90, 0xedb39031, 0x6b646a0b, 0x00000001, 0x00000288, 0x00000003, + 0x0000002c, 0x0000003c, 0x0000004c, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f, + 0x00000008, 0x00000000, 0x00000008, 0x58454853, 0x00000234, 0x00050050, 0x0000008d, 0x0100086a, + 0x04001858, 0x00107000, 0x00000000, 0x00004444, 0x0400009e, 0x0011e000, 0x00000000, 0x00000004, + 0x0200005f, 0x00020012, 0x02000068, 0x00000003, 0x0400009b, 0x0000001c, 0x00000001, 0x00000001, + 0x08000036, 0x00100072, 0x00000000, 0x00004002, 0x00000000, 0x00000004, 0x00000000, 0x00000000, + 0x01000030, 0x09000023, 0x00100082, 0x00000000, 0x0010001a, 0x00000000, 0x0010001a, 0x00000000, + 0x0010000a, 0x00000000, 0x06000050, 0x00100012, 0x00000001, 0x0002000a, 0x0010003a, 0x00000000, + 0x0700004f, 0x00100022, 0x00000001, 0x0010002a, 0x00000000, 0x00004001, 0x0000000a, 0x07000001, + 0x00100012, 0x00000001, 0x0010001a, 0x00000001, 0x0010000a, 0x00000001, 0x03000003, 0x0010000a, + 0x00000001, 0x07000055, 0x00100012, 0x00000001, 0x0010001a, 0x00000000, 0x00004001, 0x00000001, + 0x07000053, 0x00100022, 0x00000000, 0x0010000a, 0x00000001, 0x00004001, 0x00000001, 0x0700001e, + 0x00100042, 0x00000000, 0x0010002a, 0x00000000, 0x00004001, 0x00000001, 0x05000036, 0x00100012, + 0x00000000, 0x0010003a, 0x00000000, 0x01000016, 0x05000036, 0x001000c2, 0x00000001, 0x00100aa6, + 0x00000000, 0x0700001e, 0x00100012, 0x00000000, 0x8010000a, 0x00000041, 0x00000000, 0x0002000a, + 0x0900004e, 0x00100012, 0x00000000, 0x00100012, 0x00000002, 0x0010000a, 0x00000000, 0x0010001a, + 0x00000000, 0x05000036, 0x00100022, 0x00000002, 0x0010000a, 0x00000000, 0x0a000029, 0x00100032, + 0x00000001, 0x00100046, 0x00000002, 0x00004002, 0x00000007, 0x00000007, 0x00000000, 0x00000000, + 0x8900002d, 0x800000c2, 0x00111103, 0x00100012, 0x00000000, 0x00100e46, 0x00000001, 0x00107e46, + 0x00000000, 0x080000a8, 0x0011e012, 0x00000000, 0x0002000a, 0x00004001, 0x00000000, 0x0010000a, + 0x00000000, 0x0100003e, + }; + +#if 0 + Texture3D<uint> tiled_texture : register(t0); + RWStructuredBuffer<uint> out_buffer : register(u0); + + [numthreads(9,1,1)] + void main(uint3 thread_id : SV_DispatchThreadID) + { + uint3 tile_size = uint3(32, 32, 16); + uint tile_index = 0; + uint tile_count = 2; + uint mip_count = 2; + uint mip_level = 0; + + while (thread_id.x >= tile_index + tile_count * tile_count * tile_count && mip_level < mip_count) + { + tile_index += tile_count * tile_count * tile_count; + tile_count = max(tile_count / 2, 1); + mip_level += 1; + } + + uint3 tile_coord; + tile_coord.x = (thread_id.x - tile_index) % tile_count; + tile_coord.y = ((thread_id.x - tile_index) / tile_count) % tile_count; + tile_coord.z = (thread_id.x - tile_index) / (tile_count * tile_count); + + out_buffer[thread_id.x] = tiled_texture.mips[mip_level][tile_coord * tile_size]; + } +#endif + static const DWORD cs_texture_3d_code[] = + { + 0x43425844, 0x71b4eb36, 0x2c65e68d, 0x7763693f, 0xfd4eafc6, 0x00000001, 0x000002f4, 0x00000003, + 0x0000002c, 0x0000003c, 0x0000004c, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f, + 0x00000008, 0x00000000, 0x00000008, 0x58454853, 0x000002a0, 0x00050050, 0x000000a8, 0x0100086a, + 0x04002858, 0x00107000, 0x00000000, 0x00004444, 0x0400009e, 0x0011e000, 0x00000000, 0x00000004, + 0x0200005f, 0x00020012, 0x02000068, 0x00000004, 0x0400009b, 0x00000009, 0x00000001, 0x00000001, + 0x08000036, 0x00100032, 0x00000000, 0x00004002, 0x00000000, 0x00000002, 0x00000000, 0x00000000, + 0x05000036, 0x00100082, 0x00000001, 0x00004001, 0x00000000, 0x01000030, 0x08000026, 0x0000d000, + 0x00100042, 0x00000000, 0x0010001a, 0x00000000, 0x0010001a, 0x00000000, 0x09000023, 0x00100042, + 0x00000000, 0x0010002a, 0x00000000, 0x0010001a, 0x00000000, 0x0010000a, 0x00000000, 0x06000050, + 0x00100082, 0x00000000, 0x0002000a, 0x0010002a, 0x00000000, 0x0700004f, 0x00100012, 0x00000002, + 0x0010003a, 0x00000001, 0x00004001, 0x00000002, 0x07000001, 0x00100082, 0x00000000, 0x0010003a, + 0x00000000, 0x0010000a, 0x00000002, 0x03000003, 0x0010003a, 0x00000000, 0x07000055, 0x00100082, + 0x00000000, 0x0010001a, 0x00000000, 0x00004001, 0x00000001, 0x07000053, 0x00100022, 0x00000000, + 0x0010003a, 0x00000000, 0x00004001, 0x00000001, 0x0700001e, 0x00100082, 0x00000001, 0x0010003a, + 0x00000001, 0x00004001, 0x00000001, 0x05000036, 0x00100012, 0x00000000, 0x0010002a, 0x00000000, + 0x01000016, 0x0700001e, 0x00100012, 0x00000000, 0x8010000a, 0x00000041, 0x00000000, 0x0002000a, + 0x0900004e, 0x00100012, 0x00000002, 0x00100012, 0x00000003, 0x0010000a, 0x00000000, 0x0010001a, + 0x00000000, 0x0800004e, 0x0000d000, 0x00100022, 0x00000003, 0x0010000a, 0x00000002, 0x0010001a, + 0x00000000, 0x08000026, 0x0000d000, 0x00100022, 0x00000000, 0x0010001a, 0x00000000, 0x0010001a, + 0x00000000, 0x0800004e, 0x00100042, 0x00000003, 0x0000d000, 0x0010000a, 0x00000000, 0x0010001a, + 0x00000000, 0x0a000029, 0x00100072, 0x00000001, 0x00100246, 0x00000003, 0x00004002, 0x00000005, + 0x00000005, 0x00000004, 0x00000000, 0x8900002d, 0x80000142, 0x00111103, 0x00100012, 0x00000000, + 0x00100e46, 0x00000001, 0x00107e46, 0x00000000, 0x080000a8, 0x0011e012, 0x00000000, 0x0002000a, + 0x00004001, 0x00000000, 0x0010000a, 0x00000000, 0x0100003e, + }; + +#if 0 + RWTexture3D<uint> uav : register(u0); + + cbuffer clear_args + { + uint3 offset; + uint value; + }; + + [numthreads(4, 4, 4)] + void main(uint3 coord : SV_DispatchThreadID) + { + uav[offset + coord] = value; + } +#endif + static const DWORD cs_clear_code[] = + { + 0x43425844, 0x288d0bcd, 0xbe5e644d, 0x95665c2e, 0xd8f02c36, 0x00000001, 0x000000e0, 0x00000003, + 0x0000002c, 0x0000003c, 0x0000004c, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f, + 0x00000008, 0x00000000, 0x00000008, 0x58454853, 0x0000008c, 0x00050050, 0x00000023, 0x0100086a, + 0x04000059, 0x00208e46, 0x00000000, 0x00000001, 0x0400289c, 0x0011e000, 0x00000000, 0x00004444, + 0x0200005f, 0x00020072, 0x02000068, 0x00000001, 0x0400009b, 0x00000004, 0x00000004, 0x00000004, + 0x0700001e, 0x001000f2, 0x00000000, 0x00020a46, 0x00208a46, 0x00000000, 0x00000000, 0x080000a4, + 0x0011e0f2, 0x00000000, 0x00100e46, 0x00000000, 0x00208ff6, 0x00000000, 0x00000000, 0x0100003e, + }; + + static const D3D12_SHADER_BYTECODE cs_texture = { cs_texture_code, sizeof(cs_texture_code) }; + static const D3D12_SHADER_BYTECODE cs_texture_3d = { cs_texture_3d_code, sizeof(cs_texture_3d_code) }; + static const D3D12_SHADER_BYTECODE cs_buffer = { cs_buffer_code, sizeof(cs_buffer_code) }; + static const D3D12_SHADER_BYTECODE cs_clear = { cs_clear_code, sizeof(cs_clear_code) }; + + static const uint32_t buffer_region_tiles[] = + { + /* 0 1 2 3 4 5 6 7 8 9 */ + /*0*/ 33, 34, 35, 36, 37, 6, 7, 8, 9, 10, + /*1*/ 11, 12, 38, 39, 40, 41, 1, 18, 2, 20, + /*2*/ 21, 22, 23, 3, 4, 4, 4, 0, 0, 25, + /*3*/ 26, 27, 28, 29, 30, 36, 37, 38, 39, 40, + /*4*/ 9, 11, 43, 44, 45, 46, 45, 46, 49, 50, + /*5*/ 0, 0, 17, 18, 19, 20, 21, 22, 23, 24, + /*6*/ 61, 62, 63, 12, + }; + + static const uint32_t texture_region_tiles[] = + { + 1, 2, 4, 5, 6, 7, 1, 1, 9, 1, 17, 14, 8, 14, 3, 0, + 18, 18, 19, 18, 19, 22, 23, 24, 25, 26, 27, 28, + }; + + static const uint32_t texture_3d_region_tiles[] = + { + 3, 2, 0, 7, 8, 2, 4, 5, 6, + }; + + if (test_options.use_warp_device) + { + /* Internal error in the 2D complex mapping test, and mysterious references left on the device at the end. */ + skip("Broken on WARP.\n"); + return; + } + + memset(&desc, 0, sizeof(desc)); + desc.rt_width = 640; + desc.rt_height = 480; + desc.rt_format = DXGI_FORMAT_R8G8B8A8_UNORM; + if (!init_test_context(&context, &desc)) + return; + + if ((tiled_tier = get_tiled_resources_tier(context.device)) < D3D12_TILED_RESOURCES_TIER_1) + { + skip("Tiled resources not supported by device.\n"); + destroy_test_context(&context); + return; + } + + descriptor_range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; + descriptor_range.NumDescriptors = 1; + descriptor_range.BaseShaderRegister = 0; + descriptor_range.RegisterSpace = 0; + descriptor_range.OffsetInDescriptorsFromTableStart = 0; + root_parameters[0].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + root_parameters[0].DescriptorTable.NumDescriptorRanges = 1; + root_parameters[0].DescriptorTable.pDescriptorRanges = &descriptor_range; + root_parameters[0].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + root_parameters[1].ParameterType = D3D12_ROOT_PARAMETER_TYPE_UAV; + root_parameters[1].Descriptor.ShaderRegister = 0; + root_parameters[1].Descriptor.RegisterSpace = 0; + root_parameters[1].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + root_signature_desc.NumParameters = ARRAY_SIZE(root_parameters); + root_signature_desc.pParameters = root_parameters; + root_signature_desc.NumStaticSamplers = 0; + root_signature_desc.pStaticSamplers = NULL; + root_signature_desc.Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE; + hr = create_root_signature(context.device, &root_signature_desc, &root_signature); + ok(hr == S_OK, "Failed to create root signature, hr %#x.\n", hr); + + descriptor_range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV; + root_parameters[1].ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS; + root_parameters[1].Constants.ShaderRegister = 0; + root_parameters[1].Constants.RegisterSpace = 0; + root_parameters[1].Constants.Num32BitValues = 4; + root_parameters[1].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + hr = create_root_signature(context.device, &root_signature_desc, &clear_root_signature); + ok(hr == S_OK, "Failed to create root signature, hr %#x.\n", hr); + + clear_texture_pipeline = create_compute_pipeline_state(context.device, clear_root_signature, cs_clear); + check_texture_pipeline = create_compute_pipeline_state(context.device, root_signature, cs_texture); + check_texture_3d_pipeline = create_compute_pipeline_state(context.device, root_signature, cs_texture_3d); + check_buffer_pipeline = create_compute_pipeline_state(context.device, root_signature, cs_buffer); + + cpu_heap = create_cpu_descriptor_heap(context.device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, 11); + gpu_heap = create_gpu_descriptor_heap(context.device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, 11); + + ok(clear_texture_pipeline, "Failed to create clear_texture_pipeline.\n"); + ok(check_texture_pipeline, "Failed to create check_texture_pipeline.\n"); + ok(check_texture_3d_pipeline, "Failed to create check_texture_3d_pipeline.\n"); + ok(check_buffer_pipeline, "Failed to create check_buffer_pipeline.\n"); + ok(cpu_heap, "Failed to create cpu_heap.\n"); + ok(gpu_heap, "Failed to create gpu_heap.\n"); + + memset(&heap_properties, 0, sizeof(heap_properties)); + heap_properties.Type = D3D12_HEAP_TYPE_DEFAULT; + + resource_desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; + resource_desc.Alignment = 0; + resource_desc.Width = 64 * sizeof(uint32_t); + resource_desc.Height = 1; + resource_desc.DepthOrArraySize = 1; + resource_desc.MipLevels = 1; + resource_desc.Format = DXGI_FORMAT_UNKNOWN; + resource_desc.SampleDesc.Count = 1; + resource_desc.SampleDesc.Quality = 0; + resource_desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; + resource_desc.Flags = D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; + hr = ID3D12Device_CreateCommittedResource(context.device, &heap_properties, D3D12_HEAP_FLAG_NONE, + &resource_desc, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, NULL, &IID_ID3D12Resource, (void **)&readback_buffer); + ok(hr == S_OK, "Failed to create readback buffer, hr %#x.\n", hr); + + readback_va = ID3D12Resource_GetGPUVirtualAddress(readback_buffer); + + /* Test buffer tile mappings */ + heap_desc.Properties = heap_properties; + heap_desc.Alignment = 0; + heap_desc.SizeInBytes = 64 * 65536; + heap_desc.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS; + hr = ID3D12Device_CreateHeap(context.device, &heap_desc, &IID_ID3D12Heap, (void **)&heap); + ok(hr == S_OK, "Failed to create heap, hr %#x.\n", hr); + + resource_desc.Width = 64 * 65536; + hr = ID3D12Device_CreateReservedResource(context.device, &resource_desc, + D3D12_RESOURCE_STATE_UNORDERED_ACCESS, NULL, &IID_ID3D12Resource, (void **)&resource); + ok(hr == S_OK, "Failed to create reserved buffer, hr %#x.\n", hr); + + srv_desc.Format = DXGI_FORMAT_UNKNOWN; + srv_desc.ViewDimension = D3D12_SRV_DIMENSION_BUFFER; + srv_desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + srv_desc.Buffer.FirstElement = 0; + srv_desc.Buffer.NumElements = resource_desc.Width / sizeof(uint32_t); + srv_desc.Buffer.StructureByteStride = sizeof(uint32_t); + srv_desc.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_NONE; + ID3D12Device_CreateShaderResourceView(context.device, resource, &srv_desc, get_cpu_descriptor_handle(&context, gpu_heap, 0)); + + uav_desc.Format = DXGI_FORMAT_R32_UINT; + uav_desc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER; + uav_desc.Buffer.FirstElement = 0; + uav_desc.Buffer.NumElements = resource_desc.Width / sizeof(uint32_t); + uav_desc.Buffer.StructureByteStride = 0; + uav_desc.Buffer.CounterOffsetInBytes = 0; + uav_desc.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_NONE; + ID3D12Device_CreateUnorderedAccessView(context.device, resource, NULL, &uav_desc, get_cpu_descriptor_handle(&context, cpu_heap, 1)); + ID3D12Device_CreateUnorderedAccessView(context.device, resource, NULL, &uav_desc, get_cpu_descriptor_handle(&context, gpu_heap, 1)); + + /* Map entire buffer, linearly, and initialize tile data */ + tile_offsets[0] = 0; + ID3D12CommandQueue_UpdateTileMappings(context.queue, resource, 1, NULL, NULL, + heap, 1, NULL, tile_offsets, NULL, D3D12_TILE_MAPPING_FLAG_NONE); + + for (i = 0; i < 64; i++) + { + UINT clear_value[4] = { 0, 0, 0, 0 }; + D3D12_RECT clear_rect; + + set_rect(&clear_rect, 16384 * i, 0, 16384 * (i + 1), 1); + clear_value[0] = i + 1; + + ID3D12GraphicsCommandList_ClearUnorderedAccessViewUint(context.list, + get_gpu_descriptor_handle(&context, gpu_heap, 1), + get_cpu_descriptor_handle(&context, cpu_heap, 1), + resource, clear_value, 1, &clear_rect); + } + + transition_resource_state(context.list, resource, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE); + ID3D12GraphicsCommandList_SetDescriptorHeaps(context.list, 1, &gpu_heap); + ID3D12GraphicsCommandList_SetComputeRootSignature(context.list, root_signature); + ID3D12GraphicsCommandList_SetPipelineState(context.list, check_buffer_pipeline); + ID3D12GraphicsCommandList_SetComputeRootDescriptorTable(context.list, 0, get_gpu_descriptor_handle(&context, gpu_heap, 0)); + ID3D12GraphicsCommandList_SetComputeRootUnorderedAccessView(context.list, 1, readback_va); + ID3D12GraphicsCommandList_Dispatch(context.list, 1, 1, 1); + transition_resource_state(context.list, readback_buffer, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_SOURCE); + + get_buffer_readback_with_command_list(readback_buffer, DXGI_FORMAT_R32_UINT, &rb, context.queue, context.list); + + for (i = 0; i < 64; i++) + { + set_box(&box, i, 0, 0, i + 1, 1, 1); + todo check_readback_data_uint(&rb.rb, &box, i + 1, 0); + } + + release_resource_readback(&rb); + + /* Test arbitrary tile mappings */ + set_region_offset(®ion_offsets[0], 16, 0, 0, 0); + set_region_offset(®ion_offsets[1], 18, 0, 0, 0); + set_region_offset(®ion_offsets[2], 23, 0, 0, 0); + set_region_offset(®ion_offsets[3], 40, 0, 0, 0); + set_region_offset(®ion_offsets[4], 41, 0, 0, 0); + set_region_offset(®ion_offsets[5], 63, 0, 0, 0); + + tile_offsets[0] = 0; + tile_offsets[1] = 8; + tile_offsets[2] = 10; + + tile_counts[0] = 3; + tile_counts[1] = 1; + tile_counts[2] = 2; + + ID3D12CommandQueue_UpdateTileMappings(context.queue, resource, 6, region_offsets, NULL, + heap, 3, NULL, tile_offsets, tile_counts, D3D12_TILE_MAPPING_FLAG_NONE); + + set_region_offset(®ion_offsets[0], 24, 0, 0, 0); + set_region_offset(®ion_offsets[1], 50, 0, 0, 0); + set_region_offset(®ion_offsets[2], 0, 0, 0, 0); + set_region_offset(®ion_offsets[3], 52, 0, 0, 0); + set_region_offset(®ion_offsets[4], 29, 0, 0, 0); + + set_region_size(®ion_sizes[0], 5, false, 0, 0, 0); + set_region_size(®ion_sizes[1], 2, false, 0, 0, 0); + set_region_size(®ion_sizes[2], 16, false, 0, 0, 0); + set_region_size(®ion_sizes[3], 8, false, 0, 0, 0); + set_region_size(®ion_sizes[4], 6, false, 0, 0, 0); + + tile_flags[0] = D3D12_TILE_RANGE_FLAG_REUSE_SINGLE_TILE; + tile_flags[1] = D3D12_TILE_RANGE_FLAG_NULL; + tile_flags[2] = D3D12_TILE_RANGE_FLAG_NONE; + tile_flags[3] = D3D12_TILE_RANGE_FLAG_SKIP; + tile_flags[4] = D3D12_TILE_RANGE_FLAG_NONE; + tile_flags[5] = D3D12_TILE_RANGE_FLAG_NONE; + + tile_offsets[0] = 3; + tile_offsets[1] = 0; + tile_offsets[2] = 32; + tile_offsets[3] = 0; + tile_offsets[4] = 37; + tile_offsets[5] = 16; + + tile_counts[0] = 3; + tile_counts[1] = 4; + tile_counts[2] = 5; + tile_counts[3] = 7; + tile_counts[4] = 4; + tile_counts[5] = 14; + + ID3D12CommandQueue_UpdateTileMappings(context.queue, resource, 5, region_offsets, region_sizes, + heap, 6, tile_flags, tile_offsets, tile_counts, D3D12_TILE_MAPPING_FLAG_NONE); + + set_region_offset(®ion_offsets[0], 46, 0, 0, 0); + set_region_offset(®ion_offsets[1], 44, 0, 0, 0); + set_region_size(®ion_sizes[0], 2, false, 0, 0, 0); + + ID3D12CommandQueue_CopyTileMappings(context.queue, resource, ®ion_offsets[0], resource, + ®ion_offsets[1], ®ion_sizes[0], D3D12_TILE_MAPPING_FLAG_NONE); + + reset_command_list(context.list, context.allocator); + + transition_resource_state(context.list, readback_buffer, D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); + ID3D12GraphicsCommandList_SetDescriptorHeaps(context.list, 1, &gpu_heap); + ID3D12GraphicsCommandList_SetComputeRootSignature(context.list, root_signature); + ID3D12GraphicsCommandList_SetPipelineState(context.list, check_buffer_pipeline); + ID3D12GraphicsCommandList_SetComputeRootDescriptorTable(context.list, 0, get_gpu_descriptor_handle(&context, gpu_heap, 0)); + ID3D12GraphicsCommandList_SetComputeRootUnorderedAccessView(context.list, 1, readback_va); + ID3D12GraphicsCommandList_Dispatch(context.list, 1, 1, 1); + transition_resource_state(context.list, readback_buffer, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_SOURCE); + + get_buffer_readback_with_command_list(readback_buffer, DXGI_FORMAT_R32_UINT, &rb, context.queue, context.list); + + for (i = 0; i < ARRAY_SIZE(buffer_region_tiles); i++) + { + set_box(&box, i, 0, 0, i + 1, 1, 1); + todo_if(buffer_region_tiles[i]) check_readback_data_uint(&rb.rb, &box, buffer_region_tiles[i], 0); + } + + release_resource_readback(&rb); + + ID3D12Resource_Release(resource); + ID3D12Heap_Release(heap); + + /* Test 2D image tile mappings */ + heap_desc.Properties = heap_properties; + heap_desc.Alignment = 0; + heap_desc.SizeInBytes = 64 * 65536; + heap_desc.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_NON_RT_DS_TEXTURES; + hr = ID3D12Device_CreateHeap(context.device, &heap_desc, &IID_ID3D12Heap, (void **)&heap); + ok(hr == S_OK, "Failed to create heap, hr %#x.\n", hr); + + resource_desc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D; + resource_desc.Alignment = 0; + resource_desc.Width = 512; + resource_desc.Height = 512; + resource_desc.DepthOrArraySize = 1; + resource_desc.MipLevels = 10; + resource_desc.Format = DXGI_FORMAT_R32_UINT; + resource_desc.SampleDesc.Count = 1; + resource_desc.SampleDesc.Quality = 0; + resource_desc.Layout = D3D12_TEXTURE_LAYOUT_64KB_UNDEFINED_SWIZZLE; + resource_desc.Flags = D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; + + hr = ID3D12Device_CreateReservedResource(context.device, &resource_desc, + D3D12_RESOURCE_STATE_UNORDERED_ACCESS, NULL, &IID_ID3D12Resource, (void **)&resource); + ok(hr == S_OK, "Failed to create reserved texture, hr %#x.\n", hr); + hr = ID3D12Device_CreateReservedResource(context.device, &resource_desc, + D3D12_RESOURCE_STATE_UNORDERED_ACCESS, NULL, &IID_ID3D12Resource, (void **)&resource_2); + ok(hr == S_OK, "Failed to create reserved texture, hr %#x.\n", hr); + + num_tilings = resource_desc.MipLevels; + ID3D12Device_GetResourceTiling(context.device, resource, NULL, &packed_mip_info, &tile_shape, &num_tilings, 0, tilings); + ok(packed_mip_info.NumStandardMips >= 3, "Unexpected number of standard mips %u.\n", packed_mip_info.NumStandardMips); + + srv_desc.Format = DXGI_FORMAT_R32_UINT; + srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; + srv_desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + srv_desc.Texture2D.MostDetailedMip = 0; + srv_desc.Texture2D.MipLevels = resource_desc.MipLevels; + srv_desc.Texture2D.PlaneSlice = 0; + srv_desc.Texture2D.ResourceMinLODClamp = 0.0f; + ID3D12Device_CreateShaderResourceView(context.device, resource, &srv_desc, get_cpu_descriptor_handle(&context, gpu_heap, 0)); + + /* Map entire image */ + tile_offsets[0] = 0; + ID3D12CommandQueue_UpdateTileMappings(context.queue, resource, + 1, NULL, NULL, heap, 1, NULL, tile_offsets, NULL, D3D12_TILE_MAPPING_FLAG_NONE); + + reset_command_list(context.list, context.allocator); + + for (i = 0, j = 0; i < resource_desc.MipLevels; i++) + { + uav_desc.Format = DXGI_FORMAT_R32_UINT; + uav_desc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2D; + uav_desc.Texture2D.MipSlice = i; + uav_desc.Texture2D.PlaneSlice = 0; + ID3D12Device_CreateUnorderedAccessView(context.device, resource, NULL, &uav_desc, get_cpu_descriptor_handle(&context, cpu_heap, 1 + i)); + ID3D12Device_CreateUnorderedAccessView(context.device, resource, NULL, &uav_desc, get_cpu_descriptor_handle(&context, gpu_heap, 1 + i)); + + for (y = 0; y < max(1u, tilings[i].HeightInTiles); y++) + { + for (x = 0; x < max(1u, tilings[i].WidthInTiles); x++) + { + UINT clear_value[4] = { 0, 0, 0, 0 }; + D3D12_RECT clear_rect; + + clear_value[0] = ++j; + set_rect(&clear_rect, x * tile_shape.WidthInTexels, y * tile_shape.HeightInTexels, + min(resource_desc.Width >> i, (x + 1) * tile_shape.WidthInTexels), + min(resource_desc.Height >> i, (y + 1) * tile_shape.HeightInTexels)); + + ID3D12GraphicsCommandList_ClearUnorderedAccessViewUint(context.list, + get_gpu_descriptor_handle(&context, gpu_heap, 1 + i), + get_cpu_descriptor_handle(&context, cpu_heap, 1 + i), + resource, clear_value, 1, &clear_rect); + } + } + } + + transition_resource_state(context.list, resource, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE); + transition_resource_state(context.list, readback_buffer, D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); + ID3D12GraphicsCommandList_SetDescriptorHeaps(context.list, 1, &gpu_heap); + ID3D12GraphicsCommandList_SetComputeRootSignature(context.list, root_signature); + ID3D12GraphicsCommandList_SetPipelineState(context.list, check_texture_pipeline); + ID3D12GraphicsCommandList_SetComputeRootDescriptorTable(context.list, 0, get_gpu_descriptor_handle(&context, gpu_heap, 0)); + ID3D12GraphicsCommandList_SetComputeRootUnorderedAccessView(context.list, 1, readback_va); + ID3D12GraphicsCommandList_Dispatch(context.list, 1, 1, 1); + transition_resource_state(context.list, readback_buffer, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_SOURCE); + + get_buffer_readback_with_command_list(readback_buffer, DXGI_FORMAT_R32_UINT, &rb, context.queue, context.list); + + for (i = 0; i < j; i++) + { + set_box(&box, i, 0, 0, i + 1, 1, 1); + todo check_readback_data_uint(&rb.rb, &box, i + 1, 0); + } + + release_resource_readback(&rb); + + set_region_offset(®ion_offsets[0], 2, 0, 0, 0); + set_region_offset(®ion_offsets[1], 1, 1, 0, 0); + set_region_offset(®ion_offsets[2], 1, 1, 0, 1); + set_region_offset(®ion_offsets[3], 0, 3, 0, 0); + set_region_offset(®ion_offsets[4], 0, 0, 0, packed_mip_info.NumStandardMips); + + set_region_size(®ion_sizes[0], 3, false, 0, 0, 0); + set_region_size(®ion_sizes[1], 4, true, 2, 2, 1); + set_region_size(®ion_sizes[2], 2, false, 0, 0, 0); + set_region_size(®ion_sizes[3], 4, true, 4, 1, 1); + set_region_size(®ion_sizes[4], packed_mip_info.NumTilesForPackedMips, false, 0, 0, 0); + + tile_flags[0] = D3D12_TILE_RANGE_FLAG_NONE; + tile_flags[1] = D3D12_TILE_RANGE_FLAG_REUSE_SINGLE_TILE; + tile_flags[2] = D3D12_TILE_RANGE_FLAG_NONE; + tile_flags[3] = D3D12_TILE_RANGE_FLAG_NONE; + tile_flags[4] = D3D12_TILE_RANGE_FLAG_SKIP; + tile_flags[5] = D3D12_TILE_RANGE_FLAG_NONE; + tile_flags[6] = D3D12_TILE_RANGE_FLAG_NULL; + tile_flags[7] = D3D12_TILE_RANGE_FLAG_NONE; + + tile_offsets[0] = 3; + tile_offsets[1] = 0; + tile_offsets[2] = 16; + tile_offsets[3] = 7; + tile_offsets[4] = 0; + tile_offsets[5] = 2; + tile_offsets[6] = 0; + tile_offsets[7] = 0; + + tile_counts[0] = 4; + tile_counts[1] = 2; + tile_counts[2] = 3; + tile_counts[3] = 1; + tile_counts[4] = 1; + tile_counts[5] = 1; + tile_counts[6] = 1; + tile_counts[7] = packed_mip_info.NumTilesForPackedMips; + + ID3D12CommandQueue_UpdateTileMappings(context.queue, resource, 5, region_offsets, region_sizes, + heap, 8, tile_flags, tile_offsets, tile_counts, D3D12_TILE_MAPPING_FLAG_NONE); + + tile_offsets[7] = packed_mip_info.StartTileIndexInOverallResource; + ID3D12CommandQueue_UpdateTileMappings(context.queue, resource_2, 1, ®ion_offsets[4], ®ion_sizes[4], + heap, 1, tile_flags, &tile_offsets[7], &tile_counts[7], D3D12_TILE_MAPPING_FLAG_NONE); + + set_region_offset(®ion_offsets[0], 3, 1, 0, 0); + set_region_offset(®ion_offsets[1], 1, 2, 0, 0); + set_region_size(®ion_sizes[0], 2, true, 1, 2, 1); + + ID3D12CommandQueue_CopyTileMappings(context.queue, resource, ®ion_offsets[0], + resource, ®ion_offsets[1], ®ion_sizes[0], D3D12_TILE_MAPPING_FLAG_NONE); + + set_region_offset(®ion_offsets[0], 0, 0, 0, packed_mip_info.NumStandardMips); + region_offsets[1] = region_offsets[0]; + set_region_size(®ion_sizes[0], packed_mip_info.NumTilesForPackedMips, false, 0, 0, 0); + + ID3D12CommandQueue_CopyTileMappings(context.queue, resource, ®ion_offsets[0], + resource_2, ®ion_offsets[1], ®ion_sizes[0], D3D12_TILE_MAPPING_FLAG_NONE); + + set_region_offset(®ion_offsets[0], 0, 0, 0, 1); + set_region_size(®ion_sizes[0], 4, true, 2, 2, 1); + ID3D12CommandQueue_CopyTileMappings(context.queue, resource_2, ®ion_offsets[0], + resource, ®ion_offsets[0], ®ion_sizes[0], D3D12_TILE_MAPPING_FLAG_NONE); + + set_region_offset(®ion_offsets[1], 1, 1, 0, 1); + set_region_offset(®ion_offsets[2], 0, 0, 0, 1); + set_region_size(®ion_sizes[1], 1, false, 0, 0, 0); + ID3D12CommandQueue_CopyTileMappings(context.queue, resource_2, ®ion_offsets[2], + resource_2, ®ion_offsets[1], ®ion_sizes[1], D3D12_TILE_MAPPING_FLAG_NONE); + + set_region_size(®ion_sizes[0], 4, false, 0, 0, 0); + ID3D12CommandQueue_CopyTileMappings(context.queue, resource, ®ion_offsets[0], + resource_2, ®ion_offsets[0], ®ion_sizes[0], D3D12_TILE_MAPPING_FLAG_NONE); + + reset_command_list(context.list, context.allocator); + + transition_resource_state(context.list, readback_buffer, D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); + ID3D12GraphicsCommandList_SetDescriptorHeaps(context.list, 1, &gpu_heap); + ID3D12GraphicsCommandList_SetComputeRootSignature(context.list, root_signature); + ID3D12GraphicsCommandList_SetPipelineState(context.list, check_texture_pipeline); + ID3D12GraphicsCommandList_SetComputeRootDescriptorTable(context.list, 0, get_gpu_descriptor_handle(&context, gpu_heap, 0)); + ID3D12GraphicsCommandList_SetComputeRootUnorderedAccessView(context.list, 1, readback_va); + ID3D12GraphicsCommandList_Dispatch(context.list, 1, 1, 1); + transition_resource_state(context.list, readback_buffer, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_SOURCE); + + get_buffer_readback_with_command_list(readback_buffer, DXGI_FORMAT_R32_UINT, &rb, context.queue, context.list); + + for (i = 0; i < j; i++) + { + set_box(&box, i, 0, 0, i + 1, 1, 1); + todo_if(texture_region_tiles[i]) + check_readback_data_uint(&rb.rb, &box, texture_region_tiles[i], 0); + } + + release_resource_readback(&rb); + + ID3D12Resource_Release(resource); + ID3D12Resource_Release(resource_2); + + if (tiled_tier >= D3D12_TILED_RESOURCES_TIER_3) + { + /* Test 3D image tile mappings */ + resource_desc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE3D; + resource_desc.Alignment = 0; + resource_desc.Width = 64; + resource_desc.Height = 64; + resource_desc.DepthOrArraySize = 32; + resource_desc.MipLevels = 2; + resource_desc.Format = DXGI_FORMAT_R32_UINT; + resource_desc.SampleDesc.Count = 1; + resource_desc.SampleDesc.Quality = 0; + resource_desc.Layout = D3D12_TEXTURE_LAYOUT_64KB_UNDEFINED_SWIZZLE; + resource_desc.Flags = D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; + + hr = ID3D12Device_CreateReservedResource(context.device, &resource_desc, + D3D12_RESOURCE_STATE_UNORDERED_ACCESS, NULL, &IID_ID3D12Resource, (void **)&resource); + ok(hr == S_OK, "Failed to create reserved texture, hr %#x.\n", hr); + + num_tilings = resource_desc.MipLevels; + ID3D12Device_GetResourceTiling(context.device, resource, NULL, &packed_mip_info, &tile_shape, &num_tilings, 0, tilings); + ok(packed_mip_info.NumStandardMips == 2, "Unexpected number of standard mips %u.\n", packed_mip_info.NumStandardMips); + + srv_desc.Format = DXGI_FORMAT_R32_UINT; + srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE3D; + srv_desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + srv_desc.Texture3D.MostDetailedMip = 0; + srv_desc.Texture3D.MipLevels = resource_desc.MipLevels; + srv_desc.Texture3D.ResourceMinLODClamp = 0.0f; + ID3D12Device_CreateShaderResourceView(context.device, resource, &srv_desc, get_cpu_descriptor_handle(&context, gpu_heap, 0)); + + /* Map entire image */ + tile_offsets[0] = 0; + ID3D12CommandQueue_UpdateTileMappings(context.queue, resource, + 1, NULL, NULL, heap, 1, NULL, tile_offsets, NULL, D3D12_TILE_MAPPING_FLAG_NONE); + + reset_command_list(context.list, context.allocator); + + for (i = 0, j = 0; i < resource_desc.MipLevels; i++) + { + uav_desc.Format = DXGI_FORMAT_R32_UINT; + uav_desc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE3D; + uav_desc.Texture3D.MipSlice = i; + uav_desc.Texture3D.FirstWSlice = 0; + uav_desc.Texture3D.WSize = resource_desc.DepthOrArraySize >> i; + ID3D12Device_CreateUnorderedAccessView(context.device, resource, NULL, &uav_desc, get_cpu_descriptor_handle(&context, cpu_heap, 1 + i)); + ID3D12Device_CreateUnorderedAccessView(context.device, resource, NULL, &uav_desc, get_cpu_descriptor_handle(&context, gpu_heap, 1 + i)); + + /* ClearUnorderedAccessView only takes 2D coordinates so we have to + * bring our own shader to initialize portions of a 3D image */ + ID3D12GraphicsCommandList_SetDescriptorHeaps(context.list, 1, &gpu_heap); + ID3D12GraphicsCommandList_SetComputeRootSignature(context.list, clear_root_signature); + ID3D12GraphicsCommandList_SetComputeRootDescriptorTable(context.list, 0, get_gpu_descriptor_handle(&context, gpu_heap, 1 + i)); + ID3D12GraphicsCommandList_SetPipelineState(context.list, clear_texture_pipeline); + + for (z = 0; z < max(1u, tilings[i].DepthInTiles); z++) + { + for (y = 0; y < max(1u, tilings[i].HeightInTiles); y++) + { + for (x = 0; x < max(1u, tilings[i].WidthInTiles); x++) + { + UINT shader_args[4]; + shader_args[0] = tile_shape.WidthInTexels * x; + shader_args[1] = tile_shape.HeightInTexels * y; + shader_args[2] = tile_shape.DepthInTexels * z; + shader_args[3] = ++j; + + ID3D12GraphicsCommandList_SetComputeRoot32BitConstants(context.list, + 1, ARRAY_SIZE(shader_args), shader_args, 0); + ID3D12GraphicsCommandList_Dispatch(context.list, tile_shape.WidthInTexels / 4, + tile_shape.HeightInTexels / 4, tile_shape.DepthInTexels / 4); + } + } + } + } + + transition_resource_state(context.list, resource, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE); + transition_resource_state(context.list, readback_buffer, D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); + ID3D12GraphicsCommandList_SetDescriptorHeaps(context.list, 1, &gpu_heap); + ID3D12GraphicsCommandList_SetComputeRootSignature(context.list, root_signature); + ID3D12GraphicsCommandList_SetPipelineState(context.list, check_texture_3d_pipeline); + ID3D12GraphicsCommandList_SetComputeRootDescriptorTable(context.list, 0, get_gpu_descriptor_handle(&context, gpu_heap, 0)); + ID3D12GraphicsCommandList_SetComputeRootUnorderedAccessView(context.list, 1, readback_va); + ID3D12GraphicsCommandList_Dispatch(context.list, 1, 1, 1); + transition_resource_state(context.list, readback_buffer, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_SOURCE); + + get_buffer_readback_with_command_list(readback_buffer, DXGI_FORMAT_R32_UINT, &rb, context.queue, context.list); + + for (i = 0; i < j; i++) + { + set_box(&box, i, 0, 0, i + 1, 1, 1); + check_readback_data_uint(&rb.rb, &box, i + 1, 0); + } + + release_resource_readback(&rb); + + set_region_offset(®ion_offsets[0], 0, 0, 0, 0); + set_region_offset(®ion_offsets[1], 0, 1, 1, 0); + set_region_offset(®ion_offsets[2], 1, 1, 0, 0); + set_region_offset(®ion_offsets[3], 1, 0, 0, 0); + set_region_offset(®ion_offsets[4], 0, 1, 0, 0); + + set_region_size(®ion_sizes[0], 1, false, 0, 0, 0); + set_region_size(®ion_sizes[1], 3, false, 0, 0, 0); + set_region_size(®ion_sizes[2], 2, false, 0, 0, 0); + set_region_size(®ion_sizes[3], 2, true, 1, 1, 2); + set_region_size(®ion_sizes[4], 1, true, 1, 1, 1); + + tile_flags[0] = D3D12_TILE_RANGE_FLAG_NONE; + tile_flags[1] = D3D12_TILE_RANGE_FLAG_REUSE_SINGLE_TILE; + tile_flags[2] = D3D12_TILE_RANGE_FLAG_NULL; + + tile_offsets[0] = 2; + tile_offsets[1] = 1; + tile_offsets[2] = 0; + + tile_counts[0] = 6; + tile_counts[1] = 2; + tile_counts[2] = 1; + + ID3D12CommandQueue_UpdateTileMappings(context.queue, resource, 5, region_offsets, region_sizes, + heap, 3, tile_flags, tile_offsets, tile_counts, D3D12_TILE_MAPPING_FLAG_NONE); + + reset_command_list(context.list, context.allocator); + + transition_resource_state(context.list, readback_buffer, D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); + ID3D12GraphicsCommandList_SetDescriptorHeaps(context.list, 1, &gpu_heap); + ID3D12GraphicsCommandList_SetComputeRootSignature(context.list, root_signature); + ID3D12GraphicsCommandList_SetPipelineState(context.list, check_texture_3d_pipeline); + ID3D12GraphicsCommandList_SetComputeRootDescriptorTable(context.list, 0, get_gpu_descriptor_handle(&context, gpu_heap, 0)); + ID3D12GraphicsCommandList_SetComputeRootUnorderedAccessView(context.list, 1, readback_va); + ID3D12GraphicsCommandList_Dispatch(context.list, 1, 1, 1); + transition_resource_state(context.list, readback_buffer, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_SOURCE); + + get_buffer_readback_with_command_list(readback_buffer, DXGI_FORMAT_R32_UINT, &rb, context.queue, context.list); + + for (i = 0; i < j; i++) + { + set_box(&box, i, 0, 0, i + 1, 1, 1); + check_readback_data_uint(&rb.rb, &box, texture_3d_region_tiles[i], 0); + } + + release_resource_readback(&rb); + ID3D12Resource_Release(resource); + } + else + { + skip("Tiles resources tier 3 not supported.\n"); + } + + ID3D12Heap_Release(heap); + + ID3D12DescriptorHeap_Release(gpu_heap); + ID3D12DescriptorHeap_Release(cpu_heap); + ID3D12Resource_Release(readback_buffer); + ID3D12PipelineState_Release(clear_texture_pipeline); + ID3D12PipelineState_Release(check_texture_3d_pipeline); + ID3D12PipelineState_Release(check_texture_pipeline); + ID3D12PipelineState_Release(check_buffer_pipeline); + ID3D12RootSignature_Release(clear_root_signature); + ID3D12RootSignature_Release(root_signature); + destroy_test_context(&context); +} + START_TEST(d3d12) { parse_args(argc, argv); @@ -38728,4 +39567,5 @@ START_TEST(d3d12) run_test(test_hull_shader_punned_array); run_test(test_unused_interpolated_input); run_test(test_shader_cache); + run_test(test_update_tile_mappings); }
From: Conor McCarthy cmccarthy@codeweavers.com
As long as the reserved regions are not used, this is okay.
Based on a vkd3d-proton patch by Hans-Kristian Arntzen. --- tests/d3d12.c | 178 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 178 insertions(+)
diff --git a/tests/d3d12.c b/tests/d3d12.c index b09a01aab..fc2e176c3 100644 --- a/tests/d3d12.c +++ b/tests/d3d12.c @@ -39382,6 +39382,183 @@ static void test_update_tile_mappings(void) destroy_test_context(&context); }
+static void test_sparse_buffer_memory_lifetime(void) +{ + /* Attempt to bind sparse memory, then free the underlying heap, but keep the sparse resource + * alive. This should confuse drivers that attempt to track BO lifetimes. */ + D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc; + D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc; + const UINT values[] = { 42, 42, 42, 42 }; + D3D12_ROOT_PARAMETER root_parameters[2]; + D3D12_TILE_REGION_SIZE region_size; + D3D12_CPU_DESCRIPTOR_HANDLE h_cpu; + D3D12_ROOT_SIGNATURE_DESC rs_desc; + D3D12_DESCRIPTOR_RANGE desc_range; + struct d3d12_resource_readback rb; + struct test_context context; + ID3D12DescriptorHeap *cpu; + ID3D12DescriptorHeap *gpu; + D3D12_HEAP_DESC heap_desc; + D3D12_RESOURCE_DESC desc; + ID3D12Resource *sparse; + ID3D12Resource *buffer; + ID3D12Heap *heap_live; + ID3D12Heap *heap; + unsigned int i; + HRESULT hr; + + if (test_options.use_warp_device) + { + /* Mysterious references remain on the device at the end. */ + skip("Broken on WARP.\n"); + return; + } + + if (!init_compute_test_context(&context)) + return; + + if (get_tiled_resources_tier(context.device) < D3D12_TILED_RESOURCES_TIER_1) + { + skip("Tiled resources not supported by device.\n"); + destroy_test_context(&context); + return; + } + + memset(&rs_desc, 0, sizeof(rs_desc)); + memset(root_parameters, 0, sizeof(root_parameters)); + memset(&desc_range, 0, sizeof(desc_range)); + rs_desc.NumParameters = ARRAY_SIZE(root_parameters); + rs_desc.pParameters = root_parameters; + root_parameters[0].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + root_parameters[0].ParameterType = D3D12_ROOT_PARAMETER_TYPE_UAV; + root_parameters[1].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + root_parameters[1].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + root_parameters[1].DescriptorTable.NumDescriptorRanges = 1; + root_parameters[1].DescriptorTable.pDescriptorRanges = &desc_range; + desc_range.NumDescriptors = 1; + desc_range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; + create_root_signature(context.device, &rs_desc, &context.root_signature); + + memset(&heap_desc, 0, sizeof(heap_desc)); + heap_desc.SizeInBytes = 4 * 1024 * 1024; + heap_desc.Properties.Type = D3D12_HEAP_TYPE_DEFAULT; + heap_desc.Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT; + heap_desc.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS; + hr = ID3D12Device_CreateHeap(context.device, &heap_desc, &IID_ID3D12Heap, (void**)&heap); + ok(SUCCEEDED(hr), "Failed to create heap, hr #%x.\n", hr); + hr = ID3D12Device_CreateHeap(context.device, &heap_desc, &IID_ID3D12Heap, (void**)&heap_live); + ok(SUCCEEDED(hr), "Failed to create heap, hr #%x.\n", hr); + + memset(&desc, 0, sizeof(desc)); + desc.Width = 64 * 1024 * 1024; + desc.Height = 1; + desc.DepthOrArraySize = 1; + desc.SampleDesc.Count = 1; + desc.Format = DXGI_FORMAT_UNKNOWN; + desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; + desc.MipLevels = 1; + desc.Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT; + desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; + desc.Flags = D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; + hr = ID3D12Device_CreateReservedResource(context.device, &desc, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, + NULL, &IID_ID3D12Resource, (void**)&sparse); + ok(SUCCEEDED(hr), "Failed to create reserved resource, hr #%x.\n", hr); + + { + const D3D12_TILED_RESOURCE_COORDINATE region_start_coordinate = { 0 }; + const D3D12_TILE_RANGE_FLAGS range_flag = D3D12_TILE_RANGE_FLAG_NULL; + const UINT offset = 0; + const UINT count = desc.Width / (64 * 1024); + region_size.UseBox = FALSE; + region_size.NumTiles = desc.Width / (64 * 1024); + ID3D12CommandQueue_UpdateTileMappings(context.queue, sparse, 1, ®ion_start_coordinate, ®ion_size, + NULL, 1, &range_flag, &offset, &count, D3D12_TILE_MAPPING_FLAG_NONE); + } + + region_size.UseBox = FALSE; + region_size.NumTiles = 1; + + for (i = 0; i < 2; i++) + { + const D3D12_TILED_RESOURCE_COORDINATE region_start_coordinate = { i, 0, 0, 0 }; + const D3D12_TILE_RANGE_FLAGS range_flag = D3D12_TILE_RANGE_FLAG_NONE; + const UINT offset = i; + const UINT count = 1; + + ID3D12CommandQueue_UpdateTileMappings(context.queue, sparse, 1, ®ion_start_coordinate, ®ion_size, + i ? heap_live : heap, 1, &range_flag, &offset, &count, D3D12_TILE_MAPPING_FLAG_NONE); + } + wait_queue_idle(context.device, context.queue); + + buffer = create_default_buffer(context.device, 128 * 1024, + D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_DEST); + cpu = create_cpu_descriptor_heap(context.device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, 1); + gpu = create_gpu_descriptor_heap(context.device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, 2); + + memset(&uav_desc, 0, sizeof(uav_desc)); + uav_desc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER; + uav_desc.Format = DXGI_FORMAT_R32_UINT; + uav_desc.Buffer.NumElements = 128 * 1024 / 4; + uav_desc.Buffer.FirstElement = 0; + ID3D12Device_CreateUnorderedAccessView(context.device, sparse, NULL, &uav_desc, + ID3D12DescriptorHeap_GetCPUDescriptorHandleForHeapStart(cpu)); + ID3D12Device_CreateUnorderedAccessView(context.device, sparse, NULL, &uav_desc, + ID3D12DescriptorHeap_GetCPUDescriptorHandleForHeapStart(gpu)); + + memset(&srv_desc, 0, sizeof(srv_desc)); + srv_desc.Buffer.FirstElement = 0; + srv_desc.Buffer.NumElements = 2 * 1024 * 1024; + srv_desc.ViewDimension = D3D12_SRV_DIMENSION_BUFFER; + srv_desc.Format = DXGI_FORMAT_R32_UINT; + srv_desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + + h_cpu = ID3D12DescriptorHeap_GetCPUDescriptorHandleForHeapStart(gpu); + h_cpu.ptr += ID3D12Device_GetDescriptorHandleIncrementSize(context.device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + ID3D12Device_CreateShaderResourceView(context.device, sparse, &srv_desc, h_cpu); + + ID3D12GraphicsCommandList_SetDescriptorHeaps(context.list, 1, &gpu); + ID3D12GraphicsCommandList_ClearUnorderedAccessViewUint(context.list, + ID3D12DescriptorHeap_GetGPUDescriptorHandleForHeapStart(gpu), + ID3D12DescriptorHeap_GetCPUDescriptorHandleForHeapStart(cpu), sparse, values, 0, NULL); + transition_resource_state(context.list, sparse, + D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_SOURCE); + ID3D12GraphicsCommandList_CopyBufferRegion(context.list, buffer, 0, sparse, 0, 128 * 1024); + + transition_resource_state(context.list, buffer, D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_COPY_SOURCE); + get_buffer_readback_with_command_list(buffer, DXGI_FORMAT_R32_UINT, &rb, context.queue, context.list); + i = get_readback_uint(&rb.rb, 0, 0, 0); + todo ok(i == 42, "Got #%x, expected 42.\n", i); + i = get_readback_uint(&rb.rb, 64 * 1024 / 4, 0, 0); + todo ok(i == 42, "Got #%x, expected 42.\n", i); + release_resource_readback(&rb); + + reset_command_list(context.list, context.allocator); + + ID3D12Heap_Release(heap); + + /* Access a resource where we can hypothetically access the freed heap memory. */ + /* On AMD Windows native at least, if we read the freed region, we read garbage, which proves it's not required to unbind explicitly. + * We'd read 0 in that case. */ + ID3D12GraphicsCommandList_CopyBufferRegion(context.list, buffer, 0, sparse, 64 * 1024, 64 * 1024); + + transition_resource_state(context.list, buffer, D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_COPY_SOURCE); + + get_buffer_readback_with_command_list(buffer, DXGI_FORMAT_R32_UINT, &rb, context.queue, context.list); + + i = get_readback_uint(&rb.rb, 2048 / 4, 0, 0); + todo ok(i == 42, "Got #%x, expected 42.\n", i); + i = get_readback_uint(&rb.rb, 64 * 1024 / 4, 0, 0); + todo ok(i == 42, "Got #%x, expected 42.\n", i); + release_resource_readback(&rb); + + ID3D12Resource_Release(buffer); + ID3D12Resource_Release(sparse); + ID3D12DescriptorHeap_Release(cpu); + ID3D12DescriptorHeap_Release(gpu); + ID3D12Heap_Release(heap_live); + destroy_test_context(&context); +} + START_TEST(d3d12) { parse_args(argc, argv); @@ -39568,4 +39745,5 @@ START_TEST(d3d12) run_test(test_unused_interpolated_input); run_test(test_shader_cache); run_test(test_update_tile_mappings); + run_test(test_sparse_buffer_memory_lifetime); }
From: Conor McCarthy cmccarthy@codeweavers.com
--- libs/vkd3d/command.c | 6 +-- libs/vkd3d/device.c | 40 ++++++++++++-- libs/vkd3d/resource.c | 107 +++++++++++++++++++++++++++++++++++++ libs/vkd3d/vkd3d_private.h | 8 ++- tests/d3d12.c | 4 +- 5 files changed, 156 insertions(+), 9 deletions(-)
diff --git a/libs/vkd3d/command.c b/libs/vkd3d/command.c index 7115a74a6..d7276ff2a 100644 --- a/libs/vkd3d/command.c +++ b/libs/vkd3d/command.c @@ -31,7 +31,7 @@ static HRESULT d3d12_command_queue_flush_ops(struct d3d12_command_queue *queue, static HRESULT d3d12_command_queue_flush_ops_locked(struct d3d12_command_queue *queue, bool *flushed_any);
HRESULT vkd3d_queue_create(struct d3d12_device *device, - uint32_t family_index, const VkQueueFamilyProperties *properties, struct vkd3d_queue **queue) + uint32_t family_index, VkQueueFlags vk_queue_flags, uint32_t timestamp_bits, struct vkd3d_queue **queue) { const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; struct vkd3d_queue *object; @@ -45,8 +45,8 @@ HRESULT vkd3d_queue_create(struct d3d12_device *device, object->submitted_sequence_number = 0;
object->vk_family_index = family_index; - object->vk_queue_flags = properties->queueFlags; - object->timestamp_bits = properties->timestampValidBits; + object->vk_queue_flags = vk_queue_flags; + object->timestamp_bits = timestamp_bits;
object->semaphores = NULL; object->semaphores_size = 0; diff --git a/libs/vkd3d/device.c b/libs/vkd3d/device.c index 90de27c53..cac57f7cc 100644 --- a/libs/vkd3d/device.c +++ b/libs/vkd3d/device.c @@ -1829,6 +1829,7 @@ enum vkd3d_queue_family VKD3D_QUEUE_FAMILY_DIRECT, VKD3D_QUEUE_FAMILY_COMPUTE, VKD3D_QUEUE_FAMILY_TRANSFER, + VKD3D_QUEUE_FAMILY_TILED_BINDING,
VKD3D_QUEUE_FAMILY_COUNT, }; @@ -1851,10 +1852,13 @@ static void d3d12_device_destroy_vkd3d_queues(struct d3d12_device *device) if (device->copy_queue && device->copy_queue != device->direct_queue && device->copy_queue != device->compute_queue) vkd3d_queue_destroy(device->copy_queue, device); + if (device->tiled_binding_queue && device->tiled_binding_queue != device->direct_queue) + vkd3d_queue_destroy(device->tiled_binding_queue, device);
device->direct_queue = NULL; device->compute_queue = NULL; device->copy_queue = NULL; + device->tiled_binding_queue = NULL; }
static HRESULT d3d12_device_create_vkd3d_queues(struct d3d12_device *device, @@ -1868,12 +1872,15 @@ static HRESULT d3d12_device_create_vkd3d_queues(struct d3d12_device *device, device->direct_queue = NULL; device->compute_queue = NULL; device->copy_queue = NULL; + device->tiled_binding_queue = NULL; + device->tiled_binding_family_index = queue_info->family_index[VKD3D_QUEUE_FAMILY_TILED_BINDING];
device->queue_family_count = 0; memset(device->queue_family_indices, 0, sizeof(device->queue_family_indices));
if (SUCCEEDED((hr = vkd3d_queue_create(device, direct_family_index, - &queue_info->vk_properties[VKD3D_QUEUE_FAMILY_DIRECT], &device->direct_queue)))) + queue_info->vk_properties[VKD3D_QUEUE_FAMILY_DIRECT].queueFlags, + queue_info->vk_properties[VKD3D_QUEUE_FAMILY_DIRECT].timestampValidBits, &device->direct_queue)))) device->queue_family_indices[device->queue_family_count++] = direct_family_index; else goto out_destroy_queues; @@ -1881,7 +1888,8 @@ static HRESULT d3d12_device_create_vkd3d_queues(struct d3d12_device *device, if (compute_family_index == direct_family_index) device->compute_queue = device->direct_queue; else if (SUCCEEDED(hr = vkd3d_queue_create(device, compute_family_index, - &queue_info->vk_properties[VKD3D_QUEUE_FAMILY_COMPUTE], &device->compute_queue))) + queue_info->vk_properties[VKD3D_QUEUE_FAMILY_COMPUTE].queueFlags, + queue_info->vk_properties[VKD3D_QUEUE_FAMILY_COMPUTE].timestampValidBits, &device->compute_queue))) device->queue_family_indices[device->queue_family_count++] = compute_family_index; else goto out_destroy_queues; @@ -1891,11 +1899,15 @@ static HRESULT d3d12_device_create_vkd3d_queues(struct d3d12_device *device, else if (transfer_family_index == compute_family_index) device->copy_queue = device->compute_queue; else if (SUCCEEDED(hr = vkd3d_queue_create(device, transfer_family_index, - &queue_info->vk_properties[VKD3D_QUEUE_FAMILY_TRANSFER], &device->copy_queue))) + queue_info->vk_properties[VKD3D_QUEUE_FAMILY_TRANSFER].queueFlags, + queue_info->vk_properties[VKD3D_QUEUE_FAMILY_TRANSFER].timestampValidBits, &device->copy_queue))) device->queue_family_indices[device->queue_family_count++] = transfer_family_index; else goto out_destroy_queues;
+ if (device->tiled_binding_family_index == direct_family_index) + device->tiled_binding_queue = device->direct_queue; + device->feature_options3.CopyQueueTimestampQueriesSupported = !!device->copy_queue->timestamp_bits;
return S_OK; @@ -1905,6 +1917,17 @@ out_destroy_queues: return hr; }
+struct vkd3d_queue *d3d12_device_get_tiled_binding_queue(struct d3d12_device *device) +{ + HRESULT hr; + + if (!device->tiled_binding_queue + && (SUCCEEDED(hr = vkd3d_queue_create(device, device->tiled_binding_family_index, + VK_QUEUE_SPARSE_BINDING_BIT, 0, &device->tiled_binding_queue)))) + device->queue_family_indices[device->queue_family_count++] = device->tiled_binding_family_index; + return device->tiled_binding_queue; +} + static float queue_priorities[] = {1.0f};
static HRESULT vkd3d_select_queues(const struct vkd3d_instance *vkd3d_instance, @@ -1944,6 +1967,12 @@ static HRESULT vkd3d_select_queues(const struct vkd3d_instance *vkd3d_instance, vkd3d_family = VKD3D_QUEUE_FAMILY_TRANSFER; }
+ if (info->family_index[VKD3D_QUEUE_FAMILY_TILED_BINDING] == UINT_MAX + && (queue_properties[i].queueFlags & VK_QUEUE_SPARSE_BINDING_BIT)) + { + info->family_index[VKD3D_QUEUE_FAMILY_TILED_BINDING] = i; + } + if (vkd3d_family == VKD3D_QUEUE_FAMILY_COUNT) continue;
@@ -1978,6 +2007,11 @@ static HRESULT vkd3d_select_queues(const struct vkd3d_instance *vkd3d_instance, info->family_index[VKD3D_QUEUE_FAMILY_TRANSFER] = info->family_index[VKD3D_QUEUE_FAMILY_DIRECT]; info->vk_properties[VKD3D_QUEUE_FAMILY_TRANSFER] = info->vk_properties[VKD3D_QUEUE_FAMILY_DIRECT]; } + if (info->vk_properties[VKD3D_QUEUE_FAMILY_DIRECT].queueFlags & VK_QUEUE_SPARSE_BINDING_BIT) + { + info->family_index[VKD3D_QUEUE_FAMILY_TILED_BINDING] = info->family_index[VKD3D_QUEUE_FAMILY_DIRECT]; + info->vk_properties[VKD3D_QUEUE_FAMILY_TILED_BINDING] = info->vk_properties[VKD3D_QUEUE_FAMILY_DIRECT]; + }
/* Compact the array. */ info->vk_family_count = 1; diff --git a/libs/vkd3d/resource.c b/libs/vkd3d/resource.c index 179999148..3940e46bc 100644 --- a/libs/vkd3d/resource.c +++ b/libs/vkd3d/resource.c @@ -671,6 +671,7 @@ HRESULT vkd3d_create_buffer(struct d3d12_device *device, buffer_info.flags |= VK_BUFFER_CREATE_SPARSE_BINDING_BIT; if (device->vk_info.sparse_properties.residencyNonResidentStrict) buffer_info.flags |= VK_BUFFER_CREATE_SPARSE_RESIDENCY_BIT; + d3d12_device_get_tiled_binding_queue(device); }
buffer_info.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT @@ -861,6 +862,7 @@ static HRESULT vkd3d_create_image(struct d3d12_device *device, }
image_info.tiling = VK_IMAGE_TILING_OPTIMAL; + d3d12_device_get_tiled_binding_queue(device); } else if (desc->Layout == D3D12_TEXTURE_LAYOUT_UNKNOWN) { @@ -977,6 +979,15 @@ HRESULT vkd3d_get_image_allocation_info(struct d3d12_device *device,
static void d3d12_resource_tile_info_cleanup(struct d3d12_resource *resource) { + const struct vkd3d_vk_device_procs *vk_procs = &resource->device->vk_procs; + + if (!resource->tiles.subresources) + return; + + VK_CALL(vkFreeMemory(resource->device->vk_device, resource->tiles.mip_tail_memory, NULL)); + + vkd3d_free(resource->tiles.bind_buffer); + vkd3d_free(resource->tiles.subresources); }
@@ -1141,6 +1152,66 @@ void d3d12_resource_get_tiling(struct d3d12_device *device, const struct d3d12_r *subresource_tiling_count = i; }
+static void d3d12_resource_bind_sparse_mip_tail(struct d3d12_resource *resource, + VkSparseImageMemoryRequirements *sparse_requirements) +{ + const struct vkd3d_vk_device_procs *vk_procs = &resource->device->vk_procs; + VkSparseMemoryBind *memory_bind = resource->tiles.bind_buffer; + VkSparseImageOpaqueMemoryBindInfo opaque_bind_info; + struct d3d12_device *device = resource->device; + struct vkd3d_queue *vkd3d_queue; + VkBindSparseInfo sparse_info; + unsigned int i, layer_count; + VkDeviceSize memory_offset; + VkQueue vk_queue; + VkResult vr; + + if (!resource->tiles.packed_mip_tile_count) + return; + + if (!(vkd3d_queue = d3d12_device_get_tiled_binding_queue(device))) + { + ERR("Failed to get sparse binding queue.\n"); + return; + } + + opaque_bind_info.image = resource->u.vk_image; + opaque_bind_info.bindCount = 1; + opaque_bind_info.pBinds = memory_bind; + + layer_count = resource->tiles.single_mip_tail ? 1 : d3d12_resource_desc_get_layer_count(&resource->desc); + + for (i = 0, memory_offset = 0; i < layer_count; ++i) + { + memory_bind->resourceOffset = sparse_requirements->imageMipTailOffset + + i * sparse_requirements->imageMipTailStride; + memory_bind->size = sparse_requirements->imageMipTailSize; + memory_bind->memory = resource->tiles.mip_tail_memory; + memory_bind->memoryOffset = memory_offset; + memory_bind->flags = 0; + memory_offset += memory_bind->size; + } + + memset(&sparse_info, 0, sizeof(sparse_info)); + sparse_info.sType = VK_STRUCTURE_TYPE_BIND_SPARSE_INFO; + sparse_info.imageOpaqueBindCount = 1; + sparse_info.pImageOpaqueBinds = &opaque_bind_info; + + if (!(vk_queue = vkd3d_queue_acquire(vkd3d_queue))) + { + ERR("Failed to acquire queue %p.\n", vkd3d_queue); + return; + } + + if ((vr = VK_CALL(vkQueueBindSparse(vk_queue, 1, &sparse_info, VK_NULL_HANDLE))) < 0) + ERR("Failed to submit sparse image bind, vr %d.\n", vr); + /* The caller may use the resource in another queue. Avoid sync complications by waiting for idle. */ + if ((vr = VK_CALL(vkQueueWaitIdle(vk_queue)) < 0)) + WARN("Failed to wait for queue, vr %d.\n", vr); + + vkd3d_queue_release(vkd3d_queue); +} + static bool d3d12_resource_init_tiles(struct d3d12_resource *resource, struct d3d12_device *device) { unsigned int i, start_idx, subresource_count, tile_count, miplevel_idx; @@ -1148,9 +1219,11 @@ static bool d3d12_resource_init_tiles(struct d3d12_resource *resource, struct d3 VkSparseImageMemoryRequirements *sparse_requirements_array; VkSparseImageMemoryRequirements sparse_requirements = {0}; struct vkd3d_subresource_tile_info *tile_info; + D3D12_HEAP_PROPERTIES heap_properties; VkMemoryRequirements requirements; const VkExtent3D *tile_extent; uint32_t requirement_count; + HRESULT hr;
subresource_count = d3d12_resource_desc_get_sub_resource_count(&resource->desc);
@@ -1214,6 +1287,8 @@ static bool d3d12_resource_init_tiles(struct d3d12_resource *resource, struct d3 sparse_requirements = sparse_requirements_array[i]; } } + if (sparse_requirements_array[i].formatProperties.aspectMask & VK_IMAGE_ASPECT_METADATA_BIT) + FIXME("Mip tail metadata binding is not implemented.\n"); } vkd3d_free(sparse_requirements_array); if (!sparse_requirements.formatProperties.aspectMask) @@ -1223,6 +1298,8 @@ static bool d3d12_resource_init_tiles(struct d3d12_resource *resource, struct d3 }
resource->tiles.tile_extent = sparse_requirements.formatProperties.imageGranularity; + resource->tiles.single_mip_tail = !!(sparse_requirements.formatProperties.flags + & VK_SPARSE_IMAGE_FORMAT_SINGLE_MIPTAIL_BIT); resource->tiles.subresource_count = subresource_count; resource->tiles.standard_mip_count = sparse_requirements.imageMipTailSize ? sparse_requirements.imageMipTailFirstLod : resource->desc.MipLevels; @@ -1252,9 +1329,39 @@ static bool d3d12_resource_init_tiles(struct d3d12_resource *resource, struct d3 } } resource->tiles.total_count = start_idx; + + if (resource->tiles.packed_mip_tile_count) + { + memset(&heap_properties, 0, sizeof(heap_properties)); + heap_properties.Type = D3D12_HEAP_TYPE_DEFAULT; + requirements.size = sparse_requirements.imageMipTailSize; + if (!resource->tiles.single_mip_tail) + requirements.size *= d3d12_resource_desc_get_layer_count(&resource->desc); + if (FAILED(hr = vkd3d_allocate_device_memory(device, &heap_properties, 0, &requirements, NULL, + &resource->tiles.mip_tail_memory, NULL))) + { + ERR("Failed to allocate device memory for mip tail, hr %s.\n", debugstr_hresult(hr)); + goto error; + } + } + + if (!(resource->tiles.bind_buffer = vkd3d_malloc(start_idx * max(sizeof(VkSparseImageMemoryBind), + sizeof(VkBufferImageCopy))))) + { + ERR("Failed to allocate binding buffer.\n"); + goto error; + } + + /* Vulkan implementations may merge layer miptails into a single miptail, which is not supported in D3D12. + * TODO: do this only if single miptails are used, otherwise handle miptails in UpdateTileMappings(). */ + d3d12_resource_bind_sparse_mip_tail(resource, &sparse_requirements); }
return true; + +error: + d3d12_resource_tile_info_cleanup(resource); + return false; }
/* ID3D12Resource */ diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index 8de57a336..140aa3575 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -620,7 +620,10 @@ struct d3d12_resource_tile_info unsigned int standard_mip_count; unsigned int packed_mip_tile_count; unsigned int subresource_count; + bool single_mip_tail; struct vkd3d_subresource_tile_info *subresources; + VkDeviceMemory mip_tail_memory; + void *bind_buffer; };
/* ID3D12Resource */ @@ -1465,7 +1468,7 @@ struct vkd3d_queue
VkQueue vkd3d_queue_acquire(struct vkd3d_queue *queue); HRESULT vkd3d_queue_create(struct d3d12_device *device, uint32_t family_index, - const VkQueueFamilyProperties *properties, struct vkd3d_queue **queue); + VkQueueFlags vk_queue_flags, uint32_t timestamp_bits, struct vkd3d_queue **queue); void vkd3d_queue_destroy(struct vkd3d_queue *queue, struct d3d12_device *device); void vkd3d_queue_release(struct vkd3d_queue *queue);
@@ -1715,9 +1718,11 @@ struct d3d12_device struct vkd3d_queue *direct_queue; struct vkd3d_queue *compute_queue; struct vkd3d_queue *copy_queue; + struct vkd3d_queue *tiled_binding_queue; uint32_t queue_family_indices[VKD3D_MAX_QUEUE_FAMILY_COUNT]; unsigned int queue_family_count; VkTimeDomainEXT vk_host_time_domain; + unsigned int tiled_binding_family_index;
struct vkd3d_mutex blocked_queues_mutex; struct d3d12_command_queue *blocked_queues[VKD3D_MAX_DEVICE_BLOCKED_QUEUES]; @@ -1742,6 +1747,7 @@ struct d3d12_device HRESULT d3d12_device_create(struct vkd3d_instance *instance, const struct vkd3d_device_create_info *create_info, struct d3d12_device **device); struct vkd3d_queue *d3d12_device_get_vkd3d_queue(struct d3d12_device *device, D3D12_COMMAND_LIST_TYPE type); +struct vkd3d_queue *d3d12_device_get_tiled_binding_queue(struct d3d12_device *device); bool d3d12_device_is_uma(struct d3d12_device *device, bool *coherent); void d3d12_device_mark_as_removed(struct d3d12_device *device, HRESULT reason, const char *message, ...) VKD3D_PRINTF_FUNC(3, 4); diff --git a/tests/d3d12.c b/tests/d3d12.c index fc2e176c3..48785e6d7 100644 --- a/tests/d3d12.c +++ b/tests/d3d12.c @@ -39113,7 +39113,7 @@ static void test_update_tile_mappings(void) for (i = 0; i < j; i++) { set_box(&box, i, 0, 0, i + 1, 1, 1); - todo check_readback_data_uint(&rb.rb, &box, i + 1, 0); + todo_if(i < packed_mip_info.StartTileIndexInOverallResource) check_readback_data_uint(&rb.rb, &box, i + 1, 0); }
release_resource_readback(&rb); @@ -39209,7 +39209,7 @@ static void test_update_tile_mappings(void) for (i = 0; i < j; i++) { set_box(&box, i, 0, 0, i + 1, 1, 1); - todo_if(texture_region_tiles[i]) + todo_if(i < packed_mip_info.StartTileIndexInOverallResource && texture_region_tiles[i]) check_readback_data_uint(&rb.rb, &box, texture_region_tiles[i], 0); }
From: Conor McCarthy cmccarthy@codeweavers.com
--- libs/vkd3d/command.c | 375 ++++++++++++++++++++++++++++++++++++- libs/vkd3d/device.c | 21 +++ libs/vkd3d/resource.c | 6 + libs/vkd3d/vkd3d_private.h | 1 + tests/d3d12.c | 13 +- 5 files changed, 408 insertions(+), 8 deletions(-)
diff --git a/libs/vkd3d/command.c b/libs/vkd3d/command.c index d7276ff2a..1e3c58d7c 100644 --- a/libs/vkd3d/command.c +++ b/libs/vkd3d/command.c @@ -104,6 +104,27 @@ void vkd3d_queue_release(struct vkd3d_queue *queue) vkd3d_mutex_unlock(&queue->mutex); }
+static VkResult vkd3d_queue_submit_wait_acquired(const struct vkd3d_queue *queue, VkSemaphore vk_semaphore, + struct d3d12_device *device) +{ + VkPipelineStageFlags stage_mask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + VkSubmitInfo submit_info; + + memset(&submit_info, 0, sizeof(submit_info)); + submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + submit_info.pNext = NULL; + submit_info.waitSemaphoreCount = 1; + submit_info.pWaitSemaphores = &vk_semaphore; + submit_info.pWaitDstStageMask = &stage_mask; + submit_info.commandBufferCount = 0; + submit_info.pCommandBuffers = 0; + submit_info.signalSemaphoreCount = 0; + submit_info.pSignalSemaphores = NULL; + + return VK_CALL(vkQueueSubmit(queue->vk_queue, 1, &submit_info, VK_NULL_HANDLE)); +} + static VkResult vkd3d_queue_wait_idle(struct vkd3d_queue *queue, const struct vkd3d_vk_device_procs *vk_procs) { @@ -3946,6 +3967,104 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyResource(ID3D12GraphicsComm } }
+struct vkd3d_resource_tile_coordinate +{ + unsigned int x; + unsigned int y; + unsigned int z; +}; + +static inline unsigned int d3d12_tile_region_size_compute_tile_count(const D3D12_TILE_REGION_SIZE *region_size) +{ + return region_size->Width * region_size->Height * region_size->Depth; +} + +static inline void d3d12_tile_region_size_set_entire_subresource(D3D12_TILE_REGION_SIZE *region_size, + const struct d3d12_resource *resource, unsigned int subresource) +{ + const struct vkd3d_tiled_region_extent *extent = &resource->tiles.subresources[subresource].extent; + region_size->Width = extent->width; + region_size->Height = extent->height; + region_size->Depth = extent->depth; +} + +static bool resource_validate_tiled_coordinate(const struct d3d12_resource *resource, + const D3D12_TILED_RESOURCE_COORDINATE *coordinate) +{ + const struct vkd3d_tiled_region_extent *extent = &resource->tiles.subresources[coordinate->Subresource].extent; + + return coordinate->Subresource < resource->tiles.subresource_count + && coordinate->X < extent->width && coordinate->Y < extent->height && coordinate->Z < extent->depth; +} + +/* coordinate must already be validated */ +static bool resource_validate_tile_region_size(const struct d3d12_resource *resource, + const D3D12_TILED_RESOURCE_COORDINATE *coordinate, const D3D12_TILE_REGION_SIZE *size) +{ + const struct vkd3d_tiled_region_extent *extent = &resource->tiles.subresources[coordinate->Subresource].extent; + D3D12_TILE_REGION_SIZE max_size; + + if (!size || !size->UseBox) + return true; + + max_size.Width = extent->width - coordinate->X; + max_size.Height = extent->height - coordinate->Y; + max_size.Depth = extent->depth - coordinate->Z; + return size->Width <= max_size.Width && size->Height <= max_size.Height && size->Depth <= max_size.Depth; +} + +/* Initialises a region in base_coordinate and region_size, where base_coordinate is always the front + * top left. If src_region_size->UseBox is true, start_coordinate is also the front top left, otherwise + * it can start anywhere within the region and the region front top left is always {0, 0, 0}. */ +static bool vkd3d_initialise_tile_region(struct vkd3d_resource_tile_coordinate *base_coordinate, + D3D12_TILE_REGION_SIZE *region_size, const D3D12_TILED_RESOURCE_COORDINATE *start_coordinate, + const D3D12_TILE_REGION_SIZE *src_region_size, const struct d3d12_resource *resource) +{ + unsigned int count; + + if (!resource_validate_tiled_coordinate(resource, start_coordinate)) + { + WARN("Invalid start coordinate (%u: %u, %u, %u).\n", start_coordinate->Subresource, start_coordinate->X, + start_coordinate->Y, start_coordinate->Z); + return false; + } + if (!resource_validate_tile_region_size(resource, start_coordinate, src_region_size)) + { + WARN("Invalid region size (%u, %u, %u).\n", src_region_size->Width, src_region_size->Height, + src_region_size->Depth); + return false; + } + + if (src_region_size) + { + *region_size = *src_region_size; + } + else + { + region_size->UseBox = false; + region_size->NumTiles = 1; + } + + if (region_size->UseBox) + { + base_coordinate->x = start_coordinate->X; + base_coordinate->y = start_coordinate->Y; + base_coordinate->z = start_coordinate->Z; + /* NumTiles should be set by the caller. Validate it. */ + count = d3d12_tile_region_size_compute_tile_count(region_size); + if (region_size->NumTiles != count) + WARN("NumTiles does not match the box size.\n"); + region_size->NumTiles = count; + } + else + { + memset(base_coordinate, 0, sizeof(*base_coordinate)); + d3d12_tile_region_size_set_entire_subresource(region_size, resource, start_coordinate->Subresource); + } + + return true; +} + static void STDMETHODCALLTYPE d3d12_command_list_CopyTiles(ID3D12GraphicsCommandList5 *iface, ID3D12Resource *tiled_resource, const D3D12_TILED_RESOURCE_COORDINATE *tile_region_start_coordinate, const D3D12_TILE_REGION_SIZE *tile_region_size, ID3D12Resource *buffer, UINT64 buffer_offset, @@ -6237,6 +6356,8 @@ static ULONG STDMETHODCALLTYPE d3d12_command_queue_AddRef(ID3D12CommandQueue *if return refcount; }
+static void update_mappings_cleanup(struct vkd3d_cs_update_mappings *update_mappings); + static void d3d12_command_queue_destroy_op(struct vkd3d_cs_op_data *op) { switch (op->opcode) @@ -6254,6 +6375,9 @@ static void d3d12_command_queue_destroy_op(struct vkd3d_cs_op_data *op) break;
case VKD3D_CS_OP_UPDATE_MAPPINGS: + update_mappings_cleanup(&op->u.update_mappings); + break; + case VKD3D_CS_OP_COPY_MAPPINGS: break; } @@ -6490,6 +6614,249 @@ free_clones: update_mappings_cleanup(&update_mappings); }
+static void deaggregate_sparse_memory_bind(VkSparseBufferMemoryBindInfo *buffer_bind_info, + const VkSparseMemoryBind *src, unsigned int tile_count, struct d3d12_resource *resource) +{ + VkSparseMemoryBind *memory_binds = (VkSparseMemoryBind *)buffer_bind_info->pBinds + buffer_bind_info->bindCount; + unsigned int i; + + for (i = 0; i < tile_count; ++i) + { + memory_binds[i].resourceOffset = src->resourceOffset + i * D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES; + memory_binds[i].size = D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES; + memory_binds[i].memory = src->memory; + memory_binds[i].memoryOffset = src->memoryOffset + i * D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES; + memory_binds[i].flags = src->flags; + } + + buffer_bind_info->bindCount += tile_count; +} + +static unsigned int vkd3d_queue_bind_sparse_block(VkBindSparseInfo *sparse_info, + struct d3d12_resource *resource, const struct vkd3d_resource_tile_coordinate *base_coordinate, + D3D12_TILED_RESOURCE_COORDINATE *coordinate, const D3D12_TILE_REGION_SIZE *region_size, + VkDeviceMemory vk_memory, unsigned int memory_offset, unsigned int memory_tile_count, bool skip_binding) +{ + unsigned int subresource = coordinate->Subresource; + VkSparseMemoryBind memory_bind; + unsigned int tiles_used; + + /* The tiled resource spec for D3D11 seems to apply to D3D12 also, and states: + * "For mipmaps that use nonstandard tiling and/or are packed, any subresource + * value that indicates any of the packed mips all refer to the same tile." */ + if (subresource % resource->desc.MipLevels >= resource->tiles.standard_mip_count) + { + /* Already bound, but the caller expects this to use the required number of tiles, + * which is 1 because we bind the mip tails on resource creation and return a + * dummy value of 1. */ + return 1; + } + + if (d3d12_resource_is_buffer(resource)) + { + tiles_used = region_size->NumTiles; + tiles_used = min(tiles_used, memory_tile_count); + + memory_bind.resourceOffset = coordinate->X * D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES; + coordinate->X += tiles_used; + + if (skip_binding || !tiles_used) + return tiles_used; + + memory_bind.size = tiles_used * D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES; + memory_bind.memory = vk_memory; + memory_bind.memoryOffset = memory_offset * D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES; + memory_bind.flags = 0; + + /* To workaround a bug in NVIDIA drivers (older ones at least) requires one tile per struct. This + * could be skipped on other hardware by checking physical_device_info->properties2.properties.vendorID. */ + deaggregate_sparse_memory_bind((VkSparseBufferMemoryBindInfo *)sparse_info->pBufferBinds, + &memory_bind, tiles_used, resource); + } + else + { + vkd3d_unreachable(); + } + + return tiles_used; +} + +static void d3d12_command_queue_update_tile_mappings(struct d3d12_command_queue *command_queue, + struct d3d12_resource *resource, UINT region_count, + const D3D12_TILED_RESOURCE_COORDINATE *region_start_coordinates, + const D3D12_TILE_REGION_SIZE *region_sizes, + struct d3d12_heap *heap, + UINT range_count, + const D3D12_TILE_RANGE_FLAGS *range_flags, + const UINT *heap_range_offsets, + const UINT *range_tile_counts, + D3D12_TILE_MAPPING_FLAGS flags) +{ + const struct vkd3d_vk_device_procs *vk_procs = &command_queue->device->vk_procs; + bool null_binding, aliased_binding, skip_binding, have_unsupported_aliasing; + VkDeviceMemory vk_memory = heap ? heap->vk_memory : VK_NULL_HANDLE; + unsigned int memory_offset, memory_tile_count, tiles_used; + struct vkd3d_resource_tile_coordinate base_coordinate; + struct d3d12_device *device = command_queue->device; + D3D12_TILED_RESOURCE_COORDINATE coordinate_zero; + VkSparseBufferMemoryBindInfo buffer_bind_info; + D3D12_TILE_REGION_SIZE region_size_default; + D3D12_TILED_RESOURCE_COORDINATE coordinate; + D3D12_TILE_REGION_SIZE region_size; + unsigned int region_idx, range_idx; + D3D12_TILE_RANGE_FLAGS cur_flags; + struct vkd3d_queue *vkd3d_queue; + VkBindSparseInfo sparse_info; + unsigned int tile_count_all; + VkResult vr; + + if (d3d12_resource_is_texture(resource)) + { + FIXME("Tiled textures are not implemented yet.\n"); + return; + } + + if (region_count == 1) + { + if (!region_sizes) + { + region_size_default.UseBox = false; + region_size_default.NumTiles = region_start_coordinates ? 1 : resource->tiles.total_count; + region_sizes = ®ion_size_default; + } + if (!region_start_coordinates) + { + memset(&coordinate_zero, 0, sizeof(coordinate_zero)); + region_start_coordinates = &coordinate_zero; + } + } + + if (range_count == 1 && !range_tile_counts) + { + tile_count_all = resource->tiles.total_count; + range_tile_counts = &tile_count_all; + } + + if (flags) + WARN("Ignoring flags %#x.\n", flags); + + memory_offset = heap_range_offsets ? heap_range_offsets[0] : 0; + memory_tile_count = range_tile_counts[0]; + coordinate = region_start_coordinates[0]; + + if (!vkd3d_initialise_tile_region(&base_coordinate, ®ion_size, &coordinate, ®ion_sizes[0], resource)) + return; + + region_idx = 0; + range_idx = 0; + null_binding = false; + aliased_binding = false; + skip_binding = false; + have_unsupported_aliasing = false; + + vkd3d_queue = command_queue->vkd3d_queue; + if (!(vkd3d_queue->vk_queue_flags & VK_QUEUE_SPARSE_BINDING_BIT) + && !(vkd3d_queue = d3d12_device_get_tiled_binding_queue(device))) + { + ERR("Failed to get sparse binding queue.\n"); + return; + } + + if (heap) + vkd3d_mutex_lock(&heap->mutex); + + buffer_bind_info.buffer = resource->u.vk_buffer; + buffer_bind_info.bindCount = 0; + buffer_bind_info.pBinds = resource->tiles.bind_buffer; + memset(&sparse_info, 0, sizeof(sparse_info)); + sparse_info.sType = VK_STRUCTURE_TYPE_BIND_SPARSE_INFO; + sparse_info.bufferBindCount = 1; + sparse_info.pBufferBinds = &buffer_bind_info; + + do + { + if (range_flags) + { + cur_flags = range_flags[range_idx]; + null_binding = !!(cur_flags & D3D12_TILE_RANGE_FLAG_NULL); + skip_binding = !!(cur_flags & D3D12_TILE_RANGE_FLAG_SKIP); + aliased_binding = !!(cur_flags & D3D12_TILE_RANGE_FLAG_REUSE_SINGLE_TILE); + if (aliased_binding && !(null_binding || skip_binding)) + { + have_unsupported_aliasing = true; + skip_binding = true; + } + if ((cur_flags &= ~(D3D12_TILE_RANGE_FLAG_NULL | D3D12_TILE_RANGE_FLAG_SKIP | D3D12_TILE_RANGE_FLAG_REUSE_SINGLE_TILE))) + FIXME("Ignoring flags %#x.\n", cur_flags); + } + + if (!heap_range_offsets && !null_binding) + { + WARN("Heap range offets may be NULL only if D3D12_TILE_RANGE_FLAG_NULL is used.\n"); + break; + } + + tiles_used = vkd3d_queue_bind_sparse_block(&sparse_info, resource, &base_coordinate, + &coordinate, ®ion_size, null_binding ? VK_NULL_HANDLE : vk_memory, memory_offset, + aliased_binding ? 1 : memory_tile_count, skip_binding); + + if (!aliased_binding) + memory_offset += tiles_used; + memory_tile_count -= tiles_used; + region_size.NumTiles -= tiles_used; + + if (!memory_tile_count && ++range_idx < range_count) + { + memory_offset = heap_range_offsets ? heap_range_offsets[range_idx] : 0; + memory_tile_count = range_tile_counts[range_idx]; + } + + if (!region_size.NumTiles && ++region_idx < region_count) + { + coordinate = region_start_coordinates[region_idx]; + if (!vkd3d_initialise_tile_region(&base_coordinate, ®ion_size, &coordinate, + region_sizes ? ®ion_sizes[region_idx] : NULL, resource)) + break; + } + } + while (region_idx < region_count && range_idx < range_count); + + if (heap) + vkd3d_mutex_unlock(&heap->mutex); + + if (!buffer_bind_info.bindCount) + return; + + if (have_unsupported_aliasing) + FIXME("Aliased bindings are not implemented.\n"); + + if (!vkd3d_queue_acquire(vkd3d_queue)) + { + ERR("Failed to acquire queue %p.\n", vkd3d_queue); + return; + } + + sparse_info.pSignalSemaphores = &device->tiled_binding_semaphore; + sparse_info.signalSemaphoreCount = 1; + if ((vr = VK_CALL(vkQueueBindSparse(vkd3d_queue->vk_queue, 1, &sparse_info, VK_NULL_HANDLE))) < 0) + ERR("Failed to submit sparse image bind, vr %d.\n", vr); + + if (vkd3d_queue != command_queue->vkd3d_queue) + { + vkd3d_queue_release(vkd3d_queue); + if (!vkd3d_queue_acquire(vkd3d_queue = command_queue->vkd3d_queue)) + { + ERR("Failed to acquire queue %p.\n", vkd3d_queue); + return; + } + } + + if ((vr = vkd3d_queue_submit_wait_acquired(vkd3d_queue, device->tiled_binding_semaphore, device)) < 0) + ERR("Failed to submit queue wait, vr %d.\n", vr); + + vkd3d_queue_release(vkd3d_queue); +} + static void STDMETHODCALLTYPE d3d12_command_queue_CopyTileMappings(ID3D12CommandQueue *iface, ID3D12Resource *dst_resource, const D3D12_TILED_RESOURCE_COORDINATE *dst_region_start_coordinate, @@ -7274,8 +7641,12 @@ static HRESULT d3d12_command_queue_flush_ops_locked(struct d3d12_command_queue * break;
case VKD3D_CS_OP_UPDATE_MAPPINGS: - FIXME("Tiled resource binding is not supported yet.\n"); - update_mappings_cleanup(&op->u.update_mappings); + d3d12_command_queue_update_tile_mappings(queue, op->u.update_mappings.resource, + op->u.update_mappings.region_count, op->u.update_mappings.region_start_coordinates, + op->u.update_mappings.region_sizes, op->u.update_mappings.heap, + op->u.update_mappings.range_count, op->u.update_mappings.range_flags, + op->u.update_mappings.heap_range_offsets, op->u.update_mappings.range_tile_counts, + op->u.update_mappings.flags); break;
case VKD3D_CS_OP_COPY_MAPPINGS: diff --git a/libs/vkd3d/device.c b/libs/vkd3d/device.c index cac57f7cc..9e9047417 100644 --- a/libs/vkd3d/device.c +++ b/libs/vkd3d/device.c @@ -1845,6 +1845,8 @@ struct vkd3d_device_queue_info
static void d3d12_device_destroy_vkd3d_queues(struct d3d12_device *device) { + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + if (device->direct_queue) vkd3d_queue_destroy(device->direct_queue, device); if (device->compute_queue && device->compute_queue != device->direct_queue) @@ -1855,6 +1857,8 @@ static void d3d12_device_destroy_vkd3d_queues(struct d3d12_device *device) if (device->tiled_binding_queue && device->tiled_binding_queue != device->direct_queue) vkd3d_queue_destroy(device->tiled_binding_queue, device);
+ VK_CALL(vkDestroySemaphore(device->vk_device, device->tiled_binding_semaphore, NULL)); + device->direct_queue = NULL; device->compute_queue = NULL; device->copy_queue = NULL; @@ -1867,6 +1871,8 @@ static HRESULT d3d12_device_create_vkd3d_queues(struct d3d12_device *device, uint32_t transfer_family_index = queue_info->family_index[VKD3D_QUEUE_FAMILY_TRANSFER]; uint32_t compute_family_index = queue_info->family_index[VKD3D_QUEUE_FAMILY_COMPUTE]; uint32_t direct_family_index = queue_info->family_index[VKD3D_QUEUE_FAMILY_DIRECT]; + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + VkResult vr; HRESULT hr;
device->direct_queue = NULL; @@ -1910,6 +1916,21 @@ static HRESULT d3d12_device_create_vkd3d_queues(struct d3d12_device *device,
device->feature_options3.CopyQueueTimestampQueriesSupported = !!device->copy_queue->timestamp_bits;
+ device->tiled_binding_semaphore = VK_NULL_HANDLE; + if (device->feature_options.TiledResourcesTier >= D3D12_TILED_RESOURCES_TIER_1) + { + VkSemaphoreCreateInfo semaphore_info; + semaphore_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; + semaphore_info.pNext = NULL; + semaphore_info.flags = 0; + if ((vr = VK_CALL(vkCreateSemaphore(device->vk_device, &semaphore_info, NULL, + &device->tiled_binding_semaphore))) < 0) + { + ERR("Failed to create tiled binding semaphore, vr %d.\n", vr); + return hresult_from_vk_result(vr); + } + } + return S_OK;
out_destroy_queues: diff --git a/libs/vkd3d/resource.c b/libs/vkd3d/resource.c index 3940e46bc..d88a4eafb 100644 --- a/libs/vkd3d/resource.c +++ b/libs/vkd3d/resource.c @@ -1256,6 +1256,12 @@ static bool d3d12_resource_init_tiles(struct d3d12_resource *resource, struct d3 resource->tiles.subresource_count = 1; resource->tiles.standard_mip_count = 1; resource->tiles.packed_mip_tile_count = 0; + + if (!(resource->tiles.bind_buffer = vkd3d_malloc(resource->tiles.total_count * sizeof(VkSparseMemoryBind)))) + { + ERR("Failed to allocate binding buffer.\n"); + goto error; + } } else { diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index 140aa3575..0e3ea1411 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -1723,6 +1723,7 @@ struct d3d12_device unsigned int queue_family_count; VkTimeDomainEXT vk_host_time_domain; unsigned int tiled_binding_family_index; + VkSemaphore tiled_binding_semaphore;
struct vkd3d_mutex blocked_queues_mutex; struct d3d12_command_queue *blocked_queues[VKD3D_MAX_DEVICE_BLOCKED_QUEUES]; diff --git a/tests/d3d12.c b/tests/d3d12.c index 48785e6d7..ee0bc1006 100644 --- a/tests/d3d12.c +++ b/tests/d3d12.c @@ -38931,7 +38931,7 @@ static void test_update_tile_mappings(void) for (i = 0; i < 64; i++) { set_box(&box, i, 0, 0, i + 1, 1, 1); - todo check_readback_data_uint(&rb.rb, &box, i + 1, 0); + check_readback_data_uint(&rb.rb, &box, i + 1, 0); }
release_resource_readback(&rb); @@ -39014,7 +39014,8 @@ static void test_update_tile_mappings(void) for (i = 0; i < ARRAY_SIZE(buffer_region_tiles); i++) { set_box(&box, i, 0, 0, i + 1, 1, 1); - todo_if(buffer_region_tiles[i]) check_readback_data_uint(&rb.rb, &box, buffer_region_tiles[i], 0); + todo_if((i >= region_offsets[0].X && i < region_offsets[0].X + region_sizes[0].NumTiles) || (i >= 24 && i <= 26)) + check_readback_data_uint(&rb.rb, &box, buffer_region_tiles[i], 0); }
release_resource_readback(&rb); @@ -39527,9 +39528,9 @@ static void test_sparse_buffer_memory_lifetime(void) transition_resource_state(context.list, buffer, D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_COPY_SOURCE); get_buffer_readback_with_command_list(buffer, DXGI_FORMAT_R32_UINT, &rb, context.queue, context.list); i = get_readback_uint(&rb.rb, 0, 0, 0); - todo ok(i == 42, "Got #%x, expected 42.\n", i); + ok(i == 42, "Got #%x, expected 42.\n", i); i = get_readback_uint(&rb.rb, 64 * 1024 / 4, 0, 0); - todo ok(i == 42, "Got #%x, expected 42.\n", i); + ok(i == 42, "Got #%x, expected 42.\n", i); release_resource_readback(&rb);
reset_command_list(context.list, context.allocator); @@ -39546,9 +39547,9 @@ static void test_sparse_buffer_memory_lifetime(void) get_buffer_readback_with_command_list(buffer, DXGI_FORMAT_R32_UINT, &rb, context.queue, context.list);
i = get_readback_uint(&rb.rb, 2048 / 4, 0, 0); - todo ok(i == 42, "Got #%x, expected 42.\n", i); + ok(i == 42, "Got #%x, expected 42.\n", i); i = get_readback_uint(&rb.rb, 64 * 1024 / 4, 0, 0); - todo ok(i == 42, "Got #%x, expected 42.\n", i); + ok(i == 42, "Got #%x, expected 42.\n", i); release_resource_readback(&rb);
ID3D12Resource_Release(buffer);
From: Conor McCarthy cmccarthy@codeweavers.com
--- libs/vkd3d/command.c | 186 ++++++++++++++++++++++++++++++++++++++++--- tests/d3d12.c | 4 +- 2 files changed, 177 insertions(+), 13 deletions(-)
diff --git a/libs/vkd3d/command.c b/libs/vkd3d/command.c index 1e3c58d7c..faa0c891b 100644 --- a/libs/vkd3d/command.c +++ b/libs/vkd3d/command.c @@ -4065,6 +4065,47 @@ static bool vkd3d_initialise_tile_region(struct vkd3d_resource_tile_coordinate * return true; }
+static bool d3d12_tiled_resource_coordinate_normalise(const struct vkd3d_resource_tile_coordinate *base_coordinate, + const D3D12_TILE_REGION_SIZE *region_extent, D3D12_TILED_RESOURCE_COORDINATE *coordinate) +{ + unsigned int carry; + + /* This should compile branchless on most hardware. */ + carry = coordinate->X >= base_coordinate->x + region_extent->Width; + coordinate->Y += carry; + coordinate->X -= region_extent->Width & -carry; + + carry = coordinate->Y >= base_coordinate->y + region_extent->Height; + coordinate->Z += carry; + coordinate->Y -= region_extent->Height & -carry; + + carry = coordinate->Z >= base_coordinate->z + region_extent->Depth; + coordinate->Subresource += carry; + coordinate->Z -= region_extent->Depth & -carry; + + return carry; +} + +static void vk_offset_convert_tiles_to_texels(VkOffset3D *offset, const VkExtent3D *tile_extent) +{ + offset->x *= tile_extent->width; + offset->y *= tile_extent->height; + offset->z *= tile_extent->depth; +} + +static void d3d12_resource_get_vk_subresource(const struct d3d12_resource *resource, unsigned int subresource, + VkImageSubresource *vk_subresource) +{ + const struct vkd3d_format *format = resource->format; + const D3D12_RESOURCE_DESC1 *desc = &resource->desc; + + assert(format->plane_count == 1); + + vk_subresource->mipLevel = subresource % desc->MipLevels; + vk_subresource->arrayLayer = subresource / desc->MipLevels; + vk_subresource->aspectMask = format->vk_aspect_mask; +} + static void STDMETHODCALLTYPE d3d12_command_list_CopyTiles(ID3D12GraphicsCommandList5 *iface, ID3D12Resource *tiled_resource, const D3D12_TILED_RESOURCE_COORDINATE *tile_region_start_coordinate, const D3D12_TILE_REGION_SIZE *tile_region_size, ID3D12Resource *buffer, UINT64 buffer_offset, @@ -6614,6 +6655,61 @@ free_clones: update_mappings_cleanup(&update_mappings); }
+static unsigned int vkd3d_set_sparse_image_bind_region(VkSparseImageMemoryBind *memory_bind, + const struct vkd3d_resource_tile_coordinate *base_coordinate, + D3D12_TILED_RESOURCE_COORDINATE *coordinate, + const D3D12_TILE_REGION_SIZE *region_extent, const struct d3d12_resource *resource, + unsigned int memory_tile_count) +{ + unsigned int height, depth, remaining, max_tile_count, tile_count, layer_stride; + VkOffset3D *offset = &memory_bind->offset; + VkExtent3D *extent = &memory_bind->extent; + bool partial_x, partial_y; + + max_tile_count = min(region_extent->NumTiles, memory_tile_count); + tile_count = max_tile_count; + partial_x = coordinate->X > base_coordinate->x; + partial_y = coordinate->Y > base_coordinate->y; + + offset->x = coordinate->X; + offset->y = coordinate->Y; + offset->z = coordinate->Z; + + /* Grab the largest possible width */ + remaining = region_extent->Width - (coordinate->X - base_coordinate->x); + extent->width = min(remaining, tile_count); + coordinate->X += extent->width; + tile_count -= extent->width; + extent->height = 1; + extent->depth = 1; + if (d3d12_tiled_resource_coordinate_normalise(base_coordinate, region_extent, coordinate) + || partial_x || !tile_count || !(height = tile_count / region_extent->Width)) + goto done; + + /* Expand the height */ + remaining = region_extent->Height - (coordinate->Y - base_coordinate->y); + remaining = min(remaining, height); + extent->height += remaining; + coordinate->Y += remaining; + tile_count -= region_extent->Width * remaining; + if (d3d12_tiled_resource_coordinate_normalise(base_coordinate, region_extent, coordinate) + || partial_y || !tile_count + || !(depth = tile_count / (layer_stride = region_extent->Width * region_extent->Height))) + goto done; + + /* Expand the depth */ + remaining = region_extent->Depth - (coordinate->Z - base_coordinate->z); + remaining = min(remaining, depth); + extent->depth += remaining; + coordinate->Z += remaining; + tile_count -= layer_stride * remaining; + + d3d12_tiled_resource_coordinate_normalise(base_coordinate, region_extent, coordinate); + +done: + return max_tile_count - tile_count; +} + static void deaggregate_sparse_memory_bind(VkSparseBufferMemoryBindInfo *buffer_bind_info, const VkSparseMemoryBind *src, unsigned int tile_count, struct d3d12_resource *resource) { @@ -6632,12 +6728,47 @@ static void deaggregate_sparse_memory_bind(VkSparseBufferMemoryBindInfo *buffer_ buffer_bind_info->bindCount += tile_count; }
+static void deaggregate_sparse_image_memory_bind(VkSparseImageMemoryBindInfo *image_bind_info, + const VkSparseImageMemoryBind *src, struct d3d12_resource *resource) +{ + const VkExtent3D *tile_extent = &resource->tiles.tile_extent; + VkSparseImageMemoryBind *image_memory_binds; + unsigned int i, x, y, z, tile_count; + + image_memory_binds = (VkSparseImageMemoryBind *)image_bind_info->pBinds + image_bind_info->bindCount; + tile_count = src->extent.width * src->extent.height * src->extent.depth; + + for (z = 0, i = 0; z < src->extent.depth; ++z) + { + for (y = 0; y < src->extent.height; ++y) + { + for (x = 0; x < src->extent.width; ++x, ++i) + { + image_memory_binds[i].subresource = src->subresource; + image_memory_binds[i].offset.x = src->offset.x + x; + image_memory_binds[i].offset.y = src->offset.y + y; + image_memory_binds[i].offset.z = src->offset.z + z; + vk_offset_convert_tiles_to_texels(&image_memory_binds[i].offset, tile_extent); + image_memory_binds[i].extent.width = tile_extent->width; + image_memory_binds[i].extent.height = tile_extent->height; + image_memory_binds[i].extent.depth = tile_extent->depth; + image_memory_binds[i].memory = src->memory; + image_memory_binds[i].memoryOffset = src->memoryOffset + i * D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES; + image_memory_binds[i].flags = src->flags; + } + } + } + + image_bind_info->bindCount += tile_count; +} + static unsigned int vkd3d_queue_bind_sparse_block(VkBindSparseInfo *sparse_info, struct d3d12_resource *resource, const struct vkd3d_resource_tile_coordinate *base_coordinate, D3D12_TILED_RESOURCE_COORDINATE *coordinate, const D3D12_TILE_REGION_SIZE *region_size, VkDeviceMemory vk_memory, unsigned int memory_offset, unsigned int memory_tile_count, bool skip_binding) { unsigned int subresource = coordinate->Subresource; + VkSparseImageMemoryBind image_memory_bind; VkSparseMemoryBind memory_bind; unsigned int tiles_used;
@@ -6675,7 +6806,21 @@ static unsigned int vkd3d_queue_bind_sparse_block(VkBindSparseInfo *sparse_info, } else { - vkd3d_unreachable(); + d3d12_resource_get_vk_subresource(resource, subresource, &image_memory_bind.subresource); + + tiles_used = vkd3d_set_sparse_image_bind_region(&image_memory_bind, + base_coordinate, coordinate, region_size, resource, memory_tile_count); + + if (skip_binding || !tiles_used) + return tiles_used; + + image_memory_bind.memory = vk_memory; + image_memory_bind.memoryOffset = memory_offset * D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES; + image_memory_bind.flags = 0; + + /* NVIDIA bug (see above).*/ + deaggregate_sparse_image_memory_bind((VkSparseImageMemoryBindInfo *)sparse_info->pImageBinds, + &image_memory_bind, resource); }
return tiles_used; @@ -6694,12 +6839,13 @@ static void d3d12_command_queue_update_tile_mappings(struct d3d12_command_queue { const struct vkd3d_vk_device_procs *vk_procs = &command_queue->device->vk_procs; bool null_binding, aliased_binding, skip_binding, have_unsupported_aliasing; + unsigned int memory_offset, memory_tile_count, tiles_used, subresource; VkDeviceMemory vk_memory = heap ? heap->vk_memory : VK_NULL_HANDLE; - unsigned int memory_offset, memory_tile_count, tiles_used; struct vkd3d_resource_tile_coordinate base_coordinate; struct d3d12_device *device = command_queue->device; D3D12_TILED_RESOURCE_COORDINATE coordinate_zero; VkSparseBufferMemoryBindInfo buffer_bind_info; + VkSparseImageMemoryBindInfo image_bind_info; D3D12_TILE_REGION_SIZE region_size_default; D3D12_TILED_RESOURCE_COORDINATE coordinate; D3D12_TILE_REGION_SIZE region_size; @@ -6710,12 +6856,6 @@ static void d3d12_command_queue_update_tile_mappings(struct d3d12_command_queue unsigned int tile_count_all; VkResult vr;
- if (d3d12_resource_is_texture(resource)) - { - FIXME("Tiled textures are not implemented yet.\n"); - return; - } - if (region_count == 1) { if (!region_sizes) @@ -6747,6 +6887,7 @@ static void d3d12_command_queue_update_tile_mappings(struct d3d12_command_queue if (!vkd3d_initialise_tile_region(&base_coordinate, ®ion_size, &coordinate, ®ion_sizes[0], resource)) return;
+ subresource = coordinate.Subresource; region_idx = 0; range_idx = 0; null_binding = false; @@ -6765,16 +6906,25 @@ static void d3d12_command_queue_update_tile_mappings(struct d3d12_command_queue if (heap) vkd3d_mutex_lock(&heap->mutex);
- buffer_bind_info.buffer = resource->u.vk_buffer; buffer_bind_info.bindCount = 0; buffer_bind_info.pBinds = resource->tiles.bind_buffer; + image_bind_info.bindCount = 0; + image_bind_info.pBinds = resource->tiles.bind_buffer; + memset(&sparse_info, 0, sizeof(sparse_info)); sparse_info.sType = VK_STRUCTURE_TYPE_BIND_SPARSE_INFO; - sparse_info.bufferBindCount = 1; sparse_info.pBufferBinds = &buffer_bind_info; + sparse_info.pImageBinds = &image_bind_info;
do { + if (coordinate.Subresource != subresource) + { + if ((subresource = coordinate.Subresource) >= resource->tiles.subresource_count) + break; + d3d12_tile_region_size_set_entire_subresource(®ion_size, resource, subresource); + } + if (range_flags) { cur_flags = range_flags[range_idx]; @@ -6824,8 +6974,22 @@ static void d3d12_command_queue_update_tile_mappings(struct d3d12_command_queue if (heap) vkd3d_mutex_unlock(&heap->mutex);
- if (!buffer_bind_info.bindCount) + if (buffer_bind_info.bindCount) + { + buffer_bind_info.buffer = resource->u.vk_buffer; + sparse_info.bufferBindCount = 1; + sparse_info.pImageBinds = NULL; + } + else if (image_bind_info.bindCount) + { + image_bind_info.image = resource->u.vk_image; + sparse_info.imageBindCount = 1; + sparse_info.pBufferBinds = NULL; + } + else + { return; + }
if (have_unsupported_aliasing) FIXME("Aliased bindings are not implemented.\n"); diff --git a/tests/d3d12.c b/tests/d3d12.c index ee0bc1006..42dbb18b1 100644 --- a/tests/d3d12.c +++ b/tests/d3d12.c @@ -39114,7 +39114,7 @@ static void test_update_tile_mappings(void) for (i = 0; i < j; i++) { set_box(&box, i, 0, 0, i + 1, 1, 1); - todo_if(i < packed_mip_info.StartTileIndexInOverallResource) check_readback_data_uint(&rb.rb, &box, i + 1, 0); + check_readback_data_uint(&rb.rb, &box, i + 1, 0); }
release_resource_readback(&rb); @@ -39210,7 +39210,7 @@ static void test_update_tile_mappings(void) for (i = 0; i < j; i++) { set_box(&box, i, 0, 0, i + 1, 1, 1); - todo_if(i < packed_mip_info.StartTileIndexInOverallResource && texture_region_tiles[i]) + todo_if(i == 6 || i == 7 || i == 9 || i == 11 || i == 16) check_readback_data_uint(&rb.rb, &box, texture_region_tiles[i], 0); }
From: Conor McCarthy cmccarthy@codeweavers.com
--- libs/vkd3d/command.c | 5 +++-- libs/vkd3d/device.c | 1 + libs/vkd3d/resource.c | 4 ++++ libs/vkd3d/vkd3d_private.h | 1 + tests/d3d12.c | 4 ++-- 5 files changed, 11 insertions(+), 4 deletions(-)
diff --git a/libs/vkd3d/command.c b/libs/vkd3d/command.c index faa0c891b..19bed4501 100644 --- a/libs/vkd3d/command.c +++ b/libs/vkd3d/command.c @@ -6839,6 +6839,7 @@ static void d3d12_command_queue_update_tile_mappings(struct d3d12_command_queue { const struct vkd3d_vk_device_procs *vk_procs = &command_queue->device->vk_procs; bool null_binding, aliased_binding, skip_binding, have_unsupported_aliasing; + bool can_alias = command_queue->device->vk_info.sparse_residency_aliased; unsigned int memory_offset, memory_tile_count, tiles_used, subresource; VkDeviceMemory vk_memory = heap ? heap->vk_memory : VK_NULL_HANDLE; struct vkd3d_resource_tile_coordinate base_coordinate; @@ -6931,7 +6932,7 @@ static void d3d12_command_queue_update_tile_mappings(struct d3d12_command_queue null_binding = !!(cur_flags & D3D12_TILE_RANGE_FLAG_NULL); skip_binding = !!(cur_flags & D3D12_TILE_RANGE_FLAG_SKIP); aliased_binding = !!(cur_flags & D3D12_TILE_RANGE_FLAG_REUSE_SINGLE_TILE); - if (aliased_binding && !(null_binding || skip_binding)) + if (aliased_binding && !(null_binding || skip_binding) && !can_alias) { have_unsupported_aliasing = true; skip_binding = true; @@ -6992,7 +6993,7 @@ static void d3d12_command_queue_update_tile_mappings(struct d3d12_command_queue }
if (have_unsupported_aliasing) - FIXME("Aliased bindings are not implemented.\n"); + FIXME("Aliased bindings are not supported by the device.\n");
if (!vkd3d_queue_acquire(vkd3d_queue)) { diff --git a/libs/vkd3d/device.c b/libs/vkd3d/device.c index 9e9047417..fab713212 100644 --- a/libs/vkd3d/device.c +++ b/libs/vkd3d/device.c @@ -1521,6 +1521,7 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, vulkan_info->sparse_properties = physical_device_info->properties2.properties.sparseProperties; vulkan_info->sparse_binding = features->sparseBinding; vulkan_info->sparse_residency_3d = features->sparseResidencyImage3D; + vulkan_info->sparse_residency_aliased = features->sparseResidencyAliased; vulkan_info->rasterization_stream = physical_device_info->xfb_properties.transformFeedbackRasterizationStreamSelect; vulkan_info->transform_feedback_queries = physical_device_info->xfb_properties.transformFeedbackQueries; vulkan_info->uav_read_without_format = features->shaderStorageImageReadWithoutFormat; diff --git a/libs/vkd3d/resource.c b/libs/vkd3d/resource.c index d88a4eafb..d2958b43e 100644 --- a/libs/vkd3d/resource.c +++ b/libs/vkd3d/resource.c @@ -671,6 +671,8 @@ HRESULT vkd3d_create_buffer(struct d3d12_device *device, buffer_info.flags |= VK_BUFFER_CREATE_SPARSE_BINDING_BIT; if (device->vk_info.sparse_properties.residencyNonResidentStrict) buffer_info.flags |= VK_BUFFER_CREATE_SPARSE_RESIDENCY_BIT; + if (device->vk_info.sparse_residency_aliased) + buffer_info.flags |= VK_BUFFER_CREATE_SPARSE_ALIASED_BIT; d3d12_device_get_tiled_binding_queue(device); }
@@ -832,6 +834,8 @@ static HRESULT vkd3d_create_image(struct d3d12_device *device, image_info.flags |= VK_IMAGE_CREATE_SPARSE_BINDING_BIT; if (device->vk_info.sparse_properties.residencyNonResidentStrict) image_info.flags |= VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT; + if (device->vk_info.sparse_residency_aliased) + image_info.flags |= VK_IMAGE_CREATE_SPARSE_ALIASED_BIT; }
image_info.imageType = vk_image_type_from_d3d12_resource_dimension(desc->Dimension); diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index 0e3ea1411..3eae1f74e 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -157,6 +157,7 @@ struct vkd3d_vulkan_info VkPhysicalDeviceSparseProperties sparse_properties; bool sparse_binding; bool sparse_residency_3d; + bool sparse_residency_aliased;
VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT texel_buffer_alignment_properties;
diff --git a/tests/d3d12.c b/tests/d3d12.c index 42dbb18b1..b396bf18a 100644 --- a/tests/d3d12.c +++ b/tests/d3d12.c @@ -39014,7 +39014,7 @@ static void test_update_tile_mappings(void) for (i = 0; i < ARRAY_SIZE(buffer_region_tiles); i++) { set_box(&box, i, 0, 0, i + 1, 1, 1); - todo_if((i >= region_offsets[0].X && i < region_offsets[0].X + region_sizes[0].NumTiles) || (i >= 24 && i <= 26)) + todo_if(i >= region_offsets[0].X && i < region_offsets[0].X + region_sizes[0].NumTiles) check_readback_data_uint(&rb.rb, &box, buffer_region_tiles[i], 0); }
@@ -39210,7 +39210,7 @@ static void test_update_tile_mappings(void) for (i = 0; i < j; i++) { set_box(&box, i, 0, 0, i + 1, 1, 1); - todo_if(i == 6 || i == 7 || i == 9 || i == 11 || i == 16) + todo_if(i == 7 || i == 11 || i == 16) check_readback_data_uint(&rb.rb, &box, texture_region_tiles[i], 0); }