-- v3: vkd3d-shader/tpf: Validate sysvals in index range declarations. vkd3d-shader/ir: Trim non-arrayable sysvals from the last element of an index range. tests: Test punned array access in patch constant functions.
From: Conor McCarthy cmccarthy@codeweavers.com
For example, this occurred in a shader:
reg_idx write_mask 0 xyz 1 xyzw 2 xyzw 3 xyz
The dcl_indexrange instruction covered only xyz, so once merged, searching for xyzw failed. --- libs/vkd3d-shader/ir.c | 57 +++++++++++++++++++++++++++++++++++++++-- libs/vkd3d-shader/tpf.c | 7 ----- 2 files changed, 55 insertions(+), 9 deletions(-)
diff --git a/libs/vkd3d-shader/ir.c b/libs/vkd3d-shader/ir.c index d38b3c397..7a3c9d681 100644 --- a/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d-shader/ir.c @@ -570,7 +570,7 @@ static bool io_normaliser_is_in_control_point_phase(const struct io_normaliser * static unsigned int shader_signature_find_element_for_reg(const struct shader_signature *signature, unsigned int reg_idx, unsigned int write_mask) { - unsigned int i; + unsigned int i, base_write_mask;
for (i = 0; i < signature->element_count; ++i) { @@ -582,7 +582,14 @@ static unsigned int shader_signature_find_element_for_reg(const struct shader_si } }
- /* Validated in the TPF reader. */ + /* Validated in the TPF reader, but failure in signature_element_range_expand_mask() + * can land us here on an unmatched vector mask. */ + FIXME("Failed to find signature element for register index %u, mask %#x; using scalar mask.\n", + reg_idx, write_mask); + base_write_mask = 1u << vsir_write_mask_get_component_idx(write_mask); + if (base_write_mask != write_mask) + return shader_signature_find_element_for_reg(signature, reg_idx, base_write_mask); + vkd3d_unreachable(); }
@@ -749,6 +756,51 @@ static int signature_element_index_compare(const void *a, const void *b) return vkd3d_u32_compare(e->sort_index, f->sort_index); }
+static unsigned int signature_element_range_expand_mask(struct signature_element *e, unsigned int register_count, + uint8_t range_map[][VKD3D_VEC4_SIZE]) +{ + unsigned int i, j, component_idx, component_count, merged_write_mask = e->mask; + + /* dcl_indexrange instructions can declare a subset of the full mask, and the masks of + * the elements within the range may differ. TPF's handling of arrayed inputs with + * dcl_indexrange is really just a hack. Here we create a mask which covers all element + * masks, and check for collisions with other ranges. */ + + for (i = 1; i < register_count; ++i) + merged_write_mask |= e[i].mask; + + if (merged_write_mask == e->mask) + return merged_write_mask; + + /* Reaching this point is very rare to begin with, and collisions are even rarer or + * impossible. If the latter shows up, the fallback in shader_signature_find_element_for_reg() + * may be sufficient. */ + + component_idx = vsir_write_mask_get_component_idx(e->mask); + component_count = vsir_write_mask_component_count(e->mask); + + for (i = e->register_index; i < e->register_index + register_count; ++i) + { + for (j = 0; j < component_idx; ++j) + if (range_map[i][j]) + break; + for (j = component_idx + component_count; j < VKD3D_VEC4_SIZE; ++j) + if (range_map[i][j]) + break; + } + + if (i == register_count) + { + WARN("Expanding mask %#x to %#x for %s, base reg %u, count %u.\n", e->mask, merged_write_mask, + e->semantic_name, e->register_index, register_count); + return merged_write_mask; + } + + WARN("Cannot expand mask %#x to %#x for %s, base reg %u, count %u.\n", e->mask, merged_write_mask, + e->semantic_name, e->register_index, register_count); + return e->mask; +} + static bool shader_signature_merge(struct shader_signature *s, uint8_t range_map[][VKD3D_VEC4_SIZE], bool is_patch_constant) { @@ -819,6 +871,7 @@ static bool shader_signature_merge(struct shader_signature *s, uint8_t range_map { TRACE("Merging %s, base reg %u, count %u.\n", e->semantic_name, e->register_index, register_count); e->register_count = register_count; + e->mask = signature_element_range_expand_mask(e, register_count, range_map); } } element_count = new_count; diff --git a/libs/vkd3d-shader/tpf.c b/libs/vkd3d-shader/tpf.c index f859e758d..3a3cc7d23 100644 --- a/libs/vkd3d-shader/tpf.c +++ b/libs/vkd3d-shader/tpf.c @@ -934,13 +934,6 @@ static void shader_sm4_read_dcl_index_range(struct vkd3d_shader_instruction *ins register_count = index_range->register_count; write_mask = index_range->dst.write_mask;
- if (vsir_write_mask_component_count(write_mask) != 1) - { - WARN("Unhandled write mask %#x.\n", write_mask); - vkd3d_shader_parser_warning(&priv->p, VKD3D_SHADER_WARNING_TPF_UNHANDLED_INDEX_RANGE_MASK, - "Index range mask %#x is not scalar.", write_mask); - } - switch ((type = index_range->dst.reg.type)) { case VKD3DSPR_INPUT:
From: Conor McCarthy cmccarthy@codeweavers.com
The FXC optimiser sometimes converts a local array of input values into direct array addressing of the inputs, which can result in a dcl_indexrange instruction spanning input elements with different masks. --- tests/d3d12.c | 251 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 251 insertions(+)
diff --git a/tests/d3d12.c b/tests/d3d12.c index 1126d9749..b84c58543 100644 --- a/tests/d3d12.c +++ b/tests/d3d12.c @@ -37495,6 +37495,256 @@ static void test_get_resource_tiling(void) destroy_test_context(&context); }
+static void test_hull_shader_punned_array(void) +{ + static const float white[] = {1.0f, 1.0f, 1.0f, 1.0f}; + D3D12_GRAPHICS_PIPELINE_STATE_DESC pso_desc; + ID3D12GraphicsCommandList *command_list; + struct test_context_desc desc; + struct test_context context; + ID3D12CommandQueue *queue; + struct vec4 tess_factors; + HRESULT hr; + + static const DWORD hs_code[] = + { +#if 0 + float4 tess_factor; + + struct data + { + float3 f0 : FOO0; + float4 f1 : FOO1; + float3 f2 : FOO2; + float4 position : SV_Position; + }; + + struct patch_constant_data + { + float edges[3] : SV_TessFactor; + float inside : SV_InsideTessFactor; + float4 f : FOO; + }; + + void patch_constant(InputPatch<data, 3> input, out patch_constant_data output) + { + output.edges[0] = tess_factor.x; + output.edges[1] = tess_factor.y; + output.edges[2] = tess_factor.z; + output.inside = tess_factor.w; + /* Compiles into punned array access to f0-f3 using dcl_indexrange. */ + float f[3] = {input[0].f0.x, input[0].f1.x, input[0].f2.x}; + output.f = f[tess_factor.x]; + } + + [domain("tri")] + [outputcontrolpoints(3)] + [partitioning("integer")] + [outputtopology("triangle_cw")] + [patchconstantfunc("patch_constant")] + data hs_main(InputPatch<data, 3> input, uint i : SV_OutputControlPointID) + { + data result = input[i]; + /* Read a value with more components than the first element in dcl_indexrange. Without + * special handling, this would fail because the dcl_indexrange instruction uses only + * the mask from element 0. */ + result.f2 = input[0].f1.yzw; + return result; + } + + [domain("tri")] + void ds_main(patch_constant_data input, + float3 tess_coord : SV_DomainLocation, + const OutputPatch<data, 3> patch, + out data output) + { + output.position = tess_coord.x * patch[0].position + + tess_coord.y * patch[1].position + + tess_coord.z * patch[2].position; + output.f0.x = tess_coord.x * patch[0].f0.x + tess_coord.y * patch[1].f0.x + tess_coord.z * patch[2].f0.x; + output.f0.y = tess_coord.x * patch[0].f0.y + tess_coord.y * patch[1].f0.y + tess_coord.z * patch[2].f0.y; + output.f0.z = tess_coord.x * patch[0].f0.z + tess_coord.y * patch[1].f0.z + tess_coord.z * patch[2].f0.z; + output.f1 = 0.0; + output.f2 = 0.0; + } + + void vs_main(uint id : SV_VertexID, out data output) + { + float2 coords = float2((id << 1) & 2, id & 2); + output.position = float4(coords * float2(2, -2) + float2(-1, 1), 0, 1); + output.f0 = float3(0.0, 1.0, 0.0); + output.f1 = float4(2.0, 3.0, 4.0, 5.0); + output.f2 = float3(6.0, 7.0, 8.0); + } + + float4 ps_main(data input) : sv_target + { + return float4(input.f0, 1.0); + } +#endif + 0x43425844, 0x6d775869, 0x25139e4c, 0x407bfb1e, 0xe239367e, 0x00000001, 0x00000660, 0x00000004, + 0x00000030, 0x000000b0, 0x00000130, 0x000001e0, 0x4e475349, 0x00000078, 0x00000004, 0x00000008, + 0x00000068, 0x00000000, 0x00000000, 0x00000003, 0x00000000, 0x00000707, 0x00000068, 0x00000001, + 0x00000000, 0x00000003, 0x00000001, 0x00000f0f, 0x00000068, 0x00000002, 0x00000000, 0x00000003, + 0x00000002, 0x00000107, 0x0000006c, 0x00000000, 0x00000001, 0x00000003, 0x00000003, 0x00000f0f, + 0x004f4f46, 0x505f5653, 0x7469736f, 0x006e6f69, 0x4e47534f, 0x00000078, 0x00000004, 0x00000008, + 0x00000068, 0x00000000, 0x00000000, 0x00000003, 0x00000000, 0x00000807, 0x00000068, 0x00000001, + 0x00000000, 0x00000003, 0x00000001, 0x0000000f, 0x00000068, 0x00000002, 0x00000000, 0x00000003, + 0x00000002, 0x00000807, 0x0000006c, 0x00000000, 0x00000001, 0x00000003, 0x00000003, 0x0000000f, + 0x004f4f46, 0x505f5653, 0x7469736f, 0x006e6f69, 0x47534350, 0x000000a8, 0x00000005, 0x00000008, + 0x00000080, 0x00000000, 0x0000000d, 0x00000003, 0x00000000, 0x00000e01, 0x00000080, 0x00000001, + 0x0000000d, 0x00000003, 0x00000001, 0x00000e01, 0x00000080, 0x00000002, 0x0000000d, 0x00000003, + 0x00000002, 0x00000e01, 0x0000008e, 0x00000000, 0x0000000e, 0x00000003, 0x00000003, 0x00000e01, + 0x000000a2, 0x00000000, 0x00000000, 0x00000003, 0x00000004, 0x0000000f, 0x545f5653, 0x46737365, + 0x6f746361, 0x56530072, 0x736e495f, 0x54656469, 0x46737365, 0x6f746361, 0x4f460072, 0xabab004f, + 0x58454853, 0x00000478, 0x00030050, 0x0000011e, 0x01000071, 0x01001893, 0x01001894, 0x01001095, + 0x01000896, 0x01001897, 0x0100086a, 0x04000059, 0x00208e46, 0x00000000, 0x00000001, 0x01000072, + 0x0200005f, 0x00016000, 0x0400005f, 0x00201072, 0x00000003, 0x00000000, 0x0400005f, 0x002010f2, + 0x00000003, 0x00000001, 0x0400005f, 0x002010f2, 0x00000003, 0x00000003, 0x03000065, 0x00102072, + 0x00000000, 0x03000065, 0x001020f2, 0x00000001, 0x03000065, 0x00102072, 0x00000002, 0x03000065, + 0x001020f2, 0x00000003, 0x02000068, 0x00000001, 0x04000036, 0x00100012, 0x00000000, 0x00016001, + 0x07000036, 0x00102072, 0x00000000, 0x00a01246, 0x0010000a, 0x00000000, 0x00000000, 0x07000036, + 0x001020f2, 0x00000001, 0x00a01e46, 0x0010000a, 0x00000000, 0x00000001, 0x07000036, 0x001020f2, + 0x00000003, 0x00a01e46, 0x0010000a, 0x00000000, 0x00000003, 0x06000036, 0x00102072, 0x00000002, + 0x00201796, 0x00000000, 0x00000001, 0x0100003e, 0x01000073, 0x04000067, 0x00102012, 0x00000000, + 0x00000011, 0x06000036, 0x00102012, 0x00000000, 0x0020800a, 0x00000000, 0x00000000, 0x0100003e, + 0x01000073, 0x04000067, 0x00102012, 0x00000001, 0x00000012, 0x06000036, 0x00102012, 0x00000001, + 0x0020801a, 0x00000000, 0x00000000, 0x0100003e, 0x01000073, 0x04000067, 0x00102012, 0x00000002, + 0x00000013, 0x06000036, 0x00102012, 0x00000002, 0x0020802a, 0x00000000, 0x00000000, 0x0100003e, + 0x01000073, 0x04000067, 0x00102012, 0x00000003, 0x00000014, 0x06000036, 0x00102012, 0x00000003, + 0x0020803a, 0x00000000, 0x00000000, 0x0100003e, 0x01000073, 0x0400005f, 0x00219012, 0x00000003, + 0x00000000, 0x0400005f, 0x00219012, 0x00000003, 0x00000001, 0x0400005f, 0x00219012, 0x00000003, + 0x00000002, 0x0400005f, 0x00219012, 0x00000003, 0x00000003, 0x03000065, 0x00102012, 0x00000004, + 0x02000068, 0x00000001, 0x0500005b, 0x00219012, 0x00000003, 0x00000000, 0x00000004, 0x0600001c, + 0x00100012, 0x00000000, 0x0020800a, 0x00000000, 0x00000000, 0x07000036, 0x00102012, 0x00000004, + 0x0421900a, 0x00000000, 0x0010000a, 0x00000000, 0x0100003e, 0x01000073, 0x0400005f, 0x00219012, + 0x00000003, 0x00000000, 0x0400005f, 0x00219012, 0x00000003, 0x00000001, 0x0400005f, 0x00219012, + 0x00000003, 0x00000002, 0x0400005f, 0x00219012, 0x00000003, 0x00000003, 0x03000065, 0x00102022, + 0x00000004, 0x02000068, 0x00000001, 0x0500005b, 0x00219012, 0x00000003, 0x00000000, 0x00000004, + 0x0600001c, 0x00100012, 0x00000000, 0x0020800a, 0x00000000, 0x00000000, 0x07000036, 0x00102022, + 0x00000004, 0x0421900a, 0x00000000, 0x0010000a, 0x00000000, 0x0100003e, 0x01000073, 0x0400005f, + 0x00219012, 0x00000003, 0x00000000, 0x0400005f, 0x00219012, 0x00000003, 0x00000001, 0x0400005f, + 0x00219012, 0x00000003, 0x00000002, 0x0400005f, 0x00219012, 0x00000003, 0x00000003, 0x03000065, + 0x00102042, 0x00000004, 0x02000068, 0x00000001, 0x0500005b, 0x00219012, 0x00000003, 0x00000000, + 0x00000004, 0x0600001c, 0x00100012, 0x00000000, 0x0020800a, 0x00000000, 0x00000000, 0x07000036, + 0x00102042, 0x00000004, 0x0421900a, 0x00000000, 0x0010000a, 0x00000000, 0x0100003e, 0x01000073, + 0x0400005f, 0x00219012, 0x00000003, 0x00000000, 0x0400005f, 0x00219012, 0x00000003, 0x00000001, + 0x0400005f, 0x00219012, 0x00000003, 0x00000002, 0x0400005f, 0x00219012, 0x00000003, 0x00000003, + 0x03000065, 0x00102082, 0x00000004, 0x02000068, 0x00000001, 0x0500005b, 0x00219012, 0x00000003, + 0x00000000, 0x00000004, 0x0600001c, 0x00100012, 0x00000000, 0x0020800a, 0x00000000, 0x00000000, + 0x07000036, 0x00102082, 0x00000004, 0x0421900a, 0x00000000, 0x0010000a, 0x00000000, 0x0100003e, + }; + static const D3D12_SHADER_BYTECODE hs = {hs_code, sizeof(hs_code)}; + static const DWORD ds_code[] = + { + 0x43425844, 0x9c50437f, 0xe21aebbd, 0x29bff97d, 0xa7808efa, 0x00000001, 0x0000036c, 0x00000004, + 0x00000030, 0x000000b0, 0x00000160, 0x000001e0, 0x4e475349, 0x00000078, 0x00000004, 0x00000008, + 0x00000068, 0x00000000, 0x00000000, 0x00000003, 0x00000000, 0x00000707, 0x00000068, 0x00000001, + 0x00000000, 0x00000003, 0x00000001, 0x0000000f, 0x00000068, 0x00000002, 0x00000000, 0x00000003, + 0x00000002, 0x00000007, 0x0000006c, 0x00000000, 0x00000001, 0x00000003, 0x00000003, 0x00000f0f, + 0x004f4f46, 0x505f5653, 0x7469736f, 0x006e6f69, 0x47534350, 0x000000a8, 0x00000005, 0x00000008, + 0x00000080, 0x00000000, 0x0000000d, 0x00000003, 0x00000000, 0x00000001, 0x00000080, 0x00000001, + 0x0000000d, 0x00000003, 0x00000001, 0x00000001, 0x00000080, 0x00000002, 0x0000000d, 0x00000003, + 0x00000002, 0x00000001, 0x0000008e, 0x00000000, 0x0000000e, 0x00000003, 0x00000003, 0x00000001, + 0x000000a2, 0x00000000, 0x00000000, 0x00000003, 0x00000004, 0x0000000f, 0x545f5653, 0x46737365, + 0x6f746361, 0x56530072, 0x736e495f, 0x54656469, 0x46737365, 0x6f746361, 0x4f460072, 0xabab004f, + 0x4e47534f, 0x00000078, 0x00000004, 0x00000008, 0x00000068, 0x00000000, 0x00000000, 0x00000003, + 0x00000000, 0x00000807, 0x00000068, 0x00000001, 0x00000000, 0x00000003, 0x00000001, 0x0000000f, + 0x00000068, 0x00000002, 0x00000000, 0x00000003, 0x00000002, 0x00000807, 0x0000006c, 0x00000000, + 0x00000001, 0x00000003, 0x00000003, 0x0000000f, 0x004f4f46, 0x505f5653, 0x7469736f, 0x006e6f69, + 0x58454853, 0x00000184, 0x00040050, 0x00000061, 0x01001893, 0x01001095, 0x0100086a, 0x0200005f, + 0x0001c072, 0x0400005f, 0x00219072, 0x00000003, 0x00000000, 0x0400005f, 0x002190f2, 0x00000003, + 0x00000003, 0x03000065, 0x00102072, 0x00000000, 0x03000065, 0x001020f2, 0x00000001, 0x03000065, + 0x00102072, 0x00000002, 0x04000067, 0x001020f2, 0x00000003, 0x00000001, 0x02000068, 0x00000001, + 0x07000038, 0x00100072, 0x00000000, 0x0001c556, 0x00219246, 0x00000001, 0x00000000, 0x09000032, + 0x00100072, 0x00000000, 0x0001c006, 0x00219246, 0x00000000, 0x00000000, 0x00100246, 0x00000000, + 0x09000032, 0x00102072, 0x00000000, 0x0001caa6, 0x00219246, 0x00000002, 0x00000000, 0x00100246, + 0x00000000, 0x08000036, 0x001020f2, 0x00000001, 0x00004002, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x08000036, 0x00102072, 0x00000002, 0x00004002, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x07000038, 0x001000f2, 0x00000000, 0x0001c556, 0x00219e46, 0x00000001, 0x00000003, + 0x09000032, 0x001000f2, 0x00000000, 0x0001c006, 0x00219e46, 0x00000000, 0x00000003, 0x00100e46, + 0x00000000, 0x09000032, 0x001020f2, 0x00000003, 0x0001caa6, 0x00219e46, 0x00000002, 0x00000003, + 0x00100e46, 0x00000000, 0x0100003e, + }; + static const D3D12_SHADER_BYTECODE ds = {ds_code, sizeof(ds_code)}; + static const DWORD vs_code[] = + { + 0x43425844, 0xa781de9a, 0xd9cd6837, 0xd70ee110, 0xba5ecac2, 0x00000001, 0x0000025c, 0x00000003, + 0x0000002c, 0x00000060, 0x000000e0, 0x4e475349, 0x0000002c, 0x00000001, 0x00000008, 0x00000020, + 0x00000000, 0x00000006, 0x00000001, 0x00000000, 0x00000101, 0x565f5653, 0x65747265, 0x00444978, + 0x4e47534f, 0x00000078, 0x00000004, 0x00000008, 0x00000068, 0x00000000, 0x00000000, 0x00000003, + 0x00000000, 0x00000807, 0x00000068, 0x00000001, 0x00000000, 0x00000003, 0x00000001, 0x0000000f, + 0x00000068, 0x00000002, 0x00000000, 0x00000003, 0x00000002, 0x00000807, 0x0000006c, 0x00000000, + 0x00000001, 0x00000003, 0x00000003, 0x0000000f, 0x004f4f46, 0x505f5653, 0x7469736f, 0x006e6f69, + 0x58454853, 0x00000174, 0x00010050, 0x0000005d, 0x0100086a, 0x04000060, 0x00101012, 0x00000000, + 0x00000006, 0x03000065, 0x00102072, 0x00000000, 0x03000065, 0x001020f2, 0x00000001, 0x03000065, + 0x00102072, 0x00000002, 0x04000067, 0x001020f2, 0x00000003, 0x00000001, 0x02000068, 0x00000001, + 0x08000036, 0x00102072, 0x00000000, 0x00004002, 0x00000000, 0x3f800000, 0x00000000, 0x00000000, + 0x08000036, 0x001020f2, 0x00000001, 0x00004002, 0x40000000, 0x40400000, 0x40800000, 0x40a00000, + 0x08000036, 0x00102072, 0x00000002, 0x00004002, 0x40c00000, 0x40e00000, 0x41000000, 0x00000000, + 0x0b00008c, 0x00100012, 0x00000000, 0x00004001, 0x00000001, 0x00004001, 0x00000001, 0x0010100a, + 0x00000000, 0x00004001, 0x00000000, 0x07000001, 0x00100042, 0x00000000, 0x0010100a, 0x00000000, + 0x00004001, 0x00000002, 0x05000056, 0x00100032, 0x00000000, 0x00100086, 0x00000000, 0x0f000032, + 0x00102032, 0x00000003, 0x00100046, 0x00000000, 0x00004002, 0x40000000, 0xc0000000, 0x00000000, + 0x00000000, 0x00004002, 0xbf800000, 0x3f800000, 0x00000000, 0x00000000, 0x08000036, 0x001020c2, + 0x00000003, 0x00004002, 0x00000000, 0x00000000, 0x00000000, 0x3f800000, 0x0100003e, + }; + static const D3D12_SHADER_BYTECODE vs = {vs_code, sizeof(vs_code)}; + static const DWORD ps_code[] = + { + 0x43425844, 0x586f8b9c, 0x6ca70cc8, 0x2aa24884, 0xae111d8c, 0x00000001, 0x00000138, 0x00000003, + 0x0000002c, 0x000000ac, 0x000000e0, 0x4e475349, 0x00000078, 0x00000004, 0x00000008, 0x00000068, + 0x00000000, 0x00000000, 0x00000003, 0x00000000, 0x00000707, 0x00000068, 0x00000001, 0x00000000, + 0x00000003, 0x00000001, 0x0000000f, 0x00000068, 0x00000002, 0x00000000, 0x00000003, 0x00000002, + 0x00000007, 0x0000006c, 0x00000000, 0x00000001, 0x00000003, 0x00000003, 0x0000000f, 0x004f4f46, + 0x505f5653, 0x7469736f, 0x006e6f69, 0x4e47534f, 0x0000002c, 0x00000001, 0x00000008, 0x00000020, + 0x00000000, 0x00000000, 0x00000003, 0x00000000, 0x0000000f, 0x745f7673, 0x65677261, 0xabab0074, + 0x58454853, 0x00000050, 0x00000050, 0x00000014, 0x0100086a, 0x03001062, 0x00101072, 0x00000000, + 0x03000065, 0x001020f2, 0x00000000, 0x05000036, 0x00102072, 0x00000000, 0x00101246, 0x00000000, + 0x05000036, 0x00102082, 0x00000000, 0x00004001, 0x3f800000, 0x0100003e, + }; + static const D3D12_SHADER_BYTECODE ps = {ps_code, sizeof(ps_code)}; + + memset(&desc, 0, sizeof(desc)); + desc.no_root_signature = true; + if (!init_test_context(&context, &desc)) + return; + command_list = context.list; + queue = context.queue; + + context.root_signature = create_32bit_constants_root_signature(context.device, + 0, 4, D3D12_SHADER_VISIBILITY_HULL); + + init_pipeline_state_desc(&pso_desc, context.root_signature, + context.render_target_desc.Format, &vs, &ps, NULL); + pso_desc.HS = hs; + pso_desc.DS = ds; + pso_desc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_PATCH; + hr = ID3D12Device_CreateGraphicsPipelineState(context.device, &pso_desc, + &IID_ID3D12PipelineState, (void **)&context.pipeline_state); + ok(hr == S_OK, "Failed to create state, hr %#x.\n", hr); + + ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, context.rtv, white, 0, NULL); + + ID3D12GraphicsCommandList_OMSetRenderTargets(command_list, 1, &context.rtv, false, NULL); + ID3D12GraphicsCommandList_SetGraphicsRootSignature(command_list, context.root_signature); + ID3D12GraphicsCommandList_SetPipelineState(command_list, context.pipeline_state); + ID3D12GraphicsCommandList_IASetPrimitiveTopology(command_list, D3D_PRIMITIVE_TOPOLOGY_3_CONTROL_POINT_PATCHLIST); + ID3D12GraphicsCommandList_RSSetViewports(command_list, 1, &context.viewport); + ID3D12GraphicsCommandList_RSSetScissorRects(command_list, 1, &context.scissor_rect); + tess_factors.x = tess_factors.y = tess_factors.z = tess_factors.w = 1.0f; + ID3D12GraphicsCommandList_SetGraphicsRoot32BitConstants(command_list, 0, 4, &tess_factors.x, 0); + ID3D12GraphicsCommandList_DrawInstanced(command_list, 3, 1, 0, 0); + + transition_resource_state(command_list, context.render_target, + D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_COPY_SOURCE); + + todo + check_sub_resource_uint(context.render_target, 0, queue, command_list, 0xff00ff00, 0); + + destroy_test_context(&context); +} + START_TEST(d3d12) { parse_args(argc, argv); @@ -37676,4 +37926,5 @@ START_TEST(d3d12) run_test(test_readback_map_stability); run_test(test_vs_ps_relative_addressing); run_test(test_get_resource_tiling); + run_test(test_hull_shader_punned_array); }
From: Conor McCarthy cmccarthy@codeweavers.com
FXC may erroneously emit a dcl_indexrange beyond the bounds of the addressed range. If this includes a sysval, it will be erroneously included in the arrayed signature element. We could trim more than one from the end, but it is very rare to have even one. A sysval at index 0 is a more complex case, which is not handled. --- libs/vkd3d-shader/ir.c | 13 ++++++++++++- tests/d3d12.c | 1 - 2 files changed, 12 insertions(+), 2 deletions(-)
diff --git a/libs/vkd3d-shader/ir.c b/libs/vkd3d-shader/ir.c index 7a3c9d681..b63fcab53 100644 --- a/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d-shader/ir.c @@ -650,10 +650,15 @@ static void io_normaliser_add_index_range(struct io_normaliser *normaliser, { const struct vkd3d_shader_index_range *range = &ins->declaration.index_range; const struct vkd3d_shader_register *reg = &range->dst.reg; + unsigned int register_count = range->register_count; + enum vkd3d_shader_sysval_semantic sysval_semantic; unsigned int reg_idx, write_mask, element_idx; const struct shader_signature *signature; uint8_t (*range_map)[VKD3D_VEC4_SIZE];
+ if (!register_count) + return; + switch (reg->type) { case VKD3DSPR_INPUT: @@ -685,7 +690,13 @@ static void io_normaliser_add_index_range(struct io_normaliser *normaliser, reg_idx = reg->idx[reg->idx_count - 1].offset; write_mask = range->dst.write_mask; element_idx = shader_signature_find_element_for_reg(signature, reg_idx, write_mask); - range_map_set_register_range(range_map, reg_idx, range->register_count, + sysval_semantic = signature->elements[element_idx + register_count - 1].sysval_semantic; + /* Trim the last element if it's a non-arrayable sysval. FXC seems to forbid actual addressing + * of such a sysval, but it can erroneously include it in a dcl_indexrange. */ + register_count -= sysval_semantic && !vsir_sysval_semantic_is_tess_factor(sysval_semantic) + && !vsir_sysval_semantic_is_clip_cull(sysval_semantic); + + range_map_set_register_range(range_map, reg_idx, register_count, signature->elements[element_idx].mask, true); }
diff --git a/tests/d3d12.c b/tests/d3d12.c index b84c58543..ec6e065c0 100644 --- a/tests/d3d12.c +++ b/tests/d3d12.c @@ -37739,7 +37739,6 @@ static void test_hull_shader_punned_array(void) transition_resource_state(command_list, context.render_target, D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_COPY_SOURCE);
- todo check_sub_resource_uint(context.render_target, 0, queue, command_list, 0xff00ff00, 0);
destroy_test_context(&context);
From: Conor McCarthy cmccarthy@codeweavers.com
--- libs/vkd3d-shader/tpf.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+)
diff --git a/libs/vkd3d-shader/tpf.c b/libs/vkd3d-shader/tpf.c index 3a3cc7d23..bb2076b42 100644 --- a/libs/vkd3d-shader/tpf.c +++ b/libs/vkd3d-shader/tpf.c @@ -921,6 +921,7 @@ static void shader_sm4_read_dcl_index_range(struct vkd3d_shader_instruction *ins { struct vkd3d_shader_index_range *index_range = &ins->declaration.index_range; unsigned int i, register_idx, register_count; + const struct shader_signature *signature; enum vkd3d_shader_register_type type; struct sm4_index_range_array *ranges; unsigned int *io_masks; @@ -940,27 +941,32 @@ static void shader_sm4_read_dcl_index_range(struct vkd3d_shader_instruction *ins case VKD3DSPR_INCONTROLPOINT: io_masks = priv->input_register_masks; ranges = &priv->input_index_ranges; + signature = &priv->p.shader_desc.input_signature; break; case VKD3DSPR_OUTPUT: if (sm4_parser_is_in_fork_or_join_phase(priv)) { io_masks = priv->patch_constant_register_masks; ranges = &priv->patch_constant_index_ranges; + signature = &priv->p.shader_desc.patch_constant_signature; } else { io_masks = priv->output_register_masks; ranges = &priv->output_index_ranges; + signature = &priv->p.shader_desc.output_signature; } break; case VKD3DSPR_COLOROUT: case VKD3DSPR_OUTCONTROLPOINT: io_masks = priv->output_register_masks; ranges = &priv->output_index_ranges; + signature = &priv->p.shader_desc.output_signature; break; case VKD3DSPR_PATCHCONST: io_masks = priv->patch_constant_register_masks; ranges = &priv->patch_constant_index_ranges; + signature = &priv->p.shader_desc.patch_constant_signature; break;
default: @@ -998,6 +1004,19 @@ static void shader_sm4_read_dcl_index_range(struct vkd3d_shader_instruction *ins
for (i = 0; i < register_count; ++i) { + const struct signature_element *e = vsir_signature_find_element_for_reg(signature, register_idx + i, write_mask); + /* Index ranges should not contain non-arrayed sysvals. FXC tries to forbid this but it is buggy, + * and sometimes emits a range containing a sysval where the sysval is not actually accessed. + * We currently trim one off the end in the IR. */ + if (e && e->sysval_semantic && i < register_count - 1 && !vsir_sysval_semantic_is_tess_factor(e->sysval_semantic) + && !vsir_sysval_semantic_is_clip_cull(e->sysval_semantic)) + { + WARN("Sysval %u included in an index range declaration.\n", e->sysval_semantic); + vkd3d_shader_parser_error(&priv->p, VKD3D_SHADER_ERROR_TPF_INVALID_INDEX_RANGE_DCL, + "Index range base %u, count %u, mask %#x contains sysval %u.", + register_idx, register_count, write_mask, e->sysval_semantic); + return; + } if ((io_masks[register_idx + i] & write_mask) != write_mask) { WARN("No matching declaration for index range base %u, count %u, mask %#x.\n",
This turns out to be quite messy and results from FXC weirdness. I ran into a new problem with the test: `dcl_indexrange` exceeds its required bounds and sweeps up `SV_Position` into the array. The patch for this may be too hacky for upstream, since it's not known to occur in the wild. In the test I tried to avoid it by using `SV_Position` only in the DS and PS, but it failed to render. I changed `struct data` to `struct ds_data` and declared a new `struct data` containing `POSITION` instead, and had `ds_main` output a `ds_data`, and `ps_main` receive one. I don't know why that would be a problem so maybe I misunderstand how this should work.
FXC is unpredicable and broken on the hull shader. For example, if I substitute `output.inside += f[tess_factor.x]` for `output.f = f[tess_factor.x]` it emits an error for dynamic indexing of `SV_Position`, but this doesn't happen for `output.edges[0] += f[tess_factor.x]`.
On Tue Jan 9 09:17:19 2024 +0000, Conor McCarthy wrote:
This turns out to be quite messy and results from FXC weirdness. I ran into a new problem with the test: `dcl_indexrange` exceeds its required bounds and sweeps up `SV_Position` into the array. The patch for this may be too hacky for upstream, since it's not known to occur in the wild. In the test I tried to avoid it by using `SV_Position` only in the DS and PS, but it failed to render. I changed `struct data` to `struct ds_data` and declared a new `struct data` containing `POSITION` instead, and had `ds_main` output a `ds_data`, and `ps_main` receive one. I don't know why that would be a problem so maybe I misunderstand how this should work.
I need to pass custom `POSITION` input elements to vs_main. That should fix it to work without `SV_Position`.