Goes atop !537.
-- v4: tests/d3d12: Test multiple clip distance inputs in test_clip_distance(). tests/d3d12: Use 5 clip distances for the multiple test in test_clip_distance(). vkd3d-shader/ir: Transform clip/cull outputs and patch constants into arrays. vkd3d-shader/ir: Transform clip/cull inputs into an array.
From: Conor McCarthy cmccarthy@codeweavers.com
For example, this occurred in a shader:
reg_idx write_mask 0 xyz 1 xyzw 2 xyzw 3 xyz
The dcl_indexrange instruction covered only xyz, so once merged, searching for xyzw failed.
It is impossible to declare an input array where elements have different component counts, but the optimiser can create this case. One way for this to occur is to dynamically index input values via a local array containing copies of the input values. The optimiser converts this to dynamically indexed inputs. --- libs/vkd3d-shader/ir.c | 57 +++++++++++++++++++++++++++++++++++++++-- libs/vkd3d-shader/tpf.c | 7 ----- 2 files changed, 55 insertions(+), 9 deletions(-)
diff --git a/libs/vkd3d-shader/ir.c b/libs/vkd3d-shader/ir.c index 28c7d1584..d6ff42ab2 100644 --- a/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d-shader/ir.c @@ -571,7 +571,7 @@ static bool io_normaliser_is_in_control_point_phase(const struct io_normaliser * static unsigned int shader_signature_find_element_for_reg(const struct shader_signature *signature, unsigned int reg_idx, unsigned int write_mask) { - unsigned int i; + unsigned int i, base_write_mask;
for (i = 0; i < signature->element_count; ++i) { @@ -583,7 +583,14 @@ static unsigned int shader_signature_find_element_for_reg(const struct shader_si } }
- /* Validated in the TPF reader. */ + /* Validated in the TPF reader, but failure in signature_element_range_expand_mask() + * can land us here on an unmatched vector mask. */ + FIXME("Failed to find signature element for register index %u, mask %#x; using scalar mask.\n", + reg_idx, write_mask); + base_write_mask = 1u << vsir_write_mask_get_component_idx(write_mask); + if (base_write_mask != write_mask) + return shader_signature_find_element_for_reg(signature, reg_idx, base_write_mask); + vkd3d_unreachable(); }
@@ -750,6 +757,51 @@ static int signature_element_index_compare(const void *a, const void *b) return vkd3d_u32_compare(e->sort_index, f->sort_index); }
+static unsigned int signature_element_range_expand_mask(struct signature_element *e, unsigned int register_count, + uint8_t range_map[][VKD3D_VEC4_SIZE]) +{ + unsigned int i, j, component_idx, component_count, merged_write_mask = e->mask; + + /* dcl_indexrange instructions can declare a subset of the full mask, and the masks of + * the elements within the range may differ. TPF's handling of arrayed inputs with + * dcl_indexrange is really just a hack. Here we create a mask which covers all element + * masks, and check for collisions with other ranges. */ + + for (i = 1; i < register_count; ++i) + merged_write_mask |= e[i].mask; + + if (merged_write_mask == e->mask) + return merged_write_mask; + + /* Reaching this point is very rare to begin with, and collisions are even rarer or + * impossible. If the latter shows up, the fallback in shader_signature_find_element_for_reg() + * may be sufficient. */ + + component_idx = vsir_write_mask_get_component_idx(e->mask); + component_count = vsir_write_mask_component_count(e->mask); + + for (i = e->register_index; i < e->register_index + register_count; ++i) + { + for (j = 0; j < component_idx; ++j) + if (range_map[i][j]) + break; + for (j = component_idx + component_count; j < VKD3D_VEC4_SIZE; ++j) + if (range_map[i][j]) + break; + } + + if (i == register_count) + { + WARN("Expanding mask %#x to %#x for %s, base reg %u, count %u.\n", e->mask, merged_write_mask, + e->semantic_name, e->register_index, register_count); + return merged_write_mask; + } + + WARN("Cannot expand mask %#x to %#x for %s, base reg %u, count %u.\n", e->mask, merged_write_mask, + e->semantic_name, e->register_index, register_count); + return e->mask; +} + static bool shader_signature_merge(struct shader_signature *s, uint8_t range_map[][VKD3D_VEC4_SIZE], bool is_patch_constant) { @@ -820,6 +872,7 @@ static bool shader_signature_merge(struct shader_signature *s, uint8_t range_map { TRACE("Merging %s, base reg %u, count %u.\n", e->semantic_name, e->register_index, register_count); e->register_count = register_count; + e->mask = signature_element_range_expand_mask(e, register_count, range_map); } } element_count = new_count; diff --git a/libs/vkd3d-shader/tpf.c b/libs/vkd3d-shader/tpf.c index 1f1944bc0..587446556 100644 --- a/libs/vkd3d-shader/tpf.c +++ b/libs/vkd3d-shader/tpf.c @@ -932,13 +932,6 @@ static void shader_sm4_read_dcl_index_range(struct vkd3d_shader_instruction *ins register_count = index_range->register_count; write_mask = index_range->dst.write_mask;
- if (vsir_write_mask_component_count(write_mask) != 1) - { - WARN("Unhandled write mask %#x.\n", write_mask); - vkd3d_shader_parser_warning(&priv->p, VKD3D_SHADER_WARNING_TPF_UNHANDLED_INDEX_RANGE_MASK, - "Index range mask %#x is not scalar.", write_mask); - } - switch ((type = index_range->dst.reg.type)) { case VKD3DSPR_INPUT:
From: Conor McCarthy cmccarthy@codeweavers.com
The FXC optimiser sometimes converts a local array of input values into direct array addressing of the inputs, which can result in a dcl_indexrange instruction spanning input elements with different masks. --- tests/d3d12.c | 262 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 262 insertions(+)
diff --git a/tests/d3d12.c b/tests/d3d12.c index 0edad708e..9154db138 100644 --- a/tests/d3d12.c +++ b/tests/d3d12.c @@ -37567,6 +37567,267 @@ static void test_get_resource_tiling(void) destroy_test_context(&context); }
+static void test_hull_shader_punned_array(void) +{ + D3D12_GRAPHICS_PIPELINE_STATE_DESC pso_desc; + ID3D12GraphicsCommandList *command_list; + struct test_context_desc desc; + struct test_context context; + ID3D12CommandQueue *queue; + HRESULT hr; + + static const DWORD hs_code[] = + { +#if 0 + float4 tess_factor; + uint u; + + struct patch_in_data + { + float3 f0 : FOO0; + float4 f1 : FOO1; + float3 f2 : FOO2; + float4 position : P; + }; + + struct patch_out_data + { + float4 position : SV_Position; + float3 f0 : FOO0; + float4 f1 : FOO1; + float3 f2 : FOO2; + }; + + struct patch_constant_data + { + float edges[3] : SV_TessFactor; + float inside : SV_InsideTessFactor; + float3 f : FOO; + }; + + void patch_constant(InputPatch<patch_in_data, 3> input, out patch_constant_data output) + { + output.edges[0] = tess_factor.x; + output.edges[1] = tess_factor.y; + output.edges[2] = tess_factor.z; + output.inside = tess_factor.w; + /* Compiles into punned array access to f0-f3 using dcl_indexrange. */ + float3 f[3] = {input[0].f0, input[0].f1.xyz, input[0].f2}; + output.f = f[u]; + } + + [domain("tri")] + [outputcontrolpoints(3)] + [partitioning("integer")] + [outputtopology("triangle_cw")] + [patchconstantfunc("patch_constant")] + patch_out_data hs_main(InputPatch<patch_in_data, 3> input, uint i : SV_OutputControlPointID) + { + patch_out_data result; + result.position = input[i].position; + result.f0 = input[i].f0; + result.f1 = input[i].f1; + /* Read a value with more components than the first element in dcl_indexrange. Without + * special handling, this would fail because the dcl_indexrange instruction uses only + * the mask from element 0. */ + result.f2 = input[0].f1.yzw; + return result; + } + + [domain("tri")] + void ds_main(patch_constant_data input, + float3 tess_coord : SV_DomainLocation, + const OutputPatch<patch_out_data, 3> patch, + out patch_out_data output) + { + output.position = tess_coord.x * patch[0].position + + tess_coord.y * patch[1].position + + tess_coord.z * patch[2].position; + output.f0.x = tess_coord.x * patch[0].f0.x + tess_coord.y * patch[1].f0.x + tess_coord.z * patch[2].f0.x; + output.f0.y = tess_coord.x * patch[0].f0.y + tess_coord.y * patch[1].f0.y + tess_coord.z * patch[2].f0.y; + output.f0.z = tess_coord.x * patch[0].f0.z + tess_coord.y * patch[1].f0.z + tess_coord.z * patch[2].f0.z; + output.f1 = float4(input.f, 0.0); + output.f2 = patch[0].f2; + } + + void vs_main(uint id : SV_VertexID, out patch_in_data output) + { + float2 coords = float2((id << 1) & 2, id & 2); + output.position = float4(coords * float2(2, -2) + float2(-1, 1), 0, 1); + output.f0 = float3(0.2, 0.4, 0.1); + output.f1 = float4(0.6, 0.8, 0.3, 0.5); + output.f2 = float3(0.7, 0.9, 1.0); + } + + float4 ps_main(patch_out_data input) : sv_target + { + return float4(input.f0.xy, input.f1.x, input.f2.x); + } +#endif + 0x43425844, 0xca86855e, 0xb4676233, 0xb42762ad, 0x825dfc5f, 0x00000001, 0x000005b4, 0x00000004, + 0x00000030, 0x000000a8, 0x00000128, 0x000001d8, 0x4e475349, 0x00000070, 0x00000004, 0x00000008, + 0x00000068, 0x00000000, 0x00000000, 0x00000003, 0x00000000, 0x00000707, 0x00000068, 0x00000001, + 0x00000000, 0x00000003, 0x00000001, 0x00000f0f, 0x00000068, 0x00000002, 0x00000000, 0x00000003, + 0x00000002, 0x00000707, 0x0000006c, 0x00000000, 0x00000000, 0x00000003, 0x00000003, 0x00000f0f, + 0x004f4f46, 0xabab0050, 0x4e47534f, 0x00000078, 0x00000004, 0x00000008, 0x00000068, 0x00000000, + 0x00000001, 0x00000003, 0x00000000, 0x0000000f, 0x00000074, 0x00000000, 0x00000000, 0x00000003, + 0x00000001, 0x00000807, 0x00000074, 0x00000001, 0x00000000, 0x00000003, 0x00000002, 0x0000000f, + 0x00000074, 0x00000002, 0x00000000, 0x00000003, 0x00000003, 0x00000807, 0x505f5653, 0x7469736f, + 0x006e6f69, 0x004f4f46, 0x47534350, 0x000000a8, 0x00000005, 0x00000008, 0x00000080, 0x00000000, + 0x0000000d, 0x00000003, 0x00000000, 0x00000e01, 0x0000008e, 0x00000000, 0x00000000, 0x00000003, + 0x00000000, 0x0000010e, 0x00000080, 0x00000001, 0x0000000d, 0x00000003, 0x00000001, 0x00000e01, + 0x00000080, 0x00000002, 0x0000000d, 0x00000003, 0x00000002, 0x00000e01, 0x00000092, 0x00000000, + 0x0000000e, 0x00000003, 0x00000003, 0x00000e01, 0x545f5653, 0x46737365, 0x6f746361, 0x4f460072, + 0x5653004f, 0x736e495f, 0x54656469, 0x46737365, 0x6f746361, 0xabab0072, 0x58454853, 0x000003d4, + 0x00030050, 0x000000f5, 0x01000071, 0x01001893, 0x01001894, 0x01001095, 0x01000896, 0x01001897, + 0x0100086a, 0x04000059, 0x00208e46, 0x00000000, 0x00000002, 0x01000072, 0x0200005f, 0x00016000, + 0x0400005f, 0x00201072, 0x00000003, 0x00000000, 0x0400005f, 0x002010f2, 0x00000003, 0x00000001, + 0x0400005f, 0x002010f2, 0x00000003, 0x00000003, 0x03000065, 0x001020f2, 0x00000000, 0x03000065, + 0x00102072, 0x00000001, 0x03000065, 0x001020f2, 0x00000002, 0x03000065, 0x00102072, 0x00000003, + 0x02000068, 0x00000001, 0x04000036, 0x00100012, 0x00000000, 0x00016001, 0x07000036, 0x001020f2, + 0x00000000, 0x00a01e46, 0x0010000a, 0x00000000, 0x00000003, 0x07000036, 0x00102072, 0x00000001, + 0x00a01246, 0x0010000a, 0x00000000, 0x00000000, 0x07000036, 0x001020f2, 0x00000002, 0x00a01e46, + 0x0010000a, 0x00000000, 0x00000001, 0x06000036, 0x00102072, 0x00000003, 0x00201796, 0x00000000, + 0x00000001, 0x0100003e, 0x01000073, 0x04000067, 0x00102012, 0x00000000, 0x00000011, 0x06000036, + 0x00102012, 0x00000000, 0x0020800a, 0x00000000, 0x00000000, 0x0100003e, 0x01000073, 0x04000067, + 0x00102012, 0x00000001, 0x00000012, 0x06000036, 0x00102012, 0x00000001, 0x0020801a, 0x00000000, + 0x00000000, 0x0100003e, 0x01000073, 0x04000067, 0x00102012, 0x00000002, 0x00000013, 0x06000036, + 0x00102012, 0x00000002, 0x0020802a, 0x00000000, 0x00000000, 0x0100003e, 0x01000073, 0x04000067, + 0x00102012, 0x00000003, 0x00000014, 0x06000036, 0x00102012, 0x00000003, 0x0020803a, 0x00000000, + 0x00000000, 0x0100003e, 0x01000073, 0x0400005f, 0x00219012, 0x00000003, 0x00000000, 0x0400005f, + 0x00219012, 0x00000003, 0x00000001, 0x0400005f, 0x00219012, 0x00000003, 0x00000002, 0x0400005f, + 0x00219012, 0x00000003, 0x00000003, 0x03000065, 0x00102022, 0x00000000, 0x02000068, 0x00000001, + 0x0500005b, 0x00219012, 0x00000003, 0x00000000, 0x00000004, 0x06000036, 0x00100012, 0x00000000, + 0x0020800a, 0x00000000, 0x00000001, 0x07000036, 0x00102022, 0x00000000, 0x0421900a, 0x00000000, + 0x0010000a, 0x00000000, 0x0100003e, 0x01000073, 0x0400005f, 0x00219022, 0x00000003, 0x00000000, + 0x0400005f, 0x00219022, 0x00000003, 0x00000001, 0x0400005f, 0x00219022, 0x00000003, 0x00000002, + 0x0400005f, 0x00219022, 0x00000003, 0x00000003, 0x03000065, 0x00102042, 0x00000000, 0x02000068, + 0x00000001, 0x0500005b, 0x00219022, 0x00000003, 0x00000000, 0x00000004, 0x06000036, 0x00100012, + 0x00000000, 0x0020800a, 0x00000000, 0x00000001, 0x07000036, 0x00102042, 0x00000000, 0x0421901a, + 0x00000000, 0x0010000a, 0x00000000, 0x0100003e, 0x01000073, 0x0400005f, 0x00219042, 0x00000003, + 0x00000000, 0x0400005f, 0x00219042, 0x00000003, 0x00000001, 0x0400005f, 0x00219042, 0x00000003, + 0x00000002, 0x0400005f, 0x00219042, 0x00000003, 0x00000003, 0x03000065, 0x00102082, 0x00000000, + 0x02000068, 0x00000001, 0x0500005b, 0x00219042, 0x00000003, 0x00000000, 0x00000004, 0x06000036, + 0x00100012, 0x00000000, 0x0020800a, 0x00000000, 0x00000001, 0x07000036, 0x00102082, 0x00000000, + 0x0421902a, 0x00000000, 0x0010000a, 0x00000000, 0x0100003e, + }; + static const D3D12_SHADER_BYTECODE hs = {hs_code, sizeof(hs_code)}; + static const DWORD ds_code[] = + { + 0x43425844, 0x25772b56, 0xf9f25be8, 0xf1e02cc3, 0xb95e5380, 0x00000001, 0x00000388, 0x00000004, + 0x00000030, 0x000000b0, 0x00000160, 0x000001e0, 0x4e475349, 0x00000078, 0x00000004, 0x00000008, + 0x00000068, 0x00000000, 0x00000001, 0x00000003, 0x00000000, 0x00000f0f, 0x00000074, 0x00000000, + 0x00000000, 0x00000003, 0x00000001, 0x00000707, 0x00000074, 0x00000001, 0x00000000, 0x00000003, + 0x00000002, 0x0000000f, 0x00000074, 0x00000002, 0x00000000, 0x00000003, 0x00000003, 0x00000707, + 0x505f5653, 0x7469736f, 0x006e6f69, 0x004f4f46, 0x47534350, 0x000000a8, 0x00000005, 0x00000008, + 0x00000080, 0x00000000, 0x0000000d, 0x00000003, 0x00000000, 0x00000001, 0x0000008e, 0x00000000, + 0x00000000, 0x00000003, 0x00000000, 0x00000e0e, 0x00000080, 0x00000001, 0x0000000d, 0x00000003, + 0x00000001, 0x00000001, 0x00000080, 0x00000002, 0x0000000d, 0x00000003, 0x00000002, 0x00000001, + 0x00000092, 0x00000000, 0x0000000e, 0x00000003, 0x00000003, 0x00000001, 0x545f5653, 0x46737365, + 0x6f746361, 0x4f460072, 0x5653004f, 0x736e495f, 0x54656469, 0x46737365, 0x6f746361, 0xabab0072, + 0x4e47534f, 0x00000078, 0x00000004, 0x00000008, 0x00000068, 0x00000000, 0x00000001, 0x00000003, + 0x00000000, 0x0000000f, 0x00000074, 0x00000000, 0x00000000, 0x00000003, 0x00000001, 0x00000807, + 0x00000074, 0x00000001, 0x00000000, 0x00000003, 0x00000002, 0x0000000f, 0x00000074, 0x00000002, + 0x00000000, 0x00000003, 0x00000003, 0x00000807, 0x505f5653, 0x7469736f, 0x006e6f69, 0x004f4f46, + 0x58454853, 0x000001a0, 0x00040050, 0x00000068, 0x01001893, 0x01001095, 0x0100086a, 0x0300005f, + 0x0011b0e2, 0x00000000, 0x0200005f, 0x0001c072, 0x0400005f, 0x002190f2, 0x00000003, 0x00000000, + 0x0400005f, 0x00219072, 0x00000003, 0x00000001, 0x0400005f, 0x00219072, 0x00000003, 0x00000003, + 0x04000067, 0x001020f2, 0x00000000, 0x00000001, 0x03000065, 0x00102072, 0x00000001, 0x03000065, + 0x001020f2, 0x00000002, 0x03000065, 0x00102072, 0x00000003, 0x02000068, 0x00000001, 0x07000038, + 0x001000f2, 0x00000000, 0x0001c556, 0x00219e46, 0x00000001, 0x00000000, 0x09000032, 0x001000f2, + 0x00000000, 0x0001c006, 0x00219e46, 0x00000000, 0x00000000, 0x00100e46, 0x00000000, 0x09000032, + 0x001020f2, 0x00000000, 0x0001caa6, 0x00219e46, 0x00000002, 0x00000000, 0x00100e46, 0x00000000, + 0x07000038, 0x00100072, 0x00000000, 0x0001c556, 0x00219246, 0x00000001, 0x00000001, 0x09000032, + 0x00100072, 0x00000000, 0x0001c006, 0x00219246, 0x00000000, 0x00000001, 0x00100246, 0x00000000, + 0x09000032, 0x00102072, 0x00000001, 0x0001caa6, 0x00219246, 0x00000002, 0x00000001, 0x00100246, + 0x00000000, 0x05000036, 0x00102072, 0x00000002, 0x0011b796, 0x00000000, 0x05000036, 0x00102082, + 0x00000002, 0x00004001, 0x00000000, 0x06000036, 0x00102072, 0x00000003, 0x00219246, 0x00000000, + 0x00000003, 0x0100003e, + }; + static const D3D12_SHADER_BYTECODE ds = {ds_code, sizeof(ds_code)}; + static const DWORD vs_code[] = + { + 0x43425844, 0xf6c8872b, 0x5f2d2170, 0xd8f05ee8, 0x1efbc879, 0x00000001, 0x00000250, 0x00000003, + 0x0000002c, 0x00000060, 0x000000d8, 0x4e475349, 0x0000002c, 0x00000001, 0x00000008, 0x00000020, + 0x00000000, 0x00000006, 0x00000001, 0x00000000, 0x00000101, 0x565f5653, 0x65747265, 0x00444978, + 0x4e47534f, 0x00000070, 0x00000004, 0x00000008, 0x00000068, 0x00000000, 0x00000000, 0x00000003, + 0x00000000, 0x00000807, 0x00000068, 0x00000001, 0x00000000, 0x00000003, 0x00000001, 0x0000000f, + 0x00000068, 0x00000002, 0x00000000, 0x00000003, 0x00000002, 0x00000807, 0x0000006c, 0x00000000, + 0x00000000, 0x00000003, 0x00000003, 0x0000000f, 0x004f4f46, 0xabab0050, 0x58454853, 0x00000170, + 0x00010050, 0x0000005c, 0x0100086a, 0x04000060, 0x00101012, 0x00000000, 0x00000006, 0x03000065, + 0x00102072, 0x00000000, 0x03000065, 0x001020f2, 0x00000001, 0x03000065, 0x00102072, 0x00000002, + 0x03000065, 0x001020f2, 0x00000003, 0x02000068, 0x00000001, 0x08000036, 0x00102072, 0x00000000, + 0x00004002, 0x3e4ccccd, 0x3ecccccd, 0x3dcccccd, 0x00000000, 0x08000036, 0x001020f2, 0x00000001, + 0x00004002, 0x3f19999a, 0x3f4ccccd, 0x3e99999a, 0x3f000000, 0x08000036, 0x00102072, 0x00000002, + 0x00004002, 0x3f333333, 0x3f666666, 0x3f800000, 0x00000000, 0x0b00008c, 0x00100012, 0x00000000, + 0x00004001, 0x00000001, 0x00004001, 0x00000001, 0x0010100a, 0x00000000, 0x00004001, 0x00000000, + 0x07000001, 0x00100042, 0x00000000, 0x0010100a, 0x00000000, 0x00004001, 0x00000002, 0x05000056, + 0x00100032, 0x00000000, 0x00100086, 0x00000000, 0x0f000032, 0x00102032, 0x00000003, 0x00100046, + 0x00000000, 0x00004002, 0x40000000, 0xc0000000, 0x00000000, 0x00000000, 0x00004002, 0xbf800000, + 0x3f800000, 0x00000000, 0x00000000, 0x08000036, 0x001020c2, 0x00000003, 0x00004002, 0x00000000, + 0x00000000, 0x00000000, 0x3f800000, 0x0100003e, + }; + static const D3D12_SHADER_BYTECODE vs = {vs_code, sizeof(vs_code)}; + static const DWORD ps_code[] = + { + 0x43425844, 0x5f0bb13f, 0x692a4798, 0x649648dc, 0xb8d68169, 0x00000001, 0x00000164, 0x00000003, + 0x0000002c, 0x000000ac, 0x000000e0, 0x4e475349, 0x00000078, 0x00000004, 0x00000008, 0x00000068, + 0x00000000, 0x00000001, 0x00000003, 0x00000000, 0x0000000f, 0x00000074, 0x00000000, 0x00000000, + 0x00000003, 0x00000001, 0x00000307, 0x00000074, 0x00000001, 0x00000000, 0x00000003, 0x00000002, + 0x0000010f, 0x00000074, 0x00000002, 0x00000000, 0x00000003, 0x00000003, 0x00000107, 0x505f5653, + 0x7469736f, 0x006e6f69, 0x004f4f46, 0x4e47534f, 0x0000002c, 0x00000001, 0x00000008, 0x00000020, + 0x00000000, 0x00000000, 0x00000003, 0x00000000, 0x0000000f, 0x745f7673, 0x65677261, 0xabab0074, + 0x58454853, 0x0000007c, 0x00000050, 0x0000001f, 0x0100086a, 0x03001062, 0x00101032, 0x00000001, + 0x03001062, 0x00101012, 0x00000002, 0x03001062, 0x00101012, 0x00000003, 0x03000065, 0x001020f2, + 0x00000000, 0x05000036, 0x00102032, 0x00000000, 0x00101046, 0x00000001, 0x05000036, 0x00102042, + 0x00000000, 0x0010100a, 0x00000002, 0x05000036, 0x00102082, 0x00000000, 0x0010100a, 0x00000003, + 0x0100003e, + }; + static const D3D12_SHADER_BYTECODE ps = {ps_code, sizeof(ps_code)}; + static const struct vec4 tess_factors = {1.0f, 1.0f, 1.0f, 1.0f}; + static const float white[] = {1.0f, 1.0f, 1.0f, 1.0f}; + static const uint32_t u = 1; + + memset(&desc, 0, sizeof(desc)); + desc.no_root_signature = true; + if (!init_test_context(&context, &desc)) + return; + command_list = context.list; + queue = context.queue; + + context.root_signature = create_32bit_constants_root_signature(context.device, + 0, 5, D3D12_SHADER_VISIBILITY_HULL); + + init_pipeline_state_desc(&pso_desc, context.root_signature, + context.render_target_desc.Format, &vs, &ps, NULL); + pso_desc.HS = hs; + pso_desc.DS = ds; + pso_desc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_PATCH; + hr = ID3D12Device_CreateGraphicsPipelineState(context.device, &pso_desc, + &IID_ID3D12PipelineState, (void **)&context.pipeline_state); + ok(hr == S_OK, "Failed to create state, hr %#x.\n", hr); + + ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, context.rtv, white, 0, NULL); + + ID3D12GraphicsCommandList_OMSetRenderTargets(command_list, 1, &context.rtv, false, NULL); + ID3D12GraphicsCommandList_SetGraphicsRootSignature(command_list, context.root_signature); + ID3D12GraphicsCommandList_SetPipelineState(command_list, context.pipeline_state); + ID3D12GraphicsCommandList_IASetPrimitiveTopology(command_list, D3D_PRIMITIVE_TOPOLOGY_3_CONTROL_POINT_PATCHLIST); + ID3D12GraphicsCommandList_RSSetViewports(command_list, 1, &context.viewport); + ID3D12GraphicsCommandList_RSSetScissorRects(command_list, 1, &context.scissor_rect); + ID3D12GraphicsCommandList_SetGraphicsRoot32BitConstants(command_list, 0, 4, &tess_factors.x, 0); + ID3D12GraphicsCommandList_SetGraphicsRoot32BitConstants(command_list, 0, 1, &u, 4); + ID3D12GraphicsCommandList_DrawInstanced(command_list, 3, 1, 0, 0); + + transition_resource_state(command_list, context.render_target, + D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_COPY_SOURCE); + + todo + check_sub_resource_uint(context.render_target, 0, queue, command_list, 0xcc996633, 0); + + destroy_test_context(&context); +} + START_TEST(d3d12) { parse_args(argc, argv); @@ -37748,4 +38009,5 @@ START_TEST(d3d12) run_test(test_readback_map_stability); run_test(test_vs_ps_relative_addressing); run_test(test_get_resource_tiling); + run_test(test_hull_shader_punned_array); }
From: Conor McCarthy cmccarthy@codeweavers.com
The relative-addressed case in shader_register_normalise_arrayed_addressing() leaves the control point id in idx[0], while for constant register indices it is placed in idx[1]. The latter case could be fixed instead, but placing the control point count in the outer dimension is more logical. --- libs/vkd3d-shader/ir.c | 7 +++++++ libs/vkd3d-shader/spirv.c | 20 ++++++++++---------- tests/d3d12.c | 1 - 3 files changed, 17 insertions(+), 11 deletions(-)
diff --git a/libs/vkd3d-shader/ir.c b/libs/vkd3d-shader/ir.c index d6ff42ab2..4ee4076a7 100644 --- a/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d-shader/ir.c @@ -897,6 +897,13 @@ static unsigned int shader_register_normalise_arrayed_addressing(struct vkd3d_sh reg->idx[id_idx + 1].rel_addr = NULL; reg->idx[id_idx + 1].offset = reg->idx[id_idx].offset; reg->idx[id_idx].offset -= register_index; + if (id_idx) + { + /* idx[id_idx] now contains the array index, which must be moved below the control point id. */ + struct vkd3d_shader_register_index tmp = reg->idx[id_idx]; + reg->idx[id_idx] = reg->idx[id_idx - 1]; + reg->idx[id_idx - 1] = tmp; + } ++id_idx; } /* Otherwise we have no address for the arrayed register, so insert one. This happens e.g. where diff --git a/libs/vkd3d-shader/spirv.c b/libs/vkd3d-shader/spirv.c index 067ba1b2d..23f249ee3 100644 --- a/libs/vkd3d-shader/spirv.c +++ b/libs/vkd3d-shader/spirv.c @@ -4770,7 +4770,7 @@ static uint32_t spirv_compiler_emit_builtin_variable_v(struct spirv_compiler *co assert(size_count <= ARRAY_SIZE(sizes)); memcpy(sizes, array_sizes, size_count * sizeof(sizes[0])); array_sizes = sizes; - sizes[size_count - 1] = max(sizes[size_count - 1], builtin->spirv_array_size); + sizes[0] = max(sizes[0], builtin->spirv_array_size);
id = spirv_compiler_emit_array_variable(compiler, &builder->global_stream, storage_class, builtin->component_type, builtin->component_count, array_sizes, size_count); @@ -4841,12 +4841,12 @@ static uint32_t spirv_compiler_emit_input(struct spirv_compiler *compiler,
builtin = get_spirv_builtin_for_sysval(compiler, sysval);
- array_sizes[0] = (reg_type == VKD3DSPR_PATCHCONST ? 0 : compiler->input_control_point_count); - array_sizes[1] = signature_element->register_count; - if (array_sizes[1] == 1 && !vsir_sysval_semantic_is_tess_factor(signature_element->sysval_semantic) - && (!vsir_sysval_semantic_is_clip_cull(signature_element->sysval_semantic) || array_sizes[0])) + array_sizes[0] = signature_element->register_count; + array_sizes[1] = (reg_type == VKD3DSPR_PATCHCONST ? 0 : compiler->input_control_point_count); + if (array_sizes[0] == 1 && !vsir_sysval_semantic_is_tess_factor(signature_element->sysval_semantic) + && (!vsir_sysval_semantic_is_clip_cull(signature_element->sysval_semantic) || array_sizes[1])) { - array_sizes[1] = 0; + array_sizes[0] = 0; }
write_mask = signature_element->mask; @@ -5183,10 +5183,10 @@ static void spirv_compiler_emit_output(struct spirv_compiler *compiler, /* Don't use builtins for TCS -> TES varyings. See spirv_compiler_emit_input(). */ if (compiler->shader_type == VKD3D_SHADER_TYPE_HULL && !is_patch_constant) sysval = VKD3D_SHADER_SV_NONE; - array_sizes[0] = (reg_type == VKD3DSPR_PATCHCONST ? 0 : compiler->output_control_point_count); - array_sizes[1] = signature_element->register_count; - if (array_sizes[1] == 1 && !vsir_sysval_semantic_is_tess_factor(signature_element->sysval_semantic)) - array_sizes[1] = 0; + array_sizes[0] = signature_element->register_count; + array_sizes[1] = (reg_type == VKD3DSPR_PATCHCONST ? 0 : compiler->output_control_point_count); + if (array_sizes[0] == 1 && !vsir_sysval_semantic_is_tess_factor(signature_element->sysval_semantic)) + array_sizes[0] = 0;
builtin = vkd3d_get_spirv_builtin(compiler, reg_type, sysval);
diff --git a/tests/d3d12.c b/tests/d3d12.c index 9154db138..6bdb5f7a4 100644 --- a/tests/d3d12.c +++ b/tests/d3d12.c @@ -37822,7 +37822,6 @@ static void test_hull_shader_punned_array(void) transition_resource_state(command_list, context.render_target, D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_COPY_SOURCE);
- todo check_sub_resource_uint(context.render_target, 0, queue, command_list, 0xcc996633, 0);
destroy_test_context(&context);
From: Conor McCarthy cmccarthy@codeweavers.com
--- libs/vkd3d-shader/tpf.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+)
diff --git a/libs/vkd3d-shader/tpf.c b/libs/vkd3d-shader/tpf.c index 587446556..7b4f25c06 100644 --- a/libs/vkd3d-shader/tpf.c +++ b/libs/vkd3d-shader/tpf.c @@ -919,6 +919,7 @@ static void shader_sm4_read_dcl_index_range(struct vkd3d_shader_instruction *ins { struct vkd3d_shader_index_range *index_range = &ins->declaration.index_range; unsigned int i, register_idx, register_count; + const struct shader_signature *signature; enum vkd3d_shader_register_type type; struct sm4_index_range_array *ranges; unsigned int *io_masks; @@ -938,27 +939,32 @@ static void shader_sm4_read_dcl_index_range(struct vkd3d_shader_instruction *ins case VKD3DSPR_INCONTROLPOINT: io_masks = priv->input_register_masks; ranges = &priv->input_index_ranges; + signature = &priv->p.shader_desc.input_signature; break; case VKD3DSPR_OUTPUT: if (sm4_parser_is_in_fork_or_join_phase(priv)) { io_masks = priv->patch_constant_register_masks; ranges = &priv->patch_constant_index_ranges; + signature = &priv->p.shader_desc.patch_constant_signature; } else { io_masks = priv->output_register_masks; ranges = &priv->output_index_ranges; + signature = &priv->p.shader_desc.output_signature; } break; case VKD3DSPR_COLOROUT: case VKD3DSPR_OUTCONTROLPOINT: io_masks = priv->output_register_masks; ranges = &priv->output_index_ranges; + signature = &priv->p.shader_desc.output_signature; break; case VKD3DSPR_PATCHCONST: io_masks = priv->patch_constant_register_masks; ranges = &priv->patch_constant_index_ranges; + signature = &priv->p.shader_desc.patch_constant_signature; break;
default: @@ -996,6 +1002,18 @@ static void shader_sm4_read_dcl_index_range(struct vkd3d_shader_instruction *ins
for (i = 0; i < register_count; ++i) { + const struct signature_element *e = vsir_signature_find_element_for_reg(signature, register_idx + i, write_mask); + /* Index ranges should not contain non-arrayed sysvals. FXC tries to forbid this but it is buggy, + * and can emit a range containing a sysval if the sysval is not actually accessed. */ + if (e && e->sysval_semantic && register_count > 1 && !vsir_sysval_semantic_is_tess_factor(e->sysval_semantic) + && !vsir_sysval_semantic_is_clip_cull(e->sysval_semantic)) + { + WARN("Sysval %u included in an index range declaration.\n", e->sysval_semantic); + vkd3d_shader_parser_error(&priv->p, VKD3D_SHADER_ERROR_TPF_INVALID_INDEX_RANGE_DCL, + "Index range base %u, count %u, mask %#x contains sysval %u.", + register_idx, register_count, write_mask, e->sysval_semantic); + return; + } if ((io_masks[register_idx + i] & write_mask) != write_mask) { WARN("No matching declaration for index range base %u, count %u, mask %#x.\n",
From: Conor McCarthy cmccarthy@codeweavers.com
We started with only one or two of these but it has become excessive. --- libs/vkd3d-shader/ir.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-)
diff --git a/libs/vkd3d-shader/ir.c b/libs/vkd3d-shader/ir.c index 4ee4076a7..3f8a6f192 100644 --- a/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d-shader/ir.c @@ -1555,25 +1555,27 @@ enum vkd3d_result vkd3d_shader_normalise(struct vkd3d_shader_parser *parser, result = instruction_array_normalise_hull_shader_control_point_io(instructions, &parser->shader_desc.input_signature); } - if (result >= 0) - result = shader_normalise_io_registers(parser); + if (result < 0) + return result; + + if ((result = shader_normalise_io_registers(parser)) < 0) + return result;
- if (result >= 0) - result = instruction_array_normalise_flat_constants(parser); + if ((result = instruction_array_normalise_flat_constants(parser)) < 0) + return result;
- if (result >= 0) - remove_dead_code(parser); + remove_dead_code(parser);
- if (result >= 0) - result = normalise_combined_samplers(parser); + if ((result = normalise_combined_samplers(parser)) < 0) + return result;
- if (result >= 0 && TRACE_ON()) + if (TRACE_ON()) vkd3d_shader_trace(instructions, &parser->shader_version);
- if (result >= 0 && !parser->failed) - result = vsir_validate(parser); + if (!parser->failed && (result = vsir_validate(parser)) < 0) + return result;
- if (result >= 0 && parser->failed) + if (parser->failed) result = VKD3D_ERROR_INVALID_SHADER;
return result;
From: Conor McCarthy cmccarthy@codeweavers.com
Up to eight clip/cull values are supported, and the DXBC pattern of spreading these across two vector signature elements is a hacky solution, so converting these into an array is best for VSIR. SPIR-V requires these to be an array. --- libs/vkd3d-shader/ir.c | 354 ++++++++++++++++++++++- libs/vkd3d-shader/spirv.c | 7 +- libs/vkd3d-shader/vkd3d_shader_private.h | 2 + 3 files changed, 359 insertions(+), 4 deletions(-)
diff --git a/libs/vkd3d-shader/ir.c b/libs/vkd3d-shader/ir.c index 3f8a6f192..7637e8e50 100644 --- a/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d-shader/ir.c @@ -1255,6 +1255,355 @@ static enum vkd3d_result shader_normalise_io_registers(struct vkd3d_shader_parse return VKD3D_OK; }
+struct clip_cull_normaliser_signature +{ + struct shader_signature *s; + unsigned int base_element_idx; + unsigned int offsets[MAX_REG_OUTPUT]; +}; + +struct clip_cull_normaliser +{ + struct vkd3d_shader_parser *parser; + + struct vkd3d_shader_location location; + bool has_clip_cull; + enum vkd3d_result result; + + struct vkd3d_shader_instruction *instructions; + size_t instruction_capacity; + size_t instruction_count; + + struct clip_cull_normaliser_signature input_signature; + + bool has_dcl_temps; + ptrdiff_t temp_ins_idx; + unsigned int temp_idx; + unsigned int temp_count; +}; + +static void shader_signature_transform_clip_cull(struct clip_cull_normaliser_signature *signature, + enum vkd3d_shader_sysval_semantic target_sysval, struct clip_cull_normaliser *normaliser) +{ + struct vkd3d_shader_parser *parser = normaliser->parser; + unsigned int i, j, base, array_size, reg_mask; + struct shader_signature *s = signature->s; + struct signature_element *e; + + /* Up to two vec4 clip/cull elements are allowed. Merge these into an array, + * and track the location and array offset of the second one. */ + for (i = 0, array_size = 0, reg_mask = 0, base = 0; i < s->element_count; ++i) + { + e = &s->elements[i]; + + if (e->sysval_semantic != target_sysval) + { + reg_mask |= ((1u << e->register_count) - 1) << e->register_index; + continue; + } + + if (!array_size) + base = i; + + signature->offsets[i] = array_size; + array_size += vsir_write_mask_component_count(e->mask); + } + + if (!array_size) + return; + + signature->base_element_idx = base; + e = &s->elements[base]; + + /* Compile a map of used registers. */ + for (i = base, j = ((1u << array_size) - 1) << base; i < MAX_REG_OUTPUT; ++i, j <<= 1) + { + if (!(reg_mask & j)) + break; + } + + /* Signature locations are not used for sysvals, so if it proves necessary, it should be + * ok to raise MAX_REG_OUTPUT to 40 and validate that non-sysval register indices are < 32. */ + if (array_size > MAX_REG_OUTPUT - i) + { + FIXME("Too many registers; array size %u.\n", array_size); + vkd3d_shader_parser_error(parser, VKD3D_SHADER_ERROR_VSIR_UNSUPPORTED_SIGNATURE, + "Clip or cull array size %u exceeds limit of 32 registers.", array_size); + return; + } + + e->register_index = i; + e->register_count = array_size; + e->mask = 1; + e->used_mask = 1; + + /* Delete the second clip/cull element. It will be remapped to the first by checking the offset table. */ + j = base + 1; + for (i = j; i < s->element_count; ++i) + { + e = &s->elements[i]; + + if (e->sysval_semantic != target_sysval) + s->elements[j++] = *e; + } + s->element_count = j; + + normaliser->has_clip_cull = true; +} + +static struct vkd3d_shader_instruction *clip_cull_normaliser_require_space(struct clip_cull_normaliser *normaliser, + size_t count) +{ + if (!vkd3d_array_reserve((void **)&normaliser->instructions, &normaliser->instruction_capacity, + normaliser->instruction_count + count, sizeof(*normaliser->instructions))) + { + ERR("Failed to allocate instructions.\n"); + normaliser->result = VKD3D_ERROR_OUT_OF_MEMORY; + return NULL; + } + return &normaliser->instructions[normaliser->instruction_count]; +} + +static struct vkd3d_shader_dst_param *clip_cull_normaliser_emit_mov(struct clip_cull_normaliser *normaliser, + struct vkd3d_shader_src_param *src_param) +{ + struct vkd3d_shader_instruction *ins; + + if (!(ins = clip_cull_normaliser_require_space(normaliser, 1))) + return NULL; + vsir_instruction_init(ins, &normaliser->location, VKD3DSIH_MOV); + + ins->src = src_param; + ins->src_count = 1; + + if (!(ins->dst = shader_parser_get_dst_params(normaliser->parser, 1))) + { + ERR("Failed to allocate instructions.\n"); + normaliser->result = VKD3D_ERROR_OUT_OF_MEMORY; + return NULL; + } + ins->dst_count = 1; + + ++normaliser->instruction_count; + return ins->dst; +} + +static void clip_cull_normaliser_copy_instruction(struct clip_cull_normaliser *normaliser, + struct vkd3d_shader_instruction *ins) +{ + struct vkd3d_shader_instruction *dst_ins; + + if (!(dst_ins = clip_cull_normaliser_require_space(normaliser, 1))) + return; + *dst_ins = *ins; + ++normaliser->instruction_count; +} + +static void shader_src_param_clip_cull_normalise(struct vkd3d_shader_src_param *src_param, + unsigned int write_mask, struct clip_cull_normaliser *normaliser) +{ + unsigned int i, element_idx, component_idx, array_offset, handled_mask, mask_bit; + struct vkd3d_shader_parser *parser = normaliser->parser; + const struct clip_cull_normaliser_signature *signature; + struct vkd3d_shader_register *reg = &src_param->reg; + struct vkd3d_shader_dst_param *dst_param; + struct vkd3d_shader_src_param *mov_src; + const struct signature_element *e; + const struct shader_signature *s; + + switch (reg->type) + { + case VKD3DSPR_INPUT: + if (parser->shader_version.type == VKD3D_SHADER_TYPE_DOMAIN) + return; + signature = &normaliser->input_signature; + break; + default: + return; + } + + s = signature->s; + element_idx = reg->idx[reg->idx_count - 1].offset; + if ((array_offset = signature->offsets[element_idx])) + { + element_idx = signature->base_element_idx; + reg->idx[reg->idx_count - 1].offset = element_idx; + } + e = &s->elements[element_idx]; + + if (e->sysval_semantic != VKD3D_SHADER_SV_CLIP_DISTANCE && e->sysval_semantic != VKD3D_SHADER_SV_CULL_DISTANCE) + return; + + if (!write_mask) + { + WARN("Unexpected zero write mask.\n"); + vkd3d_shader_parser_warning(parser, VKD3D_SHADER_ERROR_VSIR_INVALID_WRITE_MASK, + "Write mask for a clip/cull load is zero."); + normaliser->result = VKD3D_ERROR_INVALID_SHADER; + return; + } + /* Dynamic array addressing of clip/cull inputs is not supported. */ + if (reg->idx_count >= ARRAY_SIZE(reg->idx)) + { + WARN("Unexpected index count %u.\n", reg->idx_count); + vkd3d_shader_parser_error(parser, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, + "Invalid register index count %u for a clip/cull load.", reg->idx_count); + normaliser->result = VKD3D_ERROR_INVALID_SHADER; + return; + } + + memmove(®->idx[1], ®->idx[0], reg->idx_count * sizeof(reg->idx[0])); + memset(®->idx[0], 0, sizeof(reg->idx[0])); + ++reg->idx_count; + + if (vsir_write_mask_component_count(write_mask) == 1) + { + reg->idx[0].offset = array_offset + vsir_swizzle_get_component(src_param->swizzle, + vsir_write_mask_get_component_idx(write_mask)); + return; + } + + for (i = 0, handled_mask = 0; i < VKD3D_VEC4_SIZE; ++i) + { + if (!(write_mask & (1u << i))) + continue; + + /* For each component accessed by the write mask+swizzle, emit a MOV from the clip/cull array to a temp. + * The original instruction will still apply the swizzle, so the temp is a direct reflection of the source. */ + + component_idx = vsir_swizzle_get_component(src_param->swizzle, i); + mask_bit = 1u << component_idx; + + if (handled_mask & mask_bit) + continue; + handled_mask |= mask_bit; + + if (!(mov_src = shader_parser_get_src_params(parser, 1))) + { + ERR("Failed to allocate instructions.\n"); + normaliser->result = VKD3D_ERROR_OUT_OF_MEMORY; + return; + } + mov_src->reg = *reg; + mov_src->reg.idx[0].offset = array_offset + component_idx; + mov_src->swizzle = 0; + mov_src->modifiers = 0; + + if (!(dst_param = clip_cull_normaliser_emit_mov(normaliser, mov_src))) + return; + vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, reg->data_type, 1); + dst_param->reg.dimension = reg->dimension; + dst_param->reg.idx[0].offset = normaliser->temp_idx; + normaliser->temp_count = 1; + dst_param->write_mask = mask_bit; + dst_param->modifiers = 0; + dst_param->shift = 0; + } + + /* Substitute the temp for the vector clip/cull source. */ + vsir_register_init(reg, VKD3DSPR_TEMP, reg->data_type, 1); + reg->dimension = VSIR_DIMENSION_VEC4; + reg->idx[0].offset = normaliser->temp_idx; +} + +static void shader_instruction_normalise_clip_cull_params(struct vkd3d_shader_instruction *ins, + struct clip_cull_normaliser *normaliser) +{ + struct vkd3d_shader_instruction *dst_ins; + unsigned int i, write_mask; + + normaliser->location = ins->location; + + if (shader_instruction_is_dcl(ins)) + { + normaliser->has_dcl_temps |= ins->handler_idx == VKD3DSIH_DCL_TEMPS; + clip_cull_normaliser_copy_instruction(normaliser, ins); + return; + } + else if (!normaliser->has_dcl_temps) + { + if (!(dst_ins = clip_cull_normaliser_require_space(normaliser, 1))) + return; + normaliser->has_dcl_temps = true; + normaliser->temp_ins_idx = normaliser->instruction_count; + vsir_instruction_init(dst_ins, &normaliser->location, VKD3DSIH_DCL_TEMPS); + ++normaliser->instruction_count; + } + + for (i = 0, write_mask = 0; i < ins->dst_count; ++i) + if (ins->dst[i].reg.type != VKD3DSPR_NULL) + write_mask |= ins->dst[i].write_mask; + for (i = 0; i < ins->src_count; ++i) + shader_src_param_clip_cull_normalise(&ins->src[i], write_mask, normaliser); + + clip_cull_normaliser_copy_instruction(normaliser, ins); +} + +static enum vkd3d_result shader_normalise_clip_cull(struct vkd3d_shader_parser *parser) +{ + struct clip_cull_normaliser normaliser = {0}; + unsigned int i; + + normaliser.parser = parser; + normaliser.input_signature.s = &parser->shader_desc.input_signature; + normaliser.temp_ins_idx = -1; + normaliser.temp_idx = parser->shader_desc.temp_count; + + if (parser->shader_version.type != VKD3D_SHADER_TYPE_DOMAIN) + { + shader_signature_transform_clip_cull(&normaliser.input_signature, VKD3D_SHADER_SV_CLIP_DISTANCE, &normaliser); + shader_signature_transform_clip_cull(&normaliser.input_signature, VKD3D_SHADER_SV_CULL_DISTANCE, &normaliser); + } + + if (!normaliser.has_clip_cull) + return VKD3D_OK; + + if (parser->failed) + normaliser.result = VKD3D_ERROR_INVALID_SHADER; + + if (clip_cull_normaliser_require_space(&normaliser, parser->instructions.count)) + { + for (i = 0; i < parser->instructions.count; ++i) + shader_instruction_normalise_clip_cull_params(&parser->instructions.elements[i], &normaliser); + } + else + { + normaliser.result = VKD3D_ERROR_OUT_OF_MEMORY; + } + + if (normaliser.result >= 0) + { + vkd3d_free(parser->instructions.elements); + parser->instructions.elements = normaliser.instructions; + parser->instructions.capacity = normaliser.instruction_capacity; + parser->instructions.count = normaliser.instruction_count; + + if (!normaliser.temp_count) + { + if (normaliser.temp_ins_idx >= 0) + normaliser.instructions[normaliser.temp_ins_idx].handler_idx = VKD3DSIH_NOP; + return VKD3D_OK; + } + + parser->shader_desc.temp_count += normaliser.temp_count; + for (i = 0; i < parser->instructions.count; ++i) + if (parser->instructions.elements[i].handler_idx == VKD3DSIH_DCL_TEMPS) + parser->instructions.elements[i].declaration.count = parser->shader_desc.temp_count; + + return VKD3D_OK; + } + else + { + if (normaliser.result == VKD3D_ERROR_OUT_OF_MEMORY) + { + vkd3d_shader_parser_error(parser, VKD3D_SHADER_ERROR_VSIR_OUT_OF_MEMORY, + "Out of memory allocating clip/cull normalization instructions.\n"); + } + vkd3d_free(normaliser.instructions); + return normaliser.result; + } +} + struct flat_constant_def { enum vkd3d_shader_d3dbc_constant_register set; @@ -1543,7 +1892,7 @@ enum vkd3d_result vkd3d_shader_normalise(struct vkd3d_shader_parser *parser, enum vkd3d_result result = VKD3D_OK;
if (parser->shader_desc.is_dxil) - return result; + return shader_normalise_clip_cull(parser);
if (parser->shader_version.type != VKD3D_SHADER_TYPE_PIXEL && (result = remap_output_signature(parser, compile_info)) < 0) @@ -1561,6 +1910,9 @@ enum vkd3d_result vkd3d_shader_normalise(struct vkd3d_shader_parser *parser, if ((result = shader_normalise_io_registers(parser)) < 0) return result;
+ if ((result = shader_normalise_clip_cull(parser)) < 0) + return result; + if ((result = instruction_array_normalise_flat_constants(parser)) < 0) return result;
diff --git a/libs/vkd3d-shader/spirv.c b/libs/vkd3d-shader/spirv.c index 23f249ee3..dc3a89382 100644 --- a/libs/vkd3d-shader/spirv.c +++ b/libs/vkd3d-shader/spirv.c @@ -4844,7 +4844,7 @@ static uint32_t spirv_compiler_emit_input(struct spirv_compiler *compiler, array_sizes[0] = signature_element->register_count; array_sizes[1] = (reg_type == VKD3DSPR_PATCHCONST ? 0 : compiler->input_control_point_count); if (array_sizes[0] == 1 && !vsir_sysval_semantic_is_tess_factor(signature_element->sysval_semantic) - && (!vsir_sysval_semantic_is_clip_cull(signature_element->sysval_semantic) || array_sizes[1])) + && (!vsir_sysval_semantic_is_clip_cull(sysval) || array_sizes[1])) { array_sizes[0] = 0; } @@ -9953,8 +9953,6 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, enum vkd3d_result result = VKD3D_OK; unsigned int i;
- if (parser->shader_desc.temp_count) - spirv_compiler_emit_temps(compiler, parser->shader_desc.temp_count); if (parser->shader_desc.ssa_count) spirv_compiler_allocate_ssa_register_ids(compiler, parser->shader_desc.ssa_count);
@@ -9966,6 +9964,9 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, if ((result = vkd3d_shader_normalise(parser, compile_info)) < 0) return result;
+ if (parser->shader_desc.temp_count) + spirv_compiler_emit_temps(compiler, parser->shader_desc.temp_count); + instructions = parser->instructions; memset(&parser->instructions, 0, sizeof(parser->instructions));
diff --git a/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d-shader/vkd3d_shader_private.h index b85441aeb..2ed5c0057 100644 --- a/libs/vkd3d-shader/vkd3d_shader_private.h +++ b/libs/vkd3d-shader/vkd3d_shader_private.h @@ -217,6 +217,8 @@ enum vkd3d_shader_error VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX = 9015, VKD3D_SHADER_ERROR_VSIR_INVALID_INSTRUCTION_NESTING = 9016, VKD3D_SHADER_ERROR_VSIR_INVALID_SSA_USAGE = 9017, + VKD3D_SHADER_ERROR_VSIR_OUT_OF_MEMORY = 9018, + VKD3D_SHADER_ERROR_VSIR_UNSUPPORTED_SIGNATURE = 9019,
VKD3D_SHADER_WARNING_VSIR_DYNAMIC_DESCRIPTOR_ARRAY = 9300, };
From: Conor McCarthy cmccarthy@codeweavers.com
--- libs/vkd3d-shader/ir.c | 145 +++++++++++++++++++++++++++++++++++ libs/vkd3d-shader/spirv.c | 156 ++------------------------------------ 2 files changed, 152 insertions(+), 149 deletions(-)
diff --git a/libs/vkd3d-shader/ir.c b/libs/vkd3d-shader/ir.c index 7637e8e50..8e7b8d4a2 100644 --- a/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d-shader/ir.c @@ -1275,11 +1275,15 @@ struct clip_cull_normaliser size_t instruction_count;
struct clip_cull_normaliser_signature input_signature; + struct clip_cull_normaliser_signature output_signature; + struct clip_cull_normaliser_signature patch_constant_signature;
bool has_dcl_temps; ptrdiff_t temp_ins_idx; unsigned int temp_idx; unsigned int temp_count; + + enum vkd3d_shader_opcode phase; };
static void shader_signature_transform_clip_cull(struct clip_cull_normaliser_signature *signature, @@ -1418,6 +1422,14 @@ static void shader_src_param_clip_cull_normalise(struct vkd3d_shader_src_param * return; signature = &normaliser->input_signature; break; + case VKD3DSPR_OUTPUT: + if (parser->shader_version.type == VKD3D_SHADER_TYPE_HULL) + return; + signature = &normaliser->output_signature; + break; + case VKD3DSPR_PATCHCONST: + signature = &normaliser->patch_constant_signature; + break; default: return; } @@ -1506,6 +1518,118 @@ static void shader_src_param_clip_cull_normalise(struct vkd3d_shader_src_param * reg->idx[0].offset = normaliser->temp_idx; }
+static void shader_dst_param_clip_cull_normalise(struct vkd3d_shader_dst_param *dst_param, + struct clip_cull_normaliser *normaliser) + { + struct vkd3d_shader_parser *parser = normaliser->parser; + const struct clip_cull_normaliser_signature *signature; + unsigned int i, element_idx, write_mask, array_offset; + struct vkd3d_shader_register *reg = &dst_param->reg; + struct vkd3d_shader_src_param *src_param; + struct vkd3d_shader_dst_param *mov_dst; + const struct signature_element *e; + const struct shader_signature *s; + + if (!reg->idx_count) + return; + + switch (reg->type) + { + /* VKD3DSPR_INPUT must not occur in a dst param. */ + + case VKD3DSPR_OUTPUT: + if (normaliser->phase == VKD3DSIH_HS_FORK_PHASE || normaliser->phase == VKD3DSIH_HS_JOIN_PHASE) + { + signature = &normaliser->patch_constant_signature; + } + else + { + if (parser->shader_version.type == VKD3D_SHADER_TYPE_HULL) + return; + signature = &normaliser->output_signature; + } + break; + + case VKD3DSPR_PATCHCONST: + signature = &normaliser->patch_constant_signature; + break; + + default: + return; + } + + s = signature->s; + element_idx = reg->idx[reg->idx_count - 1].offset; + if ((array_offset = signature->offsets[element_idx])) + { + element_idx = signature->base_element_idx; + reg->idx[reg->idx_count - 1].offset = element_idx; + } + e = &s->elements[element_idx]; + + if (e->sysval_semantic != VKD3D_SHADER_SV_CLIP_DISTANCE && e->sysval_semantic != VKD3D_SHADER_SV_CULL_DISTANCE) + return; + + write_mask = dst_param->write_mask; + + /* Dynamic array addressing of clip/cull outputs is not supported. */ + if (reg->idx_count >= ARRAY_SIZE(reg->idx)) + { + WARN("Unexpected index count %u.\n", reg->idx_count); + vkd3d_shader_parser_error(parser, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, + "Invalid register index count %u for a clip/cull store.", reg->idx_count); + normaliser->result = VKD3D_ERROR_INVALID_SHADER; + return; + } + + /* Move the indices up so the array index can be placed in idx[0]. */ + memmove(®->idx[1], ®->idx[0], reg->idx_count * sizeof(reg->idx[0])); + memset(®->idx[0], 0, sizeof(reg->idx[0])); + ++reg->idx_count; + + if (vsir_write_mask_component_count(write_mask) == 1) + { + reg->idx[0].offset = array_offset + vsir_write_mask_get_component_idx(write_mask); + return; + } + + for (i = 0; i < VKD3D_VEC4_SIZE; ++i) + { + if (!(write_mask & (1u << i))) + continue; + + /* For each component, emit a MOV from a temp to the clip/cull array. */ + + if (!(src_param = shader_parser_get_src_params(parser, 1))) + { + ERR("Failed to allocate instructions.\n"); + normaliser->result = VKD3D_ERROR_OUT_OF_MEMORY; + return; + } + src_param->swizzle = vkd3d_shader_create_swizzle(i, i, i, i); + src_param->modifiers = 0; + vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, reg->data_type, 1); + src_param->reg.dimension = reg->dimension; + src_param->reg.idx[0].offset = normaliser->temp_idx; + normaliser->temp_count = 1; + + if (!(mov_dst = clip_cull_normaliser_emit_mov(normaliser, src_param))) + return; + mov_dst->reg = *reg; + mov_dst->reg.idx[0].offset = array_offset + i; + mov_dst->write_mask = VKD3DSP_WRITEMASK_0; + mov_dst->modifiers = 0; + mov_dst->shift = 0; + } + + /* Substitute the temp for the vector clip/cull destination. If this is for a MOV instruction with + * clip/cull source, it results in a harmless no-op MOV, because shader_src_param_clip_cull_normalise() + * has already written the clip/cull source to the temp. */ + vsir_register_init(reg, VKD3DSPR_TEMP, reg->data_type, 1); + reg->dimension = VSIR_DIMENSION_VEC4; + reg->idx[0].offset = normaliser->temp_idx; +} + static void shader_instruction_normalise_clip_cull_params(struct vkd3d_shader_instruction *ins, struct clip_cull_normaliser *normaliser) { @@ -1530,6 +1654,14 @@ static void shader_instruction_normalise_clip_cull_params(struct vkd3d_shader_in ++normaliser->instruction_count; }
+ if (ins->handler_idx == VKD3DSIH_HS_CONTROL_POINT_PHASE || ins->handler_idx == VKD3DSIH_HS_FORK_PHASE + || ins->handler_idx == VKD3DSIH_HS_JOIN_PHASE) + { + normaliser->phase = ins->handler_idx; + clip_cull_normaliser_copy_instruction(normaliser, ins); + return; + } + for (i = 0, write_mask = 0; i < ins->dst_count; ++i) if (ins->dst[i].reg.type != VKD3DSPR_NULL) write_mask |= ins->dst[i].write_mask; @@ -1537,6 +1669,9 @@ static void shader_instruction_normalise_clip_cull_params(struct vkd3d_shader_in shader_src_param_clip_cull_normalise(&ins->src[i], write_mask, normaliser);
clip_cull_normaliser_copy_instruction(normaliser, ins); + + for (i = 0; i < ins->dst_count; ++i) + shader_dst_param_clip_cull_normalise(&ins->dst[i], normaliser); }
static enum vkd3d_result shader_normalise_clip_cull(struct vkd3d_shader_parser *parser) @@ -1546,14 +1681,24 @@ static enum vkd3d_result shader_normalise_clip_cull(struct vkd3d_shader_parser *
normaliser.parser = parser; normaliser.input_signature.s = &parser->shader_desc.input_signature; + normaliser.output_signature.s = &parser->shader_desc.output_signature; + normaliser.patch_constant_signature.s = &parser->shader_desc.patch_constant_signature; normaliser.temp_ins_idx = -1; normaliser.temp_idx = parser->shader_desc.temp_count; + normaliser.phase = VKD3DSIH_INVALID;
if (parser->shader_version.type != VKD3D_SHADER_TYPE_DOMAIN) { shader_signature_transform_clip_cull(&normaliser.input_signature, VKD3D_SHADER_SV_CLIP_DISTANCE, &normaliser); shader_signature_transform_clip_cull(&normaliser.input_signature, VKD3D_SHADER_SV_CULL_DISTANCE, &normaliser); } + if (parser->shader_version.type != VKD3D_SHADER_TYPE_HULL) + { + shader_signature_transform_clip_cull(&normaliser.output_signature, VKD3D_SHADER_SV_CLIP_DISTANCE, &normaliser); + shader_signature_transform_clip_cull(&normaliser.output_signature, VKD3D_SHADER_SV_CULL_DISTANCE, &normaliser); + } + shader_signature_transform_clip_cull(&normaliser.patch_constant_signature, VKD3D_SHADER_SV_CLIP_DISTANCE, &normaliser); + shader_signature_transform_clip_cull(&normaliser.patch_constant_signature, VKD3D_SHADER_SV_CULL_DISTANCE, &normaliser);
if (!normaliser.has_clip_cull) return VKD3D_OK; diff --git a/libs/vkd3d-shader/spirv.c b/libs/vkd3d-shader/spirv.c index dc3a89382..a6153ed6d 100644 --- a/libs/vkd3d-shader/spirv.c +++ b/libs/vkd3d-shader/spirv.c @@ -2390,7 +2390,6 @@ struct spirv_compiler { uint32_t id; enum vkd3d_shader_component_type component_type; - uint32_t array_element_mask; } *output_info; uint32_t private_output_variable[MAX_REG_OUTPUT + 1]; /* 1 entry for oDepth */ uint32_t private_output_variable_write_mask[MAX_REG_OUTPUT + 1]; /* 1 entry for oDepth */ @@ -5018,88 +5017,6 @@ static bool is_dual_source_blending(const struct spirv_compiler *compiler) return compiler->shader_type == VKD3D_SHADER_TYPE_PIXEL && info && info->dual_source_blending; }
-static void calculate_clip_or_cull_distance_mask(const struct signature_element *e, uint32_t *mask) -{ - unsigned int write_mask; - - if (e->semantic_index >= sizeof(*mask) * CHAR_BIT / VKD3D_VEC4_SIZE) - { - FIXME("Invalid semantic index %u for clip/cull distance.\n", e->semantic_index); - return; - } - - write_mask = e->mask >> vsir_write_mask_get_component_idx(e->mask); - *mask |= (write_mask & VKD3DSP_WRITEMASK_ALL) << (VKD3D_VEC4_SIZE * e->semantic_index); -} - -/* Emits arrayed SPIR-V built-in variables. */ -static void spirv_compiler_emit_shader_signature_outputs(struct spirv_compiler *compiler) -{ - const struct shader_signature *output_signature = &compiler->output_signature; - uint32_t clip_distance_mask = 0, clip_distance_id = 0; - uint32_t cull_distance_mask = 0, cull_distance_id = 0; - const struct vkd3d_spirv_builtin *builtin; - unsigned int i, count; - - for (i = 0; i < output_signature->element_count; ++i) - { - const struct signature_element *e = &output_signature->elements[i]; - - switch (e->sysval_semantic) - { - case VKD3D_SHADER_SV_CLIP_DISTANCE: - calculate_clip_or_cull_distance_mask(e, &clip_distance_mask); - break; - - case VKD3D_SHADER_SV_CULL_DISTANCE: - calculate_clip_or_cull_distance_mask(e, &cull_distance_mask); - break; - - default: - break; - } - } - - if (clip_distance_mask) - { - count = vkd3d_popcount(clip_distance_mask); - builtin = get_spirv_builtin_for_sysval(compiler, VKD3D_SHADER_SV_CLIP_DISTANCE); - clip_distance_id = spirv_compiler_emit_builtin_variable(compiler, - builtin, SpvStorageClassOutput, count); - } - - if (cull_distance_mask) - { - count = vkd3d_popcount(cull_distance_mask); - builtin = get_spirv_builtin_for_sysval(compiler, VKD3D_SHADER_SV_CULL_DISTANCE); - cull_distance_id = spirv_compiler_emit_builtin_variable(compiler, - builtin, SpvStorageClassOutput, count); - } - - for (i = 0; i < output_signature->element_count; ++i) - { - const struct signature_element *e = &output_signature->elements[i]; - - switch (e->sysval_semantic) - { - case VKD3D_SHADER_SV_CLIP_DISTANCE: - compiler->output_info[i].id = clip_distance_id; - compiler->output_info[i].component_type = VKD3D_SHADER_COMPONENT_FLOAT; - compiler->output_info[i].array_element_mask = clip_distance_mask; - break; - - case VKD3D_SHADER_SV_CULL_DISTANCE: - compiler->output_info[i].id = cull_distance_id; - compiler->output_info[i].component_type = VKD3D_SHADER_COMPONENT_FLOAT; - compiler->output_info[i].array_element_mask = cull_distance_mask; - break; - - default: - break; - } - } -} - static void spirv_compiler_emit_output_register(struct spirv_compiler *compiler, const struct vkd3d_shader_dst_param *dst) { @@ -5185,7 +5102,8 @@ static void spirv_compiler_emit_output(struct spirv_compiler *compiler, sysval = VKD3D_SHADER_SV_NONE; array_sizes[0] = signature_element->register_count; array_sizes[1] = (reg_type == VKD3DSPR_PATCHCONST ? 0 : compiler->output_control_point_count); - if (array_sizes[0] == 1 && !vsir_sysval_semantic_is_tess_factor(signature_element->sysval_semantic)) + if (array_sizes[0] == 1 && !vsir_sysval_semantic_is_tess_factor(signature_element->sysval_semantic) + && (!vsir_sysval_semantic_is_clip_cull(sysval) || array_sizes[1])) array_sizes[0] = 0;
builtin = vkd3d_get_spirv_builtin(compiler, reg_type, sysval); @@ -5211,8 +5129,7 @@ static void spirv_compiler_emit_output(struct spirv_compiler *compiler, use_private_variable = true;
if (!is_patch_constant - && (get_shader_output_swizzle(compiler, signature_element->register_index) != VKD3D_SHADER_NO_SWIZZLE - || (compiler->output_info[element_idx].id && compiler->output_info[element_idx].array_element_mask))) + && get_shader_output_swizzle(compiler, signature_element->register_index) != VKD3D_SHADER_NO_SWIZZLE) { use_private_variable = true; } @@ -5312,51 +5229,18 @@ static void spirv_compiler_emit_output(struct spirv_compiler *compiler, } }
-static uint32_t spirv_compiler_get_output_array_index(struct spirv_compiler *compiler, - const struct signature_element *e) -{ - enum vkd3d_shader_sysval_semantic sysval = e->sysval_semantic; - const struct vkd3d_spirv_builtin *builtin; - - builtin = get_spirv_builtin_for_sysval(compiler, sysval); - - switch (sysval) - { - case VKD3D_SHADER_SV_TESS_FACTOR_LINEDEN: - case VKD3D_SHADER_SV_TESS_FACTOR_LINEDET: - return builtin->member_idx; - default: - return e->semantic_index; - } -} - static void spirv_compiler_emit_store_shader_output(struct spirv_compiler *compiler, const struct shader_signature *signature, const struct signature_element *output, const struct vkd3d_shader_output_info *output_info, uint32_t output_index_id, uint32_t val_id, uint32_t write_mask) { - uint32_t dst_write_mask, use_mask, uninit_mask, swizzle, mask; + uint32_t dst_write_mask, use_mask, uninit_mask, swizzle, type_id, zero_id, ptr_type_id; struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - uint32_t type_id, zero_id, ptr_type_id, chain_id, object_id; - const struct signature_element *element; - unsigned int i, index, array_idx; + unsigned int index; uint32_t output_id;
dst_write_mask = output->mask; use_mask = output->used_mask; - if (!output->sysval_semantic) - { - for (i = 0; i < signature->element_count; ++i) - { - element = &signature->elements[i]; - if (element->register_index != output->register_index) - continue; - if (element->sysval_semantic) - continue; - dst_write_mask |= element->mask; - use_mask |= element->used_mask; - } - } index = vsir_write_mask_get_component_idx(output->mask); dst_write_mask >>= index; use_mask >>= index; @@ -5398,31 +5282,8 @@ static void spirv_compiler_emit_store_shader_output(struct spirv_compiler *compi output_id = vkd3d_spirv_build_op_access_chain1(builder, ptr_type_id, output_id, output_index_id); }
- if (!output_info->array_element_mask) - { - spirv_compiler_emit_store(compiler, - output_id, dst_write_mask, output_info->component_type, SpvStorageClassOutput, write_mask, val_id); - return; - } - - type_id = vkd3d_spirv_get_type_id(builder, output_info->component_type, 1); - ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassOutput, type_id); - mask = output_info->array_element_mask; - array_idx = spirv_compiler_get_output_array_index(compiler, output); - mask &= (1u << (array_idx * VKD3D_VEC4_SIZE)) - 1; - for (i = 0, index = vkd3d_popcount(mask); i < VKD3D_VEC4_SIZE; ++i) - { - if (!(write_mask & (VKD3DSP_WRITEMASK_0 << i))) - continue; - - chain_id = vkd3d_spirv_build_op_access_chain1(builder, - ptr_type_id, output_id, spirv_compiler_get_constant_uint(compiler, index)); - object_id = spirv_compiler_emit_swizzle(compiler, val_id, write_mask, - output_info->component_type, VKD3D_SHADER_NO_SWIZZLE, VKD3DSP_WRITEMASK_0 << i); - spirv_compiler_emit_store(compiler, chain_id, VKD3DSP_WRITEMASK_0, - output_info->component_type, SpvStorageClassOutput, VKD3DSP_WRITEMASK_0 << i, object_id); - ++index; - } + spirv_compiler_emit_store(compiler, + output_id, dst_write_mask, output_info->component_type, SpvStorageClassOutput, write_mask, val_id); }
static void spirv_compiler_emit_shader_epilogue_function(struct spirv_compiler *compiler) @@ -9981,9 +9842,6 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, compiler->input_control_point_count = shader_desc->input_control_point_count; compiler->output_control_point_count = shader_desc->output_control_point_count;
- if (compiler->shader_type != VKD3D_SHADER_TYPE_HULL) - spirv_compiler_emit_shader_signature_outputs(compiler); - spirv_compiler_emit_io_declarations(compiler);
for (i = 0; i < instructions.count && result >= 0; ++i)
From: Conor McCarthy cmccarthy@codeweavers.com
--- tests/d3d12.c | 183 ++++++++++++++++++++++++++++++++------------------ 1 file changed, 116 insertions(+), 67 deletions(-)
diff --git a/tests/d3d12.c b/tests/d3d12.c index 6bdb5f7a4..72b1c0635 100644 --- a/tests/d3d12.c +++ b/tests/d3d12.c @@ -31075,7 +31075,7 @@ static void test_clip_distance(void) ID3D12CommandQueue *queue; ID3D12PipelineState *pso; ID3D12Device *device; - unsigned int i; + unsigned int i, j; D3D12_BOX box; HRESULT hr;
@@ -31130,51 +31130,53 @@ static void test_clip_distance(void) #if 0 bool use_constant; float clip_distance0; - float clip_distance1; + float4 clip_distance1;
struct input { float4 position : POSITION; float distance0 : CLIP_DISTANCE0; - float distance1 : CLIP_DISTANCE1; + float4 distance1 : CLIP_DISTANCE1; };
struct vertex { float4 position : SV_POSITION; float user_clip : CLIP_DISTANCE; - float2 clip : SV_ClipDistance; + float clip0 : SV_ClipDistance0; + float4 clip1 : SV_ClipDistance1; };
void main(input vin, out vertex vertex) { vertex.position = vin.position; vertex.user_clip = vin.distance0; - vertex.clip.x = vin.distance0; + vertex.clip0 = vin.distance0; if (use_constant) - vertex.clip.x = clip_distance0; - vertex.clip.y = vin.distance1; + vertex.clip0 = clip_distance0; + vertex.clip1 = vin.distance1.xzyw; if (use_constant) - vertex.clip.y = clip_distance1; + vertex.clip1 = clip_distance1; } #endif - 0x43425844, 0xef5cc236, 0xe2fbfa69, 0x560b6591, 0x23037999, 0x00000001, 0x00000214, 0x00000003, - 0x0000002c, 0x0000009c, 0x00000120, 0x4e475349, 0x00000068, 0x00000003, 0x00000008, 0x00000050, + 0x43425844, 0x85af9c9d, 0xa40fe352, 0x1fdcce87, 0x16f969e1, 0x00000001, 0x00000240, 0x00000003, + 0x0000002c, 0x0000009c, 0x00000138, 0x4e475349, 0x00000068, 0x00000003, 0x00000008, 0x00000050, 0x00000000, 0x00000000, 0x00000003, 0x00000000, 0x00000f0f, 0x00000059, 0x00000000, 0x00000000, 0x00000003, 0x00000001, 0x00000101, 0x00000059, 0x00000001, 0x00000000, 0x00000003, 0x00000002, - 0x00000101, 0x49534f50, 0x4e4f4954, 0x494c4300, 0x49445f50, 0x4e415453, 0xab004543, 0x4e47534f, - 0x0000007c, 0x00000003, 0x00000008, 0x00000050, 0x00000000, 0x00000001, 0x00000003, 0x00000000, - 0x0000000f, 0x0000005c, 0x00000000, 0x00000000, 0x00000003, 0x00000001, 0x00000e01, 0x0000006a, - 0x00000000, 0x00000002, 0x00000003, 0x00000002, 0x00000c03, 0x505f5653, 0x5449534f, 0x004e4f49, - 0x50494c43, 0x5349445f, 0x434e4154, 0x56530045, 0x696c435f, 0x73694470, 0x636e6174, 0xabab0065, - 0x52444853, 0x000000ec, 0x00010040, 0x0000003b, 0x04000059, 0x00208e46, 0x00000000, 0x00000001, - 0x0300005f, 0x001010f2, 0x00000000, 0x0300005f, 0x00101012, 0x00000001, 0x0300005f, 0x00101012, - 0x00000002, 0x04000067, 0x001020f2, 0x00000000, 0x00000001, 0x03000065, 0x00102012, 0x00000001, - 0x04000067, 0x00102032, 0x00000002, 0x00000002, 0x05000036, 0x001020f2, 0x00000000, 0x00101e46, - 0x00000000, 0x05000036, 0x00102012, 0x00000001, 0x0010100a, 0x00000001, 0x0b000037, 0x00102012, - 0x00000002, 0x0020800a, 0x00000000, 0x00000000, 0x0020801a, 0x00000000, 0x00000000, 0x0010100a, - 0x00000001, 0x0b000037, 0x00102022, 0x00000002, 0x0020800a, 0x00000000, 0x00000000, 0x0020802a, - 0x00000000, 0x00000000, 0x0010100a, 0x00000002, 0x0100003e, + 0x00000f0f, 0x49534f50, 0x4e4f4954, 0x494c4300, 0x49445f50, 0x4e415453, 0xab004543, 0x4e47534f, + 0x00000094, 0x00000004, 0x00000008, 0x00000068, 0x00000000, 0x00000001, 0x00000003, 0x00000000, + 0x0000000f, 0x00000074, 0x00000000, 0x00000000, 0x00000003, 0x00000001, 0x00000e01, 0x00000082, + 0x00000000, 0x00000002, 0x00000003, 0x00000002, 0x00000e01, 0x00000082, 0x00000001, 0x00000002, + 0x00000003, 0x00000003, 0x0000000f, 0x505f5653, 0x5449534f, 0x004e4f49, 0x50494c43, 0x5349445f, + 0x434e4154, 0x56530045, 0x696c435f, 0x73694470, 0x636e6174, 0xabab0065, 0x58454853, 0x00000100, + 0x00010050, 0x00000040, 0x0100086a, 0x04000059, 0x00208e46, 0x00000000, 0x00000002, 0x0300005f, + 0x001010f2, 0x00000000, 0x0300005f, 0x00101012, 0x00000001, 0x0300005f, 0x001010f2, 0x00000002, + 0x04000067, 0x001020f2, 0x00000000, 0x00000001, 0x03000065, 0x00102012, 0x00000001, 0x04000067, + 0x00102012, 0x00000002, 0x00000002, 0x04000067, 0x001020f2, 0x00000003, 0x00000002, 0x05000036, + 0x001020f2, 0x00000000, 0x00101e46, 0x00000000, 0x05000036, 0x00102012, 0x00000001, 0x0010100a, + 0x00000001, 0x0b000037, 0x00102012, 0x00000002, 0x0020800a, 0x00000000, 0x00000000, 0x0020801a, + 0x00000000, 0x00000000, 0x0010100a, 0x00000001, 0x0b000037, 0x001020f2, 0x00000003, 0x00208006, + 0x00000000, 0x00000000, 0x00208e46, 0x00000000, 0x00000001, 0x00101d86, 0x00000002, 0x0100003e, }; static const D3D12_SHADER_BYTECODE vs_multiple = {vs_multiple_code, sizeof(vs_multiple_code)}; #if 0 @@ -31410,6 +31412,12 @@ static void test_clip_distance(void) {"CLIP_DISTANCE", 0, DXGI_FORMAT_R32_FLOAT, 1, 0, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0}, {"CLIP_DISTANCE", 1, DXGI_FORMAT_R32_FLOAT, 1, 4, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0}, }; + static const D3D12_INPUT_ELEMENT_DESC layout_desc_multiple[] = + { + {"POSITION", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 0, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0}, + {"CLIP_DISTANCE", 0, DXGI_FORMAT_R32_FLOAT, 1, 0, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0}, + {"CLIP_DISTANCE", 1, DXGI_FORMAT_R32G32B32A32_FLOAT, 1, 4, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0}, + }; static const struct vec4 quad[] = { {-1.0f, -1.0f}, @@ -31429,6 +31437,18 @@ static void test_clip_distance(void) {1.0f, 1.0f}, {1.0f, 1.0f}, }; + struct + { + float clip_distance0; + struct vec4 clip_distance1; + } + vertices_multiple[] = + { + {1.0f, {1.0f, 1.0f, 1.0f, 1.0f}}, + {1.0f, {1.0f, 1.0f, 1.0f, 1.0f}}, + {1.0f, {1.0f, 1.0f, 1.0f, 1.0f}}, + {1.0f, {1.0f, 1.0f, 1.0f, 1.0f}}, + }; static const float white[] = {1.0f, 1.0f, 1.0f, 1.0f}; struct { @@ -31437,6 +31457,12 @@ static void test_clip_distance(void) float clip_distance1; float tessellation_factor; } cb_data; + struct + { + bool use_constant; + float clip_distance0; + struct vec4 clip_distance1; + } cb_data_multiple;
memset(&desc, 0, sizeof(desc)); desc.rt_width = 640; @@ -31628,16 +31654,23 @@ static void test_clip_distance(void) memset(&pso_desc.DS, 0, sizeof(pso_desc.DS)); memset(&pso_desc.GS, 0, sizeof(pso_desc.GS)); pso_desc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; + pso_desc.InputLayout.pInputElementDescs = layout_desc_multiple; + pso_desc.InputLayout.NumElements = ARRAY_SIZE(layout_desc_multiple); hr = ID3D12Device_CreateGraphicsPipelineState(device, &pso_desc, &IID_ID3D12PipelineState, (void **)&pso); ok(hr == S_OK, "Failed to create pipeline state, hr %#x.\n", hr);
- cb_data.use_constant = false; - update_buffer_data(vs_cb, 0, sizeof(cb_data), &cb_data); + memset(&cb_data_multiple, 0, sizeof(cb_data_multiple)); + ID3D12Resource_Release(vs_cb); + vs_cb = create_upload_buffer(device, sizeof(cb_data_multiple), &cb_data_multiple);
- for (i = 0; i < ARRAY_SIZE(vertices); ++i) - vertices[i].clip_distance0 = 1.0f; - update_buffer_data(vb[1], 0, sizeof(vertices), vertices); + ID3D12Resource_Release(vb[1]); + vb[1] = create_upload_buffer(device, sizeof(vertices_multiple), vertices_multiple); + vbv[1].BufferLocation = ID3D12Resource_GetGPUVirtualAddress(vb[1]); + vbv[1].StrideInBytes = sizeof(*vertices_multiple); + vbv[1].SizeInBytes = sizeof(vertices_multiple); + + update_buffer_data(vb[1], 0, sizeof(vertices_multiple), vertices_multiple); ID3D12GraphicsCommandList_OMSetRenderTargets(command_list, 1, &context.rtv, false, NULL); ID3D12GraphicsCommandList_SetGraphicsRootSignature(command_list, context.root_signature); ID3D12GraphicsCommandList_SetGraphicsRootConstantBufferView(command_list, 0, @@ -31663,49 +31696,65 @@ static void test_clip_distance(void) transition_resource_state(command_list, context.render_target, D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_RENDER_TARGET);
- for (i = 0; i < ARRAY_SIZE(vertices); ++i) + for (i = 0; i < 4; ++i) { - vertices[i].clip_distance0 = i < 2 ? 1.0f : -1.0f; - vertices[i].clip_distance1 = i % 2 ? 1.0f : -1.0f; - } - update_buffer_data(vb[1], 0, sizeof(vertices), vertices); - ID3D12GraphicsCommandList_OMSetRenderTargets(command_list, 1, &context.rtv, false, NULL); - ID3D12GraphicsCommandList_SetGraphicsRootSignature(command_list, context.root_signature); - ID3D12GraphicsCommandList_SetGraphicsRootConstantBufferView(command_list, 0, - ID3D12Resource_GetGPUVirtualAddress(vs_cb)); - ID3D12GraphicsCommandList_SetGraphicsRootConstantBufferView(command_list, 1, - ID3D12Resource_GetGPUVirtualAddress(tess_cb)); - ID3D12GraphicsCommandList_SetGraphicsRootConstantBufferView(command_list, 2, - ID3D12Resource_GetGPUVirtualAddress(tess_cb)); - ID3D12GraphicsCommandList_SetGraphicsRootConstantBufferView(command_list, 3, - ID3D12Resource_GetGPUVirtualAddress(gs_cb)); - ID3D12GraphicsCommandList_SetPipelineState(command_list, pso); - ID3D12GraphicsCommandList_IASetPrimitiveTopology(command_list, D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); - ID3D12GraphicsCommandList_RSSetViewports(command_list, 1, &context.viewport); - ID3D12GraphicsCommandList_RSSetScissorRects(command_list, 1, &context.scissor_rect); - ID3D12GraphicsCommandList_IASetVertexBuffers(command_list, 0, ARRAY_SIZE(vbv), vbv); - ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, context.rtv, white, 0, NULL); - ID3D12GraphicsCommandList_DrawInstanced(command_list, 4, 1, 0, 0); - transition_resource_state(command_list, context.render_target, - D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_COPY_SOURCE); + vkd3d_test_push_context("Component %u", i);
- get_resource_readback_with_command_list(context.render_target, 0, &rb, queue, command_list); - set_box(&box, 0, 0, 0, 320, 240, 1); - check_readback_data_uint(&rb.rb, &box, 0xff00ff00, 1); - set_box(&box, 0, 240, 0, 320, 480, 1); - check_readback_data_uint(&rb.rb, &box, 0xffffffff, 1); - set_box(&box, 320, 0, 0, 640, 480, 1); - check_readback_data_uint(&rb.rb, &box, 0xffffffff, 1); - release_resource_readback(&rb); + for (j = 0; j < ARRAY_SIZE(vertices_multiple); ++j) + { + float clip1 = j % 2 ? 1.0f : -1.0f; + vertices_multiple[j].clip_distance0 = j < 2 ? 1.0f : -1.0f; + vertices_multiple[j].clip_distance1.x = 1.0f; + vertices_multiple[j].clip_distance1.y = 1.0f; + vertices_multiple[j].clip_distance1.z = 1.0f; + vertices_multiple[j].clip_distance1.w = 1.0f; + switch (i) + { + case 0: vertices_multiple[j].clip_distance1.x = clip1; break; + case 1: vertices_multiple[j].clip_distance1.y = clip1; break; + case 2: vertices_multiple[j].clip_distance1.z = clip1; break; + case 3: vertices_multiple[j].clip_distance1.w = clip1; break; + } + } + update_buffer_data(vb[1], 0, sizeof(vertices_multiple), vertices_multiple); + ID3D12GraphicsCommandList_OMSetRenderTargets(command_list, 1, &context.rtv, false, NULL); + ID3D12GraphicsCommandList_SetGraphicsRootSignature(command_list, context.root_signature); + ID3D12GraphicsCommandList_SetGraphicsRootConstantBufferView(command_list, 0, + ID3D12Resource_GetGPUVirtualAddress(vs_cb)); + ID3D12GraphicsCommandList_SetGraphicsRootConstantBufferView(command_list, 1, + ID3D12Resource_GetGPUVirtualAddress(tess_cb)); + ID3D12GraphicsCommandList_SetGraphicsRootConstantBufferView(command_list, 2, + ID3D12Resource_GetGPUVirtualAddress(tess_cb)); + ID3D12GraphicsCommandList_SetGraphicsRootConstantBufferView(command_list, 3, + ID3D12Resource_GetGPUVirtualAddress(gs_cb)); + ID3D12GraphicsCommandList_SetPipelineState(command_list, pso); + ID3D12GraphicsCommandList_IASetPrimitiveTopology(command_list, D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); + ID3D12GraphicsCommandList_RSSetViewports(command_list, 1, &context.viewport); + ID3D12GraphicsCommandList_RSSetScissorRects(command_list, 1, &context.scissor_rect); + ID3D12GraphicsCommandList_IASetVertexBuffers(command_list, 0, ARRAY_SIZE(vbv), vbv); + ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, context.rtv, white, 0, NULL); + ID3D12GraphicsCommandList_DrawInstanced(command_list, 4, 1, 0, 0); + transition_resource_state(command_list, context.render_target, + D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_COPY_SOURCE);
- reset_command_list(command_list, context.allocator); - transition_resource_state(command_list, context.render_target, - D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_RENDER_TARGET); + get_resource_readback_with_command_list(context.render_target, 0, &rb, queue, command_list); + set_box(&box, 0, 0, 0, 320, 240, 1); + check_readback_data_uint(&rb.rb, &box, 0xff00ff00, 1); + set_box(&box, 0, 240, 0, 320, 480, 1); + check_readback_data_uint(&rb.rb, &box, 0xffffffff, 1); + set_box(&box, 320, 0, 0, 640, 480, 1); + check_readback_data_uint(&rb.rb, &box, 0xffffffff, 1); + release_resource_readback(&rb);
- cb_data.use_constant = true; - cb_data.clip_distance0 = 0.0f; - cb_data.clip_distance1 = 0.0f; - update_buffer_data(vs_cb, 0, sizeof(cb_data), &cb_data); + reset_command_list(command_list, context.allocator); + transition_resource_state(command_list, context.render_target, + D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_RENDER_TARGET); + + vkd3d_test_pop_context(); + } + + cb_data_multiple.use_constant = true; + update_buffer_data(vs_cb, 0, sizeof(cb_data_multiple), &cb_data_multiple); ID3D12GraphicsCommandList_OMSetRenderTargets(command_list, 1, &context.rtv, false, NULL); ID3D12GraphicsCommandList_SetGraphicsRootSignature(command_list, context.root_signature); ID3D12GraphicsCommandList_SetGraphicsRootConstantBufferView(command_list, 0,
From: Conor McCarthy cmccarthy@codeweavers.com
--- tests/d3d12.c | 74 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+)
diff --git a/tests/d3d12.c b/tests/d3d12.c index 72b1c0635..8b00801ff 100644 --- a/tests/d3d12.c +++ b/tests/d3d12.c @@ -31179,6 +31179,36 @@ static void test_clip_distance(void) 0x00000000, 0x00000000, 0x00208e46, 0x00000000, 0x00000001, 0x00101d86, 0x00000002, 0x0100003e, }; static const D3D12_SHADER_BYTECODE vs_multiple = {vs_multiple_code, sizeof(vs_multiple_code)}; + static const DWORD ps_multiple_code[] = + { +#if 0 + struct vertex + { + float4 position : SV_POSITION; + float user_clip : CLIP_DISTANCE; + float clip0 : SV_ClipDistance0; + float4 clip1 : SV_ClipDistance1; + }; + + float4 main(vertex input) : SV_Target + { + return float4(input.clip0, input.clip1.xyz); + } +#endif + 0x43425844, 0x672b84d7, 0x92f9cfdd, 0x87eece2d, 0xfb11dafc, 0x00000001, 0x00000168, 0x00000003, + 0x0000002c, 0x000000c8, 0x000000fc, 0x4e475349, 0x00000094, 0x00000004, 0x00000008, 0x00000068, + 0x00000000, 0x00000001, 0x00000003, 0x00000000, 0x0000000f, 0x00000074, 0x00000000, 0x00000000, + 0x00000003, 0x00000001, 0x00000001, 0x00000082, 0x00000000, 0x00000002, 0x00000003, 0x00000002, + 0x00000101, 0x00000082, 0x00000001, 0x00000002, 0x00000003, 0x00000003, 0x0000070f, 0x505f5653, + 0x5449534f, 0x004e4f49, 0x50494c43, 0x5349445f, 0x434e4154, 0x56530045, 0x696c435f, 0x73694470, + 0x636e6174, 0xabab0065, 0x4e47534f, 0x0000002c, 0x00000001, 0x00000008, 0x00000020, 0x00000000, + 0x00000000, 0x00000003, 0x00000000, 0x0000000f, 0x545f5653, 0x65677261, 0xabab0074, 0x58454853, + 0x00000064, 0x00000050, 0x00000019, 0x0100086a, 0x04001064, 0x00101012, 0x00000002, 0x00000002, + 0x04001064, 0x00101072, 0x00000003, 0x00000002, 0x03000065, 0x001020f2, 0x00000000, 0x05000036, + 0x00102012, 0x00000000, 0x0010100a, 0x00000002, 0x05000036, 0x001020e2, 0x00000000, 0x00101906, + 0x00000003, 0x0100003e, + }; + static const D3D12_SHADER_BYTECODE ps_multiple = {ps_multiple_code, sizeof(ps_multiple_code)}; #if 0 bool use_constant; float clip_distance0; @@ -31776,6 +31806,50 @@ static void test_clip_distance(void) D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_COPY_SOURCE); check_sub_resource_uint(context.render_target, 0, queue, command_list, 0xff00ff00, 0);
+ reset_command_list(command_list, context.allocator); + transition_resource_state(command_list, context.render_target, + D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_RENDER_TARGET); + + ID3D12PipelineState_Release(pso); + + pso_desc.PS = ps_multiple; + hr = ID3D12Device_CreateGraphicsPipelineState(device, &pso_desc, + &IID_ID3D12PipelineState, (void **)&pso); + ok(hr == S_OK, "Failed to create pipeline state, hr %#x.\n", hr); + + for (i = 0; i < ARRAY_SIZE(vertices_multiple); ++i) + { + vertices_multiple[i].clip_distance0 = 0.2f; + vertices_multiple[i].clip_distance1.x = 0.4f; + vertices_multiple[i].clip_distance1.y = 0.6f; + vertices_multiple[i].clip_distance1.z = 0.8f; + vertices_multiple[i].clip_distance1.w = 1.0f; + } + update_buffer_data(vb[1], 0, sizeof(vertices_multiple), vertices_multiple); + + cb_data_multiple.use_constant = false; + update_buffer_data(vs_cb, 0, sizeof(cb_data_multiple), &cb_data_multiple); + ID3D12GraphicsCommandList_OMSetRenderTargets(command_list, 1, &context.rtv, false, NULL); + ID3D12GraphicsCommandList_SetGraphicsRootSignature(command_list, context.root_signature); + ID3D12GraphicsCommandList_SetGraphicsRootConstantBufferView(command_list, 0, + ID3D12Resource_GetGPUVirtualAddress(vs_cb)); + ID3D12GraphicsCommandList_SetGraphicsRootConstantBufferView(command_list, 1, + ID3D12Resource_GetGPUVirtualAddress(tess_cb)); + ID3D12GraphicsCommandList_SetGraphicsRootConstantBufferView(command_list, 2, + ID3D12Resource_GetGPUVirtualAddress(tess_cb)); + ID3D12GraphicsCommandList_SetGraphicsRootConstantBufferView(command_list, 3, + ID3D12Resource_GetGPUVirtualAddress(gs_cb)); + ID3D12GraphicsCommandList_SetPipelineState(command_list, pso); + ID3D12GraphicsCommandList_IASetPrimitiveTopology(command_list, D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); + ID3D12GraphicsCommandList_RSSetViewports(command_list, 1, &context.viewport); + ID3D12GraphicsCommandList_RSSetScissorRects(command_list, 1, &context.scissor_rect); + ID3D12GraphicsCommandList_IASetVertexBuffers(command_list, 0, ARRAY_SIZE(vbv), vbv); + ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, context.rtv, white, 0, NULL); + ID3D12GraphicsCommandList_DrawInstanced(command_list, 4, 1, 0, 0); + transition_resource_state(command_list, context.render_target, + D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_COPY_SOURCE); + check_sub_resource_uint(context.render_target, 0, queue, command_list, 0x99cc6633, 0); + ID3D12PipelineState_Release(pso); for (i = 0; i < ARRAY_SIZE(vb); ++i) ID3D12Resource_Release(vb[i]);