From: Francisco Casas fcasas@codeweavers.com
If a hlsl_ir_load loads a variable whose components are stored from different instructions, copy propagation doesn't replace it.
But if all these instructions are constants (which currently is the case for value constructors), the load could be replaced with a constant value. Which is expected in some other instructions, e.g. texel_offsets when using aoffimmi modifiers.
For instance, this shader:
``` sampler s; Texture2D t;
float4 main() : sv_target { return t.Gather(s, float2(0.6, 0.6), int2(0, 0)); } ```
results in the following IR before applying the patch: ``` float | 6.00000024e-01 float | 6.00000024e-01 uint | 0 | = (<constructor-2>[@4].x @2) uint | 1 | = (<constructor-2>[@6].x @3) float2 | <constructor-2> int | 0 int | 0 uint | 0 | = (<constructor-5>[@11].x @9) uint | 1 | = (<constructor-5>[@13].x @10) int2 | <constructor-5> float4 | gather_red(resource = t, sampler = s, coords = @8, offset = @15) | return | = (<output-sv_target0> @16) ```
and this IR afterwards: ``` float2 | {6.00000024e-01 6.00000024e-01 } int2 | {0 0 } float4 | gather_red(resource = t, sampler = s, coords = @2, offset = @3) | return | = (<output-sv_target0> @4) ``` --- libs/vkd3d-shader/hlsl_codegen.c | 85 ++++++++++++++++++++++ tests/hlsl-initializer-objects.shader_test | 8 +- tests/object-references.shader_test | 6 +- tests/sampler-offset.shader_test | 12 +-- tests/shader_runner_d3d12.c | 2 +- tests/texture-load-offset.shader_test | 24 +++--- 6 files changed, 111 insertions(+), 26 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index ebc1822b..b05109b0 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -501,6 +501,52 @@ static bool lower_broadcasts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, v return false; }
+/* + * Copy propagation. The basic idea is to recognize instruction sequences of the + * form: + * + * 2: <any instruction> + * 3: v = @2 + * 4: load(v) + * + * and replace the load (@4) with the original instruction (@2). + * This works for multiple components, even if they're written using separate + * store instructions, as long as the rhs is the same in every case. This basic + * detection is implemented by copy_propagation_replace_with_single_instr(). + * + * We use the same infrastructure to implement a more specialized + * transformation. We recognize sequences of the form: + * + * 2: 123 + * 3: var.x = @2 + * 4: 345 + * 5: var.y = @4 + * 6: load(var.xy) + * + * where the load (@6) originates from different sources but that are constant, + * and transform it into a single constant vector. This latter pass is done + * by copy_propagation_replace_with_constant_vector(). + * + * This is a specialized form of vectorization, and begs the question: why does + * the load need to be involved? Can we just vectorize the stores into a single + * instruction, and then use "normal" copy-prop to convert that into a single + * vector? + * + * In general, the answer is yes, but there is a special case which necessitates + * the use of this transformation: non-uniform control flow. Copy-prop can act + * across some control flow, and in cases like the following: + * + * 2: 123 + * 3: var.x = @2 + * 4: if (...) + * 5: 456 + * 6: var.y = @5 + * 7: load(var.xy) + * + * we can copy-prop the load (@7) into a constant vector {123, 456}, but we + * cannot easily vectorize the stores @3 and @6. + */ + enum copy_propagation_value_state { VALUE_STATE_NOT_WRITTEN = 0, @@ -730,6 +776,42 @@ static bool copy_propagation_replace_with_single_instr(struct hlsl_ctx *ctx, return true; }
+static bool copy_propagation_replace_with_constant_vector(struct hlsl_ctx *ctx, + const struct copy_propagation_state *state, struct hlsl_ir_load *load) +{ + const struct hlsl_ir_var *var = load->src.var; + union hlsl_constant_value values[4] = {0}; + struct hlsl_ir_node *instr = &load->node; + struct hlsl_ir_constant *cons; + unsigned int start, count, i; + + if (!hlsl_component_index_range_from_deref(ctx, &load->src, &start, &count)) + return false; + + for (i = 0; i < count; ++i) + { + struct copy_propagation_value *value = copy_propagation_get_value(state, var, start + i); + + if (!value || value->node->type != HLSL_IR_CONSTANT) + return false; + + values[i] = hlsl_ir_constant(value->node)->value[value->component]; + } + + if (!(cons = hlsl_new_constant(ctx, instr->data_type, &instr->loc))) + return false; + cons->value[0] = values[0]; + cons->value[1] = values[1]; + cons->value[2] = values[2]; + cons->value[3] = values[3]; + list_add_before(&instr->entry, &cons->node.entry); + + TRACE("Load from %s[%u-%u] turned into a constant %p.\n", var->name, start, start + count, cons); + + hlsl_replace_node(instr, &cons->node); + return true; +} + static bool copy_propagation_transform_load(struct hlsl_ctx *ctx, struct hlsl_ir_load *load, struct copy_propagation_state *state) { @@ -750,6 +832,9 @@ static bool copy_propagation_transform_load(struct hlsl_ctx *ctx, return false; }
+ if (copy_propagation_replace_with_constant_vector(ctx, state, load)) + return true; + if (copy_propagation_replace_with_single_instr(ctx, state, load)) return true;
diff --git a/tests/hlsl-initializer-objects.shader_test b/tests/hlsl-initializer-objects.shader_test index d40ede46..d9c0bc91 100644 --- a/tests/hlsl-initializer-objects.shader_test +++ b/tests/hlsl-initializer-objects.shader_test @@ -29,7 +29,7 @@ draw quad probe all rgba (0.2, 0.2, 0.2, 0.1)
-[pixel shader todo] +[pixel shader] Texture2D tex;
struct foo @@ -48,11 +48,11 @@ float4 main() : sv_target }
[test] -todo draw quad -todo probe all rgba (31.1, 41.1, 51.1, 61.1) 1 +draw quad +probe all rgba (31.1, 41.1, 51.1, 61.1) 1
-[pixel shader todo] +[pixel shader] Texture2D tex1; Texture2D tex2;
diff --git a/tests/object-references.shader_test b/tests/object-references.shader_test index 12f745e6..ba9b1235 100644 --- a/tests/object-references.shader_test +++ b/tests/object-references.shader_test @@ -132,7 +132,7 @@ float4 main() : sv_target }
-[pixel shader todo] +[pixel shader] Texture2D tex; uniform float f;
@@ -153,5 +153,5 @@ float4 main() : sv_target
[test] uniform 0 float 10.0 -todo draw quad -todo probe (0, 0) rgba (11.0, 12.0, 13.0, 11.0) +draw quad +probe (0, 0) rgba (11.0, 12.0, 13.0, 11.0) diff --git a/tests/sampler-offset.shader_test b/tests/sampler-offset.shader_test index 2aa8f9b3..6f8357df 100644 --- a/tests/sampler-offset.shader_test +++ b/tests/sampler-offset.shader_test @@ -12,7 +12,7 @@ size (3, 3) 0.0 0.2 0.0 0.4 0.1 0.2 0.5 0.0 0.2 0.2 0.0 0.4
-[pixel shader todo] +[pixel shader] sampler s; Texture2D t;
@@ -22,11 +22,11 @@ float4 main() : sv_target }
[test] -todo draw quad +draw quad probe all rgba (0.1, 0.2, 0.5, 0.0)
-[pixel shader todo] +[pixel shader] sampler s; Texture2D t;
@@ -36,11 +36,11 @@ float4 main() : sv_target }
[test] -todo draw quad +draw quad probe all rgba (0.2, 0.2, 0.0, 0.4)
-[pixel shader todo] +[pixel shader] sampler s; Texture2D t;
@@ -50,5 +50,5 @@ float4 main() : sv_target }
[test] -todo draw quad +draw quad probe all rgba (0.0, 0.2, 0.0, 0.4) diff --git a/tests/shader_runner_d3d12.c b/tests/shader_runner_d3d12.c index bb4d9c5a..bd94b4c9 100644 --- a/tests/shader_runner_d3d12.c +++ b/tests/shader_runner_d3d12.c @@ -167,7 +167,7 @@ static ID3D12RootSignature *d3d12_runner_create_root_signature(struct d3d12_shad ID3D12GraphicsCommandList *command_list, unsigned int *uniform_index) { D3D12_ROOT_SIGNATURE_DESC root_signature_desc = {0}; - D3D12_ROOT_PARAMETER root_params[3], *root_param; + D3D12_ROOT_PARAMETER root_params[4], *root_param; D3D12_STATIC_SAMPLER_DESC static_samplers[1]; ID3D12RootSignature *root_signature; HRESULT hr; diff --git a/tests/texture-load-offset.shader_test b/tests/texture-load-offset.shader_test index 6d732190..52b6a5f9 100644 --- a/tests/texture-load-offset.shader_test +++ b/tests/texture-load-offset.shader_test @@ -8,7 +8,7 @@ size (3, 3) 0 2 0 1 1 2 0 1 2 2 0 1
-[pixel shader todo] +[pixel shader] Texture2D t;
float4 main(float4 pos : sv_position) : sv_target @@ -18,14 +18,14 @@ float4 main(float4 pos : sv_position) : sv_target
[test] -todo draw quad -todo probe (0, 0) rgba (0, 1, 0, 1) -todo probe (1, 0) rgba (1, 1, 0, 1) -todo probe (0, 1) rgba (0, 2, 0, 1) -todo probe (1, 1) rgba (1, 2, 0, 1) +draw quad +probe (0, 0) rgba (0, 1, 0, 1) +probe (1, 0) rgba (1, 1, 0, 1) +probe (0, 1) rgba (0, 2, 0, 1) +probe (1, 1) rgba (1, 2, 0, 1)
-[pixel shader todo] +[pixel shader] Texture2D t;
float4 main(float4 pos : sv_position) : sv_target @@ -35,11 +35,11 @@ float4 main(float4 pos : sv_position) : sv_target
[test] -todo draw quad -todo probe (3, 0) rgba (1, 0, 0, 1) -todo probe (4, 0) rgba (2, 0, 0, 1) -todo probe (3, 1) rgba (1, 1, 0, 1) -todo probe (4, 1) rgba (2, 1, 0, 1) +draw quad +probe (3, 0) rgba (1, 0, 0, 1) +probe (4, 0) rgba (2, 0, 0, 1) +probe (3, 1) rgba (1, 1, 0, 1) +probe (4, 1) rgba (2, 1, 0, 1)
[pixel shader fail]