Module: vkd3d Branch: master Commit: c2a7a40d3a3b16d8ac37ecb40db260a4f092f7f8 URL: https://gitlab.winehq.org/wine/vkd3d/-/commit/c2a7a40d3a3b16d8ac37ecb40db260...
Author: Francisco Casas fcasas@codeweavers.com Date: Thu Nov 17 17:49:28 2022 -0300
vkd3d-shader/hlsl: Replace loads with constants in copy prop.
If a hlsl_ir_load loads a variable whose components are stored from different instructions, copy propagation doesn't replace it.
But if all these instructions are constants (which currently is the case for value constructors), the load could be replaced with a constant value. Which is expected in some other instructions, e.g. texel_offsets when using aoffimmi modifiers.
For instance, this shader:
``` sampler s; Texture2D t;
float4 main() : sv_target { return t.Gather(s, float2(0.6, 0.6), int2(0, 0)); } ```
results in the following IR before applying the patch: ``` float | 6.00000024e-01 float | 6.00000024e-01 uint | 0 | = (<constructor-2>[@4].x @2) uint | 1 | = (<constructor-2>[@6].x @3) float2 | <constructor-2> int | 0 int | 0 uint | 0 | = (<constructor-5>[@11].x @9) uint | 1 | = (<constructor-5>[@13].x @10) int2 | <constructor-5> float4 | gather_red(resource = t, sampler = s, coords = @8, offset = @15) | return | = (<output-sv_target0> @16) ```
and this IR afterwards: ``` float2 | {6.00000024e-01 6.00000024e-01 } int2 | {0 0 } float4 | gather_red(resource = t, sampler = s, coords = @2, offset = @3) | return | = (<output-sv_target0> @4) ```
---
libs/vkd3d-shader/hlsl_codegen.c | 85 ++++++++++++++++++++++++++++++ tests/hlsl-initializer-objects.shader_test | 8 +-- tests/object-references.shader_test | 6 +-- tests/sampler-offset.shader_test | 12 ++--- tests/texture-load-offset.shader_test | 24 ++++----- 5 files changed, 110 insertions(+), 25 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index dcb945e7..da5b4264 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -543,6 +543,52 @@ static bool lower_broadcasts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, v return false; }
+/* + * Copy propagation. The basic idea is to recognize instruction sequences of the + * form: + * + * 2: <any instruction> + * 3: v = @2 + * 4: load(v) + * + * and replace the load (@4) with the original instruction (@2). + * This works for multiple components, even if they're written using separate + * store instructions, as long as the rhs is the same in every case. This basic + * detection is implemented by copy_propagation_replace_with_single_instr(). + * + * We use the same infrastructure to implement a more specialized + * transformation. We recognize sequences of the form: + * + * 2: 123 + * 3: var.x = @2 + * 4: 345 + * 5: var.y = @4 + * 6: load(var.xy) + * + * where the load (@6) originates from different sources but that are constant, + * and transform it into a single constant vector. This latter pass is done + * by copy_propagation_replace_with_constant_vector(). + * + * This is a specialized form of vectorization, and begs the question: why does + * the load need to be involved? Can we just vectorize the stores into a single + * instruction, and then use "normal" copy-prop to convert that into a single + * vector? + * + * In general, the answer is yes, but there is a special case which necessitates + * the use of this transformation: non-uniform control flow. Copy-prop can act + * across some control flow, and in cases like the following: + * + * 2: 123 + * 3: var.x = @2 + * 4: if (...) + * 5: 456 + * 6: var.y = @5 + * 7: load(var.xy) + * + * we can copy-prop the load (@7) into a constant vector {123, 456}, but we + * cannot easily vectorize the stores @3 and @6. + */ + enum copy_propagation_value_state { VALUE_STATE_NOT_WRITTEN = 0, @@ -772,6 +818,42 @@ static bool copy_propagation_replace_with_single_instr(struct hlsl_ctx *ctx, return true; }
+static bool copy_propagation_replace_with_constant_vector(struct hlsl_ctx *ctx, + const struct copy_propagation_state *state, struct hlsl_ir_load *load) +{ + const struct hlsl_ir_var *var = load->src.var; + union hlsl_constant_value values[4] = {0}; + struct hlsl_ir_node *instr = &load->node; + struct hlsl_ir_constant *cons; + unsigned int start, count, i; + + if (!hlsl_component_index_range_from_deref(ctx, &load->src, &start, &count)) + return false; + + for (i = 0; i < count; ++i) + { + struct copy_propagation_value *value = copy_propagation_get_value(state, var, start + i); + + if (!value || value->node->type != HLSL_IR_CONSTANT) + return false; + + values[i] = hlsl_ir_constant(value->node)->value[value->component]; + } + + if (!(cons = hlsl_new_constant(ctx, instr->data_type, &instr->loc))) + return false; + cons->value[0] = values[0]; + cons->value[1] = values[1]; + cons->value[2] = values[2]; + cons->value[3] = values[3]; + list_add_before(&instr->entry, &cons->node.entry); + + TRACE("Load from %s[%u-%u] turned into a constant %p.\n", var->name, start, start + count, cons); + + hlsl_replace_node(instr, &cons->node); + return true; +} + static bool copy_propagation_transform_load(struct hlsl_ctx *ctx, struct hlsl_ir_load *load, struct copy_propagation_state *state) { @@ -792,6 +874,9 @@ static bool copy_propagation_transform_load(struct hlsl_ctx *ctx, return false; }
+ if (copy_propagation_replace_with_constant_vector(ctx, state, load)) + return true; + if (copy_propagation_replace_with_single_instr(ctx, state, load)) return true;
diff --git a/tests/hlsl-initializer-objects.shader_test b/tests/hlsl-initializer-objects.shader_test index d40ede46..d9c0bc91 100644 --- a/tests/hlsl-initializer-objects.shader_test +++ b/tests/hlsl-initializer-objects.shader_test @@ -29,7 +29,7 @@ draw quad probe all rgba (0.2, 0.2, 0.2, 0.1)
-[pixel shader todo] +[pixel shader] Texture2D tex;
struct foo @@ -48,11 +48,11 @@ float4 main() : sv_target }
[test] -todo draw quad -todo probe all rgba (31.1, 41.1, 51.1, 61.1) 1 +draw quad +probe all rgba (31.1, 41.1, 51.1, 61.1) 1
-[pixel shader todo] +[pixel shader] Texture2D tex1; Texture2D tex2;
diff --git a/tests/object-references.shader_test b/tests/object-references.shader_test index 67f86bc6..5e8e2641 100644 --- a/tests/object-references.shader_test +++ b/tests/object-references.shader_test @@ -132,7 +132,7 @@ float4 main() : sv_target }
-[pixel shader todo] +[pixel shader] Texture2D tex; uniform float f;
@@ -153,8 +153,8 @@ float4 main() : sv_target
[test] uniform 0 float 10.0 -todo draw quad -todo probe (0, 0) rgba (11.0, 12.0, 13.0, 11.0) +draw quad +probe (0, 0) rgba (11.0, 12.0, 13.0, 11.0)
[pixel shader fail] diff --git a/tests/sampler-offset.shader_test b/tests/sampler-offset.shader_test index 2aa8f9b3..6f8357df 100644 --- a/tests/sampler-offset.shader_test +++ b/tests/sampler-offset.shader_test @@ -12,7 +12,7 @@ size (3, 3) 0.0 0.2 0.0 0.4 0.1 0.2 0.5 0.0 0.2 0.2 0.0 0.4
-[pixel shader todo] +[pixel shader] sampler s; Texture2D t;
@@ -22,11 +22,11 @@ float4 main() : sv_target }
[test] -todo draw quad +draw quad probe all rgba (0.1, 0.2, 0.5, 0.0)
-[pixel shader todo] +[pixel shader] sampler s; Texture2D t;
@@ -36,11 +36,11 @@ float4 main() : sv_target }
[test] -todo draw quad +draw quad probe all rgba (0.2, 0.2, 0.0, 0.4)
-[pixel shader todo] +[pixel shader] sampler s; Texture2D t;
@@ -50,5 +50,5 @@ float4 main() : sv_target }
[test] -todo draw quad +draw quad probe all rgba (0.0, 0.2, 0.0, 0.4) diff --git a/tests/texture-load-offset.shader_test b/tests/texture-load-offset.shader_test index 6d732190..52b6a5f9 100644 --- a/tests/texture-load-offset.shader_test +++ b/tests/texture-load-offset.shader_test @@ -8,7 +8,7 @@ size (3, 3) 0 2 0 1 1 2 0 1 2 2 0 1
-[pixel shader todo] +[pixel shader] Texture2D t;
float4 main(float4 pos : sv_position) : sv_target @@ -18,14 +18,14 @@ float4 main(float4 pos : sv_position) : sv_target
[test] -todo draw quad -todo probe (0, 0) rgba (0, 1, 0, 1) -todo probe (1, 0) rgba (1, 1, 0, 1) -todo probe (0, 1) rgba (0, 2, 0, 1) -todo probe (1, 1) rgba (1, 2, 0, 1) +draw quad +probe (0, 0) rgba (0, 1, 0, 1) +probe (1, 0) rgba (1, 1, 0, 1) +probe (0, 1) rgba (0, 2, 0, 1) +probe (1, 1) rgba (1, 2, 0, 1)
-[pixel shader todo] +[pixel shader] Texture2D t;
float4 main(float4 pos : sv_position) : sv_target @@ -35,11 +35,11 @@ float4 main(float4 pos : sv_position) : sv_target
[test] -todo draw quad -todo probe (3, 0) rgba (1, 0, 0, 1) -todo probe (4, 0) rgba (2, 0, 0, 1) -todo probe (3, 1) rgba (1, 1, 0, 1) -todo probe (4, 1) rgba (2, 1, 0, 1) +draw quad +probe (3, 0) rgba (1, 0, 0, 1) +probe (4, 0) rgba (2, 0, 0, 1) +probe (3, 1) rgba (1, 1, 0, 1) +probe (4, 1) rgba (2, 1, 0, 1)
[pixel shader fail]