From: Francisco Casas fcasas@codeweavers.com
Current copy-prop of swizzle instructions can result in infinite loops, as with the included test before this commit.
Consider the following sequence of instructions where a load is stored in the same variable it loads:
1 : A 2 : A = @1 3 : @1.x
In this case @3 would call copy_propagation_get_value() on A.x and would get @1, without detecting that that is indeed the same node it is already using as swizzle->value. So it would return true, keeping copy-prop spinning.
To avoid this, it is check that the replacement instruction is not the same as the swizzle->load instruction. --- Makefile.am | 1 + libs/vkd3d-shader/hlsl_codegen.c | 13 ++++++++++--- tests/hlsl/hard-copy-prop.shader_test | 25 +++++++++++++++++++++++++ 3 files changed, 36 insertions(+), 3 deletions(-) create mode 100644 tests/hlsl/hard-copy-prop.shader_test
diff --git a/Makefile.am b/Makefile.am index 2666194a6..5d73affa9 100644 --- a/Makefile.am +++ b/Makefile.am @@ -102,6 +102,7 @@ vkd3d_shader_tests = \ tests/hlsl/gather.shader_test \ tests/hlsl/getdimensions.shader_test \ tests/hlsl/half.shader_test \ + tests/hlsl/hard-copy-prop.shader_test \ tests/hlsl/initializer-flatten.shader_test \ tests/hlsl/initializer-implicit-array.shader_test \ tests/hlsl/initializer-invalid-arg-count.shader_test \ diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 5a70878bc..e6b5e20cd 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -1433,10 +1433,11 @@ static void copy_propagation_set_value(struct copy_propagation_var_def *var_def, }
static bool copy_propagation_replace_with_single_instr(struct hlsl_ctx *ctx, - const struct copy_propagation_state *state, const struct hlsl_deref *deref, + const struct copy_propagation_state *state, const struct hlsl_ir_load *load, unsigned int swizzle, struct hlsl_ir_node *instr) { const unsigned int instr_component_count = hlsl_type_component_count(instr->data_type); + const struct hlsl_deref *deref = &load->src; const struct hlsl_ir_var *var = deref->var; struct hlsl_ir_node *new_instr = NULL; unsigned int start, count, i; @@ -1465,6 +1466,12 @@ static bool copy_propagation_replace_with_single_instr(struct hlsl_ctx *ctx, ret_swizzle |= value->component << HLSL_SWIZZLE_SHIFT(i); }
+ /* When 'load' is not the same as 'instr', there is the possibility of 'load' itself to be + * 'new_instr'. In this case, the replacement is not necessary and we have to return false + * to avoid doing copy-prop forever. */ + if (new_instr == &load->node) + return false; + TRACE("Load from %s[%u-%u]%s propagated as instruction %p%s.\n", var->name, start, start + count, debug_hlsl_swizzle(swizzle, instr_component_count), new_instr, debug_hlsl_swizzle(ret_swizzle, instr_component_count)); @@ -1541,7 +1548,7 @@ static bool copy_propagation_transform_load(struct hlsl_ctx *ctx, if (copy_propagation_replace_with_constant_vector(ctx, state, &load->src, HLSL_SWIZZLE(X, Y, Z, W), &load->node)) return true;
- if (copy_propagation_replace_with_single_instr(ctx, state, &load->src, HLSL_SWIZZLE(X, Y, Z, W), &load->node)) + if (copy_propagation_replace_with_single_instr(ctx, state, load, HLSL_SWIZZLE(X, Y, Z, W), &load->node)) return true;
return false; @@ -1559,7 +1566,7 @@ static bool copy_propagation_transform_swizzle(struct hlsl_ctx *ctx, if (copy_propagation_replace_with_constant_vector(ctx, state, &load->src, swizzle->swizzle, &swizzle->node)) return true;
- if (copy_propagation_replace_with_single_instr(ctx, state, &load->src, swizzle->swizzle, &swizzle->node)) + if (copy_propagation_replace_with_single_instr(ctx, state, load, swizzle->swizzle, &swizzle->node)) return true;
return false; diff --git a/tests/hlsl/hard-copy-prop.shader_test b/tests/hlsl/hard-copy-prop.shader_test new file mode 100644 index 000000000..fd6d588d5 --- /dev/null +++ b/tests/hlsl/hard-copy-prop.shader_test @@ -0,0 +1,25 @@ +[pixel shader] +float cond; + +float4 main() : sv_target +{ + float2 r = {1, 2}; + float2 tmp; + + // invalidate r + if (cond) + r = float2(10, 20); + + tmp = r; + r = tmp; + return r.y; +} + +[test] +uniform 0 float 0.0 +draw quad +probe all rgba (2.0, 2.0, 2.0, 2.0) +uniform 0 float 1.0 +draw quad +probe all rgba (20.0, 20.0, 20.0, 20.0) +