From: Francisco Casas fcasas@codeweavers.com
There are infinite loops that can still happen in copy-prop for swizzles, like in the new added test. The previous solution is not strong enough to solve them.
Consider the following sequence of instructions:
1 : A 2 : B = @1 3 : B 4 : A = @3 5 : @1.x
Current copy-prop would replace 5 so it points to @3 now:
1 : A 2 : B = @1 3 : B 4 : A = @3 5 : @3.x
But in the next iteration it would make it point back to @1, keeping it spinning infinitively.
The solution is to index the instructions and don't replace the swizzle if the new load happens after the current load. --- libs/vkd3d-shader/hlsl_codegen.c | 20 +++++++++++++------- tests/hlsl/hard-copy-prop.shader_test | 24 ++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 7 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 6ff6ba755..51f333516 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -1204,7 +1204,7 @@ static bool lower_broadcasts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, s }
/* Allocate a unique, ordered index to each instruction, which will be used for - * computing liveness ranges. + * copy propagation and computing liveness ranges. * Index 0 means unused; index 1 means function entry, so start at 2. */ static unsigned int index_instructions(struct hlsl_block *block, unsigned int index) { @@ -1503,15 +1503,15 @@ static bool copy_propagation_replace_with_single_instr(struct hlsl_ctx *ctx, ret_swizzle |= value->component << HLSL_SWIZZLE_SHIFT(i); }
- /* When 'load' is not the same as 'instr', there is the possibility of 'load' itself to be - * 'new_instr'. In this case, the replacement is not necessary and we have to return false - * to avoid doing copy-prop forever. */ - if (new_instr == &load->node) + /* When 'load' is not the same as 'instr', there is the possibility of 'load' to be before in + * the program than 'new_instr', or even be equal to 'new_instr'. In this case, the replacement + * is not helpful, so we have to return false to avoid doing copy-prop forever. */ + if (load->node.index <= new_instr->index) return false;
- TRACE("Load from %s[%u-%u]%s propagated as instruction %p%s.\n", + TRACE("Load from %s[%u-%u]%s for instruction %p propagated as instruction %p%s.\n", var->name, start, start + count, debug_hlsl_swizzle(swizzle, instr_component_count), - new_instr, debug_hlsl_swizzle(ret_swizzle, instr_component_count)); + instr, new_instr, debug_hlsl_swizzle(ret_swizzle, instr_component_count));
if (instr->data_type->class != HLSL_CLASS_OBJECT) { @@ -1891,6 +1891,8 @@ bool hlsl_copy_propagation_execute(struct hlsl_ctx *ctx, struct hlsl_block *bloc struct copy_propagation_state state; bool progress;
+ index_instructions(block, 2); + copy_propagation_state_init(ctx, &state, NULL);
progress = copy_propagation_transform_block(ctx, block, &state); @@ -4884,6 +4886,10 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry hlsl_transform_ir(ctx, fold_redundant_casts, body, NULL); do { + + if (TRACE_ON()) + rb_for_each_entry(&ctx->functions, dump_function, ctx); + progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL); progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_swizzles, body, NULL); progress |= hlsl_copy_propagation_execute(ctx, body); diff --git a/tests/hlsl/hard-copy-prop.shader_test b/tests/hlsl/hard-copy-prop.shader_test index fd6d588d5..5b8604b82 100644 --- a/tests/hlsl/hard-copy-prop.shader_test +++ b/tests/hlsl/hard-copy-prop.shader_test @@ -23,3 +23,27 @@ uniform 0 float 1.0 draw quad probe all rgba (20.0, 20.0, 20.0, 20.0)
+ +[pixel shader] +float cond; + +float4 main() : sv_target +{ + float2 r = {3, 4}; + + // invalidate r + if (cond) + r = float2(30, 40); + + r = r; + r = float2(1, r.y); + + return float4(r, 0, 0); +} +[test] +uniform 0 float 0.0 +draw quad +probe all rgba (1.0, 4.0, 0.0, 0.0) +uniform 0 float 1.0 +draw quad +probe all rgba (1.0, 40.0, 0.0, 0.0)