Otherwise, it is possible that the register used by the temp is overridden by a subsequent instruction within the same loop.
From: Francisco Casas fcasas@codeweavers.com
--- Makefile.am | 1 + tests/loop.shader_test | 43 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+) create mode 100644 tests/loop.shader_test
diff --git a/Makefile.am b/Makefile.am index e06d0eeb..a5b362e1 100644 --- a/Makefile.am +++ b/Makefile.am @@ -136,6 +136,7 @@ vkd3d_shader_tests = \ tests/load-level.shader_test \ tests/log.shader_test \ tests/logic-operations.shader_test \ + tests/loop.shader_test \ tests/majority-syntax.shader_test \ tests/math.shader_test \ tests/matrix-semantics.shader_test \ diff --git a/tests/loop.shader_test b/tests/loop.shader_test new file mode 100644 index 00000000..08cf7e55 --- /dev/null +++ b/tests/loop.shader_test @@ -0,0 +1,43 @@ +[pixel shader] +float a; + +float4 main() : sv_target +{ + float res = 0; + + for (int i = 0; i < 10; ++i) + { + res += a; + // The temp copy of 'a' must reserve its registers for the rest of the loop. + // It shall not be overwritten. + } + return res; +} + +[test] +uniform 0 float 5.0 +draw quad +todo probe all rgba (50.0, 50.0, 50.0, 50.0) + + +[pixel shader] +float a; + +float4 main() : sv_target +{ + float res = 0; + + for (int i = 0; i < 10; ++i) + { + res += a; + // The temp copy of 'a' must reserve its registers for the rest of the loop. + // It shall not be overwritten. + i++; + } + return res; +} + +[test] +uniform 0 float 4.0 +draw quad +todo probe all rgba (20.0, 20.0, 20.0, 20.0)
From: Francisco Casas fcasas@codeweavers.com
Otherwise, it is possible that the register used by the temp is overridden by a subsequent instruction within the same loop. --- libs/vkd3d-shader/hlsl_codegen.c | 48 ++++++++++++++++---------------- tests/loop.shader_test | 4 +-- tests/return.shader_test | 6 ++-- 3 files changed, 29 insertions(+), 29 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 638dab6e..bb7d4bd8 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -2587,9 +2587,9 @@ static void allocate_register_reservations(struct hlsl_ctx *ctx)
/* Compute the earliest and latest liveness for each variable. In the case that * a variable is accessed inside of a loop, we promote its liveness to extend - * to at least the range of the entire loop. Note that we don't need to do this - * for anonymous nodes, since there's currently no way to use a node which was - * calculated in an earlier iteration of the loop. */ + * to at least the range of the entire loop. We also need to do this for + * anonymous nodes, so that their temp register is protected from being + * overwritten by subsequent instructions before the next iteration. */ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop_first, unsigned int loop_last) { struct hlsl_ir_node *instr; @@ -2597,7 +2597,7 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop
LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) { - const unsigned int var_last_read = loop_last ? max(instr->index, loop_last) : instr->index; + const unsigned int last_read = loop_last ? max(instr->index, loop_last) : instr->index;
switch (instr->type) { @@ -2612,9 +2612,9 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop var = store->lhs.var; if (!var->first_write) var->first_write = loop_first ? min(instr->index, loop_first) : instr->index; - store->rhs.node->last_read = instr->index; + store->rhs.node->last_read = last_read; if (store->lhs.offset.node) - store->lhs.offset.node->last_read = instr->index; + store->lhs.offset.node->last_read = last_read; break; } case HLSL_IR_EXPR: @@ -2623,7 +2623,7 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop unsigned int i;
for (i = 0; i < ARRAY_SIZE(expr->operands) && expr->operands[i].node; ++i) - expr->operands[i].node->last_read = instr->index; + expr->operands[i].node->last_read = last_read; break; } case HLSL_IR_IF: @@ -2632,7 +2632,7 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop
compute_liveness_recurse(&iff->then_block, loop_first, loop_last); compute_liveness_recurse(&iff->else_block, loop_first, loop_last); - iff->condition.node->last_read = instr->index; + iff->condition.node->last_read = last_read; break; } case HLSL_IR_LOAD: @@ -2640,9 +2640,9 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop struct hlsl_ir_load *load = hlsl_ir_load(instr);
var = load->src.var; - var->last_read = max(var->last_read, var_last_read); + var->last_read = max(var->last_read, last_read); if (load->src.offset.node) - load->src.offset.node->last_read = instr->index; + load->src.offset.node->last_read = last_read; break; } case HLSL_IR_LOOP: @@ -2658,22 +2658,22 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop struct hlsl_ir_resource_load *load = hlsl_ir_resource_load(instr);
var = load->resource.var; - var->last_read = max(var->last_read, var_last_read); + var->last_read = max(var->last_read, last_read); if (load->resource.offset.node) - load->resource.offset.node->last_read = instr->index; + load->resource.offset.node->last_read = last_read;
if ((var = load->sampler.var)) { - var->last_read = max(var->last_read, var_last_read); + var->last_read = max(var->last_read, last_read); if (load->sampler.offset.node) - load->sampler.offset.node->last_read = instr->index; + load->sampler.offset.node->last_read = last_read; }
- load->coords.node->last_read = instr->index; + load->coords.node->last_read = last_read; if (load->texel_offset.node) - load->texel_offset.node->last_read = instr->index; + load->texel_offset.node->last_read = last_read; if (load->lod.node) - load->lod.node->last_read = instr->index; + load->lod.node->last_read = last_read; break; } case HLSL_IR_RESOURCE_STORE: @@ -2681,26 +2681,26 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop struct hlsl_ir_resource_store *store = hlsl_ir_resource_store(instr);
var = store->resource.var; - var->last_read = max(var->last_read, var_last_read); + var->last_read = max(var->last_read, last_read); if (store->resource.offset.node) - store->resource.offset.node->last_read = instr->index; - store->coords.node->last_read = instr->index; - store->value.node->last_read = instr->index; + store->resource.offset.node->last_read = last_read; + store->coords.node->last_read = last_read; + store->value.node->last_read = last_read; break; } case HLSL_IR_SWIZZLE: { struct hlsl_ir_swizzle *swizzle = hlsl_ir_swizzle(instr);
- swizzle->val.node->last_read = instr->index; + swizzle->val.node->last_read = last_read; break; } case HLSL_IR_INDEX: { struct hlsl_ir_index *index = hlsl_ir_index(instr);
- index->val.node->last_read = instr->index; - index->idx.node->last_read = instr->index; + index->val.node->last_read = last_read; + index->idx.node->last_read = last_read; break; } case HLSL_IR_CONSTANT: diff --git a/tests/loop.shader_test b/tests/loop.shader_test index 08cf7e55..75a31f7a 100644 --- a/tests/loop.shader_test +++ b/tests/loop.shader_test @@ -17,7 +17,7 @@ float4 main() : sv_target [test] uniform 0 float 5.0 draw quad -todo probe all rgba (50.0, 50.0, 50.0, 50.0) +probe all rgba (50.0, 50.0, 50.0, 50.0)
[pixel shader] @@ -40,4 +40,4 @@ float4 main() : sv_target [test] uniform 0 float 4.0 draw quad -todo probe all rgba (20.0, 20.0, 20.0, 20.0) +probe all rgba (20.0, 20.0, 20.0, 20.0) diff --git a/tests/return.shader_test b/tests/return.shader_test index 3c9ea611..9f800d1a 100644 --- a/tests/return.shader_test +++ b/tests/return.shader_test @@ -175,15 +175,15 @@ probe all rgba (0.2, 0.2, 0.2, 0.2) 1
uniform 0 float 0.3 draw quad -todo probe all rgba (0.4, 0.4, 0.4, 0.4) 1 +probe all rgba (0.4, 0.4, 0.4, 0.4) 1
uniform 0 float 0.7 draw quad -todo probe all rgba (0.8, 0.8, 0.8, 0.8) 1 +probe all rgba (0.8, 0.8, 0.8, 0.8) 1
uniform 0 float 0.9 draw quad -todo probe all rgba (0.9, 0.9, 0.9, 0.9) 1 +probe all rgba (0.9, 0.9, 0.9, 0.9) 1
[pixel shader]
Zebediah Figura (@zfigura) commented about libs/vkd3d-shader/hlsl_codegen.c:
/* Compute the earliest and latest liveness for each variable. In the case that
- a variable is accessed inside of a loop, we promote its liveness to extend
- to at least the range of the entire loop. Note that we don't need to do this
- for anonymous nodes, since there's currently no way to use a node which was
- calculated in an earlier iteration of the loop. */
- to at least the range of the entire loop. We also need to do this for
- anonymous nodes, so that their temp register is protected from being
- overwritten by subsequent instructions before the next iteration. */
I want to nitpick this wording a bit. I think when I wrote the original code I was only thinking of nodes produced inside of a loop. That's still what I think of when I see this comment (or commit message), and I still think that doesn't actually make conceptual sense. Rather, what does make sense, and what this patch addresses is the case where a node produced before the loop is used inside the loop.
I want to nitpick this wording a bit. I think when I wrote the original code I was only thinking of nodes produced inside of a loop.
That makes sense. @giomasce said that this bug was probably a result of implementing copy propagation, since, after that, nodes outside the loop could be read inside the loop.
That's still what I think of when I see this comment (or commit message), and I still think that doesn't actually make conceptual sense. Rather, what does make sense, and what this patch addresses is the case where a node produced before the loop is used inside the loop.
How about: "vkd3d-shader/hlsl: Extend the liveness of nodes produced outside loops."
```c /* Compute the earliest and latest liveness for each variable. In the case that * a variable is accessed inside of a loop, we promote its liveness to extend * to at least the range of the entire loop. We also do this for nodes produced * before the loop, so that their temp register is protected from being * overriden during the whole loop. */ ```
Now I think we could extend the liveness only if `instr->index < loop_start`.
/* Compute the earliest and latest liveness for each variable. In the case that * a variable is accessed inside of a loop, we promote its liveness to extend * to at least the range of the entire loop. We also do this for nodes produced * before the loop, so that their temp register is protected from being * overriden during the whole loop. */
Yeah, I like that better.
Now I think we could extend the liveness only if `instr->index < loop_start`.
I believe you're right (and in the interest of making more registers available it's probably a good idea).
On Tue May 16 17:54:05 2023 +0000, Zebediah Figura wrote:
/* Compute the earliest and latest liveness for each variable. In the
case that
- a variable is accessed inside of a loop, we promote its liveness to extend
- to at least the range of the entire loop. We also do this for nodes produced
- before the loop, so that their temp register is protected from being
- overriden during the whole loop. */
Yeah, I like that better.
Now I think we could extend the liveness only if `instr->index < loop_start`.
I believe you're right (and in the interest of making more registers available it's probably a good idea).
Okay, I realized we can't do the `instr->index < loop_start` trick (at least, not without rethinking the implementation), because currently `loop_start` holds the start of the outermost loop, so consider:
``` 02 for(;;) 03 { 04 # instr origins here. 05 for(;;) 06 { 07 # instr is accessed here. 08 # some more operations. 09 } 10 } ```
In this case it doesn't happen that `instr->index < loop_start`, because loop_start would be 02. But its register must be protected during the whole duration of the inner loop.
For now I will just push change the comment and the commit message.