Otherwise, it is possible that the register used by the temp is overridden by a subsequent instruction within the same loop.
-- v2: vkd3d-shader/hlsl: Extend the liveness of nodes produced outside loops.
From: Francisco Casas fcasas@codeweavers.com
--- Makefile.am | 1 + tests/loop.shader_test | 43 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+) create mode 100644 tests/loop.shader_test
diff --git a/Makefile.am b/Makefile.am index e06d0eeb..a5b362e1 100644 --- a/Makefile.am +++ b/Makefile.am @@ -136,6 +136,7 @@ vkd3d_shader_tests = \ tests/load-level.shader_test \ tests/log.shader_test \ tests/logic-operations.shader_test \ + tests/loop.shader_test \ tests/majority-syntax.shader_test \ tests/math.shader_test \ tests/matrix-semantics.shader_test \ diff --git a/tests/loop.shader_test b/tests/loop.shader_test new file mode 100644 index 00000000..08cf7e55 --- /dev/null +++ b/tests/loop.shader_test @@ -0,0 +1,43 @@ +[pixel shader] +float a; + +float4 main() : sv_target +{ + float res = 0; + + for (int i = 0; i < 10; ++i) + { + res += a; + // The temp copy of 'a' must reserve its registers for the rest of the loop. + // It shall not be overwritten. + } + return res; +} + +[test] +uniform 0 float 5.0 +draw quad +todo probe all rgba (50.0, 50.0, 50.0, 50.0) + + +[pixel shader] +float a; + +float4 main() : sv_target +{ + float res = 0; + + for (int i = 0; i < 10; ++i) + { + res += a; + // The temp copy of 'a' must reserve its registers for the rest of the loop. + // It shall not be overwritten. + i++; + } + return res; +} + +[test] +uniform 0 float 4.0 +draw quad +todo probe all rgba (20.0, 20.0, 20.0, 20.0)
From: Francisco Casas fcasas@codeweavers.com
Otherwise, it is possible that the register used by the temp is overridden by a subsequent instruction within the same loop. --- libs/vkd3d-shader/hlsl_codegen.c | 48 ++++++++++++++++---------------- tests/loop.shader_test | 4 +-- tests/return.shader_test | 6 ++-- 3 files changed, 29 insertions(+), 29 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 638dab6e..5024a969 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -2587,9 +2587,9 @@ static void allocate_register_reservations(struct hlsl_ctx *ctx)
/* Compute the earliest and latest liveness for each variable. In the case that * a variable is accessed inside of a loop, we promote its liveness to extend - * to at least the range of the entire loop. Note that we don't need to do this - * for anonymous nodes, since there's currently no way to use a node which was - * calculated in an earlier iteration of the loop. */ + * to at least the range of the entire loop. We also do this for nodes, so that + * nodes produced before the loop have their temp register protected from being + * overridden after the last read within an iteration. */ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop_first, unsigned int loop_last) { struct hlsl_ir_node *instr; @@ -2597,7 +2597,7 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop
LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) { - const unsigned int var_last_read = loop_last ? max(instr->index, loop_last) : instr->index; + const unsigned int last_read = loop_last ? max(instr->index, loop_last) : instr->index;
switch (instr->type) { @@ -2612,9 +2612,9 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop var = store->lhs.var; if (!var->first_write) var->first_write = loop_first ? min(instr->index, loop_first) : instr->index; - store->rhs.node->last_read = instr->index; + store->rhs.node->last_read = last_read; if (store->lhs.offset.node) - store->lhs.offset.node->last_read = instr->index; + store->lhs.offset.node->last_read = last_read; break; } case HLSL_IR_EXPR: @@ -2623,7 +2623,7 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop unsigned int i;
for (i = 0; i < ARRAY_SIZE(expr->operands) && expr->operands[i].node; ++i) - expr->operands[i].node->last_read = instr->index; + expr->operands[i].node->last_read = last_read; break; } case HLSL_IR_IF: @@ -2632,7 +2632,7 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop
compute_liveness_recurse(&iff->then_block, loop_first, loop_last); compute_liveness_recurse(&iff->else_block, loop_first, loop_last); - iff->condition.node->last_read = instr->index; + iff->condition.node->last_read = last_read; break; } case HLSL_IR_LOAD: @@ -2640,9 +2640,9 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop struct hlsl_ir_load *load = hlsl_ir_load(instr);
var = load->src.var; - var->last_read = max(var->last_read, var_last_read); + var->last_read = max(var->last_read, last_read); if (load->src.offset.node) - load->src.offset.node->last_read = instr->index; + load->src.offset.node->last_read = last_read; break; } case HLSL_IR_LOOP: @@ -2658,22 +2658,22 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop struct hlsl_ir_resource_load *load = hlsl_ir_resource_load(instr);
var = load->resource.var; - var->last_read = max(var->last_read, var_last_read); + var->last_read = max(var->last_read, last_read); if (load->resource.offset.node) - load->resource.offset.node->last_read = instr->index; + load->resource.offset.node->last_read = last_read;
if ((var = load->sampler.var)) { - var->last_read = max(var->last_read, var_last_read); + var->last_read = max(var->last_read, last_read); if (load->sampler.offset.node) - load->sampler.offset.node->last_read = instr->index; + load->sampler.offset.node->last_read = last_read; }
- load->coords.node->last_read = instr->index; + load->coords.node->last_read = last_read; if (load->texel_offset.node) - load->texel_offset.node->last_read = instr->index; + load->texel_offset.node->last_read = last_read; if (load->lod.node) - load->lod.node->last_read = instr->index; + load->lod.node->last_read = last_read; break; } case HLSL_IR_RESOURCE_STORE: @@ -2681,26 +2681,26 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop struct hlsl_ir_resource_store *store = hlsl_ir_resource_store(instr);
var = store->resource.var; - var->last_read = max(var->last_read, var_last_read); + var->last_read = max(var->last_read, last_read); if (store->resource.offset.node) - store->resource.offset.node->last_read = instr->index; - store->coords.node->last_read = instr->index; - store->value.node->last_read = instr->index; + store->resource.offset.node->last_read = last_read; + store->coords.node->last_read = last_read; + store->value.node->last_read = last_read; break; } case HLSL_IR_SWIZZLE: { struct hlsl_ir_swizzle *swizzle = hlsl_ir_swizzle(instr);
- swizzle->val.node->last_read = instr->index; + swizzle->val.node->last_read = last_read; break; } case HLSL_IR_INDEX: { struct hlsl_ir_index *index = hlsl_ir_index(instr);
- index->val.node->last_read = instr->index; - index->idx.node->last_read = instr->index; + index->val.node->last_read = last_read; + index->idx.node->last_read = last_read; break; } case HLSL_IR_CONSTANT: diff --git a/tests/loop.shader_test b/tests/loop.shader_test index 08cf7e55..75a31f7a 100644 --- a/tests/loop.shader_test +++ b/tests/loop.shader_test @@ -17,7 +17,7 @@ float4 main() : sv_target [test] uniform 0 float 5.0 draw quad -todo probe all rgba (50.0, 50.0, 50.0, 50.0) +probe all rgba (50.0, 50.0, 50.0, 50.0)
[pixel shader] @@ -40,4 +40,4 @@ float4 main() : sv_target [test] uniform 0 float 4.0 draw quad -todo probe all rgba (20.0, 20.0, 20.0, 20.0) +probe all rgba (20.0, 20.0, 20.0, 20.0) diff --git a/tests/return.shader_test b/tests/return.shader_test index 3c9ea611..9f800d1a 100644 --- a/tests/return.shader_test +++ b/tests/return.shader_test @@ -175,15 +175,15 @@ probe all rgba (0.2, 0.2, 0.2, 0.2) 1
uniform 0 float 0.3 draw quad -todo probe all rgba (0.4, 0.4, 0.4, 0.4) 1 +probe all rgba (0.4, 0.4, 0.4, 0.4) 1
uniform 0 float 0.7 draw quad -todo probe all rgba (0.8, 0.8, 0.8, 0.8) 1 +probe all rgba (0.8, 0.8, 0.8, 0.8) 1
uniform 0 float 0.9 draw quad -todo probe all rgba (0.9, 0.9, 0.9, 0.9) 1 +probe all rgba (0.9, 0.9, 0.9, 0.9) 1
[pixel shader]
This merge request was approved by Giovanni Mascellani.
Giovanni Mascellani (@giomasce) commented about libs/vkd3d-shader/hlsl_codegen.c:
LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) {
const unsigned int var_last_read = loop_last ? max(instr->index, loop_last) : instr->index;
const unsigned int last_read = loop_last ? max(instr->index, loop_last) : instr->index;
That's not really onto this MR, but if `loop_last` is not zero, isn't `max(instr->index, loop_last) == loop_last`, given that we're considering an instruction inside that loop?
Giovanni Mascellani (@giomasce) commented about libs/vkd3d-shader/hlsl_codegen.c:
var = store->lhs.var; if (!var->first_write) var->first_write = loop_first ? min(instr->index, loop_first) : instr->index;
(and the same thing, in reverse, happens here)
Giovanni Mascellani (@giomasce) commented about libs/vkd3d-shader/hlsl_codegen.c:
var = store->lhs.var; if (!var->first_write) var->first_write = loop_first ? min(instr->index, loop_first) : instr->index;
store->rhs.node->last_read = instr->index;
store->rhs.node->last_read = last_read; if (store->lhs.offset.node)
Again not onto this MR, but maybe we could assert here that the path in all `hlsl_deref`'s is null, mostly as a justification to the reader possibly questioning why we're not extending the `last_read` there too.
Okay, I realized we can't do the `instr->index < loop_start` trick (at least, not without rethinking the implementation), because currently `loop_start` holds the start of the outermost loop, so consider:
Right. I guess we'd have to keep a stack of the starting index of all the loops we're currently inside, and extend until the end of the outermost loop that doesn't contain the originating instruction.
I don't think there is a hard need to scramble over that, for the moment.
The MR looks good. A few thoughts came to my mind while reading it, but they're not required for this MR to be accepted.
This merge request was approved by Zebediah Figura.
On Wed May 17 11:44:15 2023 +0000, Giovanni Mascellani wrote:
That's not really onto this MR, but if `loop_last` is not zero, isn't `max(instr->index, loop_last) == loop_last`, given that we're considering an instruction inside that loop?
Should be, yes.
On Wed May 17 11:44:17 2023 +0000, Giovanni Mascellani wrote:
Again not onto this MR, but maybe we could assert here that the path in all `hlsl_deref`'s is null, mostly as a justification to the reader possibly questioning why we're not extending the `last_read` there too.
Yeah, although of course the preferred solution is to stop using the "offset" field :-)
This will need to be rebased after today's commits.