Signed-off-by: Giovanni Mascellani gmascellani@codeweavers.com --- A new var_def is now initialized to the content of the same var_def in earlier stack frames.
I still believe this is more complicated than it should be (i.e., harder to check and therefore more prone to bugs), but maybe it's correct anyway. --- libs/vkd3d-shader/hlsl_codegen.c | 148 +++++++++++++++++++++++++++---- 1 file changed, 133 insertions(+), 15 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 75716bdf..d09a6a3b 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -290,6 +290,7 @@ struct copy_propagation_var_def struct copy_propagation_state { struct rb_tree var_defs; + struct copy_propagation_state *parent; };
static int copy_propagation_var_def_compare(const void *key, const struct rb_entry *entry) @@ -308,9 +309,12 @@ static void copy_propagation_var_def_destroy(struct rb_entry *entry, void *conte }
static struct copy_propagation_var_def *copy_propagation_get_var_def(struct copy_propagation_state *state, - struct hlsl_ir_var *var) + const struct hlsl_ir_var *var) { - struct rb_entry *entry = rb_get(&state->var_defs, var); + struct rb_entry *entry; + + while (state && !(entry = rb_get(&state->var_defs, var))) + state = state->parent;
if (entry) return RB_ENTRY_VALUE(entry, struct copy_propagation_var_def, entry); @@ -321,8 +325,8 @@ static struct copy_propagation_var_def *copy_propagation_get_var_def(struct copy static struct copy_propagation_var_def *copy_propagation_create_var_def(struct hlsl_ctx *ctx, struct copy_propagation_state *state, struct hlsl_ir_var *var) { + struct copy_propagation_var_def *var_def, *prev_var_def; struct rb_entry *entry = rb_get(&state->var_defs, var); - struct copy_propagation_var_def *var_def; int res;
if (entry) @@ -331,6 +335,10 @@ static struct copy_propagation_var_def *copy_propagation_create_var_def(struct h if (!(var_def = hlsl_alloc(ctx, offsetof(struct copy_propagation_var_def, values[var->data_type->reg_size])))) return NULL;
+ prev_var_def = copy_propagation_get_var_def(state, var); + if (prev_var_def) + memcpy(var_def->values, prev_var_def->values, var->data_type->reg_size * sizeof(*var_def->values)); + var_def->var = var;
res = rb_put(&state->var_defs, var, &var_def->entry); @@ -339,12 +347,6 @@ static struct copy_propagation_var_def *copy_propagation_create_var_def(struct h return var_def; }
-static void copy_propagation_invalidate_whole_variable(struct copy_propagation_var_def *var_def) -{ - TRACE("Invalidate variable %s.\n", var_def->var->name); - memset(var_def->values, 0, sizeof(*var_def->values) * var_def->var->data_type->reg_size); -} - static void copy_propagation_set_value(struct copy_propagation_var_def *var_def, unsigned int offset, unsigned char writemask, struct hlsl_ir_node *node) { @@ -354,7 +356,7 @@ static void copy_propagation_set_value(struct copy_propagation_var_def *var_def, { if (writemask & (1u << i)) { - TRACE("Variable %s[%u] is written by instruction %p%s.\n", + TRACE("Variable %s[%u] updated with value from %p%s.\n", var_def->var->name, offset + i, node, debug_hlsl_writemask(1u << i)); var_def->values[offset + i].node = node; var_def->values[offset + i].component = j++; @@ -362,6 +364,99 @@ static void copy_propagation_set_value(struct copy_propagation_var_def *var_def, } }
+static void copy_propagation_invalidate_whole_variable(struct copy_propagation_state *state, + struct copy_propagation_var_def *var_def) +{ + const struct hlsl_ir_var *var = var_def->var; + + TRACE("Invalidate variable %s.\n", var->name); + while (state) + { + if ((var_def = copy_propagation_get_var_def(state, var))) + memset(var_def->values, 0, sizeof(*var_def->values) * var_def->var->data_type->reg_size); + state = state->parent; + } +} + +static void copy_propagation_invalidate_variable_partial(struct copy_propagation_state *state, + struct copy_propagation_var_def *var_def, unsigned int offset, unsigned char writemask) +{ + const struct hlsl_ir_var *var = var_def->var; + + TRACE("Invalidate variable %s[%u]%s.\n", var->name, offset, debug_hlsl_writemask(writemask)); + while (state) + { + if ((var_def = copy_propagation_get_var_def(state, var))) + copy_propagation_set_value(var_def, offset, writemask, NULL); + state = state->parent; + } +} + +static void copy_propagation_invalidate_from_block(struct hlsl_ctx *ctx, struct copy_propagation_state *state, + struct hlsl_block *block) +{ + struct hlsl_ir_node *instr; + + LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) + { + switch (instr->type) + { + case HLSL_IR_STORE: + { + struct hlsl_ir_store *store = hlsl_ir_store(instr); + struct copy_propagation_var_def *var_def; + struct hlsl_deref *lhs = &store->lhs; + struct hlsl_ir_var *var = lhs->var; + unsigned int offset; + + if (!(var_def = copy_propagation_get_var_def(state, var))) + continue; + + if (hlsl_offset_from_deref(lhs, &offset)) + copy_propagation_invalidate_variable_partial(state, var_def, offset, store->writemask); + else + copy_propagation_invalidate_whole_variable(state, var_def); + + break; + } + + case HLSL_IR_IF: + { + struct hlsl_ir_if *iff = hlsl_ir_if(instr); + + copy_propagation_invalidate_from_block(ctx, state, &iff->then_instrs); + copy_propagation_invalidate_from_block(ctx, state, &iff->else_instrs); + + break; + } + + case HLSL_IR_LOOP: + { + struct hlsl_ir_loop *loop = hlsl_ir_loop(instr); + + copy_propagation_invalidate_from_block(ctx, state, &loop->body); + + break; + } + + default: + break; + } + } +} + +static void copy_propagation_state_destroy(struct copy_propagation_state *state) +{ + rb_destroy(&state->var_defs, copy_propagation_var_def_destroy, NULL); +} + +static void copy_propagation_state_init(struct hlsl_ctx *ctx, struct copy_propagation_state *state, + struct copy_propagation_state *parent) +{ + rb_init(&state->var_defs, copy_propagation_var_def_compare); + state->parent = parent; +} + static struct hlsl_ir_node *copy_propagation_compute_replacement(struct copy_propagation_var_def *var_def, unsigned int offset, unsigned int count, unsigned int *swizzle) { @@ -433,7 +528,30 @@ static void copy_propagation_record_store(struct hlsl_ctx *ctx, struct hlsl_ir_s if (hlsl_offset_from_deref(lhs, &offset)) copy_propagation_set_value(var_def, offset, store->writemask, store->rhs.node); else - copy_propagation_invalidate_whole_variable(var_def); + copy_propagation_invalidate_whole_variable(state, var_def); +} + +static bool copy_propagation_transform_block(struct hlsl_ctx *ctx, struct hlsl_block *block, + struct copy_propagation_state *state); + +static bool copy_propagation_process_if(struct hlsl_ctx *ctx, struct hlsl_ir_if *iff, + struct copy_propagation_state *state) +{ + struct copy_propagation_state inner_state; + bool progress = false; + + copy_propagation_state_init(ctx, &inner_state, state); + progress |= copy_propagation_transform_block(ctx, &iff->then_instrs, &inner_state); + copy_propagation_state_destroy(&inner_state); + + copy_propagation_state_init(ctx, &inner_state, state); + progress |= copy_propagation_transform_block(ctx, &iff->else_instrs, &inner_state); + copy_propagation_state_destroy(&inner_state); + + copy_propagation_invalidate_from_block(ctx, state, &iff->then_instrs); + copy_propagation_invalidate_from_block(ctx, state, &iff->else_instrs); + + return progress; }
static bool copy_propagation_transform_block(struct hlsl_ctx *ctx, struct hlsl_block *block, @@ -455,8 +573,8 @@ static bool copy_propagation_transform_block(struct hlsl_ctx *ctx, struct hlsl_b break;
case HLSL_IR_IF: - FIXME("Copy propagation doesn't support conditionals yet, leaving.\n"); - return progress; + progress |= copy_propagation_process_if(ctx, hlsl_ir_if(instr), state); + break;
case HLSL_IR_LOOP: FIXME("Copy propagation doesn't support loops yet, leaving.\n"); @@ -475,11 +593,11 @@ static bool copy_propagation_execute(struct hlsl_ctx *ctx, struct hlsl_block *bl struct copy_propagation_state state; bool progress;
- rb_init(&state.var_defs, copy_propagation_var_def_compare); + copy_propagation_state_init(ctx, &state, NULL);
progress = copy_propagation_transform_block(ctx, block, &state);
- rb_destroy(&state.var_defs, copy_propagation_var_def_destroy, NULL); + copy_propagation_state_destroy(&state);
return progress; }
Signed-off-by: Matteo Bruni mbruni@codeweavers.com Signed-off-by: Giovanni Mascellani gmascellani@codeweavers.com --- libs/vkd3d-shader/hlsl_codegen.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index d09a6a3b..e358bb14 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -554,6 +554,21 @@ static bool copy_propagation_process_if(struct hlsl_ctx *ctx, struct hlsl_ir_if return progress; }
+static bool copy_propagation_process_loop(struct hlsl_ctx *ctx, struct hlsl_ir_loop *loop, + struct copy_propagation_state *state) +{ + struct copy_propagation_state inner_state; + bool progress = false; + + copy_propagation_invalidate_from_block(ctx, state, &loop->body); + + copy_propagation_state_init(ctx, &inner_state, state); + progress |= copy_propagation_transform_block(ctx, &loop->body, &inner_state); + copy_propagation_state_destroy(&inner_state); + + return progress; +} + static bool copy_propagation_transform_block(struct hlsl_ctx *ctx, struct hlsl_block *block, struct copy_propagation_state *state) { @@ -577,8 +592,8 @@ static bool copy_propagation_transform_block(struct hlsl_ctx *ctx, struct hlsl_b break;
case HLSL_IR_LOOP: - FIXME("Copy propagation doesn't support loops yet, leaving.\n"); - return progress; + progress |= copy_propagation_process_loop(ctx, hlsl_ir_loop(instr), state); + break;
default: break;
Signed-off-by: Matteo Bruni mbruni@codeweavers.com Signed-off-by: Giovanni Mascellani gmascellani@codeweavers.com --- Control flow reworked so that copy_propagation_analyze_load() is easier to read. --- libs/vkd3d-shader/hlsl_codegen.c | 71 +++++++++++++++++++++++++++----- 1 file changed, 61 insertions(+), 10 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index e358bb14..882a546e 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -479,6 +479,48 @@ static struct hlsl_ir_node *copy_propagation_compute_replacement(struct copy_pro return node; }
+static struct hlsl_ir_node *copy_propagation_compute_constant(struct hlsl_ctx *ctx, + struct copy_propagation_var_def *variable, unsigned int offset, unsigned int count, + const struct vkd3d_shader_location *loc) +{ + enum hlsl_base_type type = HLSL_TYPE_VOID; + struct hlsl_ir_constant *constant = NULL; + unsigned int i; + + assert(offset + count <= variable->var->data_type->reg_size); + + for (i = 0; i < count; ++i) + { + struct hlsl_ir_node *store_node = variable->values[offset + i].node; + enum hlsl_base_type store_type; + + if (!store_node) + return NULL; + if (store_node->type != HLSL_IR_CONSTANT) + return NULL; + + store_type = store_node->data_type->base_type; + + if (type == HLSL_TYPE_VOID) + type = store_type; + else if (type != store_type) + return NULL; + } + + if (!(constant = hlsl_new_uint_constant(ctx, 0, *loc))) + return NULL; + constant->node.data_type = hlsl_get_vector_type(ctx, type, count); + + for (i = 0; i < count; ++i) + { + struct hlsl_ir_node *store_node = variable->values[offset + i].node; + + constant->value[i] = hlsl_ir_constant(store_node)->value[variable->values[offset + i].component]; + } + + return &constant->node; +} + static bool copy_propagation_analyze_load(struct hlsl_ctx *ctx, struct hlsl_ir_load *load, struct copy_propagation_state *state) { @@ -499,19 +541,28 @@ static bool copy_propagation_analyze_load(struct hlsl_ctx *ctx, struct hlsl_ir_l if (!(var_def = copy_propagation_get_var_def(state, var))) return false;
- if (!(new_node = copy_propagation_compute_replacement(var_def, offset, type->dimx, &swizzle))) + if ((new_node = copy_propagation_compute_constant(ctx, var_def, offset, type->dimx, &load->node.loc))) { - TRACE("No single source for propagating load from %s[%u-%u].\n", var->name, offset, offset + type->dimx); - return false; + TRACE("Load from %s[%d-%d] reconstructed as constant value.\n", + var->name, offset, offset + type->dimx); + list_add_before(&node->entry, &new_node->entry); + replace_node(node, new_node); + return true; }
- TRACE("Load from %s[%u-%u] propagated as instruction %p%s.\n", - var->name, offset, offset + type->dimx, new_node, debug_hlsl_swizzle(swizzle, 4)); - if (!(swizzle_node = hlsl_new_swizzle(ctx, swizzle, type->dimx, new_node, &node->loc))) - return false; - list_add_before(&node->entry, &swizzle_node->node.entry); - replace_node(node, &swizzle_node->node); - return true; + if ((new_node = copy_propagation_compute_replacement(var_def, offset, type->dimx, &swizzle))) + { + TRACE("Load from %s[%u-%u] propagated as instruction %p%s.\n", + var->name, offset, offset + type->dimx, new_node, debug_hlsl_swizzle(swizzle, 4)); + if (!(swizzle_node = hlsl_new_swizzle(ctx, swizzle, type->dimx, new_node, &node->loc))) + return false; + list_add_before(&node->entry, &swizzle_node->node.entry); + replace_node(node, &swizzle_node->node); + return true; + } + + TRACE("No single source for propagating load from %s[%u-%u].\n", var->name, offset, offset + type->dimx); + return false; }
static void copy_propagation_record_store(struct hlsl_ctx *ctx, struct hlsl_ir_store *store,
On 12/17/21 03:22, Giovanni Mascellani wrote:
Signed-off-by: Giovanni Mascellani gmascellani@codeweavers.com
A new var_def is now initialized to the content of the same var_def in earlier stack frames.
I'm not a big fan of this, because now it feels like it's half one thing and half the other.
...
+static void copy_propagation_invalidate_from_block(struct hlsl_ctx *ctx, struct copy_propagation_state *state,
struct hlsl_block *block)
+{
- struct hlsl_ir_node *instr;
- LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry)
- {
switch (instr->type)
{
case HLSL_IR_STORE:
{
struct hlsl_ir_store *store = hlsl_ir_store(instr);
struct copy_propagation_var_def *var_def;
struct hlsl_deref *lhs = &store->lhs;
struct hlsl_ir_var *var = lhs->var;
unsigned int offset;
if (!(var_def = copy_propagation_get_var_def(state, var)))
continue;
if (hlsl_offset_from_deref(lhs, &offset))
copy_propagation_invalidate_variable_partial(state, var_def, offset, store->writemask);
else
copy_propagation_invalidate_whole_variable(state, var_def);
break;
}
case HLSL_IR_IF:
{
struct hlsl_ir_if *iff = hlsl_ir_if(instr);
copy_propagation_invalidate_from_block(ctx, state, &iff->then_instrs);
copy_propagation_invalidate_from_block(ctx, state, &iff->else_instrs);
break;
}
case HLSL_IR_LOOP:
{
struct hlsl_ir_loop *loop = hlsl_ir_loop(instr);
copy_propagation_invalidate_from_block(ctx, state, &loop->body);
break;
}
default:
break;
}
- }
+}
Something that just occurred to me: could we handle this by instead just comparing the var_def structures from the parent and child state, and invalidating where they don't match?
Alternatively, probably simpler: could we just recursively invalidate parents in copy_propagation_record_store()?
Hi,
On 17/12/21 18:59, Zebediah Figura (she/her) wrote:
A new var_def is now initialized to the content of the same var_def in earlier stack frames.
I'm not a big fan of this, because now it feels like it's half one thing and half the other.
Neither I like it (my preferred solution is still my original proposal), but it was necessary to fix Matteo's changes, which hopefully you don't like either (since you found they are buggy). For the pars construens, which solution would you prefer now? Matteo, which alternative do you prefer?
Something that just occurred to me: could we handle this by instead just comparing the var_def structures from the parent and child state, and invalidating where they don't match?
Alternatively, probably simpler: could we just recursively invalidate parents in copy_propagation_record_store()?
I guess we could.
I don't like it because it makes it harder for me to convince myself that there is a proof of the correctness of the algorithm, and we have just seen that when it is harder to prove the correctness of an algorithm it is also easier that there are actual bugs; and on the other hand I see no points in favor in doing the changes (except perhaps for, in the case of the recursive var_def search, vague speculations about the fact that it might be a little faster.
Do you think that changing this would make it easier for you to check that the code is correct? Which other advantages do you see for the change your are proposing?
Thanks, Giovanni.
On 12/20/21 02:51, Giovanni Mascellani wrote:
Hi,
On 17/12/21 18:59, Zebediah Figura (she/her) wrote:
A new var_def is now initialized to the content of the same var_def in earlier stack frames.
I'm not a big fan of this, because now it feels like it's half one thing and half the other.
Neither I like it (my preferred solution is still my original proposal), but it was necessary to fix Matteo's changes, which hopefully you don't like either (since you found they are buggy). For the pars construens, which solution would you prefer now? Matteo, which alternative do you prefer?
Something that just occurred to me: could we handle this by instead just comparing the var_def structures from the parent and child state, and invalidating where they don't match?
Alternatively, probably simpler: could we just recursively invalidate parents in copy_propagation_record_store()?
I guess we could.
I don't like it because it makes it harder for me to convince myself that there is a proof of the correctness of the algorithm, and we have just seen that when it is harder to prove the correctness of an algorithm it is also easier that there are actual bugs; and on the other hand I see no points in favor in doing the changes (except perhaps for, in the case of the recursive var_def search, vague speculations about the fact that it might be a little faster.
Do you think that changing this would make it easier for you to check that the code is correct? Which other advantages do you see for the change your are proposing?
I'll admit, I still am inclined to prefer not duplicating variable state, and I don't share your concern about code correctness, even though we've been bitten once already.
I suppose in that case the solution would be to store some extra bit of state along with the hlsl_ir_node pointer, say "bool written;".