Signed-off-by: Giovanni Mascellani gmascellani@codeweavers.com --- A new var_def is now initialized to the content of the same var_def in earlier stack frames.
I still believe this is more complicated than it should be (i.e., harder to check and therefore more prone to bugs), but maybe it's correct anyway.
Zebediah said she doesn't like it in this way, but I couldn't understand in which direction I should change it to make it better; since my last submission has been evicted from the patch tracker, I am sending again without changes. --- libs/vkd3d-shader/hlsl_codegen.c | 148 +++++++++++++++++++++++++++---- 1 file changed, 133 insertions(+), 15 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 75716bdf..d09a6a3b 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -290,6 +290,7 @@ struct copy_propagation_var_def struct copy_propagation_state { struct rb_tree var_defs; + struct copy_propagation_state *parent; };
static int copy_propagation_var_def_compare(const void *key, const struct rb_entry *entry) @@ -308,9 +309,12 @@ static void copy_propagation_var_def_destroy(struct rb_entry *entry, void *conte }
static struct copy_propagation_var_def *copy_propagation_get_var_def(struct copy_propagation_state *state, - struct hlsl_ir_var *var) + const struct hlsl_ir_var *var) { - struct rb_entry *entry = rb_get(&state->var_defs, var); + struct rb_entry *entry; + + while (state && !(entry = rb_get(&state->var_defs, var))) + state = state->parent;
if (entry) return RB_ENTRY_VALUE(entry, struct copy_propagation_var_def, entry); @@ -321,8 +325,8 @@ static struct copy_propagation_var_def *copy_propagation_get_var_def(struct copy static struct copy_propagation_var_def *copy_propagation_create_var_def(struct hlsl_ctx *ctx, struct copy_propagation_state *state, struct hlsl_ir_var *var) { + struct copy_propagation_var_def *var_def, *prev_var_def; struct rb_entry *entry = rb_get(&state->var_defs, var); - struct copy_propagation_var_def *var_def; int res;
if (entry) @@ -331,6 +335,10 @@ static struct copy_propagation_var_def *copy_propagation_create_var_def(struct h if (!(var_def = hlsl_alloc(ctx, offsetof(struct copy_propagation_var_def, values[var->data_type->reg_size])))) return NULL;
+ prev_var_def = copy_propagation_get_var_def(state, var); + if (prev_var_def) + memcpy(var_def->values, prev_var_def->values, var->data_type->reg_size * sizeof(*var_def->values)); + var_def->var = var;
res = rb_put(&state->var_defs, var, &var_def->entry); @@ -339,12 +347,6 @@ static struct copy_propagation_var_def *copy_propagation_create_var_def(struct h return var_def; }
-static void copy_propagation_invalidate_whole_variable(struct copy_propagation_var_def *var_def) -{ - TRACE("Invalidate variable %s.\n", var_def->var->name); - memset(var_def->values, 0, sizeof(*var_def->values) * var_def->var->data_type->reg_size); -} - static void copy_propagation_set_value(struct copy_propagation_var_def *var_def, unsigned int offset, unsigned char writemask, struct hlsl_ir_node *node) { @@ -354,7 +356,7 @@ static void copy_propagation_set_value(struct copy_propagation_var_def *var_def, { if (writemask & (1u << i)) { - TRACE("Variable %s[%u] is written by instruction %p%s.\n", + TRACE("Variable %s[%u] updated with value from %p%s.\n", var_def->var->name, offset + i, node, debug_hlsl_writemask(1u << i)); var_def->values[offset + i].node = node; var_def->values[offset + i].component = j++; @@ -362,6 +364,99 @@ static void copy_propagation_set_value(struct copy_propagation_var_def *var_def, } }
+static void copy_propagation_invalidate_whole_variable(struct copy_propagation_state *state, + struct copy_propagation_var_def *var_def) +{ + const struct hlsl_ir_var *var = var_def->var; + + TRACE("Invalidate variable %s.\n", var->name); + while (state) + { + if ((var_def = copy_propagation_get_var_def(state, var))) + memset(var_def->values, 0, sizeof(*var_def->values) * var_def->var->data_type->reg_size); + state = state->parent; + } +} + +static void copy_propagation_invalidate_variable_partial(struct copy_propagation_state *state, + struct copy_propagation_var_def *var_def, unsigned int offset, unsigned char writemask) +{ + const struct hlsl_ir_var *var = var_def->var; + + TRACE("Invalidate variable %s[%u]%s.\n", var->name, offset, debug_hlsl_writemask(writemask)); + while (state) + { + if ((var_def = copy_propagation_get_var_def(state, var))) + copy_propagation_set_value(var_def, offset, writemask, NULL); + state = state->parent; + } +} + +static void copy_propagation_invalidate_from_block(struct hlsl_ctx *ctx, struct copy_propagation_state *state, + struct hlsl_block *block) +{ + struct hlsl_ir_node *instr; + + LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) + { + switch (instr->type) + { + case HLSL_IR_STORE: + { + struct hlsl_ir_store *store = hlsl_ir_store(instr); + struct copy_propagation_var_def *var_def; + struct hlsl_deref *lhs = &store->lhs; + struct hlsl_ir_var *var = lhs->var; + unsigned int offset; + + if (!(var_def = copy_propagation_get_var_def(state, var))) + continue; + + if (hlsl_offset_from_deref(lhs, &offset)) + copy_propagation_invalidate_variable_partial(state, var_def, offset, store->writemask); + else + copy_propagation_invalidate_whole_variable(state, var_def); + + break; + } + + case HLSL_IR_IF: + { + struct hlsl_ir_if *iff = hlsl_ir_if(instr); + + copy_propagation_invalidate_from_block(ctx, state, &iff->then_instrs); + copy_propagation_invalidate_from_block(ctx, state, &iff->else_instrs); + + break; + } + + case HLSL_IR_LOOP: + { + struct hlsl_ir_loop *loop = hlsl_ir_loop(instr); + + copy_propagation_invalidate_from_block(ctx, state, &loop->body); + + break; + } + + default: + break; + } + } +} + +static void copy_propagation_state_destroy(struct copy_propagation_state *state) +{ + rb_destroy(&state->var_defs, copy_propagation_var_def_destroy, NULL); +} + +static void copy_propagation_state_init(struct hlsl_ctx *ctx, struct copy_propagation_state *state, + struct copy_propagation_state *parent) +{ + rb_init(&state->var_defs, copy_propagation_var_def_compare); + state->parent = parent; +} + static struct hlsl_ir_node *copy_propagation_compute_replacement(struct copy_propagation_var_def *var_def, unsigned int offset, unsigned int count, unsigned int *swizzle) { @@ -433,7 +528,30 @@ static void copy_propagation_record_store(struct hlsl_ctx *ctx, struct hlsl_ir_s if (hlsl_offset_from_deref(lhs, &offset)) copy_propagation_set_value(var_def, offset, store->writemask, store->rhs.node); else - copy_propagation_invalidate_whole_variable(var_def); + copy_propagation_invalidate_whole_variable(state, var_def); +} + +static bool copy_propagation_transform_block(struct hlsl_ctx *ctx, struct hlsl_block *block, + struct copy_propagation_state *state); + +static bool copy_propagation_process_if(struct hlsl_ctx *ctx, struct hlsl_ir_if *iff, + struct copy_propagation_state *state) +{ + struct copy_propagation_state inner_state; + bool progress = false; + + copy_propagation_state_init(ctx, &inner_state, state); + progress |= copy_propagation_transform_block(ctx, &iff->then_instrs, &inner_state); + copy_propagation_state_destroy(&inner_state); + + copy_propagation_state_init(ctx, &inner_state, state); + progress |= copy_propagation_transform_block(ctx, &iff->else_instrs, &inner_state); + copy_propagation_state_destroy(&inner_state); + + copy_propagation_invalidate_from_block(ctx, state, &iff->then_instrs); + copy_propagation_invalidate_from_block(ctx, state, &iff->else_instrs); + + return progress; }
static bool copy_propagation_transform_block(struct hlsl_ctx *ctx, struct hlsl_block *block, @@ -455,8 +573,8 @@ static bool copy_propagation_transform_block(struct hlsl_ctx *ctx, struct hlsl_b break;
case HLSL_IR_IF: - FIXME("Copy propagation doesn't support conditionals yet, leaving.\n"); - return progress; + progress |= copy_propagation_process_if(ctx, hlsl_ir_if(instr), state); + break;
case HLSL_IR_LOOP: FIXME("Copy propagation doesn't support loops yet, leaving.\n"); @@ -475,11 +593,11 @@ static bool copy_propagation_execute(struct hlsl_ctx *ctx, struct hlsl_block *bl struct copy_propagation_state state; bool progress;
- rb_init(&state.var_defs, copy_propagation_var_def_compare); + copy_propagation_state_init(ctx, &state, NULL);
progress = copy_propagation_transform_block(ctx, block, &state);
- rb_destroy(&state.var_defs, copy_propagation_var_def_destroy, NULL); + copy_propagation_state_destroy(&state);
return progress; }
Signed-off-by: Matteo Bruni mbruni@codeweavers.com Signed-off-by: Giovanni Mascellani gmascellani@codeweavers.com --- libs/vkd3d-shader/hlsl_codegen.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index d09a6a3b..e358bb14 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -554,6 +554,21 @@ static bool copy_propagation_process_if(struct hlsl_ctx *ctx, struct hlsl_ir_if return progress; }
+static bool copy_propagation_process_loop(struct hlsl_ctx *ctx, struct hlsl_ir_loop *loop, + struct copy_propagation_state *state) +{ + struct copy_propagation_state inner_state; + bool progress = false; + + copy_propagation_invalidate_from_block(ctx, state, &loop->body); + + copy_propagation_state_init(ctx, &inner_state, state); + progress |= copy_propagation_transform_block(ctx, &loop->body, &inner_state); + copy_propagation_state_destroy(&inner_state); + + return progress; +} + static bool copy_propagation_transform_block(struct hlsl_ctx *ctx, struct hlsl_block *block, struct copy_propagation_state *state) { @@ -577,8 +592,8 @@ static bool copy_propagation_transform_block(struct hlsl_ctx *ctx, struct hlsl_b break;
case HLSL_IR_LOOP: - FIXME("Copy propagation doesn't support loops yet, leaving.\n"); - return progress; + progress |= copy_propagation_process_loop(ctx, hlsl_ir_loop(instr), state); + break;
default: break;
Signed-off-by: Matteo Bruni mbruni@codeweavers.com Signed-off-by: Giovanni Mascellani gmascellani@codeweavers.com --- Control flow reworked so that copy_propagation_analyze_load() is easier to read. --- libs/vkd3d-shader/hlsl_codegen.c | 71 +++++++++++++++++++++++++++----- 1 file changed, 61 insertions(+), 10 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index e358bb14..882a546e 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -479,6 +479,48 @@ static struct hlsl_ir_node *copy_propagation_compute_replacement(struct copy_pro return node; }
+static struct hlsl_ir_node *copy_propagation_compute_constant(struct hlsl_ctx *ctx, + struct copy_propagation_var_def *variable, unsigned int offset, unsigned int count, + const struct vkd3d_shader_location *loc) +{ + enum hlsl_base_type type = HLSL_TYPE_VOID; + struct hlsl_ir_constant *constant = NULL; + unsigned int i; + + assert(offset + count <= variable->var->data_type->reg_size); + + for (i = 0; i < count; ++i) + { + struct hlsl_ir_node *store_node = variable->values[offset + i].node; + enum hlsl_base_type store_type; + + if (!store_node) + return NULL; + if (store_node->type != HLSL_IR_CONSTANT) + return NULL; + + store_type = store_node->data_type->base_type; + + if (type == HLSL_TYPE_VOID) + type = store_type; + else if (type != store_type) + return NULL; + } + + if (!(constant = hlsl_new_uint_constant(ctx, 0, *loc))) + return NULL; + constant->node.data_type = hlsl_get_vector_type(ctx, type, count); + + for (i = 0; i < count; ++i) + { + struct hlsl_ir_node *store_node = variable->values[offset + i].node; + + constant->value[i] = hlsl_ir_constant(store_node)->value[variable->values[offset + i].component]; + } + + return &constant->node; +} + static bool copy_propagation_analyze_load(struct hlsl_ctx *ctx, struct hlsl_ir_load *load, struct copy_propagation_state *state) { @@ -499,19 +541,28 @@ static bool copy_propagation_analyze_load(struct hlsl_ctx *ctx, struct hlsl_ir_l if (!(var_def = copy_propagation_get_var_def(state, var))) return false;
- if (!(new_node = copy_propagation_compute_replacement(var_def, offset, type->dimx, &swizzle))) + if ((new_node = copy_propagation_compute_constant(ctx, var_def, offset, type->dimx, &load->node.loc))) { - TRACE("No single source for propagating load from %s[%u-%u].\n", var->name, offset, offset + type->dimx); - return false; + TRACE("Load from %s[%d-%d] reconstructed as constant value.\n", + var->name, offset, offset + type->dimx); + list_add_before(&node->entry, &new_node->entry); + replace_node(node, new_node); + return true; }
- TRACE("Load from %s[%u-%u] propagated as instruction %p%s.\n", - var->name, offset, offset + type->dimx, new_node, debug_hlsl_swizzle(swizzle, 4)); - if (!(swizzle_node = hlsl_new_swizzle(ctx, swizzle, type->dimx, new_node, &node->loc))) - return false; - list_add_before(&node->entry, &swizzle_node->node.entry); - replace_node(node, &swizzle_node->node); - return true; + if ((new_node = copy_propagation_compute_replacement(var_def, offset, type->dimx, &swizzle))) + { + TRACE("Load from %s[%u-%u] propagated as instruction %p%s.\n", + var->name, offset, offset + type->dimx, new_node, debug_hlsl_swizzle(swizzle, 4)); + if (!(swizzle_node = hlsl_new_swizzle(ctx, swizzle, type->dimx, new_node, &node->loc))) + return false; + list_add_before(&node->entry, &swizzle_node->node.entry); + replace_node(node, &swizzle_node->node); + return true; + } + + TRACE("No single source for propagating load from %s[%u-%u].\n", var->name, offset, offset + type->dimx); + return false; }
static void copy_propagation_record_store(struct hlsl_ctx *ctx, struct hlsl_ir_store *store,
On 1/19/22 06:53, Giovanni Mascellani wrote:
Signed-off-by: Giovanni Mascellani gmascellani@codeweavers.com
A new var_def is now initialized to the content of the same var_def in earlier stack frames.
I still believe this is more complicated than it should be (i.e., harder to check and therefore more prone to bugs), but maybe it's correct anyway.
Zebediah said she doesn't like it in this way, but I couldn't understand in which direction I should change it to make it better; since my last submission has been evicted from the patch tracker, I am sending again without changes.
As pointed out, the whole point of searching parent scopes (whether recursively or with an array) in copy_propagation_get_var_def() is to avoid duplicating the state. This patch feels like half of one thing and half of the other.
Since we need a way to distinguish "written in this scope, and can be resolved to a single instr" vs "written in this scope, and cannot be resolved" vs "not written in this scope", that sounds to me like we need to store an extra bool of information along with the hlsl_ir_node pointer.