Signed-off-by: Giovanni Mascellani gmascellani@codeweavers.com --- libs/vkd3d-shader/hlsl_codegen.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+)
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 24b8205c..f8b977b0 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -475,6 +475,27 @@ static bool fold_constants(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, voi return true; }
+static bool remove_trivial_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +{ + struct hlsl_ir_swizzle *swizzle; + unsigned int i; + + if (instr->type != HLSL_IR_SWIZZLE) + return false; + swizzle = hlsl_ir_swizzle(instr); + + if (instr->data_type->dimx != swizzle->val.node->data_type->dimx) + return false; + + for (i = 0; i < instr->data_type->dimx; ++i) + if (((swizzle->swizzle >> (2 * i)) & 3) != i) + return false; + + replace_node(instr, swizzle->val.node); + + return true; +} + /* Lower DIV to RCP + MUL. */ static bool lower_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) { @@ -1355,6 +1376,7 @@ int hlsl_emit_dxbc(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_fun } while (progress); while (transform_ir(ctx, fold_constants, body, NULL)); + transform_ir(ctx, remove_trivial_swizzles, body, NULL);
if (ctx->profile->major_version < 4) transform_ir(ctx, lower_division, body, NULL);
Signed-off-by: Giovanni Mascellani gmascellani@codeweavers.com --- libs/vkd3d-shader/hlsl_codegen.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index f8b977b0..f5432d22 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -204,7 +204,7 @@ static bool transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx struct hlsl_block *block, void *context) { struct hlsl_ir_node *instr, *next; - bool progress = 0; + bool progress = false;
LIST_FOR_EACH_ENTRY_SAFE(instr, next, &block->instrs, struct hlsl_ir_node, entry) {
Signed-off-by: Zebediah Figura zfigura@codeweavers.com
Signed-off-by: Giovanni Mascellani gmascellani@codeweavers.com --- libs/vkd3d-shader/hlsl.h | 6 ++++-- libs/vkd3d-shader/hlsl_codegen.c | 33 +++++++++++++++++++++++--------- libs/vkd3d-shader/hlsl_sm1.c | 4 ++-- libs/vkd3d-shader/hlsl_sm4.c | 8 ++++---- 4 files changed, 34 insertions(+), 17 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl.h b/libs/vkd3d-shader/hlsl.h index eae96232..f2a7a48d 100644 --- a/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d-shader/hlsl.h @@ -751,8 +751,10 @@ unsigned int hlsl_combine_writemasks(unsigned int first, unsigned int second); unsigned int hlsl_map_swizzle(unsigned int swizzle, unsigned int writemask); unsigned int hlsl_swizzle_from_writemask(unsigned int writemask);
-unsigned int hlsl_offset_from_deref(const struct hlsl_deref *deref); -struct hlsl_reg hlsl_reg_from_deref(const struct hlsl_deref *deref, const struct hlsl_type *type); +bool hlsl_offset_from_deref(const struct hlsl_deref *deref, unsigned int *offset); +unsigned int hlsl_offset_from_deref_safe(struct hlsl_ctx *ctx, const struct hlsl_deref *deref); +struct hlsl_reg hlsl_reg_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, + const struct hlsl_type *type);
bool hlsl_sm1_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, bool output, D3DSHADER_PARAM_REGISTER_TYPE *type, unsigned int *reg); diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index f5432d22..a0154e3b 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -1286,31 +1286,46 @@ static bool type_is_single_reg(const struct hlsl_type *type) return type->type == HLSL_CLASS_SCALAR || type->type == HLSL_CLASS_VECTOR; }
-unsigned int hlsl_offset_from_deref(const struct hlsl_deref *deref) +bool hlsl_offset_from_deref(const struct hlsl_deref *deref, unsigned int *offset) { struct hlsl_ir_node *offset_node = deref->offset.node;
if (!offset_node) - return 0; + { + *offset = 0; + return true; + }
/* We should always have generated a cast to UINT. */ assert(offset_node->data_type->type == HLSL_CLASS_SCALAR && offset_node->data_type->base_type == HLSL_TYPE_UINT);
if (offset_node->type != HLSL_IR_CONSTANT) - { - FIXME("Dereference with non-constant offset of type %s.\n", hlsl_node_type_to_string(offset_node->type)); - return 0; - } + return false;
- return hlsl_ir_constant(offset_node)->value[0].u; + *offset = hlsl_ir_constant(offset_node)->value[0].u; + return true; +} + +unsigned int hlsl_offset_from_deref_safe(struct hlsl_ctx *ctx, const struct hlsl_deref *deref) +{ + unsigned int offset; + + if (hlsl_offset_from_deref(deref, &offset)) + return offset; + + hlsl_fixme(ctx, deref->offset.node->loc, "Dereference with non-constant offset of type %s.", + hlsl_node_type_to_string(deref->offset.node->type)); + + return 0; }
-struct hlsl_reg hlsl_reg_from_deref(const struct hlsl_deref *deref, const struct hlsl_type *type) +struct hlsl_reg hlsl_reg_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, + const struct hlsl_type *type) { const struct hlsl_ir_var *var = deref->var; struct hlsl_reg ret = var->reg; - unsigned int offset = hlsl_offset_from_deref(deref); + unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref);
ret.id += offset / 4;
diff --git a/libs/vkd3d-shader/hlsl_sm1.c b/libs/vkd3d-shader/hlsl_sm1.c index 875f521f..4ff552bc 100644 --- a/libs/vkd3d-shader/hlsl_sm1.c +++ b/libs/vkd3d-shader/hlsl_sm1.c @@ -663,7 +663,7 @@ static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b static void write_sm1_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) { const struct hlsl_ir_load *load = hlsl_ir_load(instr); - const struct hlsl_reg reg = hlsl_reg_from_deref(&load->src, instr->data_type); + const struct hlsl_reg reg = hlsl_reg_from_deref(ctx, &load->src, instr->data_type); struct sm1_instruction sm1_instr = { .opcode = D3DSIO_MOV, @@ -707,7 +707,7 @@ static void write_sm1_store(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer * { const struct hlsl_ir_store *store = hlsl_ir_store(instr); const struct hlsl_ir_node *rhs = store->rhs.node; - const struct hlsl_reg reg = hlsl_reg_from_deref(&store->lhs, rhs->data_type); + const struct hlsl_reg reg = hlsl_reg_from_deref(ctx, &store->lhs, rhs->data_type); struct sm1_instruction sm1_instr = { .opcode = D3DSIO_MOV, diff --git a/libs/vkd3d-shader/hlsl_sm4.c b/libs/vkd3d-shader/hlsl_sm4.c index e597425a..12ddd4fd 100644 --- a/libs/vkd3d-shader/hlsl_sm4.c +++ b/libs/vkd3d-shader/hlsl_sm4.c @@ -792,7 +792,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *r } else { - unsigned int offset = hlsl_offset_from_deref(deref) + var->buffer_offset; + unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref) + var->buffer_offset;
assert(data_type->type <= HLSL_CLASS_VECTOR); reg->type = VKD3D_SM4_RT_CONSTBUFFER; @@ -820,7 +820,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *r } else { - struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(deref, data_type); + struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref, data_type);
assert(hlsl_reg.allocated); reg->type = VKD3D_SM4_RT_INPUT; @@ -850,7 +850,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *r } else { - struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(deref, data_type); + struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref, data_type);
assert(hlsl_reg.allocated); reg->type = VKD3D_SM4_RT_OUTPUT; @@ -862,7 +862,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *r } else { - struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(deref, data_type); + struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref, data_type);
assert(hlsl_reg.allocated); reg->type = VKD3D_SM4_RT_TEMP;
Signed-off-by: Zebediah Figura zfigura@codeweavers.com
Signed-off-by: Giovanni Mascellani gmascellani@codeweavers.com --- libs/vkd3d-shader/hlsl_codegen.c | 254 ++++++++++++++++++++++++++++++- 1 file changed, 253 insertions(+), 1 deletion(-)
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index a0154e3b..b5c43754 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -237,6 +237,253 @@ static void replace_node(struct hlsl_ir_node *old, struct hlsl_ir_node *new) hlsl_free_instr(old); }
+/* The copy propagation pass scans the code trying to reconstruct, for + * each load, which is the store that last wrote to that + * variable. When this happens, the load can be replaced with the node + * from which the variable was stored. + * + * In order to do that, the pass keeps a map of all the variables it + * has already seen; for each variable, a pointer to a node and a + * component index is kept for each of the registers that belong to + * the variable. This means that, at that point of the scan, that + * register was last stored to from that component of that node. The + * pointer can be NULL if information for that register could not be + * gathered statically (either because a non-constant offset is used, + * or because control flow forces us to drop information). + * + * When scanning through a store, data for the stored-to variable is + * updated. When scanning through a load, it is checked if all the + * registers involved in the load come from a single node. In such + * case, the store can be replaced with a swizzle based on that + * node. */ + +struct copy_propagation_value +{ + struct hlsl_ir_node *node; + unsigned int component; +}; + +struct copy_propagation_variable +{ + struct rb_entry entry; + struct hlsl_ir_var *var; + struct copy_propagation_value *values; +}; + +struct copy_propagation_state +{ + struct rb_tree variables; +}; + +static int copy_propagation_variable_compare(const void *key, const struct rb_entry *entry) +{ + struct copy_propagation_variable *variable = RB_ENTRY_VALUE(entry, struct copy_propagation_variable, entry); + uintptr_t key_int = (uintptr_t)key, entry_int = (uintptr_t)variable->var; + + return (key_int > entry_int) - (key_int < entry_int); +} + +static void copy_propagation_variable_destroy(struct rb_entry *entry, void *context) +{ + struct copy_propagation_variable *variable = RB_ENTRY_VALUE(entry, struct copy_propagation_variable, entry); + + vkd3d_free(variable); +} + +static struct copy_propagation_variable *copy_propagation_get_variable(struct copy_propagation_state *state, + struct hlsl_ir_var *var) +{ + struct rb_entry *entry = rb_get(&state->variables, var); + + if (entry) + return RB_ENTRY_VALUE(entry, struct copy_propagation_variable, entry); + else + return NULL; +} + +static struct copy_propagation_variable *copy_propagation_create_variable(struct hlsl_ctx *ctx, + struct copy_propagation_state *state, struct hlsl_ir_var *var) +{ + struct rb_entry *entry = rb_get(&state->variables, var); + struct copy_propagation_variable *variable; + int res; + + if (entry) + return RB_ENTRY_VALUE(entry, struct copy_propagation_variable, entry); + + variable = hlsl_alloc(ctx, sizeof(*variable)); + if (!variable) + return NULL; + + variable->var = var; + variable->values = hlsl_alloc(ctx, sizeof(*variable->values) * var->data_type->reg_size); + if (!variable->values) + { + vkd3d_free(variable); + return NULL; + } + + res = rb_put(&state->variables, var, &variable->entry); + assert(!res); + + return variable; +} + +static void copy_propagation_invalidate_whole_variable(struct copy_propagation_variable *variable) +{ + TRACE("Invalidate variable %s.\n", variable->var->name); + memset(variable->values, 0, sizeof(*variable->values) * variable->var->data_type->reg_size); +} + +static void copy_propagation_set_value(struct copy_propagation_variable *variable, unsigned int offset, + unsigned char writemask, struct hlsl_ir_node *node) +{ + static const char swizzle_letters[] = {'x', 'y', 'z', 'w'}; + + unsigned int i; + + for (i = 0; i < 4; ++i) + { + if (writemask & (1u << i)) + { + TRACE("Variable %s[%d] is written by instruction %p.%c.\n", + variable->var->name, offset + i, node, swizzle_letters[i]); + variable->values[offset + i].node = node; + variable->values[offset + i].component = i; + } + } +} + +static struct hlsl_ir_node *copy_propagation_find_replacement(struct copy_propagation_variable *variable, + unsigned int offset, unsigned int count, unsigned int *swizzle) +{ + struct hlsl_ir_node *node = NULL; + unsigned int i; + + assert(offset + count <= variable->var->data_type->reg_size); + + *swizzle = 0; + + for (i = 0; i < count; ++i) + { + if (!node) + node = variable->values[offset + i].node; + else if (node != variable->values[offset + i].node) + return NULL; + *swizzle |= variable->values[offset + i].component << (2 * i); + } + + return node; +} + +static bool copy_propagation_analyze_load(struct hlsl_ctx *ctx, struct hlsl_ir_load *load, + struct copy_propagation_state *state) +{ + static const char swizzle_letters[] = {'x', 'y', 'z', 'w'}; + + struct hlsl_ir_node *node = &load->node, *new_node; + struct copy_propagation_variable *variable; + struct hlsl_type *type = node->data_type; + unsigned int offset, swizzle; + struct hlsl_deref *src = &load->src; + struct hlsl_ir_var *var = src->var; + struct hlsl_ir_swizzle *swizzle_node; + + if (type->type != HLSL_CLASS_SCALAR && type->type != HLSL_CLASS_VECTOR) + return false; + + if (!hlsl_offset_from_deref(src, &offset)) + return false; + + variable = copy_propagation_get_variable(state, var); + if (!variable) + return false; + + new_node = copy_propagation_find_replacement(variable, offset, type->dimx, &swizzle); + + TRACE("Load from %s[%d-%d] reconstructed as instruction %p.%c%c%c%c.\n", + var->name, offset, offset + type->dimx, new_node, + swizzle_letters[swizzle % 4], swizzle_letters[(swizzle >> 2) % 4], + swizzle_letters[(swizzle >> 4) % 4], swizzle_letters[(swizzle >> 6) % 4]); + + if (!new_node) + return false; + + if (!(swizzle_node = hlsl_new_swizzle(ctx, swizzle, type->dimx, new_node, &node->loc))) + return false; + list_add_before(&node->entry, &swizzle_node->node.entry); + + replace_node(node, &swizzle_node->node); + + return true; +} + +static void copy_propagation_record_store(struct hlsl_ctx *ctx, struct hlsl_ir_store *store, + struct copy_propagation_state *state) +{ + struct copy_propagation_variable *variable; + struct hlsl_deref *lhs = &store->lhs; + struct hlsl_ir_var *var = lhs->var; + unsigned int offset; + + variable = copy_propagation_create_variable(ctx, state, var); + if (!variable) + return; + + if (hlsl_offset_from_deref(lhs, &offset)) + copy_propagation_set_value(variable, offset, store->writemask, store->rhs.node); + else + copy_propagation_invalidate_whole_variable(variable); +} + +static bool copy_propagation_transform_block(struct hlsl_ctx *ctx, struct hlsl_block *block, + struct copy_propagation_state *state) +{ + struct hlsl_ir_node *instr, *next; + bool progress = false; + + LIST_FOR_EACH_ENTRY_SAFE(instr, next, &block->instrs, struct hlsl_ir_node, entry) + { + switch (instr->type) + { + case HLSL_IR_LOAD: + progress |= copy_propagation_analyze_load(ctx, hlsl_ir_load(instr), state); + break; + + case HLSL_IR_STORE: + copy_propagation_record_store(ctx, hlsl_ir_store(instr), state); + break; + + case HLSL_IR_IF: + FIXME("Copy propagation doesn't support conditionals yet, leaving.\n"); + return progress; + + case HLSL_IR_LOOP: + FIXME("Copy propagation doesn't support loops yet, leaving.\n"); + return progress; + + default: + break; + } + } + + return progress; +} + +static bool copy_propagation_execute(struct hlsl_ctx *ctx, struct hlsl_block *block) +{ + struct copy_propagation_state state; + bool progress; + + rb_init(&state.variables, copy_propagation_variable_compare); + + progress = copy_propagation_transform_block(ctx, block, &state); + + rb_destroy(&state.variables, copy_propagation_variable_destroy, NULL); + + return progress; +} + static bool is_vec1(const struct hlsl_type *type) { return (type->type == HLSL_CLASS_SCALAR) || (type->type == HLSL_CLASS_VECTOR && type->dimx == 1); @@ -1390,7 +1637,12 @@ int hlsl_emit_dxbc(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_fun progress |= transform_ir(ctx, split_struct_copies, body, NULL); } while (progress); - while (transform_ir(ctx, fold_constants, body, NULL)); + do + { + progress = transform_ir(ctx, fold_constants, body, NULL); + progress |= copy_propagation_execute(ctx, body); + } + while (progress); transform_ir(ctx, remove_trivial_swizzles, body, NULL);
if (ctx->profile->major_version < 4)
Signed-off-by: Zebediah Figura zfigura@codeweavers.com
Hi,
On 15/11/21 15:27, Giovanni Mascellani wrote:
+static void copy_propagation_set_value(struct copy_propagation_variable *variable, unsigned int offset,
unsigned char writemask, struct hlsl_ir_node *node)
+{
- static const char swizzle_letters[] = {'x', 'y', 'z', 'w'};
- unsigned int i;
- for (i = 0; i < 4; ++i)
- {
if (writemask & (1u << i))
{
TRACE("Variable %s[%d] is written by instruction %p.%c.\n",
variable->var->name, offset + i, node, swizzle_letters[i]);
variable->values[offset + i].node = node;
variable->values[offset + i].component = i;
}
- }
+}
After playing with the code a little more, I don't think that's correct any more. What we call the "writemask" in struct hlsl_ir_store seems more a "write-swizzle", right? The correct code would be:
j = 0; for (i = 0; i < 4; ++i) { if (writemask & (1u << i)) { ... variable->values[offset + i].component = j++; } }
In other words, if we are storing with a writemask .y, we mean that the first register of the source goes in the second register of the destination, not that the second register of the source goes in the second register of the destination. Otherwise the code generated by the constructor float4(1.0, 2.0, 3.0, 4.0) would be wrong:
trace:hlsl_dump_function: 2: float | 1.00000000e+00 trace:hlsl_dump_function: 3: float | 2.00000000e+00 trace:hlsl_dump_function: 4: float | 3.00000000e+00 trace:hlsl_dump_function: 5: float | 4.00000000e+00 trace:hlsl_dump_function: 6: | = (<constructor-0>.x @2) trace:hlsl_dump_function: 7: | = (<constructor-0>.y @3) trace:hlsl_dump_function: 8: | = (<constructor-0>.z @4) trace:hlsl_dump_function: 9: | = (<constructor-0>.w @5) trace:hlsl_dump_function: 10: float4 | <constructor-0>
Does any of this make sense?
Giovanni.
On 11/16/21 3:04 AM, Giovanni Mascellani wrote:
Hi,
On 15/11/21 15:27, Giovanni Mascellani wrote:
+static void copy_propagation_set_value(struct copy_propagation_variable *variable, unsigned int offset,
unsigned char writemask, struct hlsl_ir_node *node)
+{
- static const char swizzle_letters[] = {'x', 'y', 'z', 'w'};
- unsigned int i;
- for (i = 0; i < 4; ++i)
- {
if (writemask & (1u << i))
{
TRACE("Variable %s[%d] is written by instruction %p.%c.\n",
variable->var->name, offset + i, node, swizzle_letters[i]);
variable->values[offset + i].node = node;
variable->values[offset + i].component = i;
}
- }
+}
After playing with the code a little more, I don't think that's correct any more. What we call the "writemask" in struct hlsl_ir_store seems more a "write-swizzle", right? The correct code would be:
j = 0; for (i = 0; i < 4; ++i) { if (writemask & (1u << i)) { ... variable->values[offset + i].component = j++; } }
In other words, if we are storing with a writemask .y, we mean that the first register of the source goes in the second register of the destination, not that the second register of the source goes in the second register of the destination. Otherwise the code generated by the constructor float4(1.0, 2.0, 3.0, 4.0) would be wrong:
trace:hlsl_dump_function: 2: float | 1.00000000e+00 trace:hlsl_dump_function: 3: float | 2.00000000e+00 trace:hlsl_dump_function: 4: float | 3.00000000e+00 trace:hlsl_dump_function: 5: float | 4.00000000e+00 trace:hlsl_dump_function: 6: | = (<constructor-0>.x @2) trace:hlsl_dump_function: 7: | = (<constructor-0>.y @3) trace:hlsl_dump_function: 8: | = (<constructor-0>.z @4) trace:hlsl_dump_function: 9: | = (<constructor-0>.w @5) trace:hlsl_dump_function: 10: float4 | <constructor-0>
Does any of this make sense?
Giovanni.
Yes, I believe that is correct. I'm surprised it didn't end up breaking any tests...
Signed-off-by: Giovanni Mascellani gmascellani@codeweavers.com --- libs/vkd3d-shader/hlsl_codegen.c | 157 +++++++++++++++++++++++++++++-- 1 file changed, 149 insertions(+), 8 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index b5c43754..0ecd8348 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -255,7 +255,18 @@ static void replace_node(struct hlsl_ir_node *old, struct hlsl_ir_node *new) * updated. When scanning through a load, it is checked if all the * registers involved in the load come from a single node. In such * case, the store can be replaced with a swizzle based on that - * node. */ + * node. + * + * All of the above works when we disregard control flow. With control + * flow it becames slightly more complicated: instead of a single map + * we keep a stack of them, pushing a new entry each time we enter an + * embedded block, and popping the entry when leaving the block. + * + * When entering a conditional block, both branches ("then" and + * "else") can inherit the variable state available just before the + * conditional block. After the conditional block, all variables that + * might have been written in either branch must be invalidated, + * because we don't know which branch has executed. */
struct copy_propagation_value { @@ -272,7 +283,9 @@ struct copy_propagation_variable
struct copy_propagation_state { - struct rb_tree variables; + struct rb_tree *variables; + unsigned int depth; + unsigned int capacity; };
static int copy_propagation_variable_compare(const void *key, const struct rb_entry *entry) @@ -293,7 +306,7 @@ static void copy_propagation_variable_destroy(struct rb_entry *entry, void *cont static struct copy_propagation_variable *copy_propagation_get_variable(struct copy_propagation_state *state, struct hlsl_ir_var *var) { - struct rb_entry *entry = rb_get(&state->variables, var); + struct rb_entry *entry = rb_get(&state->variables[state->depth], var);
if (entry) return RB_ENTRY_VALUE(entry, struct copy_propagation_variable, entry); @@ -304,7 +317,7 @@ static struct copy_propagation_variable *copy_propagation_get_variable(struct co static struct copy_propagation_variable *copy_propagation_create_variable(struct hlsl_ctx *ctx, struct copy_propagation_state *state, struct hlsl_ir_var *var) { - struct rb_entry *entry = rb_get(&state->variables, var); + struct rb_entry *entry = rb_get(&state->variables[state->depth], var); struct copy_propagation_variable *variable; int res;
@@ -323,7 +336,7 @@ static struct copy_propagation_variable *copy_propagation_create_variable(struct return NULL; }
- res = rb_put(&state->variables, var, &variable->entry); + res = rb_put(&state->variables[state->depth], var, &variable->entry); assert(!res);
return variable; @@ -354,6 +367,99 @@ static void copy_propagation_set_value(struct copy_propagation_variable *variabl } }
+static void copy_propagation_invalidate_from_block(struct hlsl_ctx *ctx, struct copy_propagation_state *state, + struct hlsl_block *block) +{ + struct hlsl_ir_node *instr; + + LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) + { + switch (instr->type) + { + case HLSL_IR_STORE: + { + struct hlsl_ir_store *store = hlsl_ir_store(instr); + struct copy_propagation_variable *variable; + struct hlsl_deref *lhs = &store->lhs; + struct hlsl_ir_var *var = lhs->var; + unsigned int offset; + + variable = copy_propagation_get_variable(state, var); + if (!variable) + continue; + + if (hlsl_offset_from_deref(lhs, &offset)) + copy_propagation_set_value(variable, offset, store->writemask, NULL); + else + copy_propagation_invalidate_whole_variable(variable); + + break; + } + + case HLSL_IR_IF: + { + struct hlsl_ir_if *iff = hlsl_ir_if(instr); + + copy_propagation_invalidate_from_block(ctx, state, &iff->then_instrs); + copy_propagation_invalidate_from_block(ctx, state, &iff->else_instrs); + + break; + } + + case HLSL_IR_LOOP: + { + struct hlsl_ir_loop *loop = hlsl_ir_loop(instr); + + copy_propagation_invalidate_from_block(ctx, state, &loop->body); + + break; + } + + default: + break; + } + } +} + +static void copy_propagation_pop(struct copy_propagation_state *state) +{ + assert(state->depth > 0); + rb_destroy(&state->variables[state->depth], copy_propagation_variable_destroy, NULL); + --state->depth; +} + +static bool copy_propagation_duplicate(struct hlsl_ctx *ctx, struct copy_propagation_state *state) +{ + struct copy_propagation_variable *var; + + if (state->depth + 1 == state->capacity) + { + unsigned int new_capacity = 2 * state->capacity; + struct rb_tree *new_vars; + + new_vars = hlsl_realloc(ctx, state->variables, sizeof(*state->variables) * new_capacity); + if (!new_vars) + return false; + state->capacity = new_capacity; + state->variables = new_vars; + } + ++state->depth; + + rb_init(&state->variables[state->depth], copy_propagation_variable_compare); + + RB_FOR_EACH_ENTRY(var, &state->variables[state->depth - 1], struct copy_propagation_variable, entry) + { + struct copy_propagation_variable *new_var = copy_propagation_create_variable(ctx, state, var->var); + + if (!new_var) + return false; + + memcpy(new_var->values, var->values, sizeof(*var->values) * var->var->data_type->reg_size); + } + + return true; +} + static struct hlsl_ir_node *copy_propagation_find_replacement(struct copy_propagation_variable *variable, unsigned int offset, unsigned int count, unsigned int *swizzle) { @@ -436,6 +542,34 @@ static void copy_propagation_record_store(struct hlsl_ctx *ctx, struct hlsl_ir_s copy_propagation_invalidate_whole_variable(variable); }
+static bool copy_propagation_transform_block(struct hlsl_ctx *ctx, struct hlsl_block *block, + struct copy_propagation_state *state); + +static bool copy_propagation_process_if(struct hlsl_ctx *ctx, struct hlsl_ir_if *iff, + struct copy_propagation_state *state) +{ + bool progress = false; + + if (!copy_propagation_duplicate(ctx, state)) + goto end; + + progress |= copy_propagation_transform_block(ctx, &iff->then_instrs, state); + + copy_propagation_pop(state); + if (!copy_propagation_duplicate(ctx, state)) + goto end; + + progress |= copy_propagation_transform_block(ctx, &iff->else_instrs, state); + + copy_propagation_pop(state); + +end: + copy_propagation_invalidate_from_block(ctx, state, &iff->then_instrs); + copy_propagation_invalidate_from_block(ctx, state, &iff->else_instrs); + + return progress; +} + static bool copy_propagation_transform_block(struct hlsl_ctx *ctx, struct hlsl_block *block, struct copy_propagation_state *state) { @@ -455,7 +589,7 @@ static bool copy_propagation_transform_block(struct hlsl_ctx *ctx, struct hlsl_b break;
case HLSL_IR_IF: - FIXME("Copy propagation doesn't support conditionals yet, leaving.\n"); + progress |= copy_propagation_process_if(ctx, hlsl_ir_if(instr), state); return progress;
case HLSL_IR_LOOP: @@ -475,11 +609,18 @@ static bool copy_propagation_execute(struct hlsl_ctx *ctx, struct hlsl_block *bl struct copy_propagation_state state; bool progress;
- rb_init(&state.variables, copy_propagation_variable_compare); + state.depth = 0; + state.capacity = 1; + state.variables = hlsl_alloc(ctx, sizeof(*state.variables) * state.capacity); + if (!state.variables) + return false; + rb_init(&state.variables[state.depth], copy_propagation_variable_compare);
progress = copy_propagation_transform_block(ctx, block, &state);
- rb_destroy(&state.variables, copy_propagation_variable_destroy, NULL); + assert(state.depth == 0); + rb_destroy(&state.variables[state.depth], copy_propagation_variable_destroy, NULL); + vkd3d_free(state.variables);
return progress; }
Signed-off-by: Giovanni Mascellani gmascellani@codeweavers.com --- libs/vkd3d-shader/hlsl_codegen.c | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 0ecd8348..53108788 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -266,7 +266,15 @@ static void replace_node(struct hlsl_ir_node *old, struct hlsl_ir_node *new) * "else") can inherit the variable state available just before the * conditional block. After the conditional block, all variables that * might have been written in either branch must be invalidated, - * because we don't know which branch has executed. */ + * because we don't know which branch has executed. + * + * When entering a loop block, we can inherit the variable state + * available just before the loop block, except that all variables + * that are written in the body must be invalidated (because at the + * beginning of each execution of the body we don't know whether the + * body has already executed or not). The same is valid after the loop + * block, because we don't know whether the body has executed at all + * or not. */
struct copy_propagation_value { @@ -570,6 +578,22 @@ end: return progress; }
+static bool copy_propagation_process_loop(struct hlsl_ctx *ctx, struct hlsl_ir_loop *loop, + struct copy_propagation_state *state) +{ + bool progress = false; + + copy_propagation_invalidate_from_block(ctx, state, &loop->body); + if (!copy_propagation_duplicate(ctx, state)) + return progress; + + progress |= copy_propagation_transform_block(ctx, &loop->body, state); + + copy_propagation_pop(state); + + return progress; +} + static bool copy_propagation_transform_block(struct hlsl_ctx *ctx, struct hlsl_block *block, struct copy_propagation_state *state) { @@ -593,7 +617,7 @@ static bool copy_propagation_transform_block(struct hlsl_ctx *ctx, struct hlsl_b return progress;
case HLSL_IR_LOOP: - FIXME("Copy propagation doesn't support loops yet, leaving.\n"); + progress |= copy_propagation_process_loop(ctx, hlsl_ir_loop(instr), state); return progress;
default: