Signed-off-by: Giovanni Mascellani gmascellani@codeweavers.com Signed-off-by: Zebediah Figura zfigura@codeweavers.com --- libs/vkd3d-shader/hlsl_codegen.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+)
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 24b8205c..f8b977b0 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -475,6 +475,27 @@ static bool fold_constants(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, voi return true; }
+static bool remove_trivial_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +{ + struct hlsl_ir_swizzle *swizzle; + unsigned int i; + + if (instr->type != HLSL_IR_SWIZZLE) + return false; + swizzle = hlsl_ir_swizzle(instr); + + if (instr->data_type->dimx != swizzle->val.node->data_type->dimx) + return false; + + for (i = 0; i < instr->data_type->dimx; ++i) + if (((swizzle->swizzle >> (2 * i)) & 3) != i) + return false; + + replace_node(instr, swizzle->val.node); + + return true; +} + /* Lower DIV to RCP + MUL. */ static bool lower_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) { @@ -1355,6 +1376,7 @@ int hlsl_emit_dxbc(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_fun } while (progress); while (transform_ir(ctx, fold_constants, body, NULL)); + transform_ir(ctx, remove_trivial_swizzles, body, NULL);
if (ctx->profile->major_version < 4) transform_ir(ctx, lower_division, body, NULL);
Signed-off-by: Giovanni Mascellani gmascellani@codeweavers.com Signed-off-by: Zebediah Figura zfigura@codeweavers.com --- libs/vkd3d-shader/hlsl_codegen.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index f8b977b0..f5432d22 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -204,7 +204,7 @@ static bool transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx struct hlsl_block *block, void *context) { struct hlsl_ir_node *instr, *next; - bool progress = 0; + bool progress = false;
LIST_FOR_EACH_ENTRY_SAFE(instr, next, &block->instrs, struct hlsl_ir_node, entry) {
Signed-off-by: Giovanni Mascellani gmascellani@codeweavers.com Signed-off-by: Zebediah Figura zfigura@codeweavers.com --- libs/vkd3d-shader/hlsl.h | 6 ++++-- libs/vkd3d-shader/hlsl_codegen.c | 33 +++++++++++++++++++++++--------- libs/vkd3d-shader/hlsl_sm1.c | 4 ++-- libs/vkd3d-shader/hlsl_sm4.c | 8 ++++---- 4 files changed, 34 insertions(+), 17 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl.h b/libs/vkd3d-shader/hlsl.h index eae96232..f2a7a48d 100644 --- a/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d-shader/hlsl.h @@ -751,8 +751,10 @@ unsigned int hlsl_combine_writemasks(unsigned int first, unsigned int second); unsigned int hlsl_map_swizzle(unsigned int swizzle, unsigned int writemask); unsigned int hlsl_swizzle_from_writemask(unsigned int writemask);
-unsigned int hlsl_offset_from_deref(const struct hlsl_deref *deref); -struct hlsl_reg hlsl_reg_from_deref(const struct hlsl_deref *deref, const struct hlsl_type *type); +bool hlsl_offset_from_deref(const struct hlsl_deref *deref, unsigned int *offset); +unsigned int hlsl_offset_from_deref_safe(struct hlsl_ctx *ctx, const struct hlsl_deref *deref); +struct hlsl_reg hlsl_reg_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, + const struct hlsl_type *type);
bool hlsl_sm1_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, bool output, D3DSHADER_PARAM_REGISTER_TYPE *type, unsigned int *reg); diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index f5432d22..a0154e3b 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -1286,31 +1286,46 @@ static bool type_is_single_reg(const struct hlsl_type *type) return type->type == HLSL_CLASS_SCALAR || type->type == HLSL_CLASS_VECTOR; }
-unsigned int hlsl_offset_from_deref(const struct hlsl_deref *deref) +bool hlsl_offset_from_deref(const struct hlsl_deref *deref, unsigned int *offset) { struct hlsl_ir_node *offset_node = deref->offset.node;
if (!offset_node) - return 0; + { + *offset = 0; + return true; + }
/* We should always have generated a cast to UINT. */ assert(offset_node->data_type->type == HLSL_CLASS_SCALAR && offset_node->data_type->base_type == HLSL_TYPE_UINT);
if (offset_node->type != HLSL_IR_CONSTANT) - { - FIXME("Dereference with non-constant offset of type %s.\n", hlsl_node_type_to_string(offset_node->type)); - return 0; - } + return false;
- return hlsl_ir_constant(offset_node)->value[0].u; + *offset = hlsl_ir_constant(offset_node)->value[0].u; + return true; +} + +unsigned int hlsl_offset_from_deref_safe(struct hlsl_ctx *ctx, const struct hlsl_deref *deref) +{ + unsigned int offset; + + if (hlsl_offset_from_deref(deref, &offset)) + return offset; + + hlsl_fixme(ctx, deref->offset.node->loc, "Dereference with non-constant offset of type %s.", + hlsl_node_type_to_string(deref->offset.node->type)); + + return 0; }
-struct hlsl_reg hlsl_reg_from_deref(const struct hlsl_deref *deref, const struct hlsl_type *type) +struct hlsl_reg hlsl_reg_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, + const struct hlsl_type *type) { const struct hlsl_ir_var *var = deref->var; struct hlsl_reg ret = var->reg; - unsigned int offset = hlsl_offset_from_deref(deref); + unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref);
ret.id += offset / 4;
diff --git a/libs/vkd3d-shader/hlsl_sm1.c b/libs/vkd3d-shader/hlsl_sm1.c index 875f521f..4ff552bc 100644 --- a/libs/vkd3d-shader/hlsl_sm1.c +++ b/libs/vkd3d-shader/hlsl_sm1.c @@ -663,7 +663,7 @@ static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b static void write_sm1_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) { const struct hlsl_ir_load *load = hlsl_ir_load(instr); - const struct hlsl_reg reg = hlsl_reg_from_deref(&load->src, instr->data_type); + const struct hlsl_reg reg = hlsl_reg_from_deref(ctx, &load->src, instr->data_type); struct sm1_instruction sm1_instr = { .opcode = D3DSIO_MOV, @@ -707,7 +707,7 @@ static void write_sm1_store(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer * { const struct hlsl_ir_store *store = hlsl_ir_store(instr); const struct hlsl_ir_node *rhs = store->rhs.node; - const struct hlsl_reg reg = hlsl_reg_from_deref(&store->lhs, rhs->data_type); + const struct hlsl_reg reg = hlsl_reg_from_deref(ctx, &store->lhs, rhs->data_type); struct sm1_instruction sm1_instr = { .opcode = D3DSIO_MOV, diff --git a/libs/vkd3d-shader/hlsl_sm4.c b/libs/vkd3d-shader/hlsl_sm4.c index e597425a..12ddd4fd 100644 --- a/libs/vkd3d-shader/hlsl_sm4.c +++ b/libs/vkd3d-shader/hlsl_sm4.c @@ -792,7 +792,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *r } else { - unsigned int offset = hlsl_offset_from_deref(deref) + var->buffer_offset; + unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref) + var->buffer_offset;
assert(data_type->type <= HLSL_CLASS_VECTOR); reg->type = VKD3D_SM4_RT_CONSTBUFFER; @@ -820,7 +820,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *r } else { - struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(deref, data_type); + struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref, data_type);
assert(hlsl_reg.allocated); reg->type = VKD3D_SM4_RT_INPUT; @@ -850,7 +850,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *r } else { - struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(deref, data_type); + struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref, data_type);
assert(hlsl_reg.allocated); reg->type = VKD3D_SM4_RT_OUTPUT; @@ -862,7 +862,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *r } else { - struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(deref, data_type); + struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref, data_type);
assert(hlsl_reg.allocated); reg->type = VKD3D_SM4_RT_TEMP;
Signed-off-by: Giovanni Mascellani gmascellani@codeweavers.com Signed-off-by: Zebediah Figura zfigura@codeweavers.com --- v6: See https://source.winehq.org/patches/data/219588
libs/vkd3d-shader/hlsl_codegen.c | 254 ++++++++++++++++++++++++++++++- 1 file changed, 253 insertions(+), 1 deletion(-)
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index a0154e3b..42422220 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -237,6 +237,253 @@ static void replace_node(struct hlsl_ir_node *old, struct hlsl_ir_node *new) hlsl_free_instr(old); }
+/* The copy propagation pass scans the code trying to reconstruct, for + * each load, which is the store that last wrote to that + * variable. When this happens, the load can be replaced with the node + * from which the variable was stored. + * + * In order to do that, the pass keeps a map of all the variables it + * has already seen; for each variable, a pointer to a node and a + * component index is kept for each of the registers that belong to + * the variable. This means that, at that point of the scan, that + * register was last stored to from that component of that node. The + * pointer can be NULL if information for that register could not be + * gathered statically (either because a non-constant offset is used, + * or because control flow forces us to drop information). + * + * When scanning through a store, data for the stored-to variable is + * updated. When scanning through a load, it is checked if all the + * registers involved in the load come from a single node. In such + * case, the store can be replaced with a swizzle based on that + * node. */ + +struct copy_propagation_value +{ + struct hlsl_ir_node *node; + unsigned int component; +}; + +struct copy_propagation_variable +{ + struct rb_entry entry; + struct hlsl_ir_var *var; + struct copy_propagation_value *values; +}; + +struct copy_propagation_state +{ + struct rb_tree variables; +}; + +static int copy_propagation_variable_compare(const void *key, const struct rb_entry *entry) +{ + struct copy_propagation_variable *variable = RB_ENTRY_VALUE(entry, struct copy_propagation_variable, entry); + uintptr_t key_int = (uintptr_t)key, entry_int = (uintptr_t)variable->var; + + return (key_int > entry_int) - (key_int < entry_int); +} + +static void copy_propagation_variable_destroy(struct rb_entry *entry, void *context) +{ + struct copy_propagation_variable *variable = RB_ENTRY_VALUE(entry, struct copy_propagation_variable, entry); + + vkd3d_free(variable); +} + +static struct copy_propagation_variable *copy_propagation_get_variable(struct copy_propagation_state *state, + struct hlsl_ir_var *var) +{ + struct rb_entry *entry = rb_get(&state->variables, var); + + if (entry) + return RB_ENTRY_VALUE(entry, struct copy_propagation_variable, entry); + else + return NULL; +} + +static struct copy_propagation_variable *copy_propagation_create_variable(struct hlsl_ctx *ctx, + struct copy_propagation_state *state, struct hlsl_ir_var *var) +{ + struct rb_entry *entry = rb_get(&state->variables, var); + struct copy_propagation_variable *variable; + int res; + + if (entry) + return RB_ENTRY_VALUE(entry, struct copy_propagation_variable, entry); + + variable = hlsl_alloc(ctx, sizeof(*variable)); + if (!variable) + return NULL; + + variable->var = var; + variable->values = hlsl_alloc(ctx, sizeof(*variable->values) * var->data_type->reg_size); + if (!variable->values) + { + vkd3d_free(variable); + return NULL; + } + + res = rb_put(&state->variables, var, &variable->entry); + assert(!res); + + return variable; +} + +static void copy_propagation_invalidate_whole_variable(struct copy_propagation_variable *variable) +{ + TRACE("Invalidate variable %s.\n", variable->var->name); + memset(variable->values, 0, sizeof(*variable->values) * variable->var->data_type->reg_size); +} + +static void copy_propagation_set_value(struct copy_propagation_variable *variable, unsigned int offset, + unsigned char writemask, struct hlsl_ir_node *node) +{ + static const char swizzle_letters[] = {'x', 'y', 'z', 'w'}; + + unsigned int i, j = 0; + + for (i = 0; i < 4; ++i) + { + if (writemask & (1u << i)) + { + TRACE("Variable %s[%d] is written by instruction %p.%c.\n", + variable->var->name, offset + i, node, swizzle_letters[i]); + variable->values[offset + i].node = node; + variable->values[offset + i].component = j++; + } + } +} + +static struct hlsl_ir_node *copy_propagation_find_replacement(struct copy_propagation_variable *variable, + unsigned int offset, unsigned int count, unsigned int *swizzle) +{ + struct hlsl_ir_node *node = NULL; + unsigned int i; + + assert(offset + count <= variable->var->data_type->reg_size); + + *swizzle = 0; + + for (i = 0; i < count; ++i) + { + if (!node) + node = variable->values[offset + i].node; + else if (node != variable->values[offset + i].node) + return NULL; + *swizzle |= variable->values[offset + i].component << (2 * i); + } + + return node; +} + +static bool copy_propagation_analyze_load(struct hlsl_ctx *ctx, struct hlsl_ir_load *load, + struct copy_propagation_state *state) +{ + static const char swizzle_letters[] = {'x', 'y', 'z', 'w'}; + + struct hlsl_ir_node *node = &load->node, *new_node; + struct copy_propagation_variable *variable; + struct hlsl_type *type = node->data_type; + unsigned int offset, swizzle; + struct hlsl_deref *src = &load->src; + struct hlsl_ir_var *var = src->var; + struct hlsl_ir_swizzle *swizzle_node; + + if (type->type != HLSL_CLASS_SCALAR && type->type != HLSL_CLASS_VECTOR) + return false; + + if (!hlsl_offset_from_deref(src, &offset)) + return false; + + variable = copy_propagation_get_variable(state, var); + if (!variable) + return false; + + new_node = copy_propagation_find_replacement(variable, offset, type->dimx, &swizzle); + + TRACE("Load from %s[%d-%d] reconstructed as instruction %p.%c%c%c%c.\n", + var->name, offset, offset + type->dimx, new_node, + swizzle_letters[swizzle % 4], swizzle_letters[(swizzle >> 2) % 4], + swizzle_letters[(swizzle >> 4) % 4], swizzle_letters[(swizzle >> 6) % 4]); + + if (!new_node) + return false; + + if (!(swizzle_node = hlsl_new_swizzle(ctx, swizzle, type->dimx, new_node, &node->loc))) + return false; + list_add_before(&node->entry, &swizzle_node->node.entry); + + replace_node(node, &swizzle_node->node); + + return true; +} + +static void copy_propagation_record_store(struct hlsl_ctx *ctx, struct hlsl_ir_store *store, + struct copy_propagation_state *state) +{ + struct copy_propagation_variable *variable; + struct hlsl_deref *lhs = &store->lhs; + struct hlsl_ir_var *var = lhs->var; + unsigned int offset; + + variable = copy_propagation_create_variable(ctx, state, var); + if (!variable) + return; + + if (hlsl_offset_from_deref(lhs, &offset)) + copy_propagation_set_value(variable, offset, store->writemask, store->rhs.node); + else + copy_propagation_invalidate_whole_variable(variable); +} + +static bool copy_propagation_transform_block(struct hlsl_ctx *ctx, struct hlsl_block *block, + struct copy_propagation_state *state) +{ + struct hlsl_ir_node *instr, *next; + bool progress = false; + + LIST_FOR_EACH_ENTRY_SAFE(instr, next, &block->instrs, struct hlsl_ir_node, entry) + { + switch (instr->type) + { + case HLSL_IR_LOAD: + progress |= copy_propagation_analyze_load(ctx, hlsl_ir_load(instr), state); + break; + + case HLSL_IR_STORE: + copy_propagation_record_store(ctx, hlsl_ir_store(instr), state); + break; + + case HLSL_IR_IF: + FIXME("Copy propagation doesn't support conditionals yet, leaving.\n"); + return progress; + + case HLSL_IR_LOOP: + FIXME("Copy propagation doesn't support loops yet, leaving.\n"); + return progress; + + default: + break; + } + } + + return progress; +} + +static bool copy_propagation_execute(struct hlsl_ctx *ctx, struct hlsl_block *block) +{ + struct copy_propagation_state state; + bool progress; + + rb_init(&state.variables, copy_propagation_variable_compare); + + progress = copy_propagation_transform_block(ctx, block, &state); + + rb_destroy(&state.variables, copy_propagation_variable_destroy, NULL); + + return progress; +} + static bool is_vec1(const struct hlsl_type *type) { return (type->type == HLSL_CLASS_SCALAR) || (type->type == HLSL_CLASS_VECTOR && type->dimx == 1); @@ -1390,7 +1637,12 @@ int hlsl_emit_dxbc(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_fun progress |= transform_ir(ctx, split_struct_copies, body, NULL); } while (progress); - while (transform_ir(ctx, fold_constants, body, NULL)); + do + { + progress = transform_ir(ctx, fold_constants, body, NULL); + progress |= copy_propagation_execute(ctx, body); + } + while (progress); transform_ir(ctx, remove_trivial_swizzles, body, NULL);
if (ctx->profile->major_version < 4)
Signed-off-by: Giovanni Mascellani gmascellani@codeweavers.com --- libs/vkd3d-shader/hlsl_codegen.c | 157 +++++++++++++++++++++++++++++-- 1 file changed, 149 insertions(+), 8 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 42422220..775fec19 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -255,7 +255,18 @@ static void replace_node(struct hlsl_ir_node *old, struct hlsl_ir_node *new) * updated. When scanning through a load, it is checked if all the * registers involved in the load come from a single node. In such * case, the store can be replaced with a swizzle based on that - * node. */ + * node. + * + * All of the above works when we disregard control flow. With control + * flow it becames slightly more complicated: instead of a single map + * we keep a stack of them, pushing a new entry each time we enter an + * embedded block, and popping the entry when leaving the block. + * + * When entering a conditional block, both branches ("then" and + * "else") can inherit the variable state available just before the + * conditional block. After the conditional block, all variables that + * might have been written in either branch must be invalidated, + * because we don't know which branch has executed. */
struct copy_propagation_value { @@ -272,7 +283,9 @@ struct copy_propagation_variable
struct copy_propagation_state { - struct rb_tree variables; + struct rb_tree *variables; + unsigned int depth; + unsigned int capacity; };
static int copy_propagation_variable_compare(const void *key, const struct rb_entry *entry) @@ -293,7 +306,7 @@ static void copy_propagation_variable_destroy(struct rb_entry *entry, void *cont static struct copy_propagation_variable *copy_propagation_get_variable(struct copy_propagation_state *state, struct hlsl_ir_var *var) { - struct rb_entry *entry = rb_get(&state->variables, var); + struct rb_entry *entry = rb_get(&state->variables[state->depth], var);
if (entry) return RB_ENTRY_VALUE(entry, struct copy_propagation_variable, entry); @@ -304,7 +317,7 @@ static struct copy_propagation_variable *copy_propagation_get_variable(struct co static struct copy_propagation_variable *copy_propagation_create_variable(struct hlsl_ctx *ctx, struct copy_propagation_state *state, struct hlsl_ir_var *var) { - struct rb_entry *entry = rb_get(&state->variables, var); + struct rb_entry *entry = rb_get(&state->variables[state->depth], var); struct copy_propagation_variable *variable; int res;
@@ -323,7 +336,7 @@ static struct copy_propagation_variable *copy_propagation_create_variable(struct return NULL; }
- res = rb_put(&state->variables, var, &variable->entry); + res = rb_put(&state->variables[state->depth], var, &variable->entry); assert(!res);
return variable; @@ -354,6 +367,99 @@ static void copy_propagation_set_value(struct copy_propagation_variable *variabl } }
+static void copy_propagation_invalidate_from_block(struct hlsl_ctx *ctx, struct copy_propagation_state *state, + struct hlsl_block *block) +{ + struct hlsl_ir_node *instr; + + LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) + { + switch (instr->type) + { + case HLSL_IR_STORE: + { + struct hlsl_ir_store *store = hlsl_ir_store(instr); + struct copy_propagation_variable *variable; + struct hlsl_deref *lhs = &store->lhs; + struct hlsl_ir_var *var = lhs->var; + unsigned int offset; + + variable = copy_propagation_get_variable(state, var); + if (!variable) + continue; + + if (hlsl_offset_from_deref(lhs, &offset)) + copy_propagation_set_value(variable, offset, store->writemask, NULL); + else + copy_propagation_invalidate_whole_variable(variable); + + break; + } + + case HLSL_IR_IF: + { + struct hlsl_ir_if *iff = hlsl_ir_if(instr); + + copy_propagation_invalidate_from_block(ctx, state, &iff->then_instrs); + copy_propagation_invalidate_from_block(ctx, state, &iff->else_instrs); + + break; + } + + case HLSL_IR_LOOP: + { + struct hlsl_ir_loop *loop = hlsl_ir_loop(instr); + + copy_propagation_invalidate_from_block(ctx, state, &loop->body); + + break; + } + + default: + break; + } + } +} + +static void copy_propagation_pop(struct copy_propagation_state *state) +{ + assert(state->depth > 0); + rb_destroy(&state->variables[state->depth], copy_propagation_variable_destroy, NULL); + --state->depth; +} + +static bool copy_propagation_duplicate(struct hlsl_ctx *ctx, struct copy_propagation_state *state) +{ + struct copy_propagation_variable *var; + + if (state->depth + 1 == state->capacity) + { + unsigned int new_capacity = 2 * state->capacity; + struct rb_tree *new_vars; + + new_vars = hlsl_realloc(ctx, state->variables, sizeof(*state->variables) * new_capacity); + if (!new_vars) + return false; + state->capacity = new_capacity; + state->variables = new_vars; + } + ++state->depth; + + rb_init(&state->variables[state->depth], copy_propagation_variable_compare); + + RB_FOR_EACH_ENTRY(var, &state->variables[state->depth - 1], struct copy_propagation_variable, entry) + { + struct copy_propagation_variable *new_var = copy_propagation_create_variable(ctx, state, var->var); + + if (!new_var) + return false; + + memcpy(new_var->values, var->values, sizeof(*var->values) * var->var->data_type->reg_size); + } + + return true; +} + static struct hlsl_ir_node *copy_propagation_find_replacement(struct copy_propagation_variable *variable, unsigned int offset, unsigned int count, unsigned int *swizzle) { @@ -436,6 +542,34 @@ static void copy_propagation_record_store(struct hlsl_ctx *ctx, struct hlsl_ir_s copy_propagation_invalidate_whole_variable(variable); }
+static bool copy_propagation_transform_block(struct hlsl_ctx *ctx, struct hlsl_block *block, + struct copy_propagation_state *state); + +static bool copy_propagation_process_if(struct hlsl_ctx *ctx, struct hlsl_ir_if *iff, + struct copy_propagation_state *state) +{ + bool progress = false; + + if (!copy_propagation_duplicate(ctx, state)) + goto end; + + progress |= copy_propagation_transform_block(ctx, &iff->then_instrs, state); + + copy_propagation_pop(state); + if (!copy_propagation_duplicate(ctx, state)) + goto end; + + progress |= copy_propagation_transform_block(ctx, &iff->else_instrs, state); + + copy_propagation_pop(state); + +end: + copy_propagation_invalidate_from_block(ctx, state, &iff->then_instrs); + copy_propagation_invalidate_from_block(ctx, state, &iff->else_instrs); + + return progress; +} + static bool copy_propagation_transform_block(struct hlsl_ctx *ctx, struct hlsl_block *block, struct copy_propagation_state *state) { @@ -455,7 +589,7 @@ static bool copy_propagation_transform_block(struct hlsl_ctx *ctx, struct hlsl_b break;
case HLSL_IR_IF: - FIXME("Copy propagation doesn't support conditionals yet, leaving.\n"); + progress |= copy_propagation_process_if(ctx, hlsl_ir_if(instr), state); return progress;
case HLSL_IR_LOOP: @@ -475,11 +609,18 @@ static bool copy_propagation_execute(struct hlsl_ctx *ctx, struct hlsl_block *bl struct copy_propagation_state state; bool progress;
- rb_init(&state.variables, copy_propagation_variable_compare); + state.depth = 0; + state.capacity = 1; + state.variables = hlsl_alloc(ctx, sizeof(*state.variables) * state.capacity); + if (!state.variables) + return false; + rb_init(&state.variables[state.depth], copy_propagation_variable_compare);
progress = copy_propagation_transform_block(ctx, block, &state);
- rb_destroy(&state.variables, copy_propagation_variable_destroy, NULL); + assert(state.depth == 0); + rb_destroy(&state.variables[state.depth], copy_propagation_variable_destroy, NULL); + vkd3d_free(state.variables);
return progress; }
On 11/16/21 13:00, Giovanni Mascellani wrote:
Signed-off-by: Giovanni Mascellani gmascellani@codeweavers.com
libs/vkd3d-shader/hlsl_codegen.c | 157 +++++++++++++++++++++++++++++-- 1 file changed, 149 insertions(+), 8 deletions(-)
I'd been putting off these two patches because I thought they would be harder and I wanted to get the basic parts in first, but I had a look at them anyway and they are actually pretty simple. So, nice job :-)
I do have some comments, however...
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 42422220..775fec19 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -255,7 +255,18 @@ static void replace_node(struct hlsl_ir_node *old, struct hlsl_ir_node *new)
- updated. When scanning through a load, it is checked if all the
- registers involved in the load come from a single node. In such
- case, the store can be replaced with a swizzle based on that
- node. */
- node.
- All of the above works when we disregard control flow. With control
- flow it becames slightly more complicated: instead of a single map
- we keep a stack of them, pushing a new entry each time we enter an
- embedded block, and popping the entry when leaving the block.
Typo, 'becames'.
Also, this is nitpicky, but the first sentence is a little weird when the earlier comment contains the phrase 'because control flow forces us to drop information'.
- When entering a conditional block, both branches ("then" and
- "else") can inherit the variable state available just before the
- conditional block. After the conditional block, all variables that
- might have been written in either branch must be invalidated,
- because we don't know which branch has executed. */
struct copy_propagation_value {
@@ -272,7 +283,9 @@ struct copy_propagation_variable
struct copy_propagation_state {
- struct rb_tree variables;
- struct rb_tree *variables;
- unsigned int depth;
- unsigned int capacity; };
As an alternative, you could create a new copy_propagation_state struct on stack in copy_propagation_process_if().
static int copy_propagation_variable_compare(const void *key, const struct rb_entry *entry) @@ -293,7 +306,7 @@ static void copy_propagation_variable_destroy(struct rb_entry *entry, void *cont static struct copy_propagation_variable *copy_propagation_get_variable(struct copy_propagation_state *state, struct hlsl_ir_var *var) {
- struct rb_entry *entry = rb_get(&state->variables, var);
struct rb_entry *entry = rb_get(&state->variables[state->depth], var);
if (entry) return RB_ENTRY_VALUE(entry, struct copy_propagation_variable, entry);
@@ -304,7 +317,7 @@ static struct copy_propagation_variable *copy_propagation_get_variable(struct co static struct copy_propagation_variable *copy_propagation_create_variable(struct hlsl_ctx *ctx, struct copy_propagation_state *state, struct hlsl_ir_var *var) {
- struct rb_entry *entry = rb_get(&state->variables, var);
- struct rb_entry *entry = rb_get(&state->variables[state->depth], var); struct copy_propagation_variable *variable; int res;
@@ -323,7 +336,7 @@ static struct copy_propagation_variable *copy_propagation_create_variable(struct return NULL; }
- res = rb_put(&state->variables, var, &variable->entry);
res = rb_put(&state->variables[state->depth], var, &variable->entry); assert(!res);
return variable;
@@ -354,6 +367,99 @@ static void copy_propagation_set_value(struct copy_propagation_variable *variabl } }
+static void copy_propagation_invalidate_from_block(struct hlsl_ctx *ctx, struct copy_propagation_state *state,
struct hlsl_block *block)
+{
- struct hlsl_ir_node *instr;
- LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry)
- {
switch (instr->type)
{
case HLSL_IR_STORE:
{
struct hlsl_ir_store *store = hlsl_ir_store(instr);
struct copy_propagation_variable *variable;
struct hlsl_deref *lhs = &store->lhs;
struct hlsl_ir_var *var = lhs->var;
unsigned int offset;
variable = copy_propagation_get_variable(state, var);
if (!variable)
continue;
if (hlsl_offset_from_deref(lhs, &offset))
copy_propagation_set_value(variable, offset, store->writemask, NULL);
else
copy_propagation_invalidate_whole_variable(variable);
break;
}
case HLSL_IR_IF:
{
struct hlsl_ir_if *iff = hlsl_ir_if(instr);
copy_propagation_invalidate_from_block(ctx, state, &iff->then_instrs);
copy_propagation_invalidate_from_block(ctx, state, &iff->else_instrs);
break;
}
case HLSL_IR_LOOP:
{
struct hlsl_ir_loop *loop = hlsl_ir_loop(instr);
copy_propagation_invalidate_from_block(ctx, state, &loop->body);
break;
}
default:
break;
}
- }
+}
+static void copy_propagation_pop(struct copy_propagation_state *state) +{
- assert(state->depth > 0);
- rb_destroy(&state->variables[state->depth], copy_propagation_variable_destroy, NULL);
- --state->depth;
+}
+static bool copy_propagation_duplicate(struct hlsl_ctx *ctx, struct copy_propagation_state *state) +{
- struct copy_propagation_variable *var;
- if (state->depth + 1 == state->capacity)
- {
unsigned int new_capacity = 2 * state->capacity;
struct rb_tree *new_vars;
new_vars = hlsl_realloc(ctx, state->variables, sizeof(*state->variables) * new_capacity);
if (!new_vars)
return false;
state->capacity = new_capacity;
state->variables = new_vars;
- }
This looks like an open-coded hlsl_array_reserve(); any reason not to use that?
- ++state->depth;
- rb_init(&state->variables[state->depth], copy_propagation_variable_compare);
- RB_FOR_EACH_ENTRY(var, &state->variables[state->depth - 1], struct copy_propagation_variable, entry)
- {
struct copy_propagation_variable *new_var = copy_propagation_create_variable(ctx, state, var->var);
if (!new_var)
return false;
memcpy(new_var->values, var->values, sizeof(*var->values) * var->var->data_type->reg_size);
- }
- return true;
+}
One potential alternative that occurred to me: instead of duplicating the whole state, we can search backwards in each parent scope in copy_propagation_get_variable(). Notably, in the case of a loop, we'd hit NULL before a "real" variable, and return NULL.
Assuming that works it'd reduce the amount of allocation we have to do.
Hi,
On 17/11/21 05:41, Zebediah Figura (she/her) wrote:
On 11/16/21 13:00, Giovanni Mascellani wrote:
Signed-off-by: Giovanni Mascellani gmascellani@codeweavers.com
libs/vkd3d-shader/hlsl_codegen.c | 157 +++++++++++++++++++++++++++++-- 1 file changed, 149 insertions(+), 8 deletions(-)
I'd been putting off these two patches because I thought they would be harder and I wanted to get the basic parts in first, but I had a look at them anyway and they are actually pretty simple. So, nice job :-)
Good, thanks for the review! :-)
@@ -272,7 +283,9 @@ struct copy_propagation_variable struct copy_propagation_state { - struct rb_tree variables; + struct rb_tree *variables; + unsigned int depth; + unsigned int capacity; };
As an alternative, you could create a new copy_propagation_state struct on stack in copy_propagation_process_if().
I guess I could, but I don't like this very much: it would mean that I have to pass more pointers between calls, which makes the code more convoluted and refactoring more difficult. The advantage of putting all the state in a single structure is that you just pass a pointer to that structure, and in if the future you want to extend that, you minimize the impact on internal interfaces.
Also, it's easier (or so I think) for someone who reads the code for the first time to understand what's going on, because all the state is listed in a single place, and you don't need to understand all the function call graph before you can start making sense of what is being stored.
One potential alternative that occurred to me: instead of duplicating the whole state, we can search backwards in each parent scope in copy_propagation_get_variable(). Notably, in the case of a loop, we'd hit NULL before a "real" variable, and return NULL.
Assuming that works it'd reduce the amount of allocation we have to do.
I think it would work (I think I considered this approach at some point) and it would reduce the amount of allocation we do, but it would increase the amount of work that programmers have to do to write and maintain the code. Depending on the variable usage patterns, it could also increase the running time of the program. For these two reasons, mostly the first one, I don't think it is a good idea until stronger data emerges.
The other comments will be addressed in v7.
Giovanni.
On 11/17/21 02:45, Giovanni Mascellani wrote:
@@ -272,7 +283,9 @@ struct copy_propagation_variable struct copy_propagation_state { - struct rb_tree variables; + struct rb_tree *variables; + unsigned int depth; + unsigned int capacity; };
As an alternative, you could create a new copy_propagation_state struct on stack in copy_propagation_process_if().
I guess I could, but I don't like this very much: it would mean that I have to pass more pointers between calls, which makes the code more convoluted and refactoring more difficult. The advantage of putting all the state in a single structure is that you just pass a pointer to that structure, and in if the future you want to extend that, you minimize the impact on internal interfaces.
Also, it's easier (or so I think) for someone who reads the code for the first time to understand what's going on, because all the state is listed in a single place, and you don't need to understand all the function call graph before you can start making sense of what is being stored.
I don't think you even need to add any function parameters, do you? Instead you do something like:
struct copy_propagation_state { struct rbtree variables; // if we want to avoid duplicating the contents: const struct copy_propagation_state *parent; };
static bool copy_propagation_handle_if(struct hlsl_ctx *ctx, struct hlsl_ir_if *iff, struct copy_propagation_state *parent) { struct copy_propagation_state state;
rb_init(&state.variables);
// duplicate parent->variables into state.variables... // or, alternatively: state.parent = parent; }
Same number of parameters as we currently have.
I'm not strongly attached to this solution, but I will say that coming from a probably neutral perspective, it doesn't seem any more (or less?) complex than what you have.
One potential alternative that occurred to me: instead of duplicating the whole state, we can search backwards in each parent scope in copy_propagation_get_variable(). Notably, in the case of a loop, we'd hit NULL before a "real" variable, and return NULL.
Assuming that works it'd reduce the amount of allocation we have to do.
I think it would work (I think I considered this approach at some point) and it would reduce the amount of allocation we do, but it would increase the amount of work that programmers have to do to write and maintain the code. Depending on the variable usage patterns, it could also increase the running time of the program. For these two reasons, mostly the first one, I don't think it is a good idea until stronger data emerges.
Well, coming from a perspective that hasn't been working with either variation, I'd say that they seem about equally natural to me. I don't strongly prefer recursive search over duplication, though, so I don't really intend to fight over this point.
Wrt CPU patterns, I'd be really surprised if searching parent scopes ends up being more CPU-intensive than duplicating scope contents. You'd have to be accessing each variable enough times to offset the cost of memory allocation.
Hi,
On 18/11/21 05:05, Zebediah Figura (she/her) wrote:
I don't think you even need to add any function parameters, do you? Instead you do something like:
struct copy_propagation_state { struct rbtree variables; // if we want to avoid duplicating the contents: const struct copy_propagation_state *parent; };
static bool copy_propagation_handle_if(struct hlsl_ctx *ctx, struct hlsl_ir_if *iff, struct copy_propagation_state *parent) { struct copy_propagation_state state;
rb_init(&state.variables);
// duplicate parent->variables into state.variables... // or, alternatively: state.parent = parent; }
Same number of parameters as we currently have.
I'm not strongly attached to this solution, but I will say that coming from a probably neutral perspective, it doesn't seem any more (or less?) complex than what you have.
Oh, I see what you mean.
Personally, I still prefer my solution. It feels more idiomatic, you don't have to couple too much the stack in the compiler with the stack in the compiled program, so if you don't have a strong preference I would keep my way.
One potential alternative that occurred to me: instead of duplicating the whole state, we can search backwards in each parent scope in copy_propagation_get_variable(). Notably, in the case of a loop, we'd hit NULL before a "real" variable, and return NULL.
Assuming that works it'd reduce the amount of allocation we have to do.
I think it would work (I think I considered this approach at some point) and it would reduce the amount of allocation we do, but it would increase the amount of work that programmers have to do to write and maintain the code. Depending on the variable usage patterns, it could also increase the running time of the program. For these two reasons, mostly the first one, I don't think it is a good idea until stronger data emerges.
Well, coming from a perspective that hasn't been working with either variation, I'd say that they seem about equally natural to me. I don't strongly prefer recursive search over duplication, though, so I don't really intend to fight over this point.
Here too I'd like to keep my way if you don't have a strong position on this.
Wrt CPU patterns, I'd be really surprised if searching parent scopes ends up being more CPU-intensive than duplicating scope contents. You'd have to be accessing each variable enough times to offset the cost of memory allocation.
On the other hand, each variable access with your solution requires many RB tree lookups, so the balance really depends on the usage patterns.
Giovanni.
On Thu, Nov 18, 2021 at 10:09 AM Giovanni Mascellani gmascellani@codeweavers.com wrote:
Hi,
On 18/11/21 05:05, Zebediah Figura (she/her) wrote:
I don't think you even need to add any function parameters, do you? Instead you do something like:
struct copy_propagation_state { struct rbtree variables; // if we want to avoid duplicating the contents: const struct copy_propagation_state *parent; };
static bool copy_propagation_handle_if(struct hlsl_ctx *ctx, struct hlsl_ir_if *iff, struct copy_propagation_state *parent) { struct copy_propagation_state state;
rb_init(&state.variables); // duplicate parent->variables into state.variables... // or, alternatively: state.parent = parent;
}
Same number of parameters as we currently have.
I'm not strongly attached to this solution, but I will say that coming from a probably neutral perspective, it doesn't seem any more (or less?) complex than what you have.
Oh, I see what you mean.
Personally, I still prefer my solution. It feels more idiomatic, you don't have to couple too much the stack in the compiler with the stack in the compiled program, so if you don't have a strong preference I would keep my way.
I don't know if that coupling you mention is necessarily a bad thing. BTW, can anybody confirm that normally our stack should be able to grow as needed?
One potential alternative that occurred to me: instead of duplicating the whole state, we can search backwards in each parent scope in copy_propagation_get_variable(). Notably, in the case of a loop, we'd hit NULL before a "real" variable, and return NULL.
Assuming that works it'd reduce the amount of allocation we have to do.
I think it would work (I think I considered this approach at some point) and it would reduce the amount of allocation we do, but it would increase the amount of work that programmers have to do to write and maintain the code. Depending on the variable usage patterns, it could also increase the running time of the program. For these two reasons, mostly the first one, I don't think it is a good idea until stronger data emerges.
Well, coming from a perspective that hasn't been working with either variation, I'd say that they seem about equally natural to me. I don't strongly prefer recursive search over duplication, though, so I don't really intend to fight over this point.
Here too I'd like to keep my way if you don't have a strong position on this.
Wrt CPU patterns, I'd be really surprised if searching parent scopes ends up being more CPU-intensive than duplicating scope contents. You'd have to be accessing each variable enough times to offset the cost of memory allocation.
On the other hand, each variable access with your solution requires many RB tree lookups, so the balance really depends on the usage patterns.
For better or worse, I thought the same things as Zebediah WRT this patch. So I went ahead and reworked the patch along those lines. Let me know if you don't like it / hate it.
Signed-off-by: Giovanni Mascellani gmascellani@codeweavers.com --- libs/vkd3d-shader/hlsl_codegen.c | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 775fec19..ba48a1f7 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -266,7 +266,15 @@ static void replace_node(struct hlsl_ir_node *old, struct hlsl_ir_node *new) * "else") can inherit the variable state available just before the * conditional block. After the conditional block, all variables that * might have been written in either branch must be invalidated, - * because we don't know which branch has executed. */ + * because we don't know which branch has executed. + * + * When entering a loop block, we can inherit the variable state + * available just before the loop block, except that all variables + * that are written in the body must be invalidated (because at the + * beginning of each execution of the body we don't know whether the + * body has already executed or not). The same is valid after the loop + * block, because we don't know whether the body has executed at all + * or not. */
struct copy_propagation_value { @@ -570,6 +578,22 @@ end: return progress; }
+static bool copy_propagation_process_loop(struct hlsl_ctx *ctx, struct hlsl_ir_loop *loop, + struct copy_propagation_state *state) +{ + bool progress = false; + + copy_propagation_invalidate_from_block(ctx, state, &loop->body); + if (!copy_propagation_duplicate(ctx, state)) + return progress; + + progress |= copy_propagation_transform_block(ctx, &loop->body, state); + + copy_propagation_pop(state); + + return progress; +} + static bool copy_propagation_transform_block(struct hlsl_ctx *ctx, struct hlsl_block *block, struct copy_propagation_state *state) { @@ -593,7 +617,7 @@ static bool copy_propagation_transform_block(struct hlsl_ctx *ctx, struct hlsl_b return progress;
case HLSL_IR_LOOP: - FIXME("Copy propagation doesn't support loops yet, leaving.\n"); + progress |= copy_propagation_process_loop(ctx, hlsl_ir_loop(instr), state); return progress;
default: