Signed-off-by: Giovanni Mascellani gmascellani@codeweavers.com Signed-off-by: Zebediah Figura zfigura@codeweavers.com --- libs/vkd3d-shader/hlsl_codegen.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+)
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 24b8205c..f8b977b0 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -475,6 +475,27 @@ static bool fold_constants(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, voi return true; }
+static bool remove_trivial_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +{ + struct hlsl_ir_swizzle *swizzle; + unsigned int i; + + if (instr->type != HLSL_IR_SWIZZLE) + return false; + swizzle = hlsl_ir_swizzle(instr); + + if (instr->data_type->dimx != swizzle->val.node->data_type->dimx) + return false; + + for (i = 0; i < instr->data_type->dimx; ++i) + if (((swizzle->swizzle >> (2 * i)) & 3) != i) + return false; + + replace_node(instr, swizzle->val.node); + + return true; +} + /* Lower DIV to RCP + MUL. */ static bool lower_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) { @@ -1355,6 +1376,7 @@ int hlsl_emit_dxbc(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_fun } while (progress); while (transform_ir(ctx, fold_constants, body, NULL)); + transform_ir(ctx, remove_trivial_swizzles, body, NULL);
if (ctx->profile->major_version < 4) transform_ir(ctx, lower_division, body, NULL);
Signed-off-by: Giovanni Mascellani gmascellani@codeweavers.com Signed-off-by: Zebediah Figura zfigura@codeweavers.com --- libs/vkd3d-shader/hlsl_codegen.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index f8b977b0..f5432d22 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -204,7 +204,7 @@ static bool transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx struct hlsl_block *block, void *context) { struct hlsl_ir_node *instr, *next; - bool progress = 0; + bool progress = false;
LIST_FOR_EACH_ENTRY_SAFE(instr, next, &block->instrs, struct hlsl_ir_node, entry) {
Signed-off-by: Matteo Bruni mbruni@codeweavers.com
Signed-off-by: Henri Verbeet hverbeet@codeweavers.com
Signed-off-by: Giovanni Mascellani gmascellani@codeweavers.com Signed-off-by: Zebediah Figura zfigura@codeweavers.com --- libs/vkd3d-shader/hlsl.h | 6 ++++-- libs/vkd3d-shader/hlsl_codegen.c | 33 +++++++++++++++++++++++--------- libs/vkd3d-shader/hlsl_sm1.c | 4 ++-- libs/vkd3d-shader/hlsl_sm4.c | 8 ++++---- 4 files changed, 34 insertions(+), 17 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl.h b/libs/vkd3d-shader/hlsl.h index eae96232..f2a7a48d 100644 --- a/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d-shader/hlsl.h @@ -751,8 +751,10 @@ unsigned int hlsl_combine_writemasks(unsigned int first, unsigned int second); unsigned int hlsl_map_swizzle(unsigned int swizzle, unsigned int writemask); unsigned int hlsl_swizzle_from_writemask(unsigned int writemask);
-unsigned int hlsl_offset_from_deref(const struct hlsl_deref *deref); -struct hlsl_reg hlsl_reg_from_deref(const struct hlsl_deref *deref, const struct hlsl_type *type); +bool hlsl_offset_from_deref(const struct hlsl_deref *deref, unsigned int *offset); +unsigned int hlsl_offset_from_deref_safe(struct hlsl_ctx *ctx, const struct hlsl_deref *deref); +struct hlsl_reg hlsl_reg_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, + const struct hlsl_type *type);
bool hlsl_sm1_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, bool output, D3DSHADER_PARAM_REGISTER_TYPE *type, unsigned int *reg); diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index f5432d22..a0154e3b 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -1286,31 +1286,46 @@ static bool type_is_single_reg(const struct hlsl_type *type) return type->type == HLSL_CLASS_SCALAR || type->type == HLSL_CLASS_VECTOR; }
-unsigned int hlsl_offset_from_deref(const struct hlsl_deref *deref) +bool hlsl_offset_from_deref(const struct hlsl_deref *deref, unsigned int *offset) { struct hlsl_ir_node *offset_node = deref->offset.node;
if (!offset_node) - return 0; + { + *offset = 0; + return true; + }
/* We should always have generated a cast to UINT. */ assert(offset_node->data_type->type == HLSL_CLASS_SCALAR && offset_node->data_type->base_type == HLSL_TYPE_UINT);
if (offset_node->type != HLSL_IR_CONSTANT) - { - FIXME("Dereference with non-constant offset of type %s.\n", hlsl_node_type_to_string(offset_node->type)); - return 0; - } + return false;
- return hlsl_ir_constant(offset_node)->value[0].u; + *offset = hlsl_ir_constant(offset_node)->value[0].u; + return true; +} + +unsigned int hlsl_offset_from_deref_safe(struct hlsl_ctx *ctx, const struct hlsl_deref *deref) +{ + unsigned int offset; + + if (hlsl_offset_from_deref(deref, &offset)) + return offset; + + hlsl_fixme(ctx, deref->offset.node->loc, "Dereference with non-constant offset of type %s.", + hlsl_node_type_to_string(deref->offset.node->type)); + + return 0; }
-struct hlsl_reg hlsl_reg_from_deref(const struct hlsl_deref *deref, const struct hlsl_type *type) +struct hlsl_reg hlsl_reg_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, + const struct hlsl_type *type) { const struct hlsl_ir_var *var = deref->var; struct hlsl_reg ret = var->reg; - unsigned int offset = hlsl_offset_from_deref(deref); + unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref);
ret.id += offset / 4;
diff --git a/libs/vkd3d-shader/hlsl_sm1.c b/libs/vkd3d-shader/hlsl_sm1.c index 875f521f..4ff552bc 100644 --- a/libs/vkd3d-shader/hlsl_sm1.c +++ b/libs/vkd3d-shader/hlsl_sm1.c @@ -663,7 +663,7 @@ static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b static void write_sm1_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) { const struct hlsl_ir_load *load = hlsl_ir_load(instr); - const struct hlsl_reg reg = hlsl_reg_from_deref(&load->src, instr->data_type); + const struct hlsl_reg reg = hlsl_reg_from_deref(ctx, &load->src, instr->data_type); struct sm1_instruction sm1_instr = { .opcode = D3DSIO_MOV, @@ -707,7 +707,7 @@ static void write_sm1_store(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer * { const struct hlsl_ir_store *store = hlsl_ir_store(instr); const struct hlsl_ir_node *rhs = store->rhs.node; - const struct hlsl_reg reg = hlsl_reg_from_deref(&store->lhs, rhs->data_type); + const struct hlsl_reg reg = hlsl_reg_from_deref(ctx, &store->lhs, rhs->data_type); struct sm1_instruction sm1_instr = { .opcode = D3DSIO_MOV, diff --git a/libs/vkd3d-shader/hlsl_sm4.c b/libs/vkd3d-shader/hlsl_sm4.c index e597425a..12ddd4fd 100644 --- a/libs/vkd3d-shader/hlsl_sm4.c +++ b/libs/vkd3d-shader/hlsl_sm4.c @@ -792,7 +792,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *r } else { - unsigned int offset = hlsl_offset_from_deref(deref) + var->buffer_offset; + unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref) + var->buffer_offset;
assert(data_type->type <= HLSL_CLASS_VECTOR); reg->type = VKD3D_SM4_RT_CONSTBUFFER; @@ -820,7 +820,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *r } else { - struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(deref, data_type); + struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref, data_type);
assert(hlsl_reg.allocated); reg->type = VKD3D_SM4_RT_INPUT; @@ -850,7 +850,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *r } else { - struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(deref, data_type); + struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref, data_type);
assert(hlsl_reg.allocated); reg->type = VKD3D_SM4_RT_OUTPUT; @@ -862,7 +862,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *r } else { - struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(deref, data_type); + struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref, data_type);
assert(hlsl_reg.allocated); reg->type = VKD3D_SM4_RT_TEMP;
Signed-off-by: Matteo Bruni mbruni@codeweavers.com
On Wed, Nov 17, 2021 at 9:47 AM Giovanni Mascellani gmascellani@codeweavers.com wrote:
Signed-off-by: Giovanni Mascellani gmascellani@codeweavers.com Signed-off-by: Zebediah Figura zfigura@codeweavers.com
libs/vkd3d-shader/hlsl.h | 6 ++++-- libs/vkd3d-shader/hlsl_codegen.c | 33 +++++++++++++++++++++++--------- libs/vkd3d-shader/hlsl_sm1.c | 4 ++-- libs/vkd3d-shader/hlsl_sm4.c | 8 ++++---- 4 files changed, 34 insertions(+), 17 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index f5432d22..a0154e3b 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -1286,31 +1286,46 @@ static bool type_is_single_reg(const struct hlsl_type *type) return type->type == HLSL_CLASS_SCALAR || type->type == HLSL_CLASS_VECTOR; }
-unsigned int hlsl_offset_from_deref(const struct hlsl_deref *deref) +bool hlsl_offset_from_deref(const struct hlsl_deref *deref, unsigned int *offset) { struct hlsl_ir_node *offset_node = deref->offset.node;
if (!offset_node)
return 0;
{
*offset = 0;
return true;
}
/* We should always have generated a cast to UINT. */ assert(offset_node->data_type->type == HLSL_CLASS_SCALAR && offset_node->data_type->base_type == HLSL_TYPE_UINT);
if (offset_node->type != HLSL_IR_CONSTANT)
- {
FIXME("Dereference with non-constant offset of type %s.\n", hlsl_node_type_to_string(offset_node->type));
return 0;
- }
return false;
- return hlsl_ir_constant(offset_node)->value[0].u;
- *offset = hlsl_ir_constant(offset_node)->value[0].u;
- return true;
+}
Mostly unrelated to the patch, a possible option would be to rename the function to hlsl_constant_offset_from_deref(), or something to that effect, and introduce a separate function for generic / non-constant offset. I guess it depends on what will make more sense once we get to handling dynamic offsets.
Signed-off-by: Henri Verbeet hverbeet@codeweavers.com
Signed-off-by: Giovanni Mascellani gmascellani@codeweavers.com Signed-off-by: Zebediah Figura zfigura@codeweavers.com --- libs/vkd3d-shader/hlsl_codegen.c | 254 ++++++++++++++++++++++++++++++- 1 file changed, 253 insertions(+), 1 deletion(-)
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index a0154e3b..42422220 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -237,6 +237,253 @@ static void replace_node(struct hlsl_ir_node *old, struct hlsl_ir_node *new) hlsl_free_instr(old); }
+/* The copy propagation pass scans the code trying to reconstruct, for + * each load, which is the store that last wrote to that + * variable. When this happens, the load can be replaced with the node + * from which the variable was stored. + * + * In order to do that, the pass keeps a map of all the variables it + * has already seen; for each variable, a pointer to a node and a + * component index is kept for each of the registers that belong to + * the variable. This means that, at that point of the scan, that + * register was last stored to from that component of that node. The + * pointer can be NULL if information for that register could not be + * gathered statically (either because a non-constant offset is used, + * or because control flow forces us to drop information). + * + * When scanning through a store, data for the stored-to variable is + * updated. When scanning through a load, it is checked if all the + * registers involved in the load come from a single node. In such + * case, the store can be replaced with a swizzle based on that + * node. */ + +struct copy_propagation_value +{ + struct hlsl_ir_node *node; + unsigned int component; +}; + +struct copy_propagation_variable +{ + struct rb_entry entry; + struct hlsl_ir_var *var; + struct copy_propagation_value *values; +}; + +struct copy_propagation_state +{ + struct rb_tree variables; +}; + +static int copy_propagation_variable_compare(const void *key, const struct rb_entry *entry) +{ + struct copy_propagation_variable *variable = RB_ENTRY_VALUE(entry, struct copy_propagation_variable, entry); + uintptr_t key_int = (uintptr_t)key, entry_int = (uintptr_t)variable->var; + + return (key_int > entry_int) - (key_int < entry_int); +} + +static void copy_propagation_variable_destroy(struct rb_entry *entry, void *context) +{ + struct copy_propagation_variable *variable = RB_ENTRY_VALUE(entry, struct copy_propagation_variable, entry); + + vkd3d_free(variable); +} + +static struct copy_propagation_variable *copy_propagation_get_variable(struct copy_propagation_state *state, + struct hlsl_ir_var *var) +{ + struct rb_entry *entry = rb_get(&state->variables, var); + + if (entry) + return RB_ENTRY_VALUE(entry, struct copy_propagation_variable, entry); + else + return NULL; +} + +static struct copy_propagation_variable *copy_propagation_create_variable(struct hlsl_ctx *ctx, + struct copy_propagation_state *state, struct hlsl_ir_var *var) +{ + struct rb_entry *entry = rb_get(&state->variables, var); + struct copy_propagation_variable *variable; + int res; + + if (entry) + return RB_ENTRY_VALUE(entry, struct copy_propagation_variable, entry); + + variable = hlsl_alloc(ctx, sizeof(*variable)); + if (!variable) + return NULL; + + variable->var = var; + variable->values = hlsl_alloc(ctx, sizeof(*variable->values) * var->data_type->reg_size); + if (!variable->values) + { + vkd3d_free(variable); + return NULL; + } + + res = rb_put(&state->variables, var, &variable->entry); + assert(!res); + + return variable; +} + +static void copy_propagation_invalidate_whole_variable(struct copy_propagation_variable *variable) +{ + TRACE("Invalidate variable %s.\n", variable->var->name); + memset(variable->values, 0, sizeof(*variable->values) * variable->var->data_type->reg_size); +} + +static void copy_propagation_set_value(struct copy_propagation_variable *variable, unsigned int offset, + unsigned char writemask, struct hlsl_ir_node *node) +{ + static const char swizzle_letters[] = {'x', 'y', 'z', 'w'}; + + unsigned int i, j = 0; + + for (i = 0; i < 4; ++i) + { + if (writemask & (1u << i)) + { + TRACE("Variable %s[%d] is written by instruction %p.%c.\n", + variable->var->name, offset + i, node, swizzle_letters[i]); + variable->values[offset + i].node = node; + variable->values[offset + i].component = j++; + } + } +} + +static struct hlsl_ir_node *copy_propagation_find_replacement(struct copy_propagation_variable *variable, + unsigned int offset, unsigned int count, unsigned int *swizzle) +{ + struct hlsl_ir_node *node = NULL; + unsigned int i; + + assert(offset + count <= variable->var->data_type->reg_size); + + *swizzle = 0; + + for (i = 0; i < count; ++i) + { + if (!node) + node = variable->values[offset + i].node; + else if (node != variable->values[offset + i].node) + return NULL; + *swizzle |= variable->values[offset + i].component << (2 * i); + } + + return node; +} + +static bool copy_propagation_analyze_load(struct hlsl_ctx *ctx, struct hlsl_ir_load *load, + struct copy_propagation_state *state) +{ + static const char swizzle_letters[] = {'x', 'y', 'z', 'w'}; + + struct hlsl_ir_node *node = &load->node, *new_node; + struct copy_propagation_variable *variable; + struct hlsl_type *type = node->data_type; + unsigned int offset, swizzle; + struct hlsl_deref *src = &load->src; + struct hlsl_ir_var *var = src->var; + struct hlsl_ir_swizzle *swizzle_node; + + if (type->type != HLSL_CLASS_SCALAR && type->type != HLSL_CLASS_VECTOR) + return false; + + if (!hlsl_offset_from_deref(src, &offset)) + return false; + + variable = copy_propagation_get_variable(state, var); + if (!variable) + return false; + + new_node = copy_propagation_find_replacement(variable, offset, type->dimx, &swizzle); + + TRACE("Load from %s[%d-%d] reconstructed as instruction %p.%c%c%c%c.\n", + var->name, offset, offset + type->dimx, new_node, + swizzle_letters[swizzle % 4], swizzle_letters[(swizzle >> 2) % 4], + swizzle_letters[(swizzle >> 4) % 4], swizzle_letters[(swizzle >> 6) % 4]); + + if (!new_node) + return false; + + if (!(swizzle_node = hlsl_new_swizzle(ctx, swizzle, type->dimx, new_node, &node->loc))) + return false; + list_add_before(&node->entry, &swizzle_node->node.entry); + + replace_node(node, &swizzle_node->node); + + return true; +} + +static void copy_propagation_record_store(struct hlsl_ctx *ctx, struct hlsl_ir_store *store, + struct copy_propagation_state *state) +{ + struct copy_propagation_variable *variable; + struct hlsl_deref *lhs = &store->lhs; + struct hlsl_ir_var *var = lhs->var; + unsigned int offset; + + variable = copy_propagation_create_variable(ctx, state, var); + if (!variable) + return; + + if (hlsl_offset_from_deref(lhs, &offset)) + copy_propagation_set_value(variable, offset, store->writemask, store->rhs.node); + else + copy_propagation_invalidate_whole_variable(variable); +} + +static bool copy_propagation_transform_block(struct hlsl_ctx *ctx, struct hlsl_block *block, + struct copy_propagation_state *state) +{ + struct hlsl_ir_node *instr, *next; + bool progress = false; + + LIST_FOR_EACH_ENTRY_SAFE(instr, next, &block->instrs, struct hlsl_ir_node, entry) + { + switch (instr->type) + { + case HLSL_IR_LOAD: + progress |= copy_propagation_analyze_load(ctx, hlsl_ir_load(instr), state); + break; + + case HLSL_IR_STORE: + copy_propagation_record_store(ctx, hlsl_ir_store(instr), state); + break; + + case HLSL_IR_IF: + FIXME("Copy propagation doesn't support conditionals yet, leaving.\n"); + return progress; + + case HLSL_IR_LOOP: + FIXME("Copy propagation doesn't support loops yet, leaving.\n"); + return progress; + + default: + break; + } + } + + return progress; +} + +static bool copy_propagation_execute(struct hlsl_ctx *ctx, struct hlsl_block *block) +{ + struct copy_propagation_state state; + bool progress; + + rb_init(&state.variables, copy_propagation_variable_compare); + + progress = copy_propagation_transform_block(ctx, block, &state); + + rb_destroy(&state.variables, copy_propagation_variable_destroy, NULL); + + return progress; +} + static bool is_vec1(const struct hlsl_type *type) { return (type->type == HLSL_CLASS_SCALAR) || (type->type == HLSL_CLASS_VECTOR && type->dimx == 1); @@ -1390,7 +1637,12 @@ int hlsl_emit_dxbc(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_fun progress |= transform_ir(ctx, split_struct_copies, body, NULL); } while (progress); - while (transform_ir(ctx, fold_constants, body, NULL)); + do + { + progress = transform_ir(ctx, fold_constants, body, NULL); + progress |= copy_propagation_execute(ctx, body); + } + while (progress); transform_ir(ctx, remove_trivial_swizzles, body, NULL);
if (ctx->profile->major_version < 4)
On Wed, Nov 17, 2021 at 9:47 AM Giovanni Mascellani gmascellani@codeweavers.com wrote:
Signed-off-by: Giovanni Mascellani gmascellani@codeweavers.com Signed-off-by: Zebediah Figura zfigura@codeweavers.com
libs/vkd3d-shader/hlsl_codegen.c | 254 ++++++++++++++++++++++++++++++- 1 file changed, 253 insertions(+), 1 deletion(-) diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index a0154e3b..42422220 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -237,6 +237,253 @@ static void replace_node(struct hlsl_ir_node *old, struct hlsl_ir_node *new) hlsl_free_instr(old); }
+/* The copy propagation pass scans the code trying to reconstruct, for
- each load, which is the store that last wrote to that
- variable. When this happens, the load can be replaced with the node
- from which the variable was stored.
- In order to do that, the pass keeps a map of all the variables it
- has already seen; for each variable, a pointer to a node and a
- component index is kept for each of the registers that belong to
- the variable. This means that, at that point of the scan, that
- register was last stored to from that component of that node. The
- pointer can be NULL if information for that register could not be
- gathered statically (either because a non-constant offset is used,
- or because control flow forces us to drop information).
- When scanning through a store, data for the stored-to variable is
- updated. When scanning through a load, it is checked if all the
- registers involved in the load come from a single node. In such
- case, the store can be replaced with a swizzle based on that
- node. */
Time to nitpick the comments...
The first paragraph seems a bit hard to read to me. I'm not a native speaker (suggestions welcome!) but I'd rewrite it differently, maybe something like: "The purpose of the copy propagation pass is to get rid of intermediate copies of a variable, replacing each use of a variable with a reference to the instruction that initially defined its current value."
Technically at this point of the compilation process there is no notion of registers so I'd avoid using that word. I think "component" works okay to refer to the individual scalar elements of a variable. You can use "destination variable" when discussing the stored variable.
In general, as already mentioned, getting too much into the details in those "overview" comments doesn't seem useful to me. I'd rather have some specific comment right next to the relevant piece of code, if necessary.
+struct copy_propagation_value +{
- struct hlsl_ir_node *node;
- unsigned int component;
+};
+struct copy_propagation_variable +{
- struct rb_entry entry;
- struct hlsl_ir_var *var;
- struct copy_propagation_value *values;
+};
I still haven't gotten warm to these names. What about copy_propagation_definition (or just copy_propagation_def)?
+struct copy_propagation_state +{
- struct rb_tree variables;
+};
+static int copy_propagation_variable_compare(const void *key, const struct rb_entry *entry) +{
- struct copy_propagation_variable *variable = RB_ENTRY_VALUE(entry, struct copy_propagation_variable, entry);
- uintptr_t key_int = (uintptr_t)key, entry_int = (uintptr_t)variable->var;
- return (key_int > entry_int) - (key_int < entry_int);
+}
+static void copy_propagation_variable_destroy(struct rb_entry *entry, void *context) +{
- struct copy_propagation_variable *variable = RB_ENTRY_VALUE(entry, struct copy_propagation_variable, entry);
- vkd3d_free(variable);
+}
+static struct copy_propagation_variable *copy_propagation_get_variable(struct copy_propagation_state *state,
struct hlsl_ir_var *var)
+{
- struct rb_entry *entry = rb_get(&state->variables, var);
- if (entry)
return RB_ENTRY_VALUE(entry, struct copy_propagation_variable, entry);
- else
return NULL;
+}
+static struct copy_propagation_variable *copy_propagation_create_variable(struct hlsl_ctx *ctx,
struct copy_propagation_state *state, struct hlsl_ir_var *var)
+{
- struct rb_entry *entry = rb_get(&state->variables, var);
- struct copy_propagation_variable *variable;
- int res;
- if (entry)
return RB_ENTRY_VALUE(entry, struct copy_propagation_variable, entry);
- variable = hlsl_alloc(ctx, sizeof(*variable));
- if (!variable)
return NULL;
- variable->var = var;
- variable->values = hlsl_alloc(ctx, sizeof(*variable->values) * var->data_type->reg_size);
- if (!variable->values)
- {
vkd3d_free(variable);
return NULL;
- }
If we want to go ahead with this dynamic / arbitrary variable size thing (which I still don't like, but I give up), at least let's avoid a separate allocation for the values, i.e.
struct copy_propagation_variable { struct rb_entry entry; struct hlsl_ir_var *var; struct copy_propagation_value values[]; };
variable = hlsl_alloc(ctx, offsetof(struct copy_propagation_variable, values[var->data_type->reg_size]);
- res = rb_put(&state->variables, var, &variable->entry);
- assert(!res);
- return variable;
+}
+static void copy_propagation_invalidate_whole_variable(struct copy_propagation_variable *variable) +{
- TRACE("Invalidate variable %s.\n", variable->var->name);
- memset(variable->values, 0, sizeof(*variable->values) * variable->var->data_type->reg_size);
+}
+static void copy_propagation_set_value(struct copy_propagation_variable *variable, unsigned int offset,
unsigned char writemask, struct hlsl_ir_node *node)
+{
- static const char swizzle_letters[] = {'x', 'y', 'z', 'w'};
- unsigned int i, j = 0;
- for (i = 0; i < 4; ++i)
- {
if (writemask & (1u << i))
{
TRACE("Variable %s[%d] is written by instruction %p.%c.\n",
variable->var->name, offset + i, node, swizzle_letters[i]);
I think you can use debug_hlsl_writemask() here (and if not, probably adapt it).
Is there any case where offset + i can be negative?
variable->values[offset + i].node = node;
variable->values[offset + i].component = j++;
}
- }
+}
+static struct hlsl_ir_node *copy_propagation_find_replacement(struct copy_propagation_variable *variable,
unsigned int offset, unsigned int count, unsigned int *swizzle)
I think I prefer my original suggestion of _get_ (or something else) rather than _find_; the point being that this function isn't really finding anything.
+{
- struct hlsl_ir_node *node = NULL;
- unsigned int i;
- assert(offset + count <= variable->var->data_type->reg_size);
- *swizzle = 0;
- for (i = 0; i < count; ++i)
- {
if (!node)
node = variable->values[offset + i].node;
else if (node != variable->values[offset + i].node)
return NULL;
*swizzle |= variable->values[offset + i].component << (2 * i);
- }
- return node;
+}
+static bool copy_propagation_analyze_load(struct hlsl_ctx *ctx, struct hlsl_ir_load *load,
struct copy_propagation_state *state)
+{
- static const char swizzle_letters[] = {'x', 'y', 'z', 'w'};
- struct hlsl_ir_node *node = &load->node, *new_node;
- struct copy_propagation_variable *variable;
- struct hlsl_type *type = node->data_type;
- unsigned int offset, swizzle;
- struct hlsl_deref *src = &load->src;
- struct hlsl_ir_var *var = src->var;
- struct hlsl_ir_swizzle *swizzle_node;
Nitpick, declaration order.
- if (type->type != HLSL_CLASS_SCALAR && type->type != HLSL_CLASS_VECTOR)
return false;
- if (!hlsl_offset_from_deref(src, &offset))
return false;
- variable = copy_propagation_get_variable(state, var);
- if (!variable)
return false;
- new_node = copy_propagation_find_replacement(variable, offset, type->dimx, &swizzle);
- TRACE("Load from %s[%d-%d] reconstructed as instruction %p.%c%c%c%c.\n",
var->name, offset, offset + type->dimx, new_node,
swizzle_letters[swizzle % 4], swizzle_letters[(swizzle >> 2) % 4],
swizzle_letters[(swizzle >> 4) % 4], swizzle_letters[(swizzle >> 6) % 4]);
Here a debug_hlsl_swizzle() would come in handy. It should be pretty simple to split it out of dump_ir_swizzle().
- if (!new_node)
return false;
- if (!(swizzle_node = hlsl_new_swizzle(ctx, swizzle, type->dimx, new_node, &node->loc)))
return false;
- list_add_before(&node->entry, &swizzle_node->node.entry);
- replace_node(node, &swizzle_node->node);
- return true;
+}
+static void copy_propagation_record_store(struct hlsl_ctx *ctx, struct hlsl_ir_store *store,
struct copy_propagation_state *state)
+{
- struct copy_propagation_variable *variable;
- struct hlsl_deref *lhs = &store->lhs;
- struct hlsl_ir_var *var = lhs->var;
- unsigned int offset;
- variable = copy_propagation_create_variable(ctx, state, var);
- if (!variable)
return;
- if (hlsl_offset_from_deref(lhs, &offset))
copy_propagation_set_value(variable, offset, store->writemask, store->rhs.node);
- else
copy_propagation_invalidate_whole_variable(variable);
+}
+static bool copy_propagation_transform_block(struct hlsl_ctx *ctx, struct hlsl_block *block,
struct copy_propagation_state *state)
+{
- struct hlsl_ir_node *instr, *next;
- bool progress = false;
- LIST_FOR_EACH_ENTRY_SAFE(instr, next, &block->instrs, struct hlsl_ir_node, entry)
- {
switch (instr->type)
{
case HLSL_IR_LOAD:
progress |= copy_propagation_analyze_load(ctx, hlsl_ir_load(instr), state);
break;
case HLSL_IR_STORE:
copy_propagation_record_store(ctx, hlsl_ir_store(instr), state);
break;
case HLSL_IR_IF:
FIXME("Copy propagation doesn't support conditionals yet, leaving.\n");
return progress;
case HLSL_IR_LOOP:
FIXME("Copy propagation doesn't support loops yet, leaving.\n");
return progress;
default:
break;
}
- }
- return progress;
+}
+static bool copy_propagation_execute(struct hlsl_ctx *ctx, struct hlsl_block *block) +{
- struct copy_propagation_state state;
- bool progress;
- rb_init(&state.variables, copy_propagation_variable_compare);
- progress = copy_propagation_transform_block(ctx, block, &state);
- rb_destroy(&state.variables, copy_propagation_variable_destroy, NULL);
- return progress;
+}
This and copy_propagation_transform_block() are a bit of a reimplementation of transform_ir(). It should be possible to extend transform_ir() to handle this new pass instead.
I'd introduce a struct like
struct transform_pass { bool (*initialize)(struct hlsl_ctx *hlsl_ctx, void **ctx); bool (*process_instruction)(void *ctx, struct hlsl_ir_node *, void *); void (*destroy)(void *ctx); };
and update transform_ir() and the existing passes to make use of it. I imagine we could have some generic initialize() implementation simply doing *ctx = hlsl_ctx; and an empty destroy() implementation to be used when there is no particular context to carry around (like in the existing passes).
On 11/18/21 11:28, Matteo Bruni wrote:
This and copy_propagation_transform_block() are a bit of a reimplementation of transform_ir(). It should be possible to extend transform_ir() to handle this new pass instead.
I'd introduce a struct like
struct transform_pass { bool (*initialize)(struct hlsl_ctx *hlsl_ctx, void **ctx); bool (*process_instruction)(void *ctx, struct hlsl_ir_node *, void *); void (*destroy)(void *ctx); };
and update transform_ir() and the existing passes to make use of it. I imagine we could have some generic initialize() implementation simply doing *ctx = hlsl_ctx; and an empty destroy() implementation to be used when there is no particular context to carry around (like in the existing passes).
Pretty much the whole point of transform_ir() is that it recursively iterates through CF blocks for cases where you don't care about transforming the CF blocks themselves. That's not the case here. What you're describing sounds to me like a framework that doesn't actually serve any purpose to its use case.
On Thu, Nov 18, 2021 at 6:43 PM Zebediah Figura (she/her) zfigura@codeweavers.com wrote:
On 11/18/21 11:28, Matteo Bruni wrote:
This and copy_propagation_transform_block() are a bit of a reimplementation of transform_ir(). It should be possible to extend transform_ir() to handle this new pass instead.
I'd introduce a struct like
struct transform_pass { bool (*initialize)(struct hlsl_ctx *hlsl_ctx, void **ctx); bool (*process_instruction)(void *ctx, struct hlsl_ir_node *, void *); void (*destroy)(void *ctx); };
and update transform_ir() and the existing passes to make use of it. I imagine we could have some generic initialize() implementation simply doing *ctx = hlsl_ctx; and an empty destroy() implementation to be used when there is no particular context to carry around (like in the existing passes).
Pretty much the whole point of transform_ir() is that it recursively iterates through CF blocks for cases where you don't care about transforming the CF blocks themselves. That's not the case here. What you're describing sounds to me like a framework that doesn't actually serve any purpose to its use case.
Well, transform_ir() could start calling process_instruction() for the HLSL_IR_IF and HLSL_IR_LOOP instructions before recursing, allowing the pass to do what it needs for those, and existing passes would keep ignoring those instructions. Would that not be enough? Or are you saying that transform_ir() should not become more complex?
On 11/18/21 12:18 PM, Matteo Bruni wrote:
On Thu, Nov 18, 2021 at 6:43 PM Zebediah Figura (she/her) zfigura@codeweavers.com wrote:
On 11/18/21 11:28, Matteo Bruni wrote:
This and copy_propagation_transform_block() are a bit of a reimplementation of transform_ir(). It should be possible to extend transform_ir() to handle this new pass instead.
I'd introduce a struct like
struct transform_pass { bool (*initialize)(struct hlsl_ctx *hlsl_ctx, void **ctx); bool (*process_instruction)(void *ctx, struct hlsl_ir_node *, void *); void (*destroy)(void *ctx); };
and update transform_ir() and the existing passes to make use of it. I imagine we could have some generic initialize() implementation simply doing *ctx = hlsl_ctx; and an empty destroy() implementation to be used when there is no particular context to carry around (like in the existing passes).
Pretty much the whole point of transform_ir() is that it recursively iterates through CF blocks for cases where you don't care about transforming the CF blocks themselves. That's not the case here. What you're describing sounds to me like a framework that doesn't actually serve any purpose to its use case.
Well, transform_ir() could start calling process_instruction() for the HLSL_IR_IF and HLSL_IR_LOOP instructions before recursing, allowing the pass to do what it needs for those, and existing passes would keep ignoring those instructions. Would that not be enough? Or are you saying that transform_ir() should not become more complex?
It could, but we also need to do things after the else/loop block, and between the if and else blocks. At that point, as far as I see it, the copy propagation function has to manually handle enough logic that transform_ir() doesn't actually save us any work.
On Thu, Nov 18, 2021 at 7:23 PM Zebediah Figura zfigura@codeweavers.com wrote:
On 11/18/21 12:18 PM, Matteo Bruni wrote:
On Thu, Nov 18, 2021 at 6:43 PM Zebediah Figura (she/her) zfigura@codeweavers.com wrote:
On 11/18/21 11:28, Matteo Bruni wrote:
This and copy_propagation_transform_block() are a bit of a reimplementation of transform_ir(). It should be possible to extend transform_ir() to handle this new pass instead.
I'd introduce a struct like
struct transform_pass { bool (*initialize)(struct hlsl_ctx *hlsl_ctx, void **ctx); bool (*process_instruction)(void *ctx, struct hlsl_ir_node *, void *); void (*destroy)(void *ctx); };
and update transform_ir() and the existing passes to make use of it. I imagine we could have some generic initialize() implementation simply doing *ctx = hlsl_ctx; and an empty destroy() implementation to be used when there is no particular context to carry around (like in the existing passes).
Pretty much the whole point of transform_ir() is that it recursively iterates through CF blocks for cases where you don't care about transforming the CF blocks themselves. That's not the case here. What you're describing sounds to me like a framework that doesn't actually serve any purpose to its use case.
Well, transform_ir() could start calling process_instruction() for the HLSL_IR_IF and HLSL_IR_LOOP instructions before recursing, allowing the pass to do what it needs for those, and existing passes would keep ignoring those instructions. Would that not be enough? Or are you saying that transform_ir() should not become more complex?
It could, but we also need to do things after the else/loop block, and between the if and else blocks. At that point, as far as I see it, the copy propagation function has to manually handle enough logic that transform_ir() doesn't actually save us any work.
Fair enough.
On 11/18/21 11:28, Matteo Bruni wrote:
If we want to go ahead with this dynamic / arbitrary variable size thing (which I still don't like, but I give up), at least let's avoid a separate allocation for the values, i.e.
struct copy_propagation_variable { struct rb_entry entry; struct hlsl_ir_var *var; struct copy_propagation_value values[]; };
variable = hlsl_alloc(ctx, offsetof(struct copy_propagation_variable, values[var->data_type->reg_size]);
FWIW, I think that limiting it to a vector is the right thing to do as well, but I'm also fine with this as a temporary measure, until we can split up vectors.
Hi,
On 18/11/21 18:56, Zebediah Figura (she/her) wrote:
FWIW, I think that limiting it to a vector is the right thing to do as well, but I'm also fine with this as a temporary measure, until we can split up vectors.
Even if there is some variable that for some reason (i.e., because it is accessed with non-statically-known offset) cannot be split?
Giovanni.
On 11/19/21 9:56 AM, Giovanni Mascellani wrote:
Hi,
On 18/11/21 18:56, Zebediah Figura (she/her) wrote:
FWIW, I think that limiting it to a vector is the right thing to do as well, but I'm also fine with this as a temporary measure, until we can split up vectors.
Even if there is some variable that for some reason (i.e., because it is accessed with non-statically-known offset) cannot be split?
Giovanni.
My guess is that if there's a variable accessed with non-constant offset, then we're not going to get much out of copy-prop anyway. Granted, you can come up with situations where it's not nothing...
I dunno, maybe it does make sense to leave this alone.
Hi,
On 18/11/21 18:28, Matteo Bruni wrote:
+struct copy_propagation_value +{
- struct hlsl_ir_node *node;
- unsigned int component;
+};
+struct copy_propagation_variable +{
- struct rb_entry entry;
- struct hlsl_ir_var *var;
- struct copy_propagation_value *values;
+};
I still haven't gotten warm to these names. What about copy_propagation_definition (or just copy_propagation_def)?
I don't understand why it's appropriate for this structure, but fine.
If we want to go ahead with this dynamic / arbitrary variable size thing (which I still don't like, but I give up), at least let's avoid a separate allocation for the values, i.e.
struct copy_propagation_variable { struct rb_entry entry; struct hlsl_ir_var *var; struct copy_propagation_value values[]; };
variable = hlsl_alloc(ctx, offsetof(struct copy_propagation_variable, values[var->data_type->reg_size]);
I knew that was coming...
I don't like at all this pattern, but since you gave up on your side I'll do the same on mine.
+static struct hlsl_ir_node *copy_propagation_find_replacement(struct copy_propagation_variable *variable,
unsigned int offset, unsigned int count, unsigned int *swizzle)
I think I prefer my original suggestion of _get_ (or something else) rather than _find_; the point being that this function isn't really finding anything.
To me it gets something as much as it finds something. I wanted to avoid the word "get" because it seems overly generic, but it's true that "find" is not that much more specific. Ok.
This and copy_propagation_transform_block() are a bit of a reimplementation of transform_ir(). It should be possible to extend transform_ir() to handle this new pass instead.
I'd introduce a struct like
struct transform_pass { bool (*initialize)(struct hlsl_ctx *hlsl_ctx, void **ctx); bool (*process_instruction)(void *ctx, struct hlsl_ir_node *, void *); void (*destroy)(void *ctx); };
and update transform_ir() and the existing passes to make use of it. I imagine we could have some generic initialize() implementation simply doing *ctx = hlsl_ctx; and an empty destroy() implementation to be used when there is no particular context to carry around (like in the existing passes).
I agree, and that happened in my initial submission, with a slightly different interface. For each conditional block the callback must be called thrice (before the block, at the else and after the block) and for each loop block it must be called twice. It seemed to be a reasonable framework which might be useful for other passes in the future.
Zeb asked me to remove it until it is clear that there are other passes that are going to use it, so I custom coded everything.
Since it seems that in the meantime Zeb already managed to convince you to leave it like this, I won't touch it.
Giovanni.
On Fri, Nov 19, 2021 at 4:55 PM Giovanni Mascellani gmascellani@codeweavers.com wrote:
Hi,
On 18/11/21 18:28, Matteo Bruni wrote:
+struct copy_propagation_value +{
- struct hlsl_ir_node *node;
- unsigned int component;
+};
+struct copy_propagation_variable +{
- struct rb_entry entry;
- struct hlsl_ir_var *var;
- struct copy_propagation_value *values;
+};
I still haven't gotten warm to these names. What about copy_propagation_definition (or just copy_propagation_def)?
I don't understand why it's appropriate for this structure, but fine.
It's in the sense of "definition of the current value", more or less like the "def" in use-def chains.
+static struct hlsl_ir_node *copy_propagation_find_replacement(struct copy_propagation_variable *variable,
unsigned int offset, unsigned int count, unsigned int *swizzle)
I think I prefer my original suggestion of _get_ (or something else) rather than _find_; the point being that this function isn't really finding anything.
To me it gets something as much as it finds something. I wanted to avoid the word "get" because it seems overly generic, but it's true that "find" is not that much more specific. Ok.
Throwing a couple more options: generate, compute.
On 11/19/21 10:12 AM, Matteo Bruni wrote:
On Fri, Nov 19, 2021 at 4:55 PM Giovanni Mascellani gmascellani@codeweavers.com wrote:
Hi,
On 18/11/21 18:28, Matteo Bruni wrote:
+struct copy_propagation_value +{
- struct hlsl_ir_node *node;
- unsigned int component;
+};
+struct copy_propagation_variable +{
- struct rb_entry entry;
- struct hlsl_ir_var *var;
- struct copy_propagation_value *values;
+};
I still haven't gotten warm to these names. What about copy_propagation_definition (or just copy_propagation_def)?
I don't understand why it's appropriate for this structure, but fine.
It's in the sense of "definition of the current value", more or less like the "def" in use-def chains.
I don't really like this, though, because both copy_propagation_variable and copy_propagation_value are "def"s, but we need some way of disambiguating what's being defined.
+static struct hlsl_ir_node *copy_propagation_find_replacement(struct copy_propagation_variable *variable,
unsigned int offset, unsigned int count, unsigned int *swizzle)
I think I prefer my original suggestion of _get_ (or something else) rather than _find_; the point being that this function isn't really finding anything.
To me it gets something as much as it finds something. I wanted to avoid the word "get" because it seems overly generic, but it's true that "find" is not that much more specific. Ok.
Throwing a couple more options: generate, compute.
I'm... not sure why this function isn't finding something? It's not really generating anything (except a swizzle, I guess); rather it's looking up the stored node that can serve as a replacement.
Although I like "get" too.
On Fri, Nov 19, 2021 at 6:36 PM Zebediah Figura zfigura@codeweavers.com wrote:
On 11/19/21 10:12 AM, Matteo Bruni wrote:
On Fri, Nov 19, 2021 at 4:55 PM Giovanni Mascellani gmascellani@codeweavers.com wrote:
Hi,
On 18/11/21 18:28, Matteo Bruni wrote:
+struct copy_propagation_value +{
- struct hlsl_ir_node *node;
- unsigned int component;
+};
+struct copy_propagation_variable +{
- struct rb_entry entry;
- struct hlsl_ir_var *var;
- struct copy_propagation_value *values;
+};
I still haven't gotten warm to these names. What about copy_propagation_definition (or just copy_propagation_def)?
I don't understand why it's appropriate for this structure, but fine.
It's in the sense of "definition of the current value", more or less like the "def" in use-def chains.
I don't really like this, though, because both copy_propagation_variable and copy_propagation_value are "def"s, but we need some way of disambiguating what's being defined.
Okay, but I don't see how copy_propagation_variable is any better.
I'm fine with whatever, FWIW.
+static struct hlsl_ir_node *copy_propagation_find_replacement(struct copy_propagation_variable *variable,
unsigned int offset, unsigned int count, unsigned int *swizzle)
I think I prefer my original suggestion of _get_ (or something else) rather than _find_; the point being that this function isn't really finding anything.
To me it gets something as much as it finds something. I wanted to avoid the word "get" because it seems overly generic, but it's true that "find" is not that much more specific. Ok.
Throwing a couple more options: generate, compute.
I'm... not sure why this function isn't finding something? It's not really generating anything (except a swizzle, I guess); rather it's looking up the stored node that can serve as a replacement.
Although I like "get" too.
Same.
On 11/19/21 11:39 AM, Matteo Bruni wrote:
On Fri, Nov 19, 2021 at 6:36 PM Zebediah Figura zfigura@codeweavers.com wrote:
On 11/19/21 10:12 AM, Matteo Bruni wrote:
On Fri, Nov 19, 2021 at 4:55 PM Giovanni Mascellani gmascellani@codeweavers.com wrote:
Hi,
On 18/11/21 18:28, Matteo Bruni wrote:
+struct copy_propagation_value +{
- struct hlsl_ir_node *node;
- unsigned int component;
+};
+struct copy_propagation_variable +{
- struct rb_entry entry;
- struct hlsl_ir_var *var;
- struct copy_propagation_value *values;
+};
I still haven't gotten warm to these names. What about copy_propagation_definition (or just copy_propagation_def)?
I don't understand why it's appropriate for this structure, but fine.
It's in the sense of "definition of the current value", more or less like the "def" in use-def chains.
I don't really like this, though, because both copy_propagation_variable and copy_propagation_value are "def"s, but we need some way of disambiguating what's being defined.
Okay, but I don't see how copy_propagation_variable is any better.
I'm fine with whatever, FWIW.
I suppose "copy_propagation_variable_def" is the correct solution :-)
Though it does get long. I would have abbreviated to "copy_prop_*" in the first place (and then "copy_prop_var_def" I guess.)
Signed-off-by: Giovanni Mascellani gmascellani@codeweavers.com --- v7: Address comments in https://source.winehq.org/patches/data/219733
libs/vkd3d-shader/hlsl_codegen.c | 148 +++++++++++++++++++++++++++++-- 1 file changed, 140 insertions(+), 8 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 42422220..86d82965 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -255,7 +255,18 @@ static void replace_node(struct hlsl_ir_node *old, struct hlsl_ir_node *new) * updated. When scanning through a load, it is checked if all the * registers involved in the load come from a single node. In such * case, the store can be replaced with a swizzle based on that - * node. */ + * node. + * + * In order to address control flow properly, instead of a single + * variables map we keep a stack of variables maps, pushing a new + * entry each time we enter a block, and popping the entry when + * leaving the block. + * + * When entering a conditional block, both branches ("then" and + * "else") can inherit the variable state available just before the + * conditional block. After the conditional block, all variables that + * might have been written in either branch must be invalidated, + * because we don't know which branch has executed. */
struct copy_propagation_value { @@ -272,7 +283,9 @@ struct copy_propagation_variable
struct copy_propagation_state { - struct rb_tree variables; + struct rb_tree *variables; + size_t depth; + size_t capacity; };
static int copy_propagation_variable_compare(const void *key, const struct rb_entry *entry) @@ -293,7 +306,7 @@ static void copy_propagation_variable_destroy(struct rb_entry *entry, void *cont static struct copy_propagation_variable *copy_propagation_get_variable(struct copy_propagation_state *state, struct hlsl_ir_var *var) { - struct rb_entry *entry = rb_get(&state->variables, var); + struct rb_entry *entry = rb_get(&state->variables[state->depth], var);
if (entry) return RB_ENTRY_VALUE(entry, struct copy_propagation_variable, entry); @@ -304,7 +317,7 @@ static struct copy_propagation_variable *copy_propagation_get_variable(struct co static struct copy_propagation_variable *copy_propagation_create_variable(struct hlsl_ctx *ctx, struct copy_propagation_state *state, struct hlsl_ir_var *var) { - struct rb_entry *entry = rb_get(&state->variables, var); + struct rb_entry *entry = rb_get(&state->variables[state->depth], var); struct copy_propagation_variable *variable; int res;
@@ -323,7 +336,7 @@ static struct copy_propagation_variable *copy_propagation_create_variable(struct return NULL; }
- res = rb_put(&state->variables, var, &variable->entry); + res = rb_put(&state->variables[state->depth], var, &variable->entry); assert(!res);
return variable; @@ -354,6 +367,90 @@ static void copy_propagation_set_value(struct copy_propagation_variable *variabl } }
+static void copy_propagation_invalidate_from_block(struct hlsl_ctx *ctx, struct copy_propagation_state *state, + struct hlsl_block *block) +{ + struct hlsl_ir_node *instr; + + LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) + { + switch (instr->type) + { + case HLSL_IR_STORE: + { + struct hlsl_ir_store *store = hlsl_ir_store(instr); + struct copy_propagation_variable *variable; + struct hlsl_deref *lhs = &store->lhs; + struct hlsl_ir_var *var = lhs->var; + unsigned int offset; + + variable = copy_propagation_get_variable(state, var); + if (!variable) + continue; + + if (hlsl_offset_from_deref(lhs, &offset)) + copy_propagation_set_value(variable, offset, store->writemask, NULL); + else + copy_propagation_invalidate_whole_variable(variable); + + break; + } + + case HLSL_IR_IF: + { + struct hlsl_ir_if *iff = hlsl_ir_if(instr); + + copy_propagation_invalidate_from_block(ctx, state, &iff->then_instrs); + copy_propagation_invalidate_from_block(ctx, state, &iff->else_instrs); + + break; + } + + case HLSL_IR_LOOP: + { + struct hlsl_ir_loop *loop = hlsl_ir_loop(instr); + + copy_propagation_invalidate_from_block(ctx, state, &loop->body); + + break; + } + + default: + break; + } + } +} + +static void copy_propagation_pop(struct copy_propagation_state *state) +{ + assert(state->depth > 0); + rb_destroy(&state->variables[state->depth], copy_propagation_variable_destroy, NULL); + --state->depth; +} + +static bool copy_propagation_duplicate(struct hlsl_ctx *ctx, struct copy_propagation_state *state) +{ + struct copy_propagation_variable *var; + + if (!hlsl_array_reserve(ctx, (void**)&state->variables, &state->capacity, state->depth + 2, sizeof(*state->variables))) + return false; + ++state->depth; + + rb_init(&state->variables[state->depth], copy_propagation_variable_compare); + + RB_FOR_EACH_ENTRY(var, &state->variables[state->depth - 1], struct copy_propagation_variable, entry) + { + struct copy_propagation_variable *new_var = copy_propagation_create_variable(ctx, state, var->var); + + if (!new_var) + return false; + + memcpy(new_var->values, var->values, sizeof(*var->values) * var->var->data_type->reg_size); + } + + return true; +} + static struct hlsl_ir_node *copy_propagation_find_replacement(struct copy_propagation_variable *variable, unsigned int offset, unsigned int count, unsigned int *swizzle) { @@ -436,6 +533,34 @@ static void copy_propagation_record_store(struct hlsl_ctx *ctx, struct hlsl_ir_s copy_propagation_invalidate_whole_variable(variable); }
+static bool copy_propagation_transform_block(struct hlsl_ctx *ctx, struct hlsl_block *block, + struct copy_propagation_state *state); + +static bool copy_propagation_process_if(struct hlsl_ctx *ctx, struct hlsl_ir_if *iff, + struct copy_propagation_state *state) +{ + bool progress = false; + + if (!copy_propagation_duplicate(ctx, state)) + goto end; + + progress |= copy_propagation_transform_block(ctx, &iff->then_instrs, state); + + copy_propagation_pop(state); + if (!copy_propagation_duplicate(ctx, state)) + goto end; + + progress |= copy_propagation_transform_block(ctx, &iff->else_instrs, state); + + copy_propagation_pop(state); + +end: + copy_propagation_invalidate_from_block(ctx, state, &iff->then_instrs); + copy_propagation_invalidate_from_block(ctx, state, &iff->else_instrs); + + return progress; +} + static bool copy_propagation_transform_block(struct hlsl_ctx *ctx, struct hlsl_block *block, struct copy_propagation_state *state) { @@ -455,7 +580,7 @@ static bool copy_propagation_transform_block(struct hlsl_ctx *ctx, struct hlsl_b break;
case HLSL_IR_IF: - FIXME("Copy propagation doesn't support conditionals yet, leaving.\n"); + progress |= copy_propagation_process_if(ctx, hlsl_ir_if(instr), state); return progress;
case HLSL_IR_LOOP: @@ -475,11 +600,18 @@ static bool copy_propagation_execute(struct hlsl_ctx *ctx, struct hlsl_block *bl struct copy_propagation_state state; bool progress;
- rb_init(&state.variables, copy_propagation_variable_compare); + state.depth = 0; + state.capacity = 1; + state.variables = hlsl_alloc(ctx, sizeof(*state.variables) * state.capacity); + if (!state.variables) + return false; + rb_init(&state.variables[state.depth], copy_propagation_variable_compare);
progress = copy_propagation_transform_block(ctx, block, &state);
- rb_destroy(&state.variables, copy_propagation_variable_destroy, NULL); + assert(state.depth == 0); + rb_destroy(&state.variables[state.depth], copy_propagation_variable_destroy, NULL); + vkd3d_free(state.variables);
return progress; }
On 11/17/21 02:47, Giovanni Mascellani wrote:
@@ -272,7 +283,9 @@ struct copy_propagation_variable
struct copy_propagation_state {
- struct rb_tree variables;
- struct rb_tree *variables;
I didn't notice this last time, but since IIUC rbtree entries can hold a pointer to the head of the list (much like linked list entries), this isn't safe. You'll need to individually allocate list heads.
- size_t depth;
- size_t capacity; };
...
+static bool copy_propagation_duplicate(struct hlsl_ctx *ctx, struct copy_propagation_state *state) +{
- struct copy_propagation_variable *var;
- if (!hlsl_array_reserve(ctx, (void**)&state->variables, &state->capacity, state->depth + 2, sizeof(*state->variables)))
"state->depth + 2" looks wrong; is it?
return false;
- ++state->depth;
- rb_init(&state->variables[state->depth], copy_propagation_variable_compare);
- RB_FOR_EACH_ENTRY(var, &state->variables[state->depth - 1], struct copy_propagation_variable, entry)
- {
struct copy_propagation_variable *new_var = copy_propagation_create_variable(ctx, state, var->var);
if (!new_var)
return false;
memcpy(new_var->values, var->values, sizeof(*var->values) * var->var->data_type->reg_size);
- }
- return true;
+}
On 11/17/21 22:09, Zebediah Figura (she/her) wrote:
+static bool copy_propagation_duplicate(struct hlsl_ctx *ctx, struct copy_propagation_state *state) +{ + struct copy_propagation_variable *var;
+ if (!hlsl_array_reserve(ctx, (void**)&state->variables, &state->capacity, state->depth + 2, sizeof(*state->variables)))
"state->depth + 2" looks wrong; is it?
Wait, never mind, I see why this looks wrong. The fact that "depth" is 1-biased seems confusing to me, though...
+ return false; + ++state->depth;
+ rb_init(&state->variables[state->depth], copy_propagation_variable_compare);
+ RB_FOR_EACH_ENTRY(var, &state->variables[state->depth - 1], struct copy_propagation_variable, entry) + { + struct copy_propagation_variable *new_var = copy_propagation_create_variable(ctx, state, var->var);
+ if (!new_var) + return false;
+ memcpy(new_var->values, var->values, sizeof(*var->values) * var->var->data_type->reg_size); + }
+ return true; +}
Hi,
On 18/11/21 05:18, Zebediah Figura (she/her) wrote:
+ if (!hlsl_array_reserve(ctx, (void**)&state->variables, &state->capacity, state->depth + 2, sizeof(*state->variables)))
"state->depth + 2" looks wrong; is it?
Wait, never mind, I see why this looks wrong. The fact that "depth" is 1-biased seems confusing to me, though...
I agree it is a bit strange, but I think it's correct too.
As whether to store the recursion depth or the number of stack levels (which is depth + 1), I think it's a matter of tastes. If you want "+ 1" here, you'll have a lot of "- 1" in other places (mostly _get_variable and _create_variable). I like depth more (mostly because I like to initialize things to zero), but I am flexible here.
Giovanni.
On Thu, Nov 18, 2021 at 10:01 AM Giovanni Mascellani gmascellani@codeweavers.com wrote:
Hi,
On 18/11/21 05:18, Zebediah Figura (she/her) wrote:
- if (!hlsl_array_reserve(ctx, (void**)&state->variables,
&state->capacity, state->depth + 2, sizeof(*state->variables)))
"state->depth + 2" looks wrong; is it?
Wait, never mind, I see why this looks wrong. The fact that "depth" is 1-biased seems confusing to me, though...
I agree it is a bit strange, but I think it's correct too.
As whether to store the recursion depth or the number of stack levels (which is depth + 1), I think it's a matter of tastes. If you want "+ 1" here, you'll have a lot of "- 1" in other places (mostly _get_variable and _create_variable).
True, although one could argue that having a bunch of "- 1" in those helper functions is proper enough.
In this particular case though, when I see:
struct rb_entry *entry = rb_get(&state->variables[state->depth], var);
I immediately think that the array access is missing a "- 1", as that is the usual (for d3d code at least) pattern of accessing the last element of an array with "count" elements.
On 12/1/21 10:12, Matteo Bruni wrote:
On Thu, Nov 18, 2021 at 10:01 AM Giovanni Mascellani gmascellani@codeweavers.com wrote:
Hi,
On 18/11/21 05:18, Zebediah Figura (she/her) wrote:
- if (!hlsl_array_reserve(ctx, (void**)&state->variables,
&state->capacity, state->depth + 2, sizeof(*state->variables)))
"state->depth + 2" looks wrong; is it?
Wait, never mind, I see why this looks wrong. The fact that "depth" is 1-biased seems confusing to me, though...
I agree it is a bit strange, but I think it's correct too.
As whether to store the recursion depth or the number of stack levels (which is depth + 1), I think it's a matter of tastes. If you want "+ 1" here, you'll have a lot of "- 1" in other places (mostly _get_variable and _create_variable).
True, although one could argue that having a bunch of "- 1" in those helper functions is proper enough.
In this particular case though, when I see:
struct rb_entry *entry = rb_get(&state->variables[state->depth], var);
I immediately think that the array access is missing a "- 1", as that is the usual (for d3d code at least) pattern of accessing the last element of an array with "count" elements.
Yeah, I concur. Accessing the "last" element via "[count - 1]" is idiomatic enough that omitting it looks wrong.
On 01/12/21 17:17, Zebediah Figura wrote:
Yeah, I concur. Accessing the "last" element via "[count - 1]" is idiomatic enough that omitting it looks wrong.
FTR, I'd agree too if the variable was named "count".
Giovanni.
Hi,
On 18/11/21 05:09, Zebediah Figura (she/her) wrote:
I didn't notice this last time, but since IIUC rbtree entries can hold a pointer to the head of the list (much like linked list entries), this isn't safe. You'll need to individually allocate list heads.
It doesn't look so:
struct rb_entry { struct rb_entry *parent; struct rb_entry *left; struct rb_entry *right; unsigned int flags; };
struct rb_tree { rb_compare_func compare; struct rb_entry *root; };
The entries have pointers between themselves, but none of them points to the tree itself. Just the other way around. In other words, struct rb_tree already provides the level of indirection that you rightly say it is needed.
Giovanni.
On 11/18/21 2:58 AM, Giovanni Mascellani wrote:
Hi,
On 18/11/21 05:09, Zebediah Figura (she/her) wrote:
I didn't notice this last time, but since IIUC rbtree entries can hold a pointer to the head of the list (much like linked list entries), this isn't safe. You'll need to individually allocate list heads.
It doesn't look so:
struct rb_entry { struct rb_entry *parent; struct rb_entry *left; struct rb_entry *right; unsigned int flags; };
struct rb_tree { rb_compare_func compare; struct rb_entry *root; };
The entries have pointers between themselves, but none of them points to the tree itself. Just the other way around. In other words, struct rb_tree already provides the level of indirection that you rightly say it is needed.
Eh, never mind, you're right, I thought the top-level entries had the tree as a parent, but they don't.
Signed-off-by: Giovanni Mascellani gmascellani@codeweavers.com --- libs/vkd3d-shader/hlsl_codegen.c | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 86d82965..1178134d 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -266,7 +266,15 @@ static void replace_node(struct hlsl_ir_node *old, struct hlsl_ir_node *new) * "else") can inherit the variable state available just before the * conditional block. After the conditional block, all variables that * might have been written in either branch must be invalidated, - * because we don't know which branch has executed. */ + * because we don't know which branch has executed. + * + * When entering a loop block, we can inherit the variable state + * available just before the loop block, except that all variables + * that are written in the body must be invalidated (because at the + * beginning of each execution of the body we don't know whether the + * body has already executed or not). The same is valid after the loop + * block, because we don't know whether the body has executed at all + * or not. */
struct copy_propagation_value { @@ -561,6 +569,22 @@ end: return progress; }
+static bool copy_propagation_process_loop(struct hlsl_ctx *ctx, struct hlsl_ir_loop *loop, + struct copy_propagation_state *state) +{ + bool progress = false; + + copy_propagation_invalidate_from_block(ctx, state, &loop->body); + if (!copy_propagation_duplicate(ctx, state)) + return progress; + + progress |= copy_propagation_transform_block(ctx, &loop->body, state); + + copy_propagation_pop(state); + + return progress; +} + static bool copy_propagation_transform_block(struct hlsl_ctx *ctx, struct hlsl_block *block, struct copy_propagation_state *state) { @@ -584,7 +608,7 @@ static bool copy_propagation_transform_block(struct hlsl_ctx *ctx, struct hlsl_b return progress;
case HLSL_IR_LOOP: - FIXME("Copy propagation doesn't support loops yet, leaving.\n"); + progress |= copy_propagation_process_loop(ctx, hlsl_ir_loop(instr), state); return progress;
default: