Signed-off-by: Giovanni Mascellani gmascellani@codeweavers.com --- libs/vkd3d-shader/hlsl_codegen.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+)
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 24b8205c..f8b977b0 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -475,6 +475,27 @@ static bool fold_constants(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, voi return true; }
+static bool remove_trivial_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +{ + struct hlsl_ir_swizzle *swizzle; + unsigned int i; + + if (instr->type != HLSL_IR_SWIZZLE) + return false; + swizzle = hlsl_ir_swizzle(instr); + + if (instr->data_type->dimx != swizzle->val.node->data_type->dimx) + return false; + + for (i = 0; i < instr->data_type->dimx; ++i) + if (((swizzle->swizzle >> (2 * i)) & 3) != i) + return false; + + replace_node(instr, swizzle->val.node); + + return true; +} + /* Lower DIV to RCP + MUL. */ static bool lower_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) { @@ -1355,6 +1376,7 @@ int hlsl_emit_dxbc(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_fun } while (progress); while (transform_ir(ctx, fold_constants, body, NULL)); + transform_ir(ctx, remove_trivial_swizzles, body, NULL);
if (ctx->profile->major_version < 4) transform_ir(ctx, lower_division, body, NULL);
Signed-off-by: Giovanni Mascellani gmascellani@codeweavers.com --- libs/vkd3d-shader/hlsl_codegen.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index f8b977b0..f5432d22 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -204,7 +204,7 @@ static bool transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx struct hlsl_block *block, void *context) { struct hlsl_ir_node *instr, *next; - bool progress = 0; + bool progress = false;
LIST_FOR_EACH_ENTRY_SAFE(instr, next, &block->instrs, struct hlsl_ir_node, entry) {
Signed-off-by: Zebediah Figura zfigura@codeweavers.com
Signed-off-by: Giovanni Mascellani gmascellani@codeweavers.com --- libs/vkd3d-shader/hlsl.h | 6 ++++-- libs/vkd3d-shader/hlsl_codegen.c | 33 +++++++++++++++++++++++--------- libs/vkd3d-shader/hlsl_sm1.c | 4 ++-- libs/vkd3d-shader/hlsl_sm4.c | 8 ++++---- 4 files changed, 34 insertions(+), 17 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl.h b/libs/vkd3d-shader/hlsl.h index eae96232..f2a7a48d 100644 --- a/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d-shader/hlsl.h @@ -751,8 +751,10 @@ unsigned int hlsl_combine_writemasks(unsigned int first, unsigned int second); unsigned int hlsl_map_swizzle(unsigned int swizzle, unsigned int writemask); unsigned int hlsl_swizzle_from_writemask(unsigned int writemask);
-unsigned int hlsl_offset_from_deref(const struct hlsl_deref *deref); -struct hlsl_reg hlsl_reg_from_deref(const struct hlsl_deref *deref, const struct hlsl_type *type); +bool hlsl_offset_from_deref(const struct hlsl_deref *deref, unsigned int *offset); +unsigned int hlsl_offset_from_deref_safe(struct hlsl_ctx *ctx, const struct hlsl_deref *deref); +struct hlsl_reg hlsl_reg_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, + const struct hlsl_type *type);
bool hlsl_sm1_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, bool output, D3DSHADER_PARAM_REGISTER_TYPE *type, unsigned int *reg); diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index f5432d22..a0154e3b 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -1286,31 +1286,46 @@ static bool type_is_single_reg(const struct hlsl_type *type) return type->type == HLSL_CLASS_SCALAR || type->type == HLSL_CLASS_VECTOR; }
-unsigned int hlsl_offset_from_deref(const struct hlsl_deref *deref) +bool hlsl_offset_from_deref(const struct hlsl_deref *deref, unsigned int *offset) { struct hlsl_ir_node *offset_node = deref->offset.node;
if (!offset_node) - return 0; + { + *offset = 0; + return true; + }
/* We should always have generated a cast to UINT. */ assert(offset_node->data_type->type == HLSL_CLASS_SCALAR && offset_node->data_type->base_type == HLSL_TYPE_UINT);
if (offset_node->type != HLSL_IR_CONSTANT) - { - FIXME("Dereference with non-constant offset of type %s.\n", hlsl_node_type_to_string(offset_node->type)); - return 0; - } + return false;
- return hlsl_ir_constant(offset_node)->value[0].u; + *offset = hlsl_ir_constant(offset_node)->value[0].u; + return true; +} + +unsigned int hlsl_offset_from_deref_safe(struct hlsl_ctx *ctx, const struct hlsl_deref *deref) +{ + unsigned int offset; + + if (hlsl_offset_from_deref(deref, &offset)) + return offset; + + hlsl_fixme(ctx, deref->offset.node->loc, "Dereference with non-constant offset of type %s.", + hlsl_node_type_to_string(deref->offset.node->type)); + + return 0; }
-struct hlsl_reg hlsl_reg_from_deref(const struct hlsl_deref *deref, const struct hlsl_type *type) +struct hlsl_reg hlsl_reg_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, + const struct hlsl_type *type) { const struct hlsl_ir_var *var = deref->var; struct hlsl_reg ret = var->reg; - unsigned int offset = hlsl_offset_from_deref(deref); + unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref);
ret.id += offset / 4;
diff --git a/libs/vkd3d-shader/hlsl_sm1.c b/libs/vkd3d-shader/hlsl_sm1.c index 875f521f..4ff552bc 100644 --- a/libs/vkd3d-shader/hlsl_sm1.c +++ b/libs/vkd3d-shader/hlsl_sm1.c @@ -663,7 +663,7 @@ static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b static void write_sm1_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) { const struct hlsl_ir_load *load = hlsl_ir_load(instr); - const struct hlsl_reg reg = hlsl_reg_from_deref(&load->src, instr->data_type); + const struct hlsl_reg reg = hlsl_reg_from_deref(ctx, &load->src, instr->data_type); struct sm1_instruction sm1_instr = { .opcode = D3DSIO_MOV, @@ -707,7 +707,7 @@ static void write_sm1_store(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer * { const struct hlsl_ir_store *store = hlsl_ir_store(instr); const struct hlsl_ir_node *rhs = store->rhs.node; - const struct hlsl_reg reg = hlsl_reg_from_deref(&store->lhs, rhs->data_type); + const struct hlsl_reg reg = hlsl_reg_from_deref(ctx, &store->lhs, rhs->data_type); struct sm1_instruction sm1_instr = { .opcode = D3DSIO_MOV, diff --git a/libs/vkd3d-shader/hlsl_sm4.c b/libs/vkd3d-shader/hlsl_sm4.c index e597425a..12ddd4fd 100644 --- a/libs/vkd3d-shader/hlsl_sm4.c +++ b/libs/vkd3d-shader/hlsl_sm4.c @@ -792,7 +792,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *r } else { - unsigned int offset = hlsl_offset_from_deref(deref) + var->buffer_offset; + unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref) + var->buffer_offset;
assert(data_type->type <= HLSL_CLASS_VECTOR); reg->type = VKD3D_SM4_RT_CONSTBUFFER; @@ -820,7 +820,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *r } else { - struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(deref, data_type); + struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref, data_type);
assert(hlsl_reg.allocated); reg->type = VKD3D_SM4_RT_INPUT; @@ -850,7 +850,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *r } else { - struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(deref, data_type); + struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref, data_type);
assert(hlsl_reg.allocated); reg->type = VKD3D_SM4_RT_OUTPUT; @@ -862,7 +862,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *r } else { - struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(deref, data_type); + struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref, data_type);
assert(hlsl_reg.allocated); reg->type = VKD3D_SM4_RT_TEMP;
On 11/11/21 05:06, Giovanni Mascellani wrote:
+unsigned int hlsl_offset_from_deref_safe(struct hlsl_ctx *ctx, const struct hlsl_deref *deref) +{
- unsigned int offset;
- if (hlsl_offset_from_deref(deref, &offset))
return offset;
- hlsl_fixme(ctx, deref->offset.node->loc, "Dereference with non-constant offset of type %s.",
hlsl_node_type_to_string(deref->offset.node->type));
- return 0; }
I don't hate this, but ultimately we're going to need to handle non-constant offsets in the callers anyway. I'd just pass the buck all the way down to sm4_register_from_deref() and write_sm1_{load,store}().
Hi,
On 12/11/21 05:16, Zebediah Figura (she/her) wrote:
On 11/11/21 05:06, Giovanni Mascellani wrote:
+unsigned int hlsl_offset_from_deref_safe(struct hlsl_ctx *ctx, const struct hlsl_deref *deref) +{ + unsigned int offset;
+ if (hlsl_offset_from_deref(deref, &offset)) + return offset;
+ hlsl_fixme(ctx, deref->offset.node->loc, "Dereference with non-constant offset of type %s.", + hlsl_node_type_to_string(deref->offset.node->type));
+ return 0; }
I don't hate this, but ultimately we're going to need to handle non-constant offsets in the callers anyway. I'd just pass the buck all the way down to sm4_register_from_deref() and write_sm1_{load,store}().
Sure, but wouldn't it make sense to do this once the caller is able to handle the error condition? Otherwise we have to repeat the same code in five different places. Just think hlsl_offset_from_deref_safe as a helper, nothing more. Once we'll have another helper that does a better job, hlsl_offset_from_deref_safe will be kissed bye bye.
But if you really insist, I can copy it in every caller.
Giovanni.
On 11/12/21 04:29, Giovanni Mascellani wrote:
Hi,
On 12/11/21 05:16, Zebediah Figura (she/her) wrote:
On 11/11/21 05:06, Giovanni Mascellani wrote:
+unsigned int hlsl_offset_from_deref_safe(struct hlsl_ctx *ctx, const struct hlsl_deref *deref) +{ + unsigned int offset;
+ if (hlsl_offset_from_deref(deref, &offset)) + return offset;
+ hlsl_fixme(ctx, deref->offset.node->loc, "Dereference with non-constant offset of type %s.", + hlsl_node_type_to_string(deref->offset.node->type));
+ return 0; }
I don't hate this, but ultimately we're going to need to handle non-constant offsets in the callers anyway. I'd just pass the buck all the way down to sm4_register_from_deref() and write_sm1_{load,store}().
Sure, but wouldn't it make sense to do this once the caller is able to handle the error condition? Otherwise we have to repeat the same code in five different places. Just think hlsl_offset_from_deref_safe as a helper, nothing more. Once we'll have another helper that does a better job, hlsl_offset_from_deref_safe will be kissed bye bye.
But if you really insist, I can copy it in every caller.
Well,like I said I don't hate it. On the other hand, since the duplicated code amounts to a single hlsl_fixme line, I don't really hate that either. So, <shrug>.
Signed-off-by: Giovanni Mascellani gmascellani@codeweavers.com --- libs/vkd3d-shader/hlsl_codegen.c | 252 ++++++++++++++++++++++++++++++- 1 file changed, 251 insertions(+), 1 deletion(-)
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index a0154e3b..7a104b45 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -237,6 +237,251 @@ static void replace_node(struct hlsl_ir_node *old, struct hlsl_ir_node *new) hlsl_free_instr(old); }
+/* The copy propagation pass scans the code trying to reconstruct, for + * each load, which is the store that last wrote to that + * variable. When this happens, the load can be replaced with the node + * from which the variable was stored. + * + * In order to do that, the pass keeps a map of all the variables it + * has already seen; for each variable, a pointer to a node and a + * component index is kept for each of the registers that belong to + * the variable. This means that, at that point of the scan, that + * register was last stored to from that component of that node. The + * pointer can be NULL if information for that register could not be + * gathered statically (either because a non-constant offset is used, + * or because control flow forces us to drop information). + * + * When scanning through a store, data for the stored-to variable is + * updated. When scanning through a load, it is checked if all the + * registers involved in the load come from a single node. In such + * case, the store can be replaced with a swizzle based on that + * node. */ + +struct copy_propagation_value +{ + struct hlsl_ir_node *node; + unsigned int component; +}; + +struct copy_propagation_variable +{ + struct rb_entry entry; + struct hlsl_ir_var *var; + struct copy_propagation_value *values; +}; + +struct copy_propagation_state +{ + struct rb_tree variables; +}; + +static int copy_propagation_variable_compare(const void *key, const struct rb_entry *entry) +{ + struct copy_propagation_variable *variable = RB_ENTRY_VALUE(entry, struct copy_propagation_variable, entry); + uintptr_t key_int = (uintptr_t)key, entry_int = (uintptr_t)variable->var; + + if (key_int < entry_int) + return -1; + else if (key_int > entry_int) + return 1; + else + return 0; +} + +static void copy_propagation_variable_destroy(struct rb_entry *entry, void *context) +{ + struct copy_propagation_variable *variable = RB_ENTRY_VALUE(entry, struct copy_propagation_variable, entry); + + vkd3d_free(variable); +} + +static struct copy_propagation_variable *copy_propagation_get_variable(struct copy_propagation_state *state, + struct hlsl_ir_var *var) +{ + struct rb_entry *entry = rb_get(&state->variables, var); + + if (entry) + return RB_ENTRY_VALUE(entry, struct copy_propagation_variable, entry); + else + return NULL; +} + +static struct copy_propagation_variable *copy_propagation_create_variable(struct hlsl_ctx *ctx, + struct copy_propagation_state *state, struct hlsl_ir_var *var) +{ + struct rb_entry *entry = rb_get(&state->variables, var); + struct copy_propagation_variable *variable; + int res; + + if (entry) + return RB_ENTRY_VALUE(entry, struct copy_propagation_variable, entry); + + variable = hlsl_alloc(ctx, sizeof(*variable)); + if (!variable) + return NULL; + + variable->var = var; + variable->values = hlsl_alloc(ctx, sizeof(*variable->values) * var->data_type->reg_size); + if (!variable->values) + { + vkd3d_free(variable); + return NULL; + } + + res = rb_put(&state->variables, var, &variable->entry); + assert(!res); + + return variable; +} + +static void copy_propagation_invalidate_whole_variable(struct copy_propagation_variable *variable) +{ + TRACE("invalidate variable %s\n", variable->var->name); + memset(variable->values, 0, sizeof(*variable->values) * variable->var->data_type->reg_size); +} + +static void copy_propagation_set_value(struct copy_propagation_variable *variable, unsigned int offset, + unsigned char writemask, struct hlsl_ir_node *node) +{ + unsigned int i; + + for (i = 0; i < 4; ++i) + { + if (writemask & (1u << i)) + { + TRACE("variable %s[%d] is written by %p[%d]\n", variable->var->name, offset + i, node, i); + variable->values[offset + i].node = node; + variable->values[offset + i].component = i; + } + } +} + +static struct hlsl_ir_node *copy_propagation_find_replacement(struct copy_propagation_variable *variable, + unsigned int offset, unsigned int count, unsigned int *swizzle) +{ + struct hlsl_ir_node *node = NULL; + unsigned int i; + + assert(offset + count <= variable->var->data_type->reg_size); + + *swizzle = 0; + + for (i = 0; i < count; ++i) + { + if (!node) + node = variable->values[offset + i].node; + else if (node != variable->values[offset + i].node) + return NULL; + *swizzle |= variable->values[offset + i].component << (2 * i); + } + + return node; +} + +static bool copy_propagation_analyze_load(struct hlsl_ctx *ctx, struct hlsl_ir_load *load, + struct copy_propagation_state *state) +{ + struct hlsl_ir_node *node = &load->node, *new_node; + struct copy_propagation_variable *variable; + struct hlsl_type *type = node->data_type; + unsigned int offset, swizzle; + struct hlsl_deref *src = &load->src; + struct hlsl_ir_var *var = src->var; + struct hlsl_ir_swizzle *swizzle_node; + + if (type->type != HLSL_CLASS_SCALAR && type->type != HLSL_CLASS_VECTOR) + return false; + + if (!hlsl_offset_from_deref(src, &offset)) + return false; + + variable = copy_propagation_get_variable(state, var); + if (!variable) + return false; + + new_node = copy_propagation_find_replacement(variable, offset, type->dimx, &swizzle); + + TRACE("load from %s[%d-%d] reconstructed to %p[%u %u %u %u]\n", var->name, offset, offset + type->dimx, + new_node, swizzle % 4, (swizzle >> 2) % 4, (swizzle >> 4) % 4, (swizzle >> 6) % 4); + + if (!new_node) + return false; + + if (!(swizzle_node = hlsl_new_swizzle(ctx, swizzle, type->dimx, new_node, &node->loc))) + return false; + list_add_before(&node->entry, &swizzle_node->node.entry); + + replace_node(node, &swizzle_node->node); + + return true; +} + +static void copy_propagation_record_store(struct hlsl_ctx *ctx, struct hlsl_ir_store *store, + struct copy_propagation_state *state) +{ + struct copy_propagation_variable *variable; + struct hlsl_deref *lhs = &store->lhs; + struct hlsl_ir_var *var = lhs->var; + unsigned int offset; + + variable = copy_propagation_create_variable(ctx, state, var); + if (!variable) + return; + + if (hlsl_offset_from_deref(lhs, &offset)) + copy_propagation_set_value(variable, offset, store->writemask, store->rhs.node); + else + copy_propagation_invalidate_whole_variable(variable); +} + +static bool copy_propagation_transform_block(struct hlsl_ctx *ctx, struct hlsl_block *block, + struct copy_propagation_state *state) +{ + struct hlsl_ir_node *instr, *next; + bool progress = false; + + LIST_FOR_EACH_ENTRY_SAFE(instr, next, &block->instrs, struct hlsl_ir_node, entry) + { + switch (instr->type) + { + case HLSL_IR_LOAD: + progress |= copy_propagation_analyze_load(ctx, hlsl_ir_load(instr), state); + break; + + case HLSL_IR_STORE: + copy_propagation_record_store(ctx, hlsl_ir_store(instr), state); + break; + + case HLSL_IR_IF: + FIXME("Copy propagation doesn't support conditionals yet, leaving.\n"); + return progress; + + case HLSL_IR_LOOP: + FIXME("Copy propagation doesn't support loops yet, leaving.\n"); + return progress; + + default: + break; + } + } + + return progress; +} + +static bool copy_propagation_execute(struct hlsl_ctx *ctx, struct hlsl_block *block) +{ + struct copy_propagation_state state; + bool progress; + + rb_init(&state.variables, copy_propagation_variable_compare); + + progress = copy_propagation_transform_block(ctx, block, &state); + + rb_destroy(&state.variables, copy_propagation_variable_destroy, NULL); + + return progress; +} + static bool is_vec1(const struct hlsl_type *type) { return (type->type == HLSL_CLASS_SCALAR) || (type->type == HLSL_CLASS_VECTOR && type->dimx == 1); @@ -1390,7 +1635,12 @@ int hlsl_emit_dxbc(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_fun progress |= transform_ir(ctx, split_struct_copies, body, NULL); } while (progress); - while (transform_ir(ctx, fold_constants, body, NULL)); + do + { + progress = transform_ir(ctx, fold_constants, body, NULL); + progress |= copy_propagation_execute(ctx, body); + } + while (progress); transform_ir(ctx, remove_trivial_swizzles, body, NULL);
if (ctx->profile->major_version < 4)
Looking really good, at least from visual examination; the only thing I have left for the moment is nitpicking the trace formats...
On 11/11/21 05:06, Giovanni Mascellani wrote:
+static void copy_propagation_invalidate_whole_variable(struct copy_propagation_variable *variable) +{
- TRACE("invalidate variable %s\n", variable->var->name);
Nitpick, but can you please make this trace consistent with vkd3d style?
- memset(variable->values, 0, sizeof(*variable->values) * variable->var->data_type->reg_size);
+}
+static void copy_propagation_set_value(struct copy_propagation_variable *variable, unsigned int offset,
unsigned char writemask, struct hlsl_ir_node *node)
+{
- unsigned int i;
- for (i = 0; i < 4; ++i)
- {
if (writemask & (1u << i))
{
TRACE("variable %s[%d] is written by %p[%d]\n", variable->var->name, offset + i, node, i);
And this one.
variable->values[offset + i].node = node;
variable->values[offset + i].component = i;
}
- }
+}
+static struct hlsl_ir_node *copy_propagation_find_replacement(struct copy_propagation_variable *variable,
unsigned int offset, unsigned int count, unsigned int *swizzle)
+{
- struct hlsl_ir_node *node = NULL;
- unsigned int i;
- assert(offset + count <= variable->var->data_type->reg_size);
- *swizzle = 0;
- for (i = 0; i < count; ++i)
- {
if (!node)
node = variable->values[offset + i].node;
else if (node != variable->values[offset + i].node)
return NULL;
*swizzle |= variable->values[offset + i].component << (2 * i);
- }
- return node;
+}
+static bool copy_propagation_analyze_load(struct hlsl_ctx *ctx, struct hlsl_ir_load *load,
struct copy_propagation_state *state)
+{
- struct hlsl_ir_node *node = &load->node, *new_node;
- struct copy_propagation_variable *variable;
- struct hlsl_type *type = node->data_type;
- unsigned int offset, swizzle;
- struct hlsl_deref *src = &load->src;
- struct hlsl_ir_var *var = src->var;
- struct hlsl_ir_swizzle *swizzle_node;
- if (type->type != HLSL_CLASS_SCALAR && type->type != HLSL_CLASS_VECTOR)
return false;
- if (!hlsl_offset_from_deref(src, &offset))
return false;
- variable = copy_propagation_get_variable(state, var);
- if (!variable)
return false;
- new_node = copy_propagation_find_replacement(variable, offset, type->dimx, &swizzle);
- TRACE("load from %s[%d-%d] reconstructed to %p[%u %u %u %u]\n", var->name, offset, offset + type->dimx,
new_node, swizzle % 4, (swizzle >> 2) % 4, (swizzle >> 4) % 4, (swizzle >> 6) % 4);
And this one. Also, do we want to dump the latter part as a swizzle, à la dump_ir_swizzle(), and the former with debug_hlsl_writemask()? I'd think so, although then again I haven't made use of the trace.
Signed-off-by: Giovanni Mascellani gmascellani@codeweavers.com --- libs/vkd3d-shader/hlsl_codegen.c | 157 +++++++++++++++++++++++++++++-- 1 file changed, 149 insertions(+), 8 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 7a104b45..fec6b87d 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -255,7 +255,18 @@ static void replace_node(struct hlsl_ir_node *old, struct hlsl_ir_node *new) * updated. When scanning through a load, it is checked if all the * registers involved in the load come from a single node. In such * case, the store can be replaced with a swizzle based on that - * node. */ + * node. + * + * All of the above works when we disregard control flow. With control + * flow it becames slightly more complicated: instead of a single map + * we keep a stack of them, pushing a new entry each time we enter an + * embedded block, and popping the entry when leaving the block. + * + * When entering a conditional block, both branches ("then" and + * "else") can inherit the variable state available just before the + * conditional block. After the conditional block, all variables that + * might have been written in either branch must be invalidated, + * because we don't know which branch has executed. */
struct copy_propagation_value { @@ -272,7 +283,9 @@ struct copy_propagation_variable
struct copy_propagation_state { - struct rb_tree variables; + struct rb_tree *variables; + unsigned int depth; + unsigned int capacity; };
static int copy_propagation_variable_compare(const void *key, const struct rb_entry *entry) @@ -298,7 +311,7 @@ static void copy_propagation_variable_destroy(struct rb_entry *entry, void *cont static struct copy_propagation_variable *copy_propagation_get_variable(struct copy_propagation_state *state, struct hlsl_ir_var *var) { - struct rb_entry *entry = rb_get(&state->variables, var); + struct rb_entry *entry = rb_get(&state->variables[state->depth], var);
if (entry) return RB_ENTRY_VALUE(entry, struct copy_propagation_variable, entry); @@ -309,7 +322,7 @@ static struct copy_propagation_variable *copy_propagation_get_variable(struct co static struct copy_propagation_variable *copy_propagation_create_variable(struct hlsl_ctx *ctx, struct copy_propagation_state *state, struct hlsl_ir_var *var) { - struct rb_entry *entry = rb_get(&state->variables, var); + struct rb_entry *entry = rb_get(&state->variables[state->depth], var); struct copy_propagation_variable *variable; int res;
@@ -328,7 +341,7 @@ static struct copy_propagation_variable *copy_propagation_create_variable(struct return NULL; }
- res = rb_put(&state->variables, var, &variable->entry); + res = rb_put(&state->variables[state->depth], var, &variable->entry); assert(!res);
return variable; @@ -356,6 +369,99 @@ static void copy_propagation_set_value(struct copy_propagation_variable *variabl } }
+static void copy_propagation_invalidate_from_block(struct hlsl_ctx *ctx, struct copy_propagation_state *state, + struct hlsl_block *block) +{ + struct hlsl_ir_node *instr; + + LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) + { + switch (instr->type) + { + case HLSL_IR_STORE: + { + struct hlsl_ir_store *store = hlsl_ir_store(instr); + struct copy_propagation_variable *variable; + struct hlsl_deref *lhs = &store->lhs; + struct hlsl_ir_var *var = lhs->var; + unsigned int offset; + + variable = copy_propagation_get_variable(state, var); + if (!variable) + continue; + + if (hlsl_offset_from_deref(lhs, &offset)) + copy_propagation_set_value(variable, offset, store->writemask, NULL); + else + copy_propagation_invalidate_whole_variable(variable); + + break; + } + + case HLSL_IR_IF: + { + struct hlsl_ir_if *iff = hlsl_ir_if(instr); + + copy_propagation_invalidate_from_block(ctx, state, &iff->then_instrs); + copy_propagation_invalidate_from_block(ctx, state, &iff->else_instrs); + + break; + } + + case HLSL_IR_LOOP: + { + struct hlsl_ir_loop *loop = hlsl_ir_loop(instr); + + copy_propagation_invalidate_from_block(ctx, state, &loop->body); + + break; + } + + default: + break; + } + } +} + +static void copy_propagation_pop(struct copy_propagation_state *state) +{ + assert(state->depth > 0); + rb_destroy(&state->variables[state->depth], copy_propagation_variable_destroy, NULL); + --state->depth; +} + +static bool copy_propagation_duplicate(struct hlsl_ctx *ctx, struct copy_propagation_state *state) +{ + struct copy_propagation_variable *var; + + if (state->depth + 1 == state->capacity) + { + unsigned int new_capacity = 2 * state->capacity; + struct rb_tree *new_vars; + + new_vars = hlsl_realloc(ctx, state->variables, sizeof(*state->variables) * new_capacity); + if (!new_vars) + return false; + state->capacity = new_capacity; + state->variables = new_vars; + } + ++state->depth; + + rb_init(&state->variables[state->depth], copy_propagation_variable_compare); + + RB_FOR_EACH_ENTRY(var, &state->variables[state->depth - 1], struct copy_propagation_variable, entry) + { + struct copy_propagation_variable *new_var = copy_propagation_create_variable(ctx, state, var->var); + + if (!new_var) + return false; + + memcpy(new_var->values, var->values, sizeof(*var->values) * var->var->data_type->reg_size); + } + + return true; +} + static struct hlsl_ir_node *copy_propagation_find_replacement(struct copy_propagation_variable *variable, unsigned int offset, unsigned int count, unsigned int *swizzle) { @@ -434,6 +540,34 @@ static void copy_propagation_record_store(struct hlsl_ctx *ctx, struct hlsl_ir_s copy_propagation_invalidate_whole_variable(variable); }
+static bool copy_propagation_transform_block(struct hlsl_ctx *ctx, struct hlsl_block *block, + struct copy_propagation_state *state); + +static bool copy_propagation_process_if(struct hlsl_ctx *ctx, struct hlsl_ir_if *iff, + struct copy_propagation_state *state) +{ + bool progress = false; + + if (!copy_propagation_duplicate(ctx, state)) + goto end; + + progress |= copy_propagation_transform_block(ctx, &iff->then_instrs, state); + + copy_propagation_pop(state); + if (!copy_propagation_duplicate(ctx, state)) + goto end; + + progress |= copy_propagation_transform_block(ctx, &iff->else_instrs, state); + + copy_propagation_pop(state); + +end: + copy_propagation_invalidate_from_block(ctx, state, &iff->then_instrs); + copy_propagation_invalidate_from_block(ctx, state, &iff->else_instrs); + + return progress; +} + static bool copy_propagation_transform_block(struct hlsl_ctx *ctx, struct hlsl_block *block, struct copy_propagation_state *state) { @@ -453,7 +587,7 @@ static bool copy_propagation_transform_block(struct hlsl_ctx *ctx, struct hlsl_b break;
case HLSL_IR_IF: - FIXME("Copy propagation doesn't support conditionals yet, leaving.\n"); + progress |= copy_propagation_process_if(ctx, hlsl_ir_if(instr), state); return progress;
case HLSL_IR_LOOP: @@ -473,11 +607,18 @@ static bool copy_propagation_execute(struct hlsl_ctx *ctx, struct hlsl_block *bl struct copy_propagation_state state; bool progress;
- rb_init(&state.variables, copy_propagation_variable_compare); + state.depth = 0; + state.capacity = 1; + state.variables = hlsl_alloc(ctx, sizeof(*state.variables) * state.capacity); + if (!state.variables) + return false; + rb_init(&state.variables[state.depth], copy_propagation_variable_compare);
progress = copy_propagation_transform_block(ctx, block, &state);
- rb_destroy(&state.variables, copy_propagation_variable_destroy, NULL); + assert(state.depth == 0); + rb_destroy(&state.variables[state.depth], copy_propagation_variable_destroy, NULL); + vkd3d_free(state.variables);
return progress; }
Signed-off-by: Giovanni Mascellani gmascellani@codeweavers.com --- libs/vkd3d-shader/hlsl_codegen.c | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index fec6b87d..1b1132a1 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -266,7 +266,15 @@ static void replace_node(struct hlsl_ir_node *old, struct hlsl_ir_node *new) * "else") can inherit the variable state available just before the * conditional block. After the conditional block, all variables that * might have been written in either branch must be invalidated, - * because we don't know which branch has executed. */ + * because we don't know which branch has executed. + * + * When entering a loop block, we can inherit the variable state + * available just before the loop block, except that all variables + * that are written in the body must be invalidated (because at the + * beginning of each execution of the body we don't know whether the + * body has already executed or not). The same is valid after the loop + * block, because we don't know whether the body has executed at all + * or not. */
struct copy_propagation_value { @@ -568,6 +576,22 @@ end: return progress; }
+static bool copy_propagation_process_loop(struct hlsl_ctx *ctx, struct hlsl_ir_loop *loop, + struct copy_propagation_state *state) +{ + bool progress = false; + + copy_propagation_invalidate_from_block(ctx, state, &loop->body); + if (!copy_propagation_duplicate(ctx, state)) + return progress; + + progress |= copy_propagation_transform_block(ctx, &loop->body, state); + + copy_propagation_pop(state); + + return progress; +} + static bool copy_propagation_transform_block(struct hlsl_ctx *ctx, struct hlsl_block *block, struct copy_propagation_state *state) { @@ -591,7 +615,7 @@ static bool copy_propagation_transform_block(struct hlsl_ctx *ctx, struct hlsl_b return progress;
case HLSL_IR_LOOP: - FIXME("Copy propagation doesn't support loops yet, leaving.\n"); + progress |= copy_propagation_process_loop(ctx, hlsl_ir_loop(instr), state); return progress;
default: