Signed-off-by: Giovanni Mascellani gmascellani@codeweavers.com --- Pretty sure this (and the following ones) will require some more tweaking before being accepted! :-)
libs/vkd3d-shader/hlsl_codegen.c | 254 ++++++++++++++++++++++++++++++- 1 file changed, 253 insertions(+), 1 deletion(-)
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 0ee8ab55..8db8bfc9 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -237,6 +237,253 @@ static void replace_node(struct hlsl_ir_node *old, struct hlsl_ir_node *new) hlsl_free_instr(old); }
+/* struct copy_propagation_state represents the accumulated knowledge + * of the copy propagation pass while it scans through the code. Field + * "variables" is a tree whose elements have type struct + * copy_propagation_varible, and represent each of the variables the + * pass has already encountered (except those with special semantics, + * which are ignored). For each variable, the array "values" (whose + * length is the register size of the variable) represent which node + * and which index inside that node (i.e., which of the at most four + * entries of a vector) provided that value last time. Field "node" + * can be NULL, meaning that the pass was not able to statically + * determine the node. + */ + +struct copy_propagation_value +{ + struct hlsl_ir_node *node; + unsigned int index; +}; + +struct copy_propagation_variable +{ + struct rb_entry entry; + struct hlsl_ir_var *var; + struct copy_propagation_value *values; +}; + +struct copy_propagation_state +{ + struct rb_tree variables; +}; + +static int copy_propagation_variable_compare(const void *key, const struct rb_entry *entry) +{ + struct copy_propagation_variable *variable = RB_ENTRY_VALUE(entry, struct copy_propagation_variable, entry); + uintptr_t key_int = (uintptr_t)key, entry_int = (uintptr_t)variable->var; + + if (key_int < entry_int) + return -1; + else if (key_int > entry_int) + return 1; + else + return 0; +} + +static void copy_propagation_variable_destroy(struct rb_entry *entry, void *context) +{ + struct copy_propagation_variable *variable = RB_ENTRY_VALUE(entry, struct copy_propagation_variable, entry); + + vkd3d_free(variable); +} + +static struct copy_propagation_variable *copy_propagation_get_variable(struct hlsl_ctx *ctx, + struct copy_propagation_state *state, struct hlsl_ir_var *var) +{ + struct rb_entry *entry = rb_get(&state->variables, var); + struct copy_propagation_variable *variable; + int res; + + if (entry) + return RB_ENTRY_VALUE(entry, struct copy_propagation_variable, entry); + + variable = hlsl_alloc(ctx, sizeof(*variable)); + if (!variable) + return NULL; + + variable->var = var; + variable->values = hlsl_alloc(ctx, sizeof(*variable->values) * var->data_type->reg_size); + if (!variable->values) + { + vkd3d_free(variable); + return NULL; + } + + res = rb_put(&state->variables, var, &variable->entry); + assert(!res); + + return variable; +} + +static void copy_propagation_set_value(struct copy_propagation_variable *variable, unsigned int offset, + unsigned char writemask, struct hlsl_ir_node *node) +{ + unsigned int index; + + for (index = 0; index < 4; ++index) + { + if (writemask & (1u << index)) + { + if (TRACE_ON()) + { + char buf[32]; + if (!node) + sprintf(buf, "(nil)"); + else if (node->index) + sprintf(buf, "@%u", node->index); + else + sprintf(buf, "%p", node); + TRACE("variable %s[%d] is written by %p[%d]\n", variable->var->name, offset + index, buf, index); + } + variable->values[offset + index].node = node; + variable->values[offset + index].index = index; + } + } +} + +/* Check if locations [offset, offset+count) in variable were all + * written from the same node. If so return the node the corresponding + * indices, otherwise return NULL (and undefined indices). */ +static struct hlsl_ir_node *copy_propagation_reconstruct_node(struct copy_propagation_variable *variable, + unsigned int offset, unsigned int count, unsigned int indices[4]) +{ + struct hlsl_ir_node *node = NULL; + unsigned int i; + + assert(offset + count <= variable->var->data_type->reg_size); + + for (i = 0; i < count; ++i) + { + if (!node) + node = variable->values[offset + i].node; + else if (node != variable->values[offset + i].node) + return NULL; + indices[i] = variable->values[offset + i].index; + } + + return node; +} + +static bool copy_propagation_load(struct hlsl_ctx *ctx, struct hlsl_ir_load *load, + struct copy_propagation_state *state) +{ + struct hlsl_ir_node *node = &load->node, *new_node; + struct copy_propagation_variable *variable; + struct hlsl_type *type = node->data_type; + unsigned int offset, indices[4] = {}; + struct hlsl_deref *src = &load->src; + struct hlsl_ir_var *var = src->var; + struct hlsl_ir_swizzle *swizzle; + DWORD s; + + if (var->is_input_semantic || var->is_output_semantic || var->is_uniform) + return false; + + if (type->type != HLSL_CLASS_SCALAR && type->type != HLSL_CLASS_VECTOR) + return false; + + offset = hlsl_offset_from_deref(src); + + variable = copy_propagation_get_variable(ctx, state, var); + if (!variable) + return false; + + new_node = copy_propagation_reconstruct_node(variable, offset, type->dimx, indices); + + if (TRACE_ON()) + { + char buf[32]; + if (!new_node) + sprintf(buf, "(nil)"); + else if (new_node->index) + sprintf(buf, "@%u", new_node->index); + else + sprintf(buf, "%p", new_node); + TRACE("load from %s[%d-%d] reconstructed to %s[%d %d %d %d]\n", var->name, offset, + offset + type->dimx, buf, indices[0], indices[1], indices[2], indices[3]); + } + + if (!new_node) + return false; + + s = indices[0] | indices[1] << 2 | indices[2] << 4 | indices[3] << 6; + if (!(swizzle = hlsl_new_swizzle(ctx, s, type->dimx, new_node, &node->loc))) + return false; + list_add_before(&node->entry, &swizzle->node.entry); + + replace_node(node, &swizzle->node); + + return true; +} + +static bool copy_propagation_store(struct hlsl_ctx *ctx, struct hlsl_ir_store *store, + struct copy_propagation_state *state) +{ + struct copy_propagation_variable *variable; + struct hlsl_deref *lhs = &store->lhs; + struct hlsl_ir_var *var = lhs->var; + + if (var->is_input_semantic || var->is_output_semantic || var->is_uniform) + return false; + + variable = copy_propagation_get_variable(ctx, state, var); + if (!variable) + return false; + + copy_propagation_set_value(variable, hlsl_offset_from_deref(lhs), store->writemask, store->rhs.node); + + return false; +} + +static bool copy_propagation_recursive(struct hlsl_ctx *ctx, struct hlsl_block *block, + struct copy_propagation_state *state) +{ + struct hlsl_ir_node *instr, *next; + bool progress = false; + + LIST_FOR_EACH_ENTRY_SAFE(instr, next, &block->instrs, struct hlsl_ir_node, entry) + { + switch (instr->type) + { + case HLSL_IR_LOAD: + progress |= copy_propagation_load(ctx, hlsl_ir_load(instr), state); + break; + + case HLSL_IR_STORE: + progress |= copy_propagation_store(ctx, hlsl_ir_store(instr), state); + break; + + case HLSL_IR_IF: + FIXME("Copy propagation doesn't support conditionals yet, leaving.\n"); + return progress; + + case HLSL_IR_LOOP: + FIXME("Copy propagation doesn't support loops yet, leaving.\n"); + return progress; + + default: + break; + } + } + + return progress; +} + +static bool copy_propagation_pass(struct hlsl_ctx *ctx, struct hlsl_block *block) +{ + struct copy_propagation_state state; + bool progress; + + rb_init(&state.variables, copy_propagation_variable_compare); + + progress = copy_propagation_recursive(ctx, block, &state); + + rb_destroy(&state.variables, copy_propagation_variable_destroy, NULL); + + return progress; +} + static bool is_vec1(const struct hlsl_type *type) { return (type->type == HLSL_CLASS_SCALAR) || (type->type == HLSL_CLASS_VECTOR && type->dimx == 1); @@ -1354,7 +1601,12 @@ int hlsl_emit_dxbc(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_fun progress |= transform_ir(ctx, split_struct_copies, body, NULL); } while (progress); - while (transform_ir(ctx, fold_constants, body, NULL)); + do + { + progress = transform_ir(ctx, fold_constants, body, NULL); + progress |= copy_propagation_pass(ctx, body); + } + while (progress);
if (ctx->profile->major_version < 4) transform_ir(ctx, lower_division, body, NULL);