From: Zebediah Figura zfigura@codeweavers.com
Signed-off-by: Zebediah Figura zfigura@codeweavers.com --- dlls/d3dcompiler_43/d3dcompiler_private.h | 2 + dlls/d3dcompiler_43/hlsl.y | 65 ++++++++++++++++++++--- dlls/d3dcompiler_43/utils.c | 2 +- 3 files changed, 62 insertions(+), 7 deletions(-)
diff --git a/dlls/d3dcompiler_43/d3dcompiler_private.h b/dlls/d3dcompiler_43/d3dcompiler_private.h index 2a926282128..0dee4129f01 100644 --- a/dlls/d3dcompiler_43/d3dcompiler_private.h +++ b/dlls/d3dcompiler_43/d3dcompiler_private.h @@ -658,6 +658,7 @@ struct hlsl_ir_node { struct list entry; enum hlsl_ir_node_type type; + unsigned int index; /* for liveness ranges */ struct hlsl_type *data_type;
struct source_location loc; @@ -734,6 +735,7 @@ struct hlsl_ir_loop struct hlsl_ir_node node; /* loop condition is stored in the body (as "if (!condition) break;") */ struct list *body; + unsigned int next_index; /* liveness index of the end of the loop */ };
enum hlsl_ir_expr_op { diff --git a/dlls/d3dcompiler_43/hlsl.y b/dlls/d3dcompiler_43/hlsl.y index 4c3e96caa0d..bf1704b5d55 100644 --- a/dlls/d3dcompiler_43/hlsl.y +++ b/dlls/d3dcompiler_43/hlsl.y @@ -915,6 +915,19 @@ static const struct hlsl_ir_function_decl *get_overloaded_func(struct wine_rb_tr return NULL; }
+static struct hlsl_ir_function_decl *get_func_entry(const char *name) +{ + struct wine_rb_entry *entry; + + if ((entry = wine_rb_get(&hlsl_ctx.functions, name)) + && (entry = WINE_RB_ENTRY_VALUE(entry, struct hlsl_ir_function, entry)->overloads.root)) + { + return WINE_RB_ENTRY_VALUE(entry, struct hlsl_ir_function_decl, entry); + } + + return NULL; +} + static struct list *append_unop(struct list *list, struct hlsl_ir_node *node) { list_add_tail(list, &node->entry); @@ -2545,9 +2558,37 @@ static void dump_function(struct wine_rb_entry *entry, void *context) wine_rb_for_each_entry(&func->overloads, dump_function_decl, NULL); }
+/* Allocate a unique, ordered index to each instruction, which will be used for + * computing liveness ranges. */ +static unsigned int index_instructions(struct list *instrs, unsigned int index) +{ + struct hlsl_ir_node *instr; + + LIST_FOR_EACH_ENTRY(instr, instrs, struct hlsl_ir_node, entry) + { + instr->index = index++; + + if (instr->type == HLSL_IR_IF) + { + struct hlsl_ir_if *iff = if_from_node(instr); + index = index_instructions(iff->then_instrs, index); + if (iff->else_instrs) + index = index_instructions(iff->else_instrs, index); + } + else if (instr->type == HLSL_IR_LOOP) + { + index = index_instructions(loop_from_node(instr)->body, index); + loop_from_node(instr)->next_index = index; + } + } + + return index; +} + struct bwriter_shader *parse_hlsl(enum shader_type type, DWORD major, DWORD minor, const char *entrypoint, char **messages) { + struct hlsl_ir_function_decl *entry_func; struct hlsl_scope *scope, *next_scope; struct hlsl_type *hlsl_type, *next_type; struct hlsl_ir_var *var, *next_var; @@ -2573,12 +2614,6 @@ struct bwriter_shader *parse_hlsl(enum shader_type type, DWORD major, DWORD mino
hlsl_parse();
- if (TRACE_ON(hlsl_parser)) - { - TRACE("IR dump.\n"); - wine_rb_for_each_entry(&hlsl_ctx.functions, dump_function, NULL); - } - TRACE("Compilation status = %d\n", hlsl_ctx.status); if (messages) { @@ -2597,6 +2632,24 @@ struct bwriter_shader *parse_hlsl(enum shader_type type, DWORD major, DWORD mino d3dcompiler_free((void *)hlsl_ctx.source_files[i]); d3dcompiler_free(hlsl_ctx.source_files);
+ if (hlsl_ctx.status == PARSE_ERR) + goto out; + + if (!(entry_func = get_func_entry(entrypoint))) + { + hlsl_message("error: entry point %s is not defined\n", debugstr_a(entrypoint)); + goto out; + } + + index_instructions(entry_func->body, 1); + + if (TRACE_ON(hlsl_parser)) + { + TRACE("IR dump.\n"); + wine_rb_for_each_entry(&hlsl_ctx.functions, dump_function, NULL); + } + +out: TRACE("Freeing functions IR.\n"); wine_rb_destroy(&hlsl_ctx.functions, free_function_rb, NULL);
diff --git a/dlls/d3dcompiler_43/utils.c b/dlls/d3dcompiler_43/utils.c index 6e9bb593e00..84323c334f8 100644 --- a/dlls/d3dcompiler_43/utils.c +++ b/dlls/d3dcompiler_43/utils.c @@ -2156,7 +2156,7 @@ static void debug_dump_ir_loop(const struct hlsl_ir_loop *loop)
static void debug_dump_instr(const struct hlsl_ir_node *instr) { - wine_dbg_printf("%p: ", instr); + wine_dbg_printf("%4u: %p: ", instr->index, instr); switch (instr->type) { case HLSL_IR_EXPR:
From: Zebediah Figura zfigura@codeweavers.com
Signed-off-by: Zebediah Figura zfigura@codeweavers.com --- dlls/d3dcompiler_43/d3dcompiler_private.h | 2 +- dlls/d3dcompiler_43/hlsl.y | 90 ++++++++++++++++++++++- 2 files changed, 90 insertions(+), 2 deletions(-)
diff --git a/dlls/d3dcompiler_43/d3dcompiler_private.h b/dlls/d3dcompiler_43/d3dcompiler_private.h index 0dee4129f01..e1cdd9a8ba0 100644 --- a/dlls/d3dcompiler_43/d3dcompiler_private.h +++ b/dlls/d3dcompiler_43/d3dcompiler_private.h @@ -700,7 +700,7 @@ struct hlsl_ir_var const struct reg_reservation *reg_reservation; struct list scope_entry, param_entry;
- struct hlsl_var_allocation *allocation; + unsigned int first_write, last_read; };
struct hlsl_ir_function diff --git a/dlls/d3dcompiler_43/hlsl.y b/dlls/d3dcompiler_43/hlsl.y index bf1704b5d55..bdf619a9224 100644 --- a/dlls/d3dcompiler_43/hlsl.y +++ b/dlls/d3dcompiler_43/hlsl.y @@ -21,6 +21,7 @@ %{ #include "wine/debug.h"
+#include <limits.h> #include <stdio.h>
#include "d3dcompiler_private.h" @@ -2585,6 +2586,90 @@ static unsigned int index_instructions(struct list *instrs, unsigned int index) return index; }
+/* Walk the chain of derefs and retrieve the actual variable we care about. */ +static struct hlsl_ir_var *hlsl_var_from_deref(const struct hlsl_deref *deref) +{ + switch (deref->type) + { + case HLSL_IR_DEREF_VAR: + return deref->v.var; + case HLSL_IR_DEREF_ARRAY: + return hlsl_var_from_deref(&deref_from_node(deref->v.array.array)->src); + case HLSL_IR_DEREF_RECORD: + return hlsl_var_from_deref(&deref_from_node(deref->v.record.record)->src); + } + assert(0); + return NULL; +} + +/* Compute the earliest and latest liveness for each variable. In the case that + * a variable is accessed inside of a loop, we promote its liveness to extend + * to at least the range of the entire loop. */ +static void compute_liveness_recurse(struct list *instrs, unsigned int loop_first, unsigned int loop_last) +{ + struct hlsl_ir_node *instr; + struct hlsl_ir_var *var; + + LIST_FOR_EACH_ENTRY(instr, instrs, struct hlsl_ir_node, entry) + { + switch (instr->type) + { + case HLSL_IR_ASSIGNMENT: + { + struct hlsl_ir_assignment *assignment = assignment_from_node(instr); + var = hlsl_var_from_deref(&assignment->lhs); + if (!var->first_write) + var->first_write = loop_first ? min(instr->index, loop_first) : instr->index; + break; + } + case HLSL_IR_DEREF: + { + struct hlsl_ir_deref *deref = deref_from_node(instr); + var = hlsl_var_from_deref(&deref->src); + var->last_read = loop_last ? max(instr->index, loop_last) : instr->index; + break; + } + case HLSL_IR_IF: + { + struct hlsl_ir_if *iff = if_from_node(instr); + compute_liveness_recurse(iff->then_instrs, loop_first, loop_last); + if (iff->else_instrs) + compute_liveness_recurse(iff->else_instrs, loop_first, loop_last); + break; + } + case HLSL_IR_LOOP: + { + struct hlsl_ir_loop *loop = loop_from_node(instr); + compute_liveness_recurse(loop->body, loop_first ? loop_first : instr->index, + loop_last ? loop_last : loop->next_index); + break; + } + default: + break; + } + } +} + +static void compute_liveness(struct hlsl_ir_function_decl *entry_func) +{ + struct hlsl_ir_var *var; + + LIST_FOR_EACH_ENTRY(var, &hlsl_ctx.globals->vars, struct hlsl_ir_var, scope_entry) + { + var->first_write = 1; + } + + LIST_FOR_EACH_ENTRY(var, entry_func->parameters, struct hlsl_ir_var, param_entry) + { + if (var->modifiers & HLSL_STORAGE_IN) + var->first_write = 1; + if (var->modifiers & HLSL_STORAGE_OUT) + var->last_read = UINT_MAX; + } + + compute_liveness_recurse(entry_func->body, 0, 0); +} + struct bwriter_shader *parse_hlsl(enum shader_type type, DWORD major, DWORD minor, const char *entrypoint, char **messages) { @@ -2641,7 +2726,8 @@ struct bwriter_shader *parse_hlsl(enum shader_type type, DWORD major, DWORD mino goto out; }
- index_instructions(entry_func->body, 1); + /* Index 0 means unused; index 1 means function entry, so start at 2. */ + index_instructions(entry_func->body, 2);
if (TRACE_ON(hlsl_parser)) { @@ -2649,6 +2735,8 @@ struct bwriter_shader *parse_hlsl(enum shader_type type, DWORD major, DWORD mino wine_rb_for_each_entry(&hlsl_ctx.functions, dump_function, NULL); }
+ compute_liveness(entry_func); + out: TRACE("Freeing functions IR.\n"); wine_rb_destroy(&hlsl_ctx.functions, free_function_rb, NULL);
Signed-off-by: Matteo Bruni mbruni@codeweavers.com
On Mon, Mar 30, 2020 at 4:54 AM Zebediah Figura z.figura12@gmail.com wrote:
@@ -2641,7 +2726,8 @@ struct bwriter_shader *parse_hlsl(enum shader_type type, DWORD major, DWORD mino goto out; }
- index_instructions(entry_func->body, 1);
- /* Index 0 means unused; index 1 means function entry, so start at 2. */
- index_instructions(entry_func->body, 2);
Do we need to reserve a special value for function entry?
On 4/1/20 1:32 PM, Matteo Bruni wrote:
On Mon, Mar 30, 2020 at 4:54 AM Zebediah Figura z.figura12@gmail.com wrote:
@@ -2641,7 +2726,8 @@ struct bwriter_shader *parse_hlsl(enum shader_type type, DWORD major, DWORD mino goto out; }
- index_instructions(entry_func->body, 1);
- /* Index 0 means unused; index 1 means function entry, so start at 2. */
- index_instructions(entry_func->body, 2);
Do we need to reserve a special value for function entry?
An instruction can both consume values and produce them, so a variable/node with liveness 4-5 and another with 5-6 can be allocated to the same register. Hence a variable written at function entry needs to have a first_write index before any reads. This seemed like the most natural way to do that, though I welcome other suggestions.
On Wed, Apr 1, 2020 at 8:41 PM Zebediah Figura zfigura@codeweavers.com wrote:
On 4/1/20 1:32 PM, Matteo Bruni wrote:
On Mon, Mar 30, 2020 at 4:54 AM Zebediah Figura z.figura12@gmail.com wrote:
@@ -2641,7 +2726,8 @@ struct bwriter_shader *parse_hlsl(enum shader_type type, DWORD major, DWORD mino goto out; }
- index_instructions(entry_func->body, 1);
- /* Index 0 means unused; index 1 means function entry, so start at 2. */
- index_instructions(entry_func->body, 2);
Do we need to reserve a special value for function entry?
An instruction can both consume values and produce them, so a variable/node with liveness 4-5 and another with 5-6 can be allocated to the same register. Hence a variable written at function entry needs to have a first_write index before any reads. This seemed like the most natural way to do that, though I welcome other suggestions.
Yep, that's a good reason for reserving 1. Thanks :)
From: Zebediah Figura zfigura@codeweavers.com
Signed-off-by: Zebediah Figura zfigura@codeweavers.com --- dlls/d3dcompiler_43/d3dcompiler_private.h | 7 +++- dlls/d3dcompiler_43/hlsl.y | 41 ++++++++++++++++++++++- 2 files changed, 46 insertions(+), 2 deletions(-)
diff --git a/dlls/d3dcompiler_43/d3dcompiler_private.h b/dlls/d3dcompiler_43/d3dcompiler_private.h index e1cdd9a8ba0..df45a7082fe 100644 --- a/dlls/d3dcompiler_43/d3dcompiler_private.h +++ b/dlls/d3dcompiler_43/d3dcompiler_private.h @@ -658,10 +658,15 @@ struct hlsl_ir_node { struct list entry; enum hlsl_ir_node_type type; - unsigned int index; /* for liveness ranges */ struct hlsl_type *data_type;
struct source_location loc; + + /* Liveness ranges. "index" is the index of this instruction. Since this is + * essentially an SSA value, the earliest live point is the index. This is + * true even for loops, since currently we can't have a reference to a + * value generated in an earlier iteration of the loop. */ + unsigned int index, last_read; };
#define HLSL_STORAGE_EXTERN 0x00000001 diff --git a/dlls/d3dcompiler_43/hlsl.y b/dlls/d3dcompiler_43/hlsl.y index bdf619a9224..312949c5840 100644 --- a/dlls/d3dcompiler_43/hlsl.y +++ b/dlls/d3dcompiler_43/hlsl.y @@ -2604,7 +2604,9 @@ static struct hlsl_ir_var *hlsl_var_from_deref(const struct hlsl_deref *deref)
/* Compute the earliest and latest liveness for each variable. In the case that * a variable is accessed inside of a loop, we promote its liveness to extend - * to at least the range of the entire loop. */ + * to at least the range of the entire loop. Note that we don't need to do this + * for anonymous nodes, since there's currently no way to use a node which was + * calculated in an earlier iteration of the loop. */ static void compute_liveness_recurse(struct list *instrs, unsigned int loop_first, unsigned int loop_last) { struct hlsl_ir_node *instr; @@ -2620,6 +2622,17 @@ static void compute_liveness_recurse(struct list *instrs, unsigned int loop_firs var = hlsl_var_from_deref(&assignment->lhs); if (!var->first_write) var->first_write = loop_first ? min(instr->index, loop_first) : instr->index; + assignment->rhs->last_read = instr->index; + break; + } + case HLSL_IR_CONSTANT: + break; + case HLSL_IR_CONSTRUCTOR: + { + struct hlsl_ir_constructor *constructor = constructor_from_node(instr); + unsigned int i; + for (i = 0; i < constructor->args_count; ++i) + constructor->args[i]->last_read = instr->index; break; } case HLSL_IR_DEREF: @@ -2627,6 +2640,18 @@ static void compute_liveness_recurse(struct list *instrs, unsigned int loop_firs struct hlsl_ir_deref *deref = deref_from_node(instr); var = hlsl_var_from_deref(&deref->src); var->last_read = loop_last ? max(instr->index, loop_last) : instr->index; + if (deref->src.type == HLSL_IR_DEREF_ARRAY) + deref->src.v.array.index->last_read = instr->index; + break; + } + case HLSL_IR_EXPR: + { + struct hlsl_ir_expr *expr = expr_from_node(instr); + expr->operands[0]->last_read = instr->index; + if (expr->operands[1]) + expr->operands[1]->last_read = instr->index; + if (expr->operands[2]) + expr->operands[2]->last_read = instr->index; break; } case HLSL_IR_IF: @@ -2635,6 +2660,14 @@ static void compute_liveness_recurse(struct list *instrs, unsigned int loop_firs compute_liveness_recurse(iff->then_instrs, loop_first, loop_last); if (iff->else_instrs) compute_liveness_recurse(iff->else_instrs, loop_first, loop_last); + iff->condition->last_read = instr->index; + break; + } + case HLSL_IR_JUMP: + { + struct hlsl_ir_jump *jump = jump_from_node(instr); + if (jump->type == HLSL_IR_JUMP_RETURN && jump->return_value) + jump->return_value->last_read = instr->index; break; } case HLSL_IR_LOOP: @@ -2644,6 +2677,12 @@ static void compute_liveness_recurse(struct list *instrs, unsigned int loop_firs loop_last ? loop_last : loop->next_index); break; } + case HLSL_IR_SWIZZLE: + { + struct hlsl_ir_swizzle *swizzle = swizzle_from_node(instr); + swizzle->val->last_read = instr->index; + break; + } default: break; }
Signed-off-by: Matteo Bruni mbruni@codeweavers.com
On Mon, Mar 30, 2020 at 4:54 AM Zebediah Figura z.figura12@gmail.com wrote:
case HLSL_IR_CONSTRUCTOR:
{
struct hlsl_ir_constructor *constructor = constructor_from_node(instr);
unsigned int i;
for (i = 0; i < constructor->args_count; ++i)
constructor->args[i]->last_read = instr->index; break; }
Going forward, it's probably easier to add a pass to split up HLSL_IR_CONSTRUCTOR nodes into multiple assignments. One less node type to care about here and in the following passes.
From: Zebediah Figura zfigura@codeweavers.com
Signed-off-by: Zebediah Figura zfigura@codeweavers.com --- dlls/d3dcompiler_43/d3dcompiler_private.h | 3 +++ dlls/d3dcompiler_43/hlsl.y | 23 +++++++++++++++++++++++ dlls/d3dcompiler_43/utils.c | 9 +++++++++ 3 files changed, 35 insertions(+)
diff --git a/dlls/d3dcompiler_43/d3dcompiler_private.h b/dlls/d3dcompiler_43/d3dcompiler_private.h index df45a7082fe..4fdb464a4ef 100644 --- a/dlls/d3dcompiler_43/d3dcompiler_private.h +++ b/dlls/d3dcompiler_43/d3dcompiler_private.h @@ -614,6 +614,7 @@ struct hlsl_type unsigned int modifiers; unsigned int dimx; unsigned int dimy; + unsigned int reg_size; union { struct list *elements; @@ -632,6 +633,7 @@ struct hlsl_struct_field const char *name; const char *semantic; DWORD modifiers; + unsigned int reg_offset; };
struct source_location @@ -1083,6 +1085,7 @@ struct hlsl_type *new_hlsl_type(const char *name, enum hlsl_type_class type_clas struct hlsl_type *new_array_type(struct hlsl_type *basic_type, unsigned int array_size) DECLSPEC_HIDDEN; struct hlsl_type *clone_hlsl_type(struct hlsl_type *old) DECLSPEC_HIDDEN; struct hlsl_type *get_type(struct hlsl_scope *scope, const char *name, BOOL recursive) DECLSPEC_HIDDEN; +BOOL is_row_major(const struct hlsl_type *type) DECLSPEC_HIDDEN; BOOL find_function(const char *name) DECLSPEC_HIDDEN; unsigned int components_count_type(struct hlsl_type *type) DECLSPEC_HIDDEN; BOOL compare_hlsl_types(const struct hlsl_type *t1, const struct hlsl_type *t2) DECLSPEC_HIDDEN; diff --git a/dlls/d3dcompiler_43/hlsl.y b/dlls/d3dcompiler_43/hlsl.y index 312949c5840..d6c64edcace 100644 --- a/dlls/d3dcompiler_43/hlsl.y +++ b/dlls/d3dcompiler_43/hlsl.y @@ -717,6 +717,15 @@ static BOOL add_struct_field(struct list *fields, struct hlsl_struct_field *fiel return TRUE; }
+BOOL is_row_major(const struct hlsl_type *type) +{ + if (type->modifiers & HLSL_MODIFIER_ROW_MAJOR) + return TRUE; + if (type->modifiers & HLSL_MODIFIER_COLUMN_MAJOR) + return FALSE; + return hlsl_ctx.matrix_majority == HLSL_ROW_MAJOR; +} + static struct hlsl_type *apply_type_modifiers(struct hlsl_type *type, DWORD *modifiers, struct source_location loc) { struct hlsl_type *new_type; @@ -729,6 +738,9 @@ static struct hlsl_type *apply_type_modifiers(struct hlsl_type *type, DWORD *mod
new_type->modifiers = add_modifiers(new_type->modifiers, *modifiers, loc); *modifiers &= ~HLSL_TYPE_MODIFIERS_MASK; + + if (new_type->type == HLSL_CLASS_MATRIX) + new_type->reg_size = is_row_major(new_type) ? new_type->dimy : new_type->dimx; return new_type; }
@@ -774,6 +786,8 @@ static struct list *gen_struct_fields(struct hlsl_type *type, DWORD modifiers, s static struct hlsl_type *new_struct_type(const char *name, struct list *fields) { struct hlsl_type *type = d3dcompiler_alloc(sizeof(*type)); + struct hlsl_struct_field *field; + unsigned int reg_size = 0;
if (!type) { @@ -785,6 +799,13 @@ static struct hlsl_type *new_struct_type(const char *name, struct list *fields) type->dimx = type->dimy = 1; type->e.elements = fields;
+ LIST_FOR_EACH_ENTRY(field, fields, struct hlsl_struct_field, entry) + { + field->reg_offset = reg_size; + reg_size += field->type->reg_size; + } + type->reg_size = reg_size; + list_add_tail(&hlsl_ctx.types, &type->entry);
return type; @@ -813,6 +834,8 @@ static BOOL add_typedef(DWORD modifiers, struct hlsl_type *orig_type, struct lis
if (type->type != HLSL_CLASS_MATRIX) check_invalid_matrix_modifiers(type->modifiers, v->loc); + else + type->reg_size = is_row_major(type) ? type->dimy : type->dimx;
ret = add_type_to_scope(hlsl_ctx.cur_scope, type); if (!ret) diff --git a/dlls/d3dcompiler_43/utils.c b/dlls/d3dcompiler_43/utils.c index 84323c334f8..94f7ee59e75 100644 --- a/dlls/d3dcompiler_43/utils.c +++ b/dlls/d3dcompiler_43/utils.c @@ -820,6 +820,10 @@ struct hlsl_type *new_hlsl_type(const char *name, enum hlsl_type_class type_clas type->base_type = base_type; type->dimx = dimx; type->dimy = dimy; + if (type_class == HLSL_CLASS_MATRIX) + type->reg_size = is_row_major(type) ? dimy : dimx; + else + type->reg_size = 1;
list_add_tail(&hlsl_ctx.types, &type->entry);
@@ -836,6 +840,7 @@ struct hlsl_type *new_array_type(struct hlsl_type *basic_type, unsigned int arra type->modifiers = basic_type->modifiers; type->e.array.elements_count = array_size; type->e.array.type = basic_type; + type->reg_size = basic_type->reg_size * array_size; return type; }
@@ -953,6 +958,7 @@ struct hlsl_type *clone_hlsl_type(struct hlsl_type *old) type->dimy = old->dimy; type->modifiers = old->modifiers; type->sampler_dim = old->sampler_dim; + type->reg_size = old->reg_size; switch (old->type) { case HLSL_CLASS_ARRAY: @@ -989,6 +995,9 @@ struct hlsl_type *clone_hlsl_type(struct hlsl_type *old) if (old_field->semantic) field->semantic = d3dcompiler_strdup(old_field->semantic); field->modifiers = old_field->modifiers; + /* Fix up the reg_size if we added a majority modifier. */ + if (field->type->type == HLSL_CLASS_MATRIX) + field->type->reg_size = is_row_major(field->type) ? field->type->dimy : field->type->dimx; list_add_tail(type->e.elements, &field->entry); } break;
On Mon, Mar 30, 2020 at 4:54 AM Zebediah Figura z.figura12@gmail.com wrote:
From: Zebediah Figura zfigura@codeweavers.com
Signed-off-by: Zebediah Figura zfigura@codeweavers.com
dlls/d3dcompiler_43/d3dcompiler_private.h | 3 +++ dlls/d3dcompiler_43/hlsl.y | 23 +++++++++++++++++++++++ dlls/d3dcompiler_43/utils.c | 9 +++++++++ 3 files changed, 35 insertions(+)
diff --git a/dlls/d3dcompiler_43/d3dcompiler_private.h b/dlls/d3dcompiler_43/d3dcompiler_private.h index df45a7082fe..4fdb464a4ef 100644 --- a/dlls/d3dcompiler_43/d3dcompiler_private.h +++ b/dlls/d3dcompiler_43/d3dcompiler_private.h @@ -614,6 +614,7 @@ struct hlsl_type unsigned int modifiers; unsigned int dimx; unsigned int dimy;
- unsigned int reg_size; union { struct list *elements;
I'm not too thrilled to have the size stored in struct hlsl_type. I guess it would be very awkward otherwise though...
@@ -632,6 +633,7 @@ struct hlsl_struct_field const char *name; const char *semantic; DWORD modifiers;
- unsigned int reg_offset;
};
struct source_location @@ -1083,6 +1085,7 @@ struct hlsl_type *new_hlsl_type(const char *name, enum hlsl_type_class type_clas struct hlsl_type *new_array_type(struct hlsl_type *basic_type, unsigned int array_size) DECLSPEC_HIDDEN; struct hlsl_type *clone_hlsl_type(struct hlsl_type *old) DECLSPEC_HIDDEN; struct hlsl_type *get_type(struct hlsl_scope *scope, const char *name, BOOL recursive) DECLSPEC_HIDDEN; +BOOL is_row_major(const struct hlsl_type *type) DECLSPEC_HIDDEN; BOOL find_function(const char *name) DECLSPEC_HIDDEN; unsigned int components_count_type(struct hlsl_type *type) DECLSPEC_HIDDEN; BOOL compare_hlsl_types(const struct hlsl_type *t1, const struct hlsl_type *t2) DECLSPEC_HIDDEN; diff --git a/dlls/d3dcompiler_43/hlsl.y b/dlls/d3dcompiler_43/hlsl.y index 312949c5840..d6c64edcace 100644 --- a/dlls/d3dcompiler_43/hlsl.y +++ b/dlls/d3dcompiler_43/hlsl.y @@ -717,6 +717,15 @@ static BOOL add_struct_field(struct list *fields, struct hlsl_struct_field *fiel return TRUE; }
+BOOL is_row_major(const struct hlsl_type *type) +{
- if (type->modifiers & HLSL_MODIFIER_ROW_MAJOR)
return TRUE;
- if (type->modifiers & HLSL_MODIFIER_COLUMN_MAJOR)
return FALSE;
- return hlsl_ctx.matrix_majority == HLSL_ROW_MAJOR;
+}
static struct hlsl_type *apply_type_modifiers(struct hlsl_type *type, DWORD *modifiers, struct source_location loc) { struct hlsl_type *new_type; @@ -729,6 +738,9 @@ static struct hlsl_type *apply_type_modifiers(struct hlsl_type *type, DWORD *mod
new_type->modifiers = add_modifiers(new_type->modifiers, *modifiers, loc); *modifiers &= ~HLSL_TYPE_MODIFIERS_MASK;
- if (new_type->type == HLSL_CLASS_MATRIX)
return new_type;new_type->reg_size = is_row_major(new_type) ? new_type->dimy : new_type->dimx;
}
I think we should have one of the "majority" modifiers always stored in the hlsl_type for matrices, since it's actually a significant distinction. Notice that the default matrix majority can be changed over the course of a shader via a #pragma.
On 4/1/20 1:34 PM, Matteo Bruni wrote:
On Mon, Mar 30, 2020 at 4:54 AM Zebediah Figura z.figura12@gmail.com wrote:
From: Zebediah Figura zfigura@codeweavers.com
Signed-off-by: Zebediah Figura zfigura@codeweavers.com
dlls/d3dcompiler_43/d3dcompiler_private.h | 3 +++ dlls/d3dcompiler_43/hlsl.y | 23 +++++++++++++++++++++++ dlls/d3dcompiler_43/utils.c | 9 +++++++++ 3 files changed, 35 insertions(+)
diff --git a/dlls/d3dcompiler_43/d3dcompiler_private.h b/dlls/d3dcompiler_43/d3dcompiler_private.h index df45a7082fe..4fdb464a4ef 100644 --- a/dlls/d3dcompiler_43/d3dcompiler_private.h +++ b/dlls/d3dcompiler_43/d3dcompiler_private.h @@ -614,6 +614,7 @@ struct hlsl_type unsigned int modifiers; unsigned int dimx; unsigned int dimy;
- unsigned int reg_size; union { struct list *elements;
I'm not too thrilled to have the size stored in struct hlsl_type. I guess it would be very awkward otherwise though...
If you think this is bad, wait until you see what I have next :D
(Namely, I have patches to access the hlsl_type structure from the "bwriter" layer, and also to store type offsets in the DXBC blob. Because otherwise writing structure types involves a lot of pointless duplication, both in terms of HLSL code and generated DXBC, and honestly widl does the same thing and it works. If it helps, we could rename it to something other than hlsl_type...)
I guess the alternative here is to have something like "unsigned int hlsl_type_get_reg_size(const struct hlsl_type *type)". Not even that ugly, but obviously means more overhead, and I imagine avoiding that overhead is a worthwhile goal for a compiler...
@@ -632,6 +633,7 @@ struct hlsl_struct_field const char *name; const char *semantic; DWORD modifiers;
unsigned int reg_offset; };
struct source_location
@@ -1083,6 +1085,7 @@ struct hlsl_type *new_hlsl_type(const char *name, enum hlsl_type_class type_clas struct hlsl_type *new_array_type(struct hlsl_type *basic_type, unsigned int array_size) DECLSPEC_HIDDEN; struct hlsl_type *clone_hlsl_type(struct hlsl_type *old) DECLSPEC_HIDDEN; struct hlsl_type *get_type(struct hlsl_scope *scope, const char *name, BOOL recursive) DECLSPEC_HIDDEN; +BOOL is_row_major(const struct hlsl_type *type) DECLSPEC_HIDDEN; BOOL find_function(const char *name) DECLSPEC_HIDDEN; unsigned int components_count_type(struct hlsl_type *type) DECLSPEC_HIDDEN; BOOL compare_hlsl_types(const struct hlsl_type *t1, const struct hlsl_type *t2) DECLSPEC_HIDDEN; diff --git a/dlls/d3dcompiler_43/hlsl.y b/dlls/d3dcompiler_43/hlsl.y index 312949c5840..d6c64edcace 100644 --- a/dlls/d3dcompiler_43/hlsl.y +++ b/dlls/d3dcompiler_43/hlsl.y @@ -717,6 +717,15 @@ static BOOL add_struct_field(struct list *fields, struct hlsl_struct_field *fiel return TRUE; }
+BOOL is_row_major(const struct hlsl_type *type) +{
- if (type->modifiers & HLSL_MODIFIER_ROW_MAJOR)
return TRUE;
- if (type->modifiers & HLSL_MODIFIER_COLUMN_MAJOR)
return FALSE;
- return hlsl_ctx.matrix_majority == HLSL_ROW_MAJOR;
+}
- static struct hlsl_type *apply_type_modifiers(struct hlsl_type *type, DWORD *modifiers, struct source_location loc) { struct hlsl_type *new_type;
@@ -729,6 +738,9 @@ static struct hlsl_type *apply_type_modifiers(struct hlsl_type *type, DWORD *mod
new_type->modifiers = add_modifiers(new_type->modifiers, *modifiers, loc); *modifiers &= ~HLSL_TYPE_MODIFIERS_MASK;
- if (new_type->type == HLSL_CLASS_MATRIX)
}new_type->reg_size = is_row_major(new_type) ? new_type->dimy : new_type->dimx; return new_type;
I think we should have one of the "majority" modifiers always stored in the hlsl_type for matrices, since it's actually a significant distinction. Notice that the default matrix majority can be changed over the course of a shader via a #pragma.
The reason I don't particularly want to do this is constructions like:
typedef float3x3 matrix_t; typedef row_major matrix_t row_matrix_t;
I didn't realize that the default majority could change, though. Maybe we need to store the default majority as a separate field in hlsl_type.
On Wed, Apr 1, 2020 at 9:00 PM Zebediah Figura zfigura@codeweavers.com wrote:
On 4/1/20 1:34 PM, Matteo Bruni wrote:
On Mon, Mar 30, 2020 at 4:54 AM Zebediah Figura z.figura12@gmail.com wrote:
From: Zebediah Figura zfigura@codeweavers.com
Signed-off-by: Zebediah Figura zfigura@codeweavers.com
dlls/d3dcompiler_43/d3dcompiler_private.h | 3 +++ dlls/d3dcompiler_43/hlsl.y | 23 +++++++++++++++++++++++ dlls/d3dcompiler_43/utils.c | 9 +++++++++ 3 files changed, 35 insertions(+)
diff --git a/dlls/d3dcompiler_43/d3dcompiler_private.h b/dlls/d3dcompiler_43/d3dcompiler_private.h index df45a7082fe..4fdb464a4ef 100644 --- a/dlls/d3dcompiler_43/d3dcompiler_private.h +++ b/dlls/d3dcompiler_43/d3dcompiler_private.h @@ -614,6 +614,7 @@ struct hlsl_type unsigned int modifiers; unsigned int dimx; unsigned int dimy;
- unsigned int reg_size; union { struct list *elements;
I'm not too thrilled to have the size stored in struct hlsl_type. I guess it would be very awkward otherwise though...
If you think this is bad, wait until you see what I have next :D
(Namely, I have patches to access the hlsl_type structure from the "bwriter" layer, and also to store type offsets in the DXBC blob. Because otherwise writing structure types involves a lot of pointless duplication, both in terms of HLSL code and generated DXBC, and honestly widl does the same thing and it works. If it helps, we could rename it to something other than hlsl_type...)
Sure, using struct hlsl_type throughout the stack is not a problem. hlsl_type is a bit of a bad name regardless. The point about "it works for widl" isn't impressing me much though :D
WRT DXBC offsets, you could have a struct bwriter_type (or other name) that points to the relevant hlsl_type and has the additional stuff you need for generating bytecode. It should be a bit nicer than sticking everything in hlsl_type and hopefully not as painful as fully duplicating everything (which I agree isn't helpful). It depends how much bwriter-specific stuff you're going to need.
I guess the alternative here is to have something like "unsigned int hlsl_type_get_reg_size(const struct hlsl_type *type)". Not even that ugly, but obviously means more overhead, and I imagine avoiding that overhead is a worthwhile goal for a compiler...
Yeah, I guess storing the size in there is okay. I'll see how my body will react to what you have in store next :D
@@ -632,6 +633,7 @@ struct hlsl_struct_field const char *name; const char *semantic; DWORD modifiers;
unsigned int reg_offset; };
struct source_location
@@ -1083,6 +1085,7 @@ struct hlsl_type *new_hlsl_type(const char *name, enum hlsl_type_class type_clas struct hlsl_type *new_array_type(struct hlsl_type *basic_type, unsigned int array_size) DECLSPEC_HIDDEN; struct hlsl_type *clone_hlsl_type(struct hlsl_type *old) DECLSPEC_HIDDEN; struct hlsl_type *get_type(struct hlsl_scope *scope, const char *name, BOOL recursive) DECLSPEC_HIDDEN; +BOOL is_row_major(const struct hlsl_type *type) DECLSPEC_HIDDEN; BOOL find_function(const char *name) DECLSPEC_HIDDEN; unsigned int components_count_type(struct hlsl_type *type) DECLSPEC_HIDDEN; BOOL compare_hlsl_types(const struct hlsl_type *t1, const struct hlsl_type *t2) DECLSPEC_HIDDEN; diff --git a/dlls/d3dcompiler_43/hlsl.y b/dlls/d3dcompiler_43/hlsl.y index 312949c5840..d6c64edcace 100644 --- a/dlls/d3dcompiler_43/hlsl.y +++ b/dlls/d3dcompiler_43/hlsl.y @@ -717,6 +717,15 @@ static BOOL add_struct_field(struct list *fields, struct hlsl_struct_field *fiel return TRUE; }
+BOOL is_row_major(const struct hlsl_type *type) +{
- if (type->modifiers & HLSL_MODIFIER_ROW_MAJOR)
return TRUE;
- if (type->modifiers & HLSL_MODIFIER_COLUMN_MAJOR)
return FALSE;
- return hlsl_ctx.matrix_majority == HLSL_ROW_MAJOR;
+}
- static struct hlsl_type *apply_type_modifiers(struct hlsl_type *type, DWORD *modifiers, struct source_location loc) { struct hlsl_type *new_type;
@@ -729,6 +738,9 @@ static struct hlsl_type *apply_type_modifiers(struct hlsl_type *type, DWORD *mod
new_type->modifiers = add_modifiers(new_type->modifiers, *modifiers, loc); *modifiers &= ~HLSL_TYPE_MODIFIERS_MASK;
- if (new_type->type == HLSL_CLASS_MATRIX)
}new_type->reg_size = is_row_major(new_type) ? new_type->dimy : new_type->dimx; return new_type;
I think we should have one of the "majority" modifiers always stored in the hlsl_type for matrices, since it's actually a significant distinction. Notice that the default matrix majority can be changed over the course of a shader via a #pragma.
The reason I don't particularly want to do this is constructions like:
typedef float3x3 matrix_t; typedef row_major matrix_t row_matrix_t;
I didn't realize that the default majority could change, though. Maybe we need to store the default majority as a separate field in hlsl_type.
I don't think it matters a lot if it's stored in a separate field or together with the other modifiers. This is a bit of a special case regardless, in that you need to override the existing majority (rather than complaining that the other majority is currently set) in some cases. Certainly fine to change things if it helps though.
And yeah, tests do have room for improvement...
On 4/1/20 2:33 PM, Matteo Bruni wrote:
On Wed, Apr 1, 2020 at 9:00 PM Zebediah Figura zfigura@codeweavers.com wrote:
On 4/1/20 1:34 PM, Matteo Bruni wrote:
On Mon, Mar 30, 2020 at 4:54 AM Zebediah Figura z.figura12@gmail.com wrote:
From: Zebediah Figura zfigura@codeweavers.com
Signed-off-by: Zebediah Figura zfigura@codeweavers.com
dlls/d3dcompiler_43/d3dcompiler_private.h | 3 +++ dlls/d3dcompiler_43/hlsl.y | 23 +++++++++++++++++++++++ dlls/d3dcompiler_43/utils.c | 9 +++++++++ 3 files changed, 35 insertions(+)
diff --git a/dlls/d3dcompiler_43/d3dcompiler_private.h b/dlls/d3dcompiler_43/d3dcompiler_private.h index df45a7082fe..4fdb464a4ef 100644 --- a/dlls/d3dcompiler_43/d3dcompiler_private.h +++ b/dlls/d3dcompiler_43/d3dcompiler_private.h @@ -614,6 +614,7 @@ struct hlsl_type unsigned int modifiers; unsigned int dimx; unsigned int dimy;
- unsigned int reg_size; union { struct list *elements;
I'm not too thrilled to have the size stored in struct hlsl_type. I guess it would be very awkward otherwise though...
If you think this is bad, wait until you see what I have next :D
(Namely, I have patches to access the hlsl_type structure from the "bwriter" layer, and also to store type offsets in the DXBC blob. Because otherwise writing structure types involves a lot of pointless duplication, both in terms of HLSL code and generated DXBC, and honestly widl does the same thing and it works. If it helps, we could rename it to something other than hlsl_type...)
Sure, using struct hlsl_type throughout the stack is not a problem. hlsl_type is a bit of a bad name regardless. The point about "it works for widl" isn't impressing me much though :D
WRT DXBC offsets, you could have a struct bwriter_type (or other name) that points to the relevant hlsl_type and has the additional stuff you need for generating bytecode. It should be a bit nicer than sticking everything in hlsl_type and hopefully not as painful as fully duplicating everything (which I agree isn't helpful). It depends how much bwriter-specific stuff you're going to need.
Sure, that's definitely better than doing a whole translation. I'm mostly inclined to say it's not so much prettier that it's worth the extra allocation, but that's just me.
I think the only thing I needed the bwriter layer to store is the type offset (including for sm4 reflection).
I guess the alternative here is to have something like "unsigned int hlsl_type_get_reg_size(const struct hlsl_type *type)". Not even that ugly, but obviously means more overhead, and I imagine avoiding that overhead is a worthwhile goal for a compiler...
Yeah, I guess storing the size in there is okay. I'll see how my body will react to what you have in store next :D
@@ -632,6 +633,7 @@ struct hlsl_struct_field const char *name; const char *semantic; DWORD modifiers;
unsigned int reg_offset; };
struct source_location
@@ -1083,6 +1085,7 @@ struct hlsl_type *new_hlsl_type(const char *name, enum hlsl_type_class type_clas struct hlsl_type *new_array_type(struct hlsl_type *basic_type, unsigned int array_size) DECLSPEC_HIDDEN; struct hlsl_type *clone_hlsl_type(struct hlsl_type *old) DECLSPEC_HIDDEN; struct hlsl_type *get_type(struct hlsl_scope *scope, const char *name, BOOL recursive) DECLSPEC_HIDDEN; +BOOL is_row_major(const struct hlsl_type *type) DECLSPEC_HIDDEN; BOOL find_function(const char *name) DECLSPEC_HIDDEN; unsigned int components_count_type(struct hlsl_type *type) DECLSPEC_HIDDEN; BOOL compare_hlsl_types(const struct hlsl_type *t1, const struct hlsl_type *t2) DECLSPEC_HIDDEN; diff --git a/dlls/d3dcompiler_43/hlsl.y b/dlls/d3dcompiler_43/hlsl.y index 312949c5840..d6c64edcace 100644 --- a/dlls/d3dcompiler_43/hlsl.y +++ b/dlls/d3dcompiler_43/hlsl.y @@ -717,6 +717,15 @@ static BOOL add_struct_field(struct list *fields, struct hlsl_struct_field *fiel return TRUE; }
+BOOL is_row_major(const struct hlsl_type *type) +{
- if (type->modifiers & HLSL_MODIFIER_ROW_MAJOR)
return TRUE;
- if (type->modifiers & HLSL_MODIFIER_COLUMN_MAJOR)
return FALSE;
- return hlsl_ctx.matrix_majority == HLSL_ROW_MAJOR;
+}
- static struct hlsl_type *apply_type_modifiers(struct hlsl_type *type, DWORD *modifiers, struct source_location loc) { struct hlsl_type *new_type;
@@ -729,6 +738,9 @@ static struct hlsl_type *apply_type_modifiers(struct hlsl_type *type, DWORD *mod
new_type->modifiers = add_modifiers(new_type->modifiers, *modifiers, loc); *modifiers &= ~HLSL_TYPE_MODIFIERS_MASK;
- if (new_type->type == HLSL_CLASS_MATRIX)
}new_type->reg_size = is_row_major(new_type) ? new_type->dimy : new_type->dimx; return new_type;
I think we should have one of the "majority" modifiers always stored in the hlsl_type for matrices, since it's actually a significant distinction. Notice that the default matrix majority can be changed over the course of a shader via a #pragma.
The reason I don't particularly want to do this is constructions like:
typedef float3x3 matrix_t; typedef row_major matrix_t row_matrix_t;
I didn't realize that the default majority could change, though. Maybe we need to store the default majority as a separate field in hlsl_type.
I don't think it matters a lot if it's stored in a separate field or together with the other modifiers. This is a bit of a special case regardless, in that you need to override the existing majority (rather than complaining that the other majority is currently set) in some cases. Certainly fine to change things if it helps though.
And yeah, tests do have room for improvement...
Mostly the trick is that a subsequent declaration "typedef column_major row_matrix_t col_matrix_t" throws up an error.
Anyway, I'll add some extra tests.
From: Zebediah Figura zfigura@codeweavers.com
Signed-off-by: Zebediah Figura zfigura@codeweavers.com --- dlls/d3dcompiler_43/d3dcompiler_private.h | 11 +- dlls/d3dcompiler_43/hlsl.y | 172 ++++++++++++++++++++++ dlls/d3dcompiler_43/utils.c | 2 +- 3 files changed, 183 insertions(+), 2 deletions(-)
diff --git a/dlls/d3dcompiler_43/d3dcompiler_private.h b/dlls/d3dcompiler_43/d3dcompiler_private.h index 4fdb464a4ef..784a1ae1136 100644 --- a/dlls/d3dcompiler_43/d3dcompiler_private.h +++ b/dlls/d3dcompiler_43/d3dcompiler_private.h @@ -150,7 +150,7 @@ static inline void *d3dcompiler_realloc(void *ptr, SIZE_T size) { if (!ptr) return d3dcompiler_alloc(size); - return HeapReAlloc(GetProcessHeap(), 0, ptr, size); + return HeapReAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, ptr, size); }
static inline BOOL d3dcompiler_free(void *ptr) @@ -643,6 +643,13 @@ struct source_location unsigned int col; };
+struct hlsl_reg +{ + unsigned int reg; + unsigned char writemask; + unsigned char allocated; +}; + enum hlsl_ir_node_type { HLSL_IR_ASSIGNMENT = 0, @@ -708,6 +715,7 @@ struct hlsl_ir_var struct list scope_entry, param_entry;
unsigned int first_write, last_read; + struct hlsl_reg reg; };
struct hlsl_ir_function @@ -1113,6 +1121,7 @@ const char *debug_base_type(const struct hlsl_type *type) DECLSPEC_HIDDEN; const char *debug_hlsl_type(const struct hlsl_type *type) DECLSPEC_HIDDEN; const char *debug_modifiers(DWORD modifiers) DECLSPEC_HIDDEN; const char *debug_node_type(enum hlsl_ir_node_type type) DECLSPEC_HIDDEN; +const char *debug_writemask(DWORD writemask) DECLSPEC_HIDDEN; void debug_dump_ir_function_decl(const struct hlsl_ir_function_decl *func) DECLSPEC_HIDDEN;
void free_hlsl_type(struct hlsl_type *type) DECLSPEC_HIDDEN; diff --git a/dlls/d3dcompiler_43/hlsl.y b/dlls/d3dcompiler_43/hlsl.y index d6c64edcace..f828e05e335 100644 --- a/dlls/d3dcompiler_43/hlsl.y +++ b/dlls/d3dcompiler_43/hlsl.y @@ -2732,6 +2732,177 @@ static void compute_liveness(struct hlsl_ir_function_decl *entry_func) compute_liveness_recurse(entry_func->body, 0, 0); }
+struct liveness_ctx +{ + size_t count; + struct + { + /* 0 if not live yet. */ + unsigned int last_read; + } *regs; +}; + +static unsigned char get_dead_writemask(struct liveness_ctx *liveness, + unsigned int first_write, unsigned int index, unsigned int components) +{ + unsigned char i, writemask = 0, count = 0; + + for (i = 0; i < 4; ++i) + { + if (liveness->regs[index + i].last_read <= first_write) + { + writemask |= 1 << i; + if (++count == components) + return writemask; + } + } + + return 0; +} + +static struct hlsl_reg allocate_temp_register(struct liveness_ctx *liveness, + unsigned int first_write, unsigned int last_read, unsigned char components) +{ + struct hlsl_reg ret = {.allocated = TRUE}; + unsigned char writemask, i; + unsigned int regnum; + + for (regnum = 0; regnum < liveness->count; regnum += 4) + { + if ((writemask = get_dead_writemask(liveness, first_write, regnum, components))) + break; + } + if (regnum == liveness->count) + { + liveness->count = max(liveness->count * 2, 32); + liveness->regs = d3dcompiler_realloc(liveness->regs, liveness->count * sizeof(*liveness->regs)); + writemask = (1 << components) - 1; + } + for (i = 0; i < 4; ++i) + { + if (writemask & (1 << i)) + liveness->regs[regnum + i].last_read = last_read; + } + ret.reg = regnum / 4; + ret.writemask = writemask; + return ret; +} + +static BOOL is_range_dead(struct liveness_ctx *liveness, unsigned int first_write, + unsigned int index, unsigned int elements) +{ + unsigned int i; + + for (i = 0; i < elements; i += 4) + { + if (!get_dead_writemask(liveness, first_write, index + i, 4)) + return FALSE; + } + return TRUE; +} + +/* "elements" is the total number of consecutive whole registers needed. */ +static struct hlsl_reg allocate_temp_range(struct liveness_ctx *liveness, + unsigned int first_write, unsigned int last_read, unsigned int elements) +{ + struct hlsl_reg ret = {.allocated = TRUE}; + unsigned int i, regnum; + + elements *= 4; + + for (regnum = 0; regnum < liveness->count; regnum += 4) + { + if (is_range_dead(liveness, first_write, regnum, min(elements, liveness->count - regnum))) + break; + } + if (regnum + elements >= liveness->count) + { + liveness->count = max(liveness->count * 2, regnum + elements); + liveness->regs = d3dcompiler_realloc(liveness->regs, liveness->count * sizeof(*liveness->regs)); + } + for (i = 0; i < elements; ++i) + liveness->regs[regnum + i].last_read = last_read; + ret.reg = regnum / 4; + return ret; +} + +static void allocate_variable_temp_register(struct hlsl_ir_var *var, struct liveness_ctx *liveness) +{ + if (!var->reg.allocated && var->last_read) + { + if (var->data_type->reg_size > 1) + { + var->reg = allocate_temp_range(liveness, var->first_write, + var->last_read, var->data_type->reg_size); + TRACE("Allocated r%u-r%u to %s (liveness %u-%u).\n", var->reg.reg, + var->reg.reg + var->data_type->reg_size - 1, var->name, var->first_write, var->last_read); + } + else + { + var->reg = allocate_temp_register(liveness, var->first_write, + var->last_read, var->data_type->dimx); + TRACE("Allocated r%u%s to %s (liveness %u-%u).\n", var->reg.reg, + debug_writemask(var->reg.writemask), var->name, var->first_write, var->last_read); + } + } +} + +static void allocate_temp_registers_recurse(struct list *instrs, struct liveness_ctx *liveness) +{ + struct hlsl_ir_node *instr; + + LIST_FOR_EACH_ENTRY(instr, instrs, struct hlsl_ir_node, entry) + { + switch (instr->type) + { + case HLSL_IR_ASSIGNMENT: + { + struct hlsl_ir_assignment *assignment = assignment_from_node(instr); + allocate_variable_temp_register(hlsl_var_from_deref(&assignment->lhs), liveness); + break; + } + case HLSL_IR_IF: + { + struct hlsl_ir_if *iff = if_from_node(instr); + allocate_temp_registers_recurse(iff->then_instrs, liveness); + if (iff->else_instrs) + allocate_temp_registers_recurse(iff->else_instrs, liveness); + break; + } + case HLSL_IR_LOOP: + { + struct hlsl_ir_loop *loop = loop_from_node(instr); + allocate_temp_registers_recurse(loop->body, liveness); + break; + } + default: + break; + } + } +} + +/* Simple greedy temporary register allocation pass that just assigns a unique + * index to all (simultaneously live) variables or intermediate values. Agnostic + * as to how many registers are actually available for the current backend, and + * does not handle constants. */ +static void allocate_temp_registers(struct hlsl_ir_function_decl *entry_func) +{ + struct liveness_ctx liveness = {}; + struct hlsl_ir_var *var; + + LIST_FOR_EACH_ENTRY(var, &hlsl_ctx.globals->vars, struct hlsl_ir_var, scope_entry) + { + allocate_variable_temp_register(var, &liveness); + } + + LIST_FOR_EACH_ENTRY(var, entry_func->parameters, struct hlsl_ir_var, param_entry) + { + allocate_variable_temp_register(var, &liveness); + } + + allocate_temp_registers_recurse(entry_func->body, &liveness); +} + struct bwriter_shader *parse_hlsl(enum shader_type type, DWORD major, DWORD minor, const char *entrypoint, char **messages) { @@ -2798,6 +2969,7 @@ struct bwriter_shader *parse_hlsl(enum shader_type type, DWORD major, DWORD mino }
compute_liveness(entry_func); + allocate_temp_registers(entry_func);
out: TRACE("Freeing functions IR.\n"); diff --git a/dlls/d3dcompiler_43/utils.c b/dlls/d3dcompiler_43/utils.c index 94f7ee59e75..af625d6ac6f 100644 --- a/dlls/d3dcompiler_43/utils.c +++ b/dlls/d3dcompiler_43/utils.c @@ -2069,7 +2069,7 @@ static void debug_dump_ir_constructor(const struct hlsl_ir_constructor *construc wine_dbg_printf(")"); }
-static const char *debug_writemask(DWORD writemask) +const char *debug_writemask(DWORD writemask) { static const char components[] = {'x', 'y', 'z', 'w'}; char string[5];
On Mon, Mar 30, 2020 at 4:54 AM Zebediah Figura z.figura12@gmail.com wrote:
Hi Zeb,
I have a number of questions / comments, inline.
diff --git a/dlls/d3dcompiler_43/hlsl.y b/dlls/d3dcompiler_43/hlsl.y index d6c64edcace..f828e05e335 100644 --- a/dlls/d3dcompiler_43/hlsl.y +++ b/dlls/d3dcompiler_43/hlsl.y @@ -2732,6 +2732,177 @@ static void compute_liveness(struct hlsl_ir_function_decl *entry_func) compute_liveness_recurse(entry_func->body, 0, 0); }
+struct liveness_ctx +{
- size_t count;
- struct
- {
/* 0 if not live yet. */
unsigned int last_read;
- } *regs;
+};
+static unsigned char get_dead_writemask(struct liveness_ctx *liveness,
unsigned int first_write, unsigned int index, unsigned int components)
Maybe get_available_writemask()? Same for the other similarly named functions.
+{
- unsigned char i, writemask = 0, count = 0;
- for (i = 0; i < 4; ++i)
- {
if (liveness->regs[index + i].last_read <= first_write)
{
writemask |= 1 << i;
if (++count == components)
return writemask;
}
- }
- return 0;
+}
+static struct hlsl_reg allocate_temp_register(struct liveness_ctx *liveness,
unsigned int first_write, unsigned int last_read, unsigned char components)
How is this going to be different to the non-temp register allocation?
+{
- struct hlsl_reg ret = {.allocated = TRUE};
- unsigned char writemask, i;
- unsigned int regnum;
- for (regnum = 0; regnum < liveness->count; regnum += 4)
- {
if ((writemask = get_dead_writemask(liveness, first_write, regnum, components)))
break;
- }
- if (regnum == liveness->count)
- {
liveness->count = max(liveness->count * 2, 32);
liveness->regs = d3dcompiler_realloc(liveness->regs, liveness->count * sizeof(*liveness->regs));
Do we want to use array_reserve() here?
writemask = (1 << components) - 1;
- }
- for (i = 0; i < 4; ++i)
- {
if (writemask & (1 << i))
liveness->regs[regnum + i].last_read = last_read;
- }
- ret.reg = regnum / 4;
- ret.writemask = writemask;
- return ret;
+}
+static BOOL is_range_dead(struct liveness_ctx *liveness, unsigned int first_write,
unsigned int index, unsigned int elements)
+{
- unsigned int i;
- for (i = 0; i < elements; i += 4)
- {
if (!get_dead_writemask(liveness, first_write, index + i, 4))
return FALSE;
- }
- return TRUE;
+}
+/* "elements" is the total number of consecutive whole registers needed. */ +static struct hlsl_reg allocate_temp_range(struct liveness_ctx *liveness,
unsigned int first_write, unsigned int last_read, unsigned int elements)
+{
- struct hlsl_reg ret = {.allocated = TRUE};
- unsigned int i, regnum;
- elements *= 4;
- for (regnum = 0; regnum < liveness->count; regnum += 4)
- {
if (is_range_dead(liveness, first_write, regnum, min(elements, liveness->count - regnum)))
break;
- }
- if (regnum + elements >= liveness->count)
- {
liveness->count = max(liveness->count * 2, regnum + elements);
liveness->regs = d3dcompiler_realloc(liveness->regs, liveness->count * sizeof(*liveness->regs));
- }
- for (i = 0; i < elements; ++i)
liveness->regs[regnum + i].last_read = last_read;
- ret.reg = regnum / 4;
- return ret;
It feels like the for should execute elements * 4 iterations, instead.
+}
+static void allocate_variable_temp_register(struct hlsl_ir_var *var, struct liveness_ctx *liveness) +{
- if (!var->reg.allocated && var->last_read)
- {
if (var->data_type->reg_size > 1)
{
var->reg = allocate_temp_range(liveness, var->first_write,
var->last_read, var->data_type->reg_size);
TRACE("Allocated r%u-r%u to %s (liveness %u-%u).\n", var->reg.reg,
var->reg.reg + var->data_type->reg_size - 1, var->name, var->first_write, var->last_read);
}
else
{
var->reg = allocate_temp_register(liveness, var->first_write,
var->last_read, var->data_type->dimx);
TRACE("Allocated r%u%s to %s (liveness %u-%u).\n", var->reg.reg,
debug_writemask(var->reg.writemask), var->name, var->first_write, var->last_read);
}
- }
+}
+static void allocate_temp_registers_recurse(struct list *instrs, struct liveness_ctx *liveness) +{
- struct hlsl_ir_node *instr;
- LIST_FOR_EACH_ENTRY(instr, instrs, struct hlsl_ir_node, entry)
- {
switch (instr->type)
{
case HLSL_IR_ASSIGNMENT:
{
struct hlsl_ir_assignment *assignment = assignment_from_node(instr);
allocate_variable_temp_register(hlsl_var_from_deref(&assignment->lhs), liveness);
break;
}
case HLSL_IR_IF:
{
struct hlsl_ir_if *iff = if_from_node(instr);
allocate_temp_registers_recurse(iff->then_instrs, liveness);
if (iff->else_instrs)
allocate_temp_registers_recurse(iff->else_instrs, liveness);
break;
}
case HLSL_IR_LOOP:
{
struct hlsl_ir_loop *loop = loop_from_node(instr);
allocate_temp_registers_recurse(loop->body, liveness);
break;
}
default:
break;
}
- }
+}
Do we need to allocate temporary registers for other instructions too, like expressions?
+/* Simple greedy temporary register allocation pass that just assigns a unique
- index to all (simultaneously live) variables or intermediate values. Agnostic
- as to how many registers are actually available for the current backend, and
- does not handle constants. */
+static void allocate_temp_registers(struct hlsl_ir_function_decl *entry_func) +{
- struct liveness_ctx liveness = {};
I'm pretty sure that this is a gcc extension: you want '{0}' instead.
On 4/1/20 1:37 PM, Matteo Bruni wrote:
On Mon, Mar 30, 2020 at 4:54 AM Zebediah Figura z.figura12@gmail.com wrote:
Hi Zeb,
I have a number of questions / comments, inline.
diff --git a/dlls/d3dcompiler_43/hlsl.y b/dlls/d3dcompiler_43/hlsl.y index d6c64edcace..f828e05e335 100644 --- a/dlls/d3dcompiler_43/hlsl.y +++ b/dlls/d3dcompiler_43/hlsl.y @@ -2732,6 +2732,177 @@ static void compute_liveness(struct hlsl_ir_function_decl *entry_func) compute_liveness_recurse(entry_func->body, 0, 0); }
+struct liveness_ctx +{
- size_t count;
- struct
- {
/* 0 if not live yet. */
unsigned int last_read;
- } *regs;
+};
+static unsigned char get_dead_writemask(struct liveness_ctx *liveness,
unsigned int first_write, unsigned int index, unsigned int components)
Maybe get_available_writemask()? Same for the other similarly named functions.
Sure.
+{
- unsigned char i, writemask = 0, count = 0;
- for (i = 0; i < 4; ++i)
- {
if (liveness->regs[index + i].last_read <= first_write)
{
writemask |= 1 << i;
if (++count == components)
return writemask;
}
- }
- return 0;
+}
+static struct hlsl_reg allocate_temp_register(struct liveness_ctx *liveness,
unsigned int first_write, unsigned int last_read, unsigned char components)
How is this going to be different to the non-temp register allocation?
The main difference is with uniforms (and anonymous constants, in sm1), since they're essentially live from program entry.
That said, after rereading my uniform allocation path, I'm not sure why I did write it any differently. The only real reason would seem to be to avoid looping through the entire register array every time, but I didn't even bother making that optimization...
+{
- struct hlsl_reg ret = {.allocated = TRUE};
- unsigned char writemask, i;
- unsigned int regnum;
- for (regnum = 0; regnum < liveness->count; regnum += 4)
- {
if ((writemask = get_dead_writemask(liveness, first_write, regnum, components)))
break;
- }
- if (regnum == liveness->count)
- {
liveness->count = max(liveness->count * 2, 32);
liveness->regs = d3dcompiler_realloc(liveness->regs, liveness->count * sizeof(*liveness->regs));
Do we want to use array_reserve() here?
Yes, definitely. I wrote this code before adding that, and forgot about it...
writemask = (1 << components) - 1;
- }
- for (i = 0; i < 4; ++i)
- {
if (writemask & (1 << i))
liveness->regs[regnum + i].last_read = last_read;
- }
- ret.reg = regnum / 4;
- ret.writemask = writemask;
- return ret;
+}
+static BOOL is_range_dead(struct liveness_ctx *liveness, unsigned int first_write,
unsigned int index, unsigned int elements)
+{
- unsigned int i;
- for (i = 0; i < elements; i += 4)
- {
if (!get_dead_writemask(liveness, first_write, index + i, 4))
return FALSE;
- }
- return TRUE;
+}
+/* "elements" is the total number of consecutive whole registers needed. */ +static struct hlsl_reg allocate_temp_range(struct liveness_ctx *liveness,
unsigned int first_write, unsigned int last_read, unsigned int elements)
+{
- struct hlsl_reg ret = {.allocated = TRUE};
- unsigned int i, regnum;
- elements *= 4;
- for (regnum = 0; regnum < liveness->count; regnum += 4)
- {
if (is_range_dead(liveness, first_write, regnum, min(elements, liveness->count - regnum)))
break;
- }
- if (regnum + elements >= liveness->count)
- {
liveness->count = max(liveness->count * 2, regnum + elements);
liveness->regs = d3dcompiler_realloc(liveness->regs, liveness->count * sizeof(*liveness->regs));
- }
- for (i = 0; i < elements; ++i)
liveness->regs[regnum + i].last_read = last_read;
- ret.reg = regnum / 4;
- return ret;
It feels like the for should execute elements * 4 iterations, instead.
It does; "elements *= 4" above. Maybe explicitly multiplying by 4 every time would be clearer?
+}
+static void allocate_variable_temp_register(struct hlsl_ir_var *var, struct liveness_ctx *liveness) +{
- if (!var->reg.allocated && var->last_read)
- {
if (var->data_type->reg_size > 1)
{
var->reg = allocate_temp_range(liveness, var->first_write,
var->last_read, var->data_type->reg_size);
TRACE("Allocated r%u-r%u to %s (liveness %u-%u).\n", var->reg.reg,
var->reg.reg + var->data_type->reg_size - 1, var->name, var->first_write, var->last_read);
}
else
{
var->reg = allocate_temp_register(liveness, var->first_write,
var->last_read, var->data_type->dimx);
TRACE("Allocated r%u%s to %s (liveness %u-%u).\n", var->reg.reg,
debug_writemask(var->reg.writemask), var->name, var->first_write, var->last_read);
}
- }
+}
+static void allocate_temp_registers_recurse(struct list *instrs, struct liveness_ctx *liveness) +{
- struct hlsl_ir_node *instr;
- LIST_FOR_EACH_ENTRY(instr, instrs, struct hlsl_ir_node, entry)
- {
switch (instr->type)
{
case HLSL_IR_ASSIGNMENT:
{
struct hlsl_ir_assignment *assignment = assignment_from_node(instr);
allocate_variable_temp_register(hlsl_var_from_deref(&assignment->lhs), liveness);
break;
}
case HLSL_IR_IF:
{
struct hlsl_ir_if *iff = if_from_node(instr);
allocate_temp_registers_recurse(iff->then_instrs, liveness);
if (iff->else_instrs)
allocate_temp_registers_recurse(iff->else_instrs, liveness);
break;
}
case HLSL_IR_LOOP:
{
struct hlsl_ir_loop *loop = loop_from_node(instr);
allocate_temp_registers_recurse(loop->body, liveness);
break;
}
default:
break;
}
- }
+}
Do we need to allocate temporary registers for other instructions too, like expressions?
Yes, that's in a separate patch. I was torn between "submit everything related to RA at once so there's enough context" and "keep patch set sizes reviewable" :-/
+/* Simple greedy temporary register allocation pass that just assigns a unique
- index to all (simultaneously live) variables or intermediate values. Agnostic
- as to how many registers are actually available for the current backend, and
- does not handle constants. */
+static void allocate_temp_registers(struct hlsl_ir_function_decl *entry_func) +{
- struct liveness_ctx liveness = {};
I'm pretty sure that this is a gcc extension: you want '{0}' instead.
Huh, indeed it is. I'll avoid that in the future.
On Wed, Apr 1, 2020 at 9:20 PM Zebediah Figura zfigura@codeweavers.com wrote:
On 4/1/20 1:37 PM, Matteo Bruni wrote:
On Mon, Mar 30, 2020 at 4:54 AM Zebediah Figura z.figura12@gmail.com wrote:
Hi Zeb,
I have a number of questions / comments, inline.
diff --git a/dlls/d3dcompiler_43/hlsl.y b/dlls/d3dcompiler_43/hlsl.y index d6c64edcace..f828e05e335 100644 --- a/dlls/d3dcompiler_43/hlsl.y +++ b/dlls/d3dcompiler_43/hlsl.y @@ -2732,6 +2732,177 @@ static void compute_liveness(struct hlsl_ir_function_decl *entry_func) compute_liveness_recurse(entry_func->body, 0, 0); }
+struct liveness_ctx +{
- size_t count;
- struct
- {
/* 0 if not live yet. */
unsigned int last_read;
- } *regs;
+};
+static unsigned char get_dead_writemask(struct liveness_ctx *liveness,
unsigned int first_write, unsigned int index, unsigned int components)
Maybe get_available_writemask()? Same for the other similarly named functions.
Sure.
+{
- unsigned char i, writemask = 0, count = 0;
- for (i = 0; i < 4; ++i)
- {
if (liveness->regs[index + i].last_read <= first_write)
{
writemask |= 1 << i;
if (++count == components)
return writemask;
}
- }
- return 0;
+}
+static struct hlsl_reg allocate_temp_register(struct liveness_ctx *liveness,
unsigned int first_write, unsigned int last_read, unsigned char components)
How is this going to be different to the non-temp register allocation?
The main difference is with uniforms (and anonymous constants, in sm1), since they're essentially live from program entry.
That said, after rereading my uniform allocation path, I'm not sure why I did write it any differently. The only real reason would seem to be to avoid looping through the entire register array every time, but I didn't even bother making that optimization...
+{
- struct hlsl_reg ret = {.allocated = TRUE};
- unsigned char writemask, i;
- unsigned int regnum;
- for (regnum = 0; regnum < liveness->count; regnum += 4)
- {
if ((writemask = get_dead_writemask(liveness, first_write, regnum, components)))
break;
- }
- if (regnum == liveness->count)
- {
liveness->count = max(liveness->count * 2, 32);
liveness->regs = d3dcompiler_realloc(liveness->regs, liveness->count * sizeof(*liveness->regs));
Do we want to use array_reserve() here?
Yes, definitely. I wrote this code before adding that, and forgot about it...
writemask = (1 << components) - 1;
- }
- for (i = 0; i < 4; ++i)
- {
if (writemask & (1 << i))
liveness->regs[regnum + i].last_read = last_read;
- }
- ret.reg = regnum / 4;
- ret.writemask = writemask;
- return ret;
+}
+static BOOL is_range_dead(struct liveness_ctx *liveness, unsigned int first_write,
unsigned int index, unsigned int elements)
+{
- unsigned int i;
- for (i = 0; i < elements; i += 4)
- {
if (!get_dead_writemask(liveness, first_write, index + i, 4))
return FALSE;
- }
- return TRUE;
+}
+/* "elements" is the total number of consecutive whole registers needed. */ +static struct hlsl_reg allocate_temp_range(struct liveness_ctx *liveness,
unsigned int first_write, unsigned int last_read, unsigned int elements)
+{
- struct hlsl_reg ret = {.allocated = TRUE};
- unsigned int i, regnum;
- elements *= 4;
- for (regnum = 0; regnum < liveness->count; regnum += 4)
- {
if (is_range_dead(liveness, first_write, regnum, min(elements, liveness->count - regnum)))
break;
- }
- if (regnum + elements >= liveness->count)
- {
liveness->count = max(liveness->count * 2, regnum + elements);
liveness->regs = d3dcompiler_realloc(liveness->regs, liveness->count * sizeof(*liveness->regs));
- }
- for (i = 0; i < elements; ++i)
liveness->regs[regnum + i].last_read = last_read;
- ret.reg = regnum / 4;
- return ret;
It feels like the for should execute elements * 4 iterations, instead.
It does; "elements *= 4" above. Maybe explicitly multiplying by 4 every time would be clearer?
Eh, I'm blind. Not sure, maybe have a "components = elements * 4;" and use that throughout. I HOPE that's a bit more foolproof...
+}
+static void allocate_variable_temp_register(struct hlsl_ir_var *var, struct liveness_ctx *liveness) +{
- if (!var->reg.allocated && var->last_read)
- {
if (var->data_type->reg_size > 1)
{
var->reg = allocate_temp_range(liveness, var->first_write,
var->last_read, var->data_type->reg_size);
TRACE("Allocated r%u-r%u to %s (liveness %u-%u).\n", var->reg.reg,
var->reg.reg + var->data_type->reg_size - 1, var->name, var->first_write, var->last_read);
}
else
{
var->reg = allocate_temp_register(liveness, var->first_write,
var->last_read, var->data_type->dimx);
TRACE("Allocated r%u%s to %s (liveness %u-%u).\n", var->reg.reg,
debug_writemask(var->reg.writemask), var->name, var->first_write, var->last_read);
}
- }
+}
+static void allocate_temp_registers_recurse(struct list *instrs, struct liveness_ctx *liveness) +{
- struct hlsl_ir_node *instr;
- LIST_FOR_EACH_ENTRY(instr, instrs, struct hlsl_ir_node, entry)
- {
switch (instr->type)
{
case HLSL_IR_ASSIGNMENT:
{
struct hlsl_ir_assignment *assignment = assignment_from_node(instr);
allocate_variable_temp_register(hlsl_var_from_deref(&assignment->lhs), liveness);
break;
}
case HLSL_IR_IF:
{
struct hlsl_ir_if *iff = if_from_node(instr);
allocate_temp_registers_recurse(iff->then_instrs, liveness);
if (iff->else_instrs)
allocate_temp_registers_recurse(iff->else_instrs, liveness);
break;
}
case HLSL_IR_LOOP:
{
struct hlsl_ir_loop *loop = loop_from_node(instr);
allocate_temp_registers_recurse(loop->body, liveness);
break;
}
default:
break;
}
- }
+}
Do we need to allocate temporary registers for other instructions too, like expressions?
Yes, that's in a separate patch. I was torn between "submit everything related to RA at once so there's enough context" and "keep patch set sizes reviewable" :-/
It's fine: I asked, you replied, I should have all the pieces now. If not, I'll ask again :)
+/* Simple greedy temporary register allocation pass that just assigns a unique
- index to all (simultaneously live) variables or intermediate values. Agnostic
- as to how many registers are actually available for the current backend, and
- does not handle constants. */
+static void allocate_temp_registers(struct hlsl_ir_function_decl *entry_func) +{
- struct liveness_ctx liveness = {};
I'm pretty sure that this is a gcc extension: you want '{0}' instead.
Huh, indeed it is. I'll avoid that in the future.
On 4/1/20 2:35 PM, Matteo Bruni wrote:
On Wed, Apr 1, 2020 at 9:20 PM Zebediah Figura zfigura@codeweavers.com wrote:
On 4/1/20 1:37 PM, Matteo Bruni wrote:
On Mon, Mar 30, 2020 at 4:54 AM Zebediah Figura z.figura12@gmail.com wrote:
Hi Zeb,
I have a number of questions / comments, inline.
diff --git a/dlls/d3dcompiler_43/hlsl.y b/dlls/d3dcompiler_43/hlsl.y index d6c64edcace..f828e05e335 100644 --- a/dlls/d3dcompiler_43/hlsl.y +++ b/dlls/d3dcompiler_43/hlsl.y @@ -2732,6 +2732,177 @@ static void compute_liveness(struct hlsl_ir_function_decl *entry_func) compute_liveness_recurse(entry_func->body, 0, 0); }
+struct liveness_ctx +{
- size_t count;
- struct
- {
/* 0 if not live yet. */
unsigned int last_read;
- } *regs;
+};
+static unsigned char get_dead_writemask(struct liveness_ctx *liveness,
unsigned int first_write, unsigned int index, unsigned int components)
Maybe get_available_writemask()? Same for the other similarly named functions.
Sure.
+{
- unsigned char i, writemask = 0, count = 0;
- for (i = 0; i < 4; ++i)
- {
if (liveness->regs[index + i].last_read <= first_write)
{
writemask |= 1 << i;
if (++count == components)
return writemask;
}
- }
- return 0;
+}
+static struct hlsl_reg allocate_temp_register(struct liveness_ctx *liveness,
unsigned int first_write, unsigned int last_read, unsigned char components)
How is this going to be different to the non-temp register allocation?
The main difference is with uniforms (and anonymous constants, in sm1), since they're essentially live from program entry.
That said, after rereading my uniform allocation path, I'm not sure why I did write it any differently. The only real reason would seem to be to avoid looping through the entire register array every time, but I didn't even bother making that optimization...
+{
- struct hlsl_reg ret = {.allocated = TRUE};
- unsigned char writemask, i;
- unsigned int regnum;
- for (regnum = 0; regnum < liveness->count; regnum += 4)
- {
if ((writemask = get_dead_writemask(liveness, first_write, regnum, components)))
break;
- }
- if (regnum == liveness->count)
- {
liveness->count = max(liveness->count * 2, 32);
liveness->regs = d3dcompiler_realloc(liveness->regs, liveness->count * sizeof(*liveness->regs));
Do we want to use array_reserve() here?
Yes, definitely. I wrote this code before adding that, and forgot about it...
writemask = (1 << components) - 1;
- }
- for (i = 0; i < 4; ++i)
- {
if (writemask & (1 << i))
liveness->regs[regnum + i].last_read = last_read;
- }
- ret.reg = regnum / 4;
- ret.writemask = writemask;
- return ret;
+}
+static BOOL is_range_dead(struct liveness_ctx *liveness, unsigned int first_write,
unsigned int index, unsigned int elements)
+{
- unsigned int i;
- for (i = 0; i < elements; i += 4)
- {
if (!get_dead_writemask(liveness, first_write, index + i, 4))
return FALSE;
- }
- return TRUE;
+}
+/* "elements" is the total number of consecutive whole registers needed. */ +static struct hlsl_reg allocate_temp_range(struct liveness_ctx *liveness,
unsigned int first_write, unsigned int last_read, unsigned int elements)
+{
- struct hlsl_reg ret = {.allocated = TRUE};
- unsigned int i, regnum;
- elements *= 4;
- for (regnum = 0; regnum < liveness->count; regnum += 4)
- {
if (is_range_dead(liveness, first_write, regnum, min(elements, liveness->count - regnum)))
break;
- }
- if (regnum + elements >= liveness->count)
- {
liveness->count = max(liveness->count * 2, regnum + elements);
liveness->regs = d3dcompiler_realloc(liveness->regs, liveness->count * sizeof(*liveness->regs));
- }
- for (i = 0; i < elements; ++i)
liveness->regs[regnum + i].last_read = last_read;
- ret.reg = regnum / 4;
- return ret;
It feels like the for should execute elements * 4 iterations, instead.
It does; "elements *= 4" above. Maybe explicitly multiplying by 4 every time would be clearer?
Eh, I'm blind. Not sure, maybe have a "components = elements * 4;" and use that throughout. I HOPE that's a bit more foolproof...
Sure, that sound like a good solution.
+}
+static void allocate_variable_temp_register(struct hlsl_ir_var *var, struct liveness_ctx *liveness) +{
- if (!var->reg.allocated && var->last_read)
- {
if (var->data_type->reg_size > 1)
{
var->reg = allocate_temp_range(liveness, var->first_write,
var->last_read, var->data_type->reg_size);
TRACE("Allocated r%u-r%u to %s (liveness %u-%u).\n", var->reg.reg,
var->reg.reg + var->data_type->reg_size - 1, var->name, var->first_write, var->last_read);
}
else
{
var->reg = allocate_temp_register(liveness, var->first_write,
var->last_read, var->data_type->dimx);
TRACE("Allocated r%u%s to %s (liveness %u-%u).\n", var->reg.reg,
debug_writemask(var->reg.writemask), var->name, var->first_write, var->last_read);
}
- }
+}
+static void allocate_temp_registers_recurse(struct list *instrs, struct liveness_ctx *liveness) +{
- struct hlsl_ir_node *instr;
- LIST_FOR_EACH_ENTRY(instr, instrs, struct hlsl_ir_node, entry)
- {
switch (instr->type)
{
case HLSL_IR_ASSIGNMENT:
{
struct hlsl_ir_assignment *assignment = assignment_from_node(instr);
allocate_variable_temp_register(hlsl_var_from_deref(&assignment->lhs), liveness);
break;
}
case HLSL_IR_IF:
{
struct hlsl_ir_if *iff = if_from_node(instr);
allocate_temp_registers_recurse(iff->then_instrs, liveness);
if (iff->else_instrs)
allocate_temp_registers_recurse(iff->else_instrs, liveness);
break;
}
case HLSL_IR_LOOP:
{
struct hlsl_ir_loop *loop = loop_from_node(instr);
allocate_temp_registers_recurse(loop->body, liveness);
break;
}
default:
break;
}
- }
+}
Do we need to allocate temporary registers for other instructions too, like expressions?
Yes, that's in a separate patch. I was torn between "submit everything related to RA at once so there's enough context" and "keep patch set sizes reviewable" :-/
It's fine: I asked, you replied, I should have all the pieces now. If not, I'll ask again :)
+/* Simple greedy temporary register allocation pass that just assigns a unique
- index to all (simultaneously live) variables or intermediate values. Agnostic
- as to how many registers are actually available for the current backend, and
- does not handle constants. */
+static void allocate_temp_registers(struct hlsl_ir_function_decl *entry_func) +{
- struct liveness_ctx liveness = {};
I'm pretty sure that this is a gcc extension: you want '{0}' instead.
Huh, indeed it is. I'll avoid that in the future.