From: Zebediah Figura zfigura@codeweavers.com
Signed-off-by: Zebediah Figura z.figura12@gmail.com --- dlls/d3dcompiler_43/utils.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/dlls/d3dcompiler_43/utils.c b/dlls/d3dcompiler_43/utils.c index 0c8b918d139..d24341329d3 100644 --- a/dlls/d3dcompiler_43/utils.c +++ b/dlls/d3dcompiler_43/utils.c @@ -1904,7 +1904,10 @@ static void debug_dump_instr_list(const struct list *list)
static void debug_dump_src(const struct hlsl_ir_node *node) { - wine_dbg_printf("%p", node); + if (node->index) + wine_dbg_printf("@%u", node->index); + else + wine_dbg_printf("%p", node); }
static void debug_dump_ir_var(const struct hlsl_ir_var *var) @@ -2178,7 +2181,10 @@ static void debug_dump_ir_loop(const struct hlsl_ir_loop *loop)
static void debug_dump_instr(const struct hlsl_ir_node *instr) { - wine_dbg_printf("%4u: %p: ", instr->index, instr); + if (instr->index) + wine_dbg_printf("%4u: ", instr->index); + else + wine_dbg_printf("%p: ", instr); switch (instr->type) { case HLSL_IR_EXPR:
From: Zebediah Figura zfigura@codeweavers.com
Signed-off-by: Zebediah Figura z.figura12@gmail.com --- aaa625217, and the approach it implies, works, but on further reflection it's still not very pretty. For example, the following line of HLSL:
var.a.b = 2.0;
produces the following IR:
2: 2.0 3: deref(var) 4: @3.b 5: @4.c = @2
This essentially works, provided that the codegen layer knows how to unwind a deref chain, but it's pretty janky. Node 4 implies we're reading from node 3, and node 3 implies we're reading from var, neither of which is actually happening. The RA pass can't easily be smart enough to recognize that 4 doesn't need to read from 3, which is a problem if we introduce a "maybe uninitialized" warning.
Moreover, if we introduce DCE, we can't DCE out node 3 because of node 4, and while we could DCE out node 4 in terms of removing it from the list, we can't actually destroy the node, since node 5 is using it. Having nodes not in the list is also janky, I feel.
With this patch we instead get the following IR:
2: 2.0 3: deref(var) 4: @3.b 5: @4.c 6: deref(var).b.c = @2
We still get redundant reads, but at least this time we can easily DCE them out. Needing less sanity checks in RA is also nice.
dlls/d3dcompiler_43/d3dcompiler_private.h | 21 +++++- dlls/d3dcompiler_43/hlsl.y | 28 +++++--- dlls/d3dcompiler_43/utils.c | 83 ++++++++++++++++------- 3 files changed, 97 insertions(+), 35 deletions(-)
diff --git a/dlls/d3dcompiler_43/d3dcompiler_private.h b/dlls/d3dcompiler_43/d3dcompiler_private.h index 1f1d8e26637..8f81271383b 100644 --- a/dlls/d3dcompiler_43/d3dcompiler_private.h +++ b/dlls/d3dcompiler_43/d3dcompiler_private.h @@ -873,10 +873,29 @@ struct hlsl_ir_deref struct hlsl_deref src; };
+struct hlsl_lvalue +{ + enum hlsl_ir_deref_type type; + union + { + struct hlsl_ir_var *var; + struct + { + struct hlsl_lvalue *array; + struct hlsl_ir_node *index; + } array; + struct + { + struct hlsl_lvalue *record; + struct hlsl_struct_field *field; + } record; + } v; +}; + struct hlsl_ir_assignment { struct hlsl_ir_node node; - struct hlsl_deref lhs; + struct hlsl_lvalue lhs; struct hlsl_ir_node *rhs; unsigned char writemask; }; diff --git a/dlls/d3dcompiler_43/hlsl.y b/dlls/d3dcompiler_43/hlsl.y index d3ef18bb6e9..5366181f356 100644 --- a/dlls/d3dcompiler_43/hlsl.y +++ b/dlls/d3dcompiler_43/hlsl.y @@ -2642,16 +2642,16 @@ static unsigned int index_instructions(struct list *instrs, unsigned int index) }
/* Walk the chain of derefs and retrieve the actual variable we care about. */ -static struct hlsl_ir_var *hlsl_var_from_deref(const struct hlsl_deref *deref) +static struct hlsl_ir_var *hlsl_var_from_lvalue(const struct hlsl_lvalue *deref) { switch (deref->type) { case HLSL_IR_DEREF_VAR: return deref->v.var; case HLSL_IR_DEREF_ARRAY: - return hlsl_var_from_deref(&deref_from_node(deref->v.array.array)->src); + return hlsl_var_from_lvalue(deref->v.array.array); case HLSL_IR_DEREF_RECORD: - return hlsl_var_from_deref(&deref_from_node(deref->v.record.record)->src); + return hlsl_var_from_lvalue(deref->v.record.record); } assert(0); return NULL; @@ -2674,7 +2674,7 @@ static void compute_liveness_recurse(struct list *instrs, unsigned int loop_firs case HLSL_IR_ASSIGNMENT: { struct hlsl_ir_assignment *assignment = assignment_from_node(instr); - var = hlsl_var_from_deref(&assignment->lhs); + var = hlsl_var_from_lvalue(&assignment->lhs); if (!var->first_write) var->first_write = loop_first ? min(instr->index, loop_first) : instr->index; assignment->rhs->last_read = instr->index; @@ -2693,10 +2693,22 @@ static void compute_liveness_recurse(struct list *instrs, unsigned int loop_firs case HLSL_IR_DEREF: { struct hlsl_ir_deref *deref = deref_from_node(instr); - var = hlsl_var_from_deref(&deref->src); - var->last_read = loop_last ? max(instr->index, loop_last) : instr->index; - if (deref->src.type == HLSL_IR_DEREF_ARRAY) - deref->src.v.array.index->last_read = instr->index; + + switch (deref->src.type) + { + case HLSL_IR_DEREF_VAR: + deref->src.v.var->last_read = loop_last ? max(instr->index, loop_last) : instr->index; + break; + + case HLSL_IR_DEREF_ARRAY: + deref->src.v.array.array->last_read = instr->index; + deref->src.v.array.index->last_read = instr->index; + break; + + case HLSL_IR_DEREF_RECORD: + deref->src.v.record.record->last_read = instr->index; + break; + } break; } case HLSL_IR_EXPR: diff --git a/dlls/d3dcompiler_43/utils.c b/dlls/d3dcompiler_43/utils.c index d24341329d3..519f50612e8 100644 --- a/dlls/d3dcompiler_43/utils.c +++ b/dlls/d3dcompiler_43/utils.c @@ -1478,27 +1478,41 @@ static unsigned int invert_swizzle(unsigned int *swizzle, unsigned int writemask return new_writemask; }
-static BOOL validate_lhs_deref(const struct hlsl_ir_node *lhs) +static BOOL get_assignment_lhs(struct hlsl_lvalue *dst, struct hlsl_ir_node *node) { struct hlsl_ir_deref *deref;
- if (lhs->type != HLSL_IR_DEREF) + if (node->type != HLSL_IR_DEREF) { - hlsl_report_message(lhs->loc, HLSL_LEVEL_ERROR, "invalid lvalue"); + hlsl_report_message(node->loc, HLSL_LEVEL_ERROR, "invalid lvalue"); return FALSE; }
- deref = deref_from_node(lhs); + deref = deref_from_node(node); + dst->type = deref->src.type;
- if (deref->src.type == HLSL_IR_DEREF_VAR) - return TRUE; - if (deref->src.type == HLSL_IR_DEREF_ARRAY) - return validate_lhs_deref(deref->src.v.array.array); - if (deref->src.type == HLSL_IR_DEREF_RECORD) - return validate_lhs_deref(deref->src.v.record.record); + switch (deref->src.type) + { + case HLSL_IR_DEREF_VAR: + dst->v.var = deref->src.v.var; + return TRUE;
- assert(0); - return FALSE; + case HLSL_IR_DEREF_ARRAY: + dst->v.array.index = deref->src.v.array.index; + if (!(dst->v.array.array = d3dcompiler_alloc(sizeof(*dst->v.array.array)))) + return FALSE; + return get_assignment_lhs(dst->v.array.array, deref->src.v.array.array); + + case HLSL_IR_DEREF_RECORD: + dst->v.record.field = deref->src.v.record.field; + if (!(dst->v.record.record = d3dcompiler_alloc(sizeof(*dst->v.record.field)))) + return FALSE; + return get_assignment_lhs(dst->v.record.record, deref->src.v.record.record); + + default: + assert(0); + return FALSE; + } }
struct hlsl_ir_node *make_assignment(struct hlsl_ir_node *lhs, enum parse_assign_op assign_op, @@ -1506,6 +1520,7 @@ struct hlsl_ir_node *make_assignment(struct hlsl_ir_node *lhs, enum parse_assign { struct hlsl_ir_assignment *assign = d3dcompiler_alloc(sizeof(*assign)); DWORD writemask = (1 << lhs->data_type->dimx) - 1; + struct hlsl_ir_node *lhs_inner; struct hlsl_type *type;
if (!assign) @@ -1516,8 +1531,6 @@ struct hlsl_ir_node *make_assignment(struct hlsl_ir_node *lhs, enum parse_assign
while (lhs->type != HLSL_IR_DEREF) { - struct hlsl_ir_node *lhs_inner; - if (lhs->type == HLSL_IR_EXPR && expr_from_node(lhs)->op == HLSL_IR_UNOP_CAST) { FIXME("Cast on the lhs.\n"); @@ -1554,12 +1567,6 @@ struct hlsl_ir_node *make_assignment(struct hlsl_ir_node *lhs, enum parse_assign lhs = lhs_inner; }
- if (!validate_lhs_deref(lhs)) - { - d3dcompiler_free(assign); - return NULL; - } - TRACE("Creating proper assignment expression.\n"); if (writemask == BWRITERSP_WRITEMASK_ALL) type = lhs->data_type; @@ -1591,14 +1598,19 @@ struct hlsl_ir_node *make_assignment(struct hlsl_ir_node *lhs, enum parse_assign type_class = lhs->data_type->type; type = new_hlsl_type(NULL, type_class, lhs->data_type->base_type, dimx, 1); } + + if (!get_assignment_lhs(&assign->lhs, lhs)) + { + d3dcompiler_free(assign); + return NULL; + } + assign->node.type = HLSL_IR_ASSIGNMENT; assign->node.loc = lhs->loc; assign->node.data_type = type; assign->writemask = writemask; - rhs = implicit_conversion(rhs, type, &rhs->loc);
- assign->lhs = deref_from_node(lhs)->src; if (assign_op != ASSIGN_OP_ASSIGN) { enum hlsl_ir_expr_op op = op_from_assignment(assign_op); @@ -1619,9 +1631,8 @@ struct hlsl_ir_node *make_assignment(struct hlsl_ir_node *lhs, enum parse_assign } else { - list_remove(&lhs->entry); - /* Don't recursively free the deref; we just copied its members. */ - d3dcompiler_free(lhs); + /* We could free the LHS derefs here, since we aren't using them + * anymore, but it's easier just to leave that to a DCE pass. */ assign->rhs = rhs; }
@@ -1919,6 +1930,26 @@ static void debug_dump_ir_var(const struct hlsl_ir_var *var) wine_dbg_printf(" : %s", debugstr_a(var->semantic)); }
+static void debug_dump_lvalue(const struct hlsl_lvalue *deref) +{ + switch (deref->type) + { + case HLSL_IR_DEREF_VAR: + debug_dump_ir_var(deref->v.var); + break; + case HLSL_IR_DEREF_ARRAY: + debug_dump_lvalue(deref->v.array.array); + wine_dbg_printf("["); + debug_dump_src(deref->v.array.index); + wine_dbg_printf("]"); + break; + case HLSL_IR_DEREF_RECORD: + debug_dump_lvalue(deref->v.record.record); + wine_dbg_printf(".%s", debugstr_a(deref->v.record.field->name)); + break; + } +} + static void debug_dump_deref(const struct hlsl_deref *deref) { switch (deref->type) @@ -2107,7 +2138,7 @@ static const char *debug_writemask(DWORD writemask) static void debug_dump_ir_assignment(const struct hlsl_ir_assignment *assign) { wine_dbg_printf("= ("); - debug_dump_deref(&assign->lhs); + debug_dump_lvalue(&assign->lhs); if (assign->writemask != BWRITERSP_WRITEMASK_ALL) wine_dbg_printf("%s", debug_writemask(assign->writemask)); wine_dbg_printf(" ");
On Wed, Apr 15, 2020 at 2:41 AM Zebediah Figura z.figura12@gmail.com wrote:
From: Zebediah Figura zfigura@codeweavers.com
Signed-off-by: Zebediah Figura z.figura12@gmail.com
aaa625217, and the approach it implies, works, but on further reflection it's still not very pretty. For example, the following line of HLSL:
var.a.b = 2.0;
produces the following IR:
2: 2.0 3: deref(var) 4: @3.b 5: @4.c = @2
This essentially works, provided that the codegen layer knows how to unwind a deref chain, but it's pretty janky. Node 4 implies we're reading from node 3, and node 3 implies we're reading from var, neither of which is actually happening. The RA pass can't easily be smart enough to recognize that 4 doesn't need to read from 3, which is a problem if we introduce a "maybe uninitialized" warning.
Moreover, if we introduce DCE, we can't DCE out node 3 because of node 4, and while we could DCE out node 4 in terms of removing it from the list, we can't actually destroy the node, since node 5 is using it. Having nodes not in the list is also janky, I feel.
With this patch we instead get the following IR:
2: 2.0 3: deref(var) 4: @3.b 5: @4.c 6: deref(var).b.c = @2
We still get redundant reads, but at least this time we can easily DCE them out. Needing less sanity checks in RA is also nice.
dlls/d3dcompiler_43/d3dcompiler_private.h | 21 +++++- dlls/d3dcompiler_43/hlsl.y | 28 +++++--- dlls/d3dcompiler_43/utils.c | 83 ++++++++++++++++------- 3 files changed, 97 insertions(+), 35 deletions(-)
Hi Zeb,
after reading it again and actually getting into "review mode", I have some more thoughts... Sorry for not thinking of this earlier. See below.
diff --git a/dlls/d3dcompiler_43/d3dcompiler_private.h b/dlls/d3dcompiler_43/d3dcompiler_private.h index 1f1d8e26637..8f81271383b 100644 --- a/dlls/d3dcompiler_43/d3dcompiler_private.h +++ b/dlls/d3dcompiler_43/d3dcompiler_private.h @@ -873,10 +873,29 @@ struct hlsl_ir_deref struct hlsl_deref src; };
+struct hlsl_lvalue +{
- enum hlsl_ir_deref_type type;
- union
- {
struct hlsl_ir_var *var;
struct
{
struct hlsl_lvalue *array;
struct hlsl_ir_node *index;
} array;
struct
{
struct hlsl_lvalue *record;
struct hlsl_struct_field *field;
} record;
- } v;
+};
struct hlsl_ir_assignment { struct hlsl_ir_node node;
- struct hlsl_deref lhs;
- struct hlsl_lvalue lhs; struct hlsl_ir_node *rhs; unsigned char writemask;
}; diff --git a/dlls/d3dcompiler_43/hlsl.y b/dlls/d3dcompiler_43/hlsl.y index d3ef18bb6e9..5366181f356 100644 --- a/dlls/d3dcompiler_43/hlsl.y +++ b/dlls/d3dcompiler_43/hlsl.y @@ -2642,16 +2642,16 @@ static unsigned int index_instructions(struct list *instrs, unsigned int index) }
/* Walk the chain of derefs and retrieve the actual variable we care about. */ -static struct hlsl_ir_var *hlsl_var_from_deref(const struct hlsl_deref *deref) +static struct hlsl_ir_var *hlsl_var_from_lvalue(const struct hlsl_lvalue *deref) { switch (deref->type) { case HLSL_IR_DEREF_VAR: return deref->v.var; case HLSL_IR_DEREF_ARRAY:
return hlsl_var_from_deref(&deref_from_node(deref->v.array.array)->src);
return hlsl_var_from_lvalue(deref->v.array.array); case HLSL_IR_DEREF_RECORD:
return hlsl_var_from_deref(&deref_from_node(deref->v.record.record)->src);
} assert(0); return NULL;return hlsl_var_from_lvalue(deref->v.record.record);
@@ -2674,7 +2674,7 @@ static void compute_liveness_recurse(struct list *instrs, unsigned int loop_firs case HLSL_IR_ASSIGNMENT: { struct hlsl_ir_assignment *assignment = assignment_from_node(instr);
var = hlsl_var_from_deref(&assignment->lhs);
var = hlsl_var_from_lvalue(&assignment->lhs); if (!var->first_write) var->first_write = loop_first ? min(instr->index, loop_first) : instr->index; assignment->rhs->last_read = instr->index;
@@ -2693,10 +2693,22 @@ static void compute_liveness_recurse(struct list *instrs, unsigned int loop_firs case HLSL_IR_DEREF: { struct hlsl_ir_deref *deref = deref_from_node(instr);
var = hlsl_var_from_deref(&deref->src);
var->last_read = loop_last ? max(instr->index, loop_last) : instr->index;
if (deref->src.type == HLSL_IR_DEREF_ARRAY)
deref->src.v.array.index->last_read = instr->index;
switch (deref->src.type)
{
case HLSL_IR_DEREF_VAR:
deref->src.v.var->last_read = loop_last ? max(instr->index, loop_last) : instr->index;
break;
case HLSL_IR_DEREF_ARRAY:
deref->src.v.array.array->last_read = instr->index;
deref->src.v.array.index->last_read = instr->index;
break;
case HLSL_IR_DEREF_RECORD:
deref->src.v.record.record->last_read = instr->index;
break;
} break; } case HLSL_IR_EXPR:
diff --git a/dlls/d3dcompiler_43/utils.c b/dlls/d3dcompiler_43/utils.c index d24341329d3..519f50612e8 100644 --- a/dlls/d3dcompiler_43/utils.c +++ b/dlls/d3dcompiler_43/utils.c @@ -1478,27 +1478,41 @@ static unsigned int invert_swizzle(unsigned int *swizzle, unsigned int writemask return new_writemask; }
-static BOOL validate_lhs_deref(const struct hlsl_ir_node *lhs) +static BOOL get_assignment_lhs(struct hlsl_lvalue *dst, struct hlsl_ir_node *node) { struct hlsl_ir_deref *deref;
- if (lhs->type != HLSL_IR_DEREF)
- if (node->type != HLSL_IR_DEREF) {
hlsl_report_message(lhs->loc, HLSL_LEVEL_ERROR, "invalid lvalue");
}hlsl_report_message(node->loc, HLSL_LEVEL_ERROR, "invalid lvalue"); return FALSE;
- deref = deref_from_node(lhs);
- deref = deref_from_node(node);
- dst->type = deref->src.type;
- if (deref->src.type == HLSL_IR_DEREF_VAR)
return TRUE;
- if (deref->src.type == HLSL_IR_DEREF_ARRAY)
return validate_lhs_deref(deref->src.v.array.array);
- if (deref->src.type == HLSL_IR_DEREF_RECORD)
return validate_lhs_deref(deref->src.v.record.record);
- switch (deref->src.type)
- {
case HLSL_IR_DEREF_VAR:
dst->v.var = deref->src.v.var;
return TRUE;
- assert(0);
- return FALSE;
case HLSL_IR_DEREF_ARRAY:
dst->v.array.index = deref->src.v.array.index;
if (!(dst->v.array.array = d3dcompiler_alloc(sizeof(*dst->v.array.array))))
return FALSE;
return get_assignment_lhs(dst->v.array.array, deref->src.v.array.array);
case HLSL_IR_DEREF_RECORD:
dst->v.record.field = deref->src.v.record.field;
if (!(dst->v.record.record = d3dcompiler_alloc(sizeof(*dst->v.record.field))))
return FALSE;
return get_assignment_lhs(dst->v.record.record, deref->src.v.record.record);
default:
assert(0);
return FALSE;
- }
}
I'm somewhat annoyed by this, it feels like we could directly create lvalue nodes while parsing... Except there doesn't seem to be any simple way. So I'm going to try and keep my initial knee-jerk reaction at bay.
On the other hand, I think it would be nicer to enforce a flat IR structure. Instead of having an arbitrarily complex lvalue inside the assignment instruction, maybe it should have the same structure of a deref, where each "level" is a separate instruction. This is probably why I was thinking of a flag rather than a separate node type: when constructing the assignment you would traverse the lhs derefs and turn them into lvalues. Although having a separate node type and changing that works as well (and it's probably nicer with liveness computation and other passes).
Does that sound reasonable? Am I missing anything?
+static void debug_dump_lvalue(const struct hlsl_lvalue *deref) +{
- switch (deref->type)
- {
case HLSL_IR_DEREF_VAR:
debug_dump_ir_var(deref->v.var);
break;
case HLSL_IR_DEREF_ARRAY:
debug_dump_lvalue(deref->v.array.array);
wine_dbg_printf("[");
debug_dump_src(deref->v.array.index);
wine_dbg_printf("]");
break;
case HLSL_IR_DEREF_RECORD:
debug_dump_lvalue(deref->v.record.record);
wine_dbg_printf(".%s", debugstr_a(deref->v.record.field->name));
break;
- }
+}
If we go with my idea, we probably need something to disambiguate lvalues from rvalues / plain derefs.
On 4/17/20 2:03 AM, Matteo Bruni wrote:
On Wed, Apr 15, 2020 at 2:41 AM Zebediah Figura z.figura12@gmail.com wrote:
From: Zebediah Figura zfigura@codeweavers.com
Signed-off-by: Zebediah Figura z.figura12@gmail.com
aaa625217, and the approach it implies, works, but on further reflection it's still not very pretty. For example, the following line of HLSL:
var.a.b = 2.0;
produces the following IR:
2: 2.0 3: deref(var) 4: @3.b 5: @4.c = @2
This essentially works, provided that the codegen layer knows how to unwind a deref chain, but it's pretty janky. Node 4 implies we're reading from node 3, and node 3 implies we're reading from var, neither of which is actually happening. The RA pass can't easily be smart enough to recognize that 4 doesn't need to read from 3, which is a problem if we introduce a "maybe uninitialized" warning.
Moreover, if we introduce DCE, we can't DCE out node 3 because of node 4, and while we could DCE out node 4 in terms of removing it from the list, we can't actually destroy the node, since node 5 is using it. Having nodes not in the list is also janky, I feel.
With this patch we instead get the following IR:
2: 2.0 3: deref(var) 4: @3.b 5: @4.c 6: deref(var).b.c = @2
We still get redundant reads, but at least this time we can easily DCE them out. Needing less sanity checks in RA is also nice.
dlls/d3dcompiler_43/d3dcompiler_private.h | 21 +++++- dlls/d3dcompiler_43/hlsl.y | 28 +++++--- dlls/d3dcompiler_43/utils.c | 83 ++++++++++++++++------- 3 files changed, 97 insertions(+), 35 deletions(-)
Hi Zeb,
after reading it again and actually getting into "review mode", I have some more thoughts... Sorry for not thinking of this earlier. See below.
diff --git a/dlls/d3dcompiler_43/d3dcompiler_private.h b/dlls/d3dcompiler_43/d3dcompiler_private.h index 1f1d8e26637..8f81271383b 100644 --- a/dlls/d3dcompiler_43/d3dcompiler_private.h +++ b/dlls/d3dcompiler_43/d3dcompiler_private.h @@ -873,10 +873,29 @@ struct hlsl_ir_deref struct hlsl_deref src; };
+struct hlsl_lvalue +{
- enum hlsl_ir_deref_type type;
- union
- {
struct hlsl_ir_var *var;
struct
{
struct hlsl_lvalue *array;
struct hlsl_ir_node *index;
} array;
struct
{
struct hlsl_lvalue *record;
struct hlsl_struct_field *field;
} record;
- } v;
+};
- struct hlsl_ir_assignment { struct hlsl_ir_node node;
- struct hlsl_deref lhs;
- struct hlsl_lvalue lhs; struct hlsl_ir_node *rhs; unsigned char writemask; };
diff --git a/dlls/d3dcompiler_43/hlsl.y b/dlls/d3dcompiler_43/hlsl.y index d3ef18bb6e9..5366181f356 100644 --- a/dlls/d3dcompiler_43/hlsl.y +++ b/dlls/d3dcompiler_43/hlsl.y @@ -2642,16 +2642,16 @@ static unsigned int index_instructions(struct list *instrs, unsigned int index) }
/* Walk the chain of derefs and retrieve the actual variable we care about. */ -static struct hlsl_ir_var *hlsl_var_from_deref(const struct hlsl_deref *deref) +static struct hlsl_ir_var *hlsl_var_from_lvalue(const struct hlsl_lvalue *deref) { switch (deref->type) { case HLSL_IR_DEREF_VAR: return deref->v.var; case HLSL_IR_DEREF_ARRAY:
return hlsl_var_from_deref(&deref_from_node(deref->v.array.array)->src);
return hlsl_var_from_lvalue(deref->v.array.array); case HLSL_IR_DEREF_RECORD:
return hlsl_var_from_deref(&deref_from_node(deref->v.record.record)->src);
return hlsl_var_from_lvalue(deref->v.record.record); } assert(0); return NULL;
@@ -2674,7 +2674,7 @@ static void compute_liveness_recurse(struct list *instrs, unsigned int loop_firs case HLSL_IR_ASSIGNMENT: { struct hlsl_ir_assignment *assignment = assignment_from_node(instr);
var = hlsl_var_from_deref(&assignment->lhs);
var = hlsl_var_from_lvalue(&assignment->lhs); if (!var->first_write) var->first_write = loop_first ? min(instr->index, loop_first) : instr->index; assignment->rhs->last_read = instr->index;
@@ -2693,10 +2693,22 @@ static void compute_liveness_recurse(struct list *instrs, unsigned int loop_firs case HLSL_IR_DEREF: { struct hlsl_ir_deref *deref = deref_from_node(instr);
var = hlsl_var_from_deref(&deref->src);
var->last_read = loop_last ? max(instr->index, loop_last) : instr->index;
if (deref->src.type == HLSL_IR_DEREF_ARRAY)
deref->src.v.array.index->last_read = instr->index;
switch (deref->src.type)
{
case HLSL_IR_DEREF_VAR:
deref->src.v.var->last_read = loop_last ? max(instr->index, loop_last) : instr->index;
break;
case HLSL_IR_DEREF_ARRAY:
deref->src.v.array.array->last_read = instr->index;
deref->src.v.array.index->last_read = instr->index;
break;
case HLSL_IR_DEREF_RECORD:
deref->src.v.record.record->last_read = instr->index;
break;
} break; } case HLSL_IR_EXPR:
diff --git a/dlls/d3dcompiler_43/utils.c b/dlls/d3dcompiler_43/utils.c index d24341329d3..519f50612e8 100644 --- a/dlls/d3dcompiler_43/utils.c +++ b/dlls/d3dcompiler_43/utils.c @@ -1478,27 +1478,41 @@ static unsigned int invert_swizzle(unsigned int *swizzle, unsigned int writemask return new_writemask; }
-static BOOL validate_lhs_deref(const struct hlsl_ir_node *lhs) +static BOOL get_assignment_lhs(struct hlsl_lvalue *dst, struct hlsl_ir_node *node) { struct hlsl_ir_deref *deref;
- if (lhs->type != HLSL_IR_DEREF)
- if (node->type != HLSL_IR_DEREF) {
hlsl_report_message(lhs->loc, HLSL_LEVEL_ERROR, "invalid lvalue");
hlsl_report_message(node->loc, HLSL_LEVEL_ERROR, "invalid lvalue"); return FALSE; }
- deref = deref_from_node(lhs);
- deref = deref_from_node(node);
- dst->type = deref->src.type;
- if (deref->src.type == HLSL_IR_DEREF_VAR)
return TRUE;
- if (deref->src.type == HLSL_IR_DEREF_ARRAY)
return validate_lhs_deref(deref->src.v.array.array);
- if (deref->src.type == HLSL_IR_DEREF_RECORD)
return validate_lhs_deref(deref->src.v.record.record);
- switch (deref->src.type)
- {
case HLSL_IR_DEREF_VAR:
dst->v.var = deref->src.v.var;
return TRUE;
- assert(0);
- return FALSE;
case HLSL_IR_DEREF_ARRAY:
dst->v.array.index = deref->src.v.array.index;
if (!(dst->v.array.array = d3dcompiler_alloc(sizeof(*dst->v.array.array))))
return FALSE;
return get_assignment_lhs(dst->v.array.array, deref->src.v.array.array);
case HLSL_IR_DEREF_RECORD:
dst->v.record.field = deref->src.v.record.field;
if (!(dst->v.record.record = d3dcompiler_alloc(sizeof(*dst->v.record.field))))
return FALSE;
return get_assignment_lhs(dst->v.record.record, deref->src.v.record.record);
default:
assert(0);
return FALSE;
- } }
I'm somewhat annoyed by this, it feels like we could directly create lvalue nodes while parsing... Except there doesn't seem to be any simple way. So I'm going to try and keep my initial knee-jerk reaction at bay.
On the other hand, I think it would be nicer to enforce a flat IR structure. Instead of having an arbitrarily complex lvalue inside the assignment instruction, maybe it should have the same structure of a deref, where each "level" is a separate instruction. This is probably why I was thinking of a flag rather than a separate node type: when constructing the assignment you would traverse the lhs derefs and turn them into lvalues. Although having a separate node type and changing that works as well (and it's probably nicer with liveness computation and other passes).
Does that sound reasonable? Am I missing anything?
Well, note that one problem is we kind of have to (recursively) dup the lvalue anyway, so that complex assignments work.
I think I understand why flattening is desirable in general, but it's not clear to me how that would apply to lvalues (or even to rvalue deref chains, honestly), since at codegen time we essentially resolve arbitrarily complex deref chains into one register (well, except for non-constant array indexing...)
It's also not obvious to me that's prettier to flatten it, given on the other hand we're also now inserting nodes into the IR list that would presumably not participate in RA or generate code.
Maybe I'm misunderstanding what you're trying to propose here, though?
+static void debug_dump_lvalue(const struct hlsl_lvalue *deref) +{
- switch (deref->type)
- {
case HLSL_IR_DEREF_VAR:
debug_dump_ir_var(deref->v.var);
break;
case HLSL_IR_DEREF_ARRAY:
debug_dump_lvalue(deref->v.array.array);
wine_dbg_printf("[");
debug_dump_src(deref->v.array.index);
wine_dbg_printf("]");
break;
case HLSL_IR_DEREF_RECORD:
debug_dump_lvalue(deref->v.record.record);
wine_dbg_printf(".%s", debugstr_a(deref->v.record.field->name));
break;
- }
+}
If we go with my idea, we probably need something to disambiguate lvalues from rvalues / plain derefs.
On Fri, Apr 17, 2020 at 6:57 PM Zebediah Figura zfigura@codeweavers.com wrote:
On 4/17/20 2:03 AM, Matteo Bruni wrote:
On Wed, Apr 15, 2020 at 2:41 AM Zebediah Figura z.figura12@gmail.com wrote:
From: Zebediah Figura zfigura@codeweavers.com
Signed-off-by: Zebediah Figura z.figura12@gmail.com
aaa625217, and the approach it implies, works, but on further reflection it's still not very pretty. For example, the following line of HLSL:
var.a.b = 2.0;
produces the following IR:
2: 2.0 3: deref(var) 4: @3.b 5: @4.c = @2
This essentially works, provided that the codegen layer knows how to unwind a deref chain, but it's pretty janky. Node 4 implies we're reading from node 3, and node 3 implies we're reading from var, neither of which is actually happening. The RA pass can't easily be smart enough to recognize that 4 doesn't need to read from 3, which is a problem if we introduce a "maybe uninitialized" warning.
Moreover, if we introduce DCE, we can't DCE out node 3 because of node 4, and while we could DCE out node 4 in terms of removing it from the list, we can't actually destroy the node, since node 5 is using it. Having nodes not in the list is also janky, I feel.
With this patch we instead get the following IR:
2: 2.0 3: deref(var) 4: @3.b 5: @4.c 6: deref(var).b.c = @2
We still get redundant reads, but at least this time we can easily DCE them out. Needing less sanity checks in RA is also nice.
dlls/d3dcompiler_43/d3dcompiler_private.h | 21 +++++- dlls/d3dcompiler_43/hlsl.y | 28 +++++--- dlls/d3dcompiler_43/utils.c | 83 ++++++++++++++++------- 3 files changed, 97 insertions(+), 35 deletions(-)
Hi Zeb,
after reading it again and actually getting into "review mode", I have some more thoughts... Sorry for not thinking of this earlier. See below.
diff --git a/dlls/d3dcompiler_43/d3dcompiler_private.h b/dlls/d3dcompiler_43/d3dcompiler_private.h index 1f1d8e26637..8f81271383b 100644 --- a/dlls/d3dcompiler_43/d3dcompiler_private.h +++ b/dlls/d3dcompiler_43/d3dcompiler_private.h @@ -873,10 +873,29 @@ struct hlsl_ir_deref struct hlsl_deref src; };
+struct hlsl_lvalue +{
- enum hlsl_ir_deref_type type;
- union
- {
struct hlsl_ir_var *var;
struct
{
struct hlsl_lvalue *array;
struct hlsl_ir_node *index;
} array;
struct
{
struct hlsl_lvalue *record;
struct hlsl_struct_field *field;
} record;
- } v;
+};
- struct hlsl_ir_assignment { struct hlsl_ir_node node;
- struct hlsl_deref lhs;
- struct hlsl_lvalue lhs; struct hlsl_ir_node *rhs; unsigned char writemask; };
diff --git a/dlls/d3dcompiler_43/hlsl.y b/dlls/d3dcompiler_43/hlsl.y index d3ef18bb6e9..5366181f356 100644 --- a/dlls/d3dcompiler_43/hlsl.y +++ b/dlls/d3dcompiler_43/hlsl.y @@ -2642,16 +2642,16 @@ static unsigned int index_instructions(struct list *instrs, unsigned int index) }
/* Walk the chain of derefs and retrieve the actual variable we care about. */ -static struct hlsl_ir_var *hlsl_var_from_deref(const struct hlsl_deref *deref) +static struct hlsl_ir_var *hlsl_var_from_lvalue(const struct hlsl_lvalue *deref) { switch (deref->type) { case HLSL_IR_DEREF_VAR: return deref->v.var; case HLSL_IR_DEREF_ARRAY:
return hlsl_var_from_deref(&deref_from_node(deref->v.array.array)->src);
return hlsl_var_from_lvalue(deref->v.array.array); case HLSL_IR_DEREF_RECORD:
return hlsl_var_from_deref(&deref_from_node(deref->v.record.record)->src);
return hlsl_var_from_lvalue(deref->v.record.record); } assert(0); return NULL;
@@ -2674,7 +2674,7 @@ static void compute_liveness_recurse(struct list *instrs, unsigned int loop_firs case HLSL_IR_ASSIGNMENT: { struct hlsl_ir_assignment *assignment = assignment_from_node(instr);
var = hlsl_var_from_deref(&assignment->lhs);
var = hlsl_var_from_lvalue(&assignment->lhs); if (!var->first_write) var->first_write = loop_first ? min(instr->index, loop_first) : instr->index; assignment->rhs->last_read = instr->index;
@@ -2693,10 +2693,22 @@ static void compute_liveness_recurse(struct list *instrs, unsigned int loop_firs case HLSL_IR_DEREF: { struct hlsl_ir_deref *deref = deref_from_node(instr);
var = hlsl_var_from_deref(&deref->src);
var->last_read = loop_last ? max(instr->index, loop_last) : instr->index;
if (deref->src.type == HLSL_IR_DEREF_ARRAY)
deref->src.v.array.index->last_read = instr->index;
switch (deref->src.type)
{
case HLSL_IR_DEREF_VAR:
deref->src.v.var->last_read = loop_last ? max(instr->index, loop_last) : instr->index;
break;
case HLSL_IR_DEREF_ARRAY:
deref->src.v.array.array->last_read = instr->index;
deref->src.v.array.index->last_read = instr->index;
break;
case HLSL_IR_DEREF_RECORD:
deref->src.v.record.record->last_read = instr->index;
break;
} break; } case HLSL_IR_EXPR:
diff --git a/dlls/d3dcompiler_43/utils.c b/dlls/d3dcompiler_43/utils.c index d24341329d3..519f50612e8 100644 --- a/dlls/d3dcompiler_43/utils.c +++ b/dlls/d3dcompiler_43/utils.c @@ -1478,27 +1478,41 @@ static unsigned int invert_swizzle(unsigned int *swizzle, unsigned int writemask return new_writemask; }
-static BOOL validate_lhs_deref(const struct hlsl_ir_node *lhs) +static BOOL get_assignment_lhs(struct hlsl_lvalue *dst, struct hlsl_ir_node *node) { struct hlsl_ir_deref *deref;
- if (lhs->type != HLSL_IR_DEREF)
- if (node->type != HLSL_IR_DEREF) {
hlsl_report_message(lhs->loc, HLSL_LEVEL_ERROR, "invalid lvalue");
hlsl_report_message(node->loc, HLSL_LEVEL_ERROR, "invalid lvalue"); return FALSE; }
- deref = deref_from_node(lhs);
- deref = deref_from_node(node);
- dst->type = deref->src.type;
- if (deref->src.type == HLSL_IR_DEREF_VAR)
return TRUE;
- if (deref->src.type == HLSL_IR_DEREF_ARRAY)
return validate_lhs_deref(deref->src.v.array.array);
- if (deref->src.type == HLSL_IR_DEREF_RECORD)
return validate_lhs_deref(deref->src.v.record.record);
- switch (deref->src.type)
- {
case HLSL_IR_DEREF_VAR:
dst->v.var = deref->src.v.var;
return TRUE;
- assert(0);
- return FALSE;
case HLSL_IR_DEREF_ARRAY:
dst->v.array.index = deref->src.v.array.index;
if (!(dst->v.array.array = d3dcompiler_alloc(sizeof(*dst->v.array.array))))
return FALSE;
return get_assignment_lhs(dst->v.array.array, deref->src.v.array.array);
case HLSL_IR_DEREF_RECORD:
dst->v.record.field = deref->src.v.record.field;
if (!(dst->v.record.record = d3dcompiler_alloc(sizeof(*dst->v.record.field))))
return FALSE;
return get_assignment_lhs(dst->v.record.record, deref->src.v.record.record);
default:
assert(0);
return FALSE;
- } }
I'm somewhat annoyed by this, it feels like we could directly create lvalue nodes while parsing... Except there doesn't seem to be any simple way. So I'm going to try and keep my initial knee-jerk reaction at bay.
On the other hand, I think it would be nicer to enforce a flat IR structure. Instead of having an arbitrarily complex lvalue inside the assignment instruction, maybe it should have the same structure of a deref, where each "level" is a separate instruction. This is probably why I was thinking of a flag rather than a separate node type: when constructing the assignment you would traverse the lhs derefs and turn them into lvalues. Although having a separate node type and changing that works as well (and it's probably nicer with liveness computation and other passes).
Does that sound reasonable? Am I missing anything?
Well, note that one problem is we kind of have to (recursively) dup the lvalue anyway, so that complex assignments work.
I see your point. I guess a possibility opened by the flat derefs is that you could share intermediate derefs, although that might well make things more complicated for no benefit.
I think I understand why flattening is desirable in general, but it's not clear to me how that would apply to lvalues (or even to rvalue deref chains, honestly), since at codegen time we essentially resolve arbitrarily complex deref chains into one register (well, except for non-constant array indexing...)
True, but the same can be said in principle for any other instruction. Also if you always "go flat" you have only one way to follow sources, instead of having to mix tree traversing inside a single instruction with instruction traversing. Relatedly, if we start doing more invasive IR transformations, having simple, "atomic" instructions might make things easier. E.g. with structure splitting, I think you'd end up with replacing one instruction (as in one instruction list entry) without changing the rest of the IR, while in the other case you have to replace a piece of a larger instruction which might be buried arbitrarily deep in the tree. Admittedly this is all very abstract and I might not be seeing a sensible overall picture.
It's also not obvious to me that's prettier to flatten it, given on the other hand we're also now inserting nodes into the IR list that would presumably not participate in RA or generate code.
Right, but that's a bit like doing an optimization right while parsing. I guess we want lvalues and rvalues / derefs to either be both flat or both tree-like. Let's focus on rvalues for a moment. Imagine you have a complicated deref chain, eventually picking a single float out of something larger and complex. If you have a very naive compiler you can imagine that each step of the deref is implemented as a copy of whatever the result of the deref step is into a temporary variable. This might mean e.g. temporarily copying a full matrix before getting at the individual component. Incredibly inefficient, but simple and consistent. Obviously this isn't very interesting and can be simplified right away... except when it can't, like the non-constant array indexing case you mention. Or copying a whole input structure to an output variable. For lvalues you can imagine the specular thing, where the naive compiler stores results into a temporary variable and then copies that into the actual destination.
It seems to me that it would be nicer to have a simple and flat IR structure where each entry can do very little, or even nothing, in practice, and handle simplifications and IR restructuring over a number of passes rather than the opposite. Somewhat related, you can already get instructions that eventually do nothing of consequence (e.g. dead code, swizzling into the exact same thing, ...) and I don't think that has to be necessarily that different from the compiler's point of view.
Maybe I'm misunderstanding what you're trying to propose here, though?
I don't think you are. I might be missing something more or less obvious to you though.
On 4/17/20 5:43 PM, Matteo Bruni wrote:
On Fri, Apr 17, 2020 at 6:57 PM Zebediah Figura zfigura@codeweavers.com wrote:
On 4/17/20 2:03 AM, Matteo Bruni wrote:
On Wed, Apr 15, 2020 at 2:41 AM Zebediah Figura z.figura12@gmail.com wrote:
From: Zebediah Figura zfigura@codeweavers.com
Signed-off-by: Zebediah Figura z.figura12@gmail.com
aaa625217, and the approach it implies, works, but on further reflection it's still not very pretty. For example, the following line of HLSL:
var.a.b = 2.0;
produces the following IR:
2: 2.0 3: deref(var) 4: @3.b 5: @4.c = @2
This essentially works, provided that the codegen layer knows how to unwind a deref chain, but it's pretty janky. Node 4 implies we're reading from node 3, and node 3 implies we're reading from var, neither of which is actually happening. The RA pass can't easily be smart enough to recognize that 4 doesn't need to read from 3, which is a problem if we introduce a "maybe uninitialized" warning.
Moreover, if we introduce DCE, we can't DCE out node 3 because of node 4, and while we could DCE out node 4 in terms of removing it from the list, we can't actually destroy the node, since node 5 is using it. Having nodes not in the list is also janky, I feel.
With this patch we instead get the following IR:
2: 2.0 3: deref(var) 4: @3.b 5: @4.c 6: deref(var).b.c = @2
We still get redundant reads, but at least this time we can easily DCE them out. Needing less sanity checks in RA is also nice.
dlls/d3dcompiler_43/d3dcompiler_private.h | 21 +++++- dlls/d3dcompiler_43/hlsl.y | 28 +++++--- dlls/d3dcompiler_43/utils.c | 83 ++++++++++++++++------- 3 files changed, 97 insertions(+), 35 deletions(-)
Hi Zeb,
after reading it again and actually getting into "review mode", I have some more thoughts... Sorry for not thinking of this earlier. See below.
diff --git a/dlls/d3dcompiler_43/d3dcompiler_private.h b/dlls/d3dcompiler_43/d3dcompiler_private.h index 1f1d8e26637..8f81271383b 100644 --- a/dlls/d3dcompiler_43/d3dcompiler_private.h +++ b/dlls/d3dcompiler_43/d3dcompiler_private.h @@ -873,10 +873,29 @@ struct hlsl_ir_deref struct hlsl_deref src; };
+struct hlsl_lvalue +{
- enum hlsl_ir_deref_type type;
- union
- {
struct hlsl_ir_var *var;
struct
{
struct hlsl_lvalue *array;
struct hlsl_ir_node *index;
} array;
struct
{
struct hlsl_lvalue *record;
struct hlsl_struct_field *field;
} record;
- } v;
+};
- struct hlsl_ir_assignment { struct hlsl_ir_node node;
- struct hlsl_deref lhs;
- struct hlsl_lvalue lhs; struct hlsl_ir_node *rhs; unsigned char writemask; };
diff --git a/dlls/d3dcompiler_43/hlsl.y b/dlls/d3dcompiler_43/hlsl.y index d3ef18bb6e9..5366181f356 100644 --- a/dlls/d3dcompiler_43/hlsl.y +++ b/dlls/d3dcompiler_43/hlsl.y @@ -2642,16 +2642,16 @@ static unsigned int index_instructions(struct list *instrs, unsigned int index) }
/* Walk the chain of derefs and retrieve the actual variable we care about. */ -static struct hlsl_ir_var *hlsl_var_from_deref(const struct hlsl_deref *deref) +static struct hlsl_ir_var *hlsl_var_from_lvalue(const struct hlsl_lvalue *deref) { switch (deref->type) { case HLSL_IR_DEREF_VAR: return deref->v.var; case HLSL_IR_DEREF_ARRAY:
return hlsl_var_from_deref(&deref_from_node(deref->v.array.array)->src);
return hlsl_var_from_lvalue(deref->v.array.array); case HLSL_IR_DEREF_RECORD:
return hlsl_var_from_deref(&deref_from_node(deref->v.record.record)->src);
return hlsl_var_from_lvalue(deref->v.record.record); } assert(0); return NULL;
@@ -2674,7 +2674,7 @@ static void compute_liveness_recurse(struct list *instrs, unsigned int loop_firs case HLSL_IR_ASSIGNMENT: { struct hlsl_ir_assignment *assignment = assignment_from_node(instr);
var = hlsl_var_from_deref(&assignment->lhs);
var = hlsl_var_from_lvalue(&assignment->lhs); if (!var->first_write) var->first_write = loop_first ? min(instr->index, loop_first) : instr->index; assignment->rhs->last_read = instr->index;
@@ -2693,10 +2693,22 @@ static void compute_liveness_recurse(struct list *instrs, unsigned int loop_firs case HLSL_IR_DEREF: { struct hlsl_ir_deref *deref = deref_from_node(instr);
var = hlsl_var_from_deref(&deref->src);
var->last_read = loop_last ? max(instr->index, loop_last) : instr->index;
if (deref->src.type == HLSL_IR_DEREF_ARRAY)
deref->src.v.array.index->last_read = instr->index;
switch (deref->src.type)
{
case HLSL_IR_DEREF_VAR:
deref->src.v.var->last_read = loop_last ? max(instr->index, loop_last) : instr->index;
break;
case HLSL_IR_DEREF_ARRAY:
deref->src.v.array.array->last_read = instr->index;
deref->src.v.array.index->last_read = instr->index;
break;
case HLSL_IR_DEREF_RECORD:
deref->src.v.record.record->last_read = instr->index;
break;
} break; } case HLSL_IR_EXPR:
diff --git a/dlls/d3dcompiler_43/utils.c b/dlls/d3dcompiler_43/utils.c index d24341329d3..519f50612e8 100644 --- a/dlls/d3dcompiler_43/utils.c +++ b/dlls/d3dcompiler_43/utils.c @@ -1478,27 +1478,41 @@ static unsigned int invert_swizzle(unsigned int *swizzle, unsigned int writemask return new_writemask; }
-static BOOL validate_lhs_deref(const struct hlsl_ir_node *lhs) +static BOOL get_assignment_lhs(struct hlsl_lvalue *dst, struct hlsl_ir_node *node) { struct hlsl_ir_deref *deref;
- if (lhs->type != HLSL_IR_DEREF)
- if (node->type != HLSL_IR_DEREF) {
hlsl_report_message(lhs->loc, HLSL_LEVEL_ERROR, "invalid lvalue");
hlsl_report_message(node->loc, HLSL_LEVEL_ERROR, "invalid lvalue"); return FALSE; }
- deref = deref_from_node(lhs);
- deref = deref_from_node(node);
- dst->type = deref->src.type;
- if (deref->src.type == HLSL_IR_DEREF_VAR)
return TRUE;
- if (deref->src.type == HLSL_IR_DEREF_ARRAY)
return validate_lhs_deref(deref->src.v.array.array);
- if (deref->src.type == HLSL_IR_DEREF_RECORD)
return validate_lhs_deref(deref->src.v.record.record);
- switch (deref->src.type)
- {
case HLSL_IR_DEREF_VAR:
dst->v.var = deref->src.v.var;
return TRUE;
- assert(0);
- return FALSE;
case HLSL_IR_DEREF_ARRAY:
dst->v.array.index = deref->src.v.array.index;
if (!(dst->v.array.array = d3dcompiler_alloc(sizeof(*dst->v.array.array))))
return FALSE;
return get_assignment_lhs(dst->v.array.array, deref->src.v.array.array);
case HLSL_IR_DEREF_RECORD:
dst->v.record.field = deref->src.v.record.field;
if (!(dst->v.record.record = d3dcompiler_alloc(sizeof(*dst->v.record.field))))
return FALSE;
return get_assignment_lhs(dst->v.record.record, deref->src.v.record.record);
default:
assert(0);
return FALSE;
- } }
I'm somewhat annoyed by this, it feels like we could directly create lvalue nodes while parsing... Except there doesn't seem to be any simple way. So I'm going to try and keep my initial knee-jerk reaction at bay.
On the other hand, I think it would be nicer to enforce a flat IR structure. Instead of having an arbitrarily complex lvalue inside the assignment instruction, maybe it should have the same structure of a deref, where each "level" is a separate instruction. This is probably why I was thinking of a flag rather than a separate node type: when constructing the assignment you would traverse the lhs derefs and turn them into lvalues. Although having a separate node type and changing that works as well (and it's probably nicer with liveness computation and other passes).
Does that sound reasonable? Am I missing anything?
Well, note that one problem is we kind of have to (recursively) dup the lvalue anyway, so that complex assignments work.
I see your point. I guess a possibility opened by the flat derefs is that you could share intermediate derefs, although that might well make things more complicated for no benefit.
I mean, it could potentially be done. I guess the easiest way that occurs to me is to set a flag that just states whether a HLSL_IR_DEREF is read from...
I think I understand why flattening is desirable in general, but it's not clear to me how that would apply to lvalues (or even to rvalue deref chains, honestly), since at codegen time we essentially resolve arbitrarily complex deref chains into one register (well, except for non-constant array indexing...)
True, but the same can be said in principle for any other instruction.
What I meant to say is that an expression like "(a + b) * c" is necessarily two instructions, whereas "a.b.c.<...>.e = f" is always one, regardless of the length of the deref chain.
Also if you always "go flat" you have only one way to follow sources, instead of having to mix tree traversing inside a single instruction with instruction traversing. Relatedly, if we start doing more invasive IR transformations, having simple, "atomic" instructions might make things easier. E.g. with structure splitting, I think you'd end up with replacing one instruction (as in one instruction list entry) without changing the rest of the IR, while in the other case you have to replace a piece of a larger instruction which might be buried arbitrarily deep in the tree. Admittedly this is all very abstract and I might not be seeing a sensible overall picture.
I guess the way I look at it is, emitting an assignment instruction means we have to reach up the entire chain at codegen time anyway, i.e. I'm not sure how you can emit an instruction for each individual lvalue deref node, whereas you can emit (at least) one instruction for each node currently.
To be clear, I don't think it's awful to emit nodes for lvalue derefs, I'm just not convinced it's better—but I'll defer to the maintainer in any case.
It's also not obvious to me that's prettier to flatten it, given on the other hand we're also now inserting nodes into the IR list that would presumably not participate in RA or generate code.
Right, but that's a bit like doing an optimization right while parsing. I guess we want lvalues and rvalues / derefs to either be both flat or both tree-like. Let's focus on rvalues for a moment. Imagine you have a complicated deref chain, eventually picking a single float out of something larger and complex. If you have a very naive compiler you can imagine that each step of the deref is implemented as a copy of whatever the result of the deref step is into a temporary variable. This might mean e.g. temporarily copying a full matrix before getting at the individual component. Incredibly inefficient, but simple and consistent. Obviously this isn't very interesting and can be simplified right away... except when it can't, like the non-constant array indexing case you mention. Or copying a whole input structure to an output variable. For lvalues you can imagine the specular thing, where the naive compiler stores results into a temporary variable and then copies that into the actual destination.
It seems to me that it would be nicer to have a simple and flat IR structure where each entry can do very little, or even nothing, in practice, and handle simplifications and IR restructuring over a number of passes rather than the opposite. Somewhat related, you can already get instructions that eventually do nothing of consequence (e.g. dead code, swizzling into the exact same thing, ...) and I don't think that has to be necessarily that different from the compiler's point of view.
So I'm not entirely sure from this if you have something specific in mind, but I'll at least run through my thoughts on the matter.
The compiler that exists in at least two of my local trees does emit a copy for every HLSL_IR_DEREF, i.e. from a variable into a temp node. This is awful, inasmuchas it matters, but the bottom line is by design, HLSL_IR_DEREF is essentially completely redundant, as you say.
One way to get around this—and the approach implied if we do make each deref an explicit node—is to reach through all the derefs at codegen time and grab the relevant register. We'd skip actually emitting code for any deref node. Hence an expression like "func().b.c + 1" would generate IR like
2: func() 3: @2.b 4: @3.c 5: 1 6: @4 + @5
Inlining aside, we'd only allocate registers for 2, 5, 6, and the source to node 6 would offset the register allocated to node 2 by the offset of "b.c".
Note that liveness analysis would also have to recursively reach through deref nodes.
The advantage of this, I guess, is that if we really do want to make lvalues separate nodes, then rvalues should probably be separate nodes too.
The second alternative that I was looking at was to make this a bit more explicit, and essentially let the hlsl_src structure contain a union of hlsl_ir_node and hlsl_deref (note that hlsl_deref.array/record would then have to be a pointer to hlsl_src instead of the flat structure as patch 0005 has it). Then we'd essentially generate
2: func() 3: 1 4: @2.b.c + 3
I mildly prefer this, since I like having the IR language be explicit about what's allowed and to resemble the code we produce, and it's not difficult to form this even at parse time. We can't easily get rid of HLSL_IR_DEREF entirely here, but we can guarantee it doesn't occur except at parse time (plus maybe a dce run?)
A third alternative is to adopt my patches 010* from my 6 April mail, i.e. to make both lvalue and rvalue derefs explicitly chains of derefs, and synthesize variables when necessary. That doesn't preclude folding hlsl_deref into hlsl_src, of course.
Maybe I'm misunderstanding what you're trying to propose here, though?
I don't think you are. I might be missing something more or less obvious to you though.
On Sun, Apr 19, 2020 at 11:06 PM Zebediah Figura zfigura@codeweavers.com wrote:
On 4/17/20 5:43 PM, Matteo Bruni wrote:
I see your point. I guess a possibility opened by the flat derefs is that you could share intermediate derefs, although that might well make things more complicated for no benefit.
I mean, it could potentially be done. I guess the easiest way that occurs to me is to set a flag that just states whether a HLSL_IR_DEREF is read from...
I think I understand why flattening is desirable in general, but it's not clear to me how that would apply to lvalues (or even to rvalue deref chains, honestly), since at codegen time we essentially resolve arbitrarily complex deref chains into one register (well, except for non-constant array indexing...)
True, but the same can be said in principle for any other instruction.
What I meant to say is that an expression like "(a + b) * c" is necessarily two instructions, whereas "a.b.c.<...>.e = f" is always one, regardless of the length of the deref chain.
Not if that's e.g. a matrix assignment, that needs to become multiple mov instructions at some point. Stretching it a bit, the former can potentially be just one instruction (MAD).
Also if you always "go flat" you have only one way to follow sources, instead of having to mix tree traversing inside a single instruction with instruction traversing. Relatedly, if we start doing more invasive IR transformations, having simple, "atomic" instructions might make things easier. E.g. with structure splitting, I think you'd end up with replacing one instruction (as in one instruction list entry) without changing the rest of the IR, while in the other case you have to replace a piece of a larger instruction which might be buried arbitrarily deep in the tree. Admittedly this is all very abstract and I might not be seeing a sensible overall picture.
I guess the way I look at it is, emitting an assignment instruction means we have to reach up the entire chain at codegen time anyway, i.e. I'm not sure how you can emit an instruction for each individual lvalue deref node, whereas you can emit (at least) one instruction for each node currently.
My point is that, during compilation, we probably have to traverse the instruction list to lookup sources for non-deref nodes anyway. At that point, if derefs are also full instructions / nodes, you get those for free. If, on the other hand, derefs are stored in an arbitrarily deep tree inside other instructions, we need to have a way to traverse derefs in addition to the "non-derefs" instructions. That's just me guessing though.
To be clear, I don't think it's awful to emit nodes for lvalue derefs, I'm just not convinced it's better—but I'll defer to the maintainer in any case.
Well, sure, but what I'm doing is mostly picturing things in my head, which doesn't always work right.
It's also not obvious to me that's prettier to flatten it, given on the other hand we're also now inserting nodes into the IR list that would presumably not participate in RA or generate code.
Right, but that's a bit like doing an optimization right while parsing. I guess we want lvalues and rvalues / derefs to either be both flat or both tree-like. Let's focus on rvalues for a moment. Imagine you have a complicated deref chain, eventually picking a single float out of something larger and complex. If you have a very naive compiler you can imagine that each step of the deref is implemented as a copy of whatever the result of the deref step is into a temporary variable. This might mean e.g. temporarily copying a full matrix before getting at the individual component. Incredibly inefficient, but simple and consistent. Obviously this isn't very interesting and can be simplified right away... except when it can't, like the non-constant array indexing case you mention. Or copying a whole input structure to an output variable. For lvalues you can imagine the specular thing, where the naive compiler stores results into a temporary variable and then copies that into the actual destination.
It seems to me that it would be nicer to have a simple and flat IR structure where each entry can do very little, or even nothing, in practice, and handle simplifications and IR restructuring over a number of passes rather than the opposite. Somewhat related, you can already get instructions that eventually do nothing of consequence (e.g. dead code, swizzling into the exact same thing, ...) and I don't think that has to be necessarily that different from the compiler's point of view.
So I'm not entirely sure from this if you have something specific in mind, but I'll at least run through my thoughts on the matter.
The compiler that exists in at least two of my local trees does emit a copy for every HLSL_IR_DEREF, i.e. from a variable into a temp node. This is awful, inasmuchas it matters, but the bottom line is by design, HLSL_IR_DEREF is essentially completely redundant, as you say.
One way to get around this—and the approach implied if we do make each deref an explicit node—is to reach through all the derefs at codegen time and grab the relevant register. We'd skip actually emitting code for any deref node. Hence an expression like "func().b.c + 1" would generate IR like
2: func() 3: @2.b 4: @3.c 5: 1 6: @4 + @5
Inlining aside, we'd only allocate registers for 2, 5, 6, and the source to node 6 would offset the register allocated to node 2 by the offset of "b.c".
Note that liveness analysis would also have to recursively reach through deref nodes.
The advantage of this, I guess, is that if we really do want to make lvalues separate nodes, then rvalues should probably be separate nodes too.
Right. One way to see it is that this is essentially a specialized form of copy propagation that only applies to derefs (which is admittedly the most important case, but not the only one). If we have actual copy (or maybe even constant) propagation we become able to 'skip' register allocation for other node types too. I already mentioned swizzling as something can do without a separate instruction and register, but it's not limited to that.
The second alternative that I was looking at was to make this a bit more explicit, and essentially let the hlsl_src structure contain a union of hlsl_ir_node and hlsl_deref (note that hlsl_deref.array/record would then have to be a pointer to hlsl_src instead of the flat structure as patch 0005 has it). Then we'd essentially generate
2: func() 3: 1 4: @2.b.c + 3
I mildly prefer this, since I like having the IR language be explicit about what's allowed and to resemble the code we produce, and it's not difficult to form this even at parse time. We can't easily get rid of HLSL_IR_DEREF entirely here, but we can guarantee it doesn't occur except at parse time (plus maybe a dce run?)
I guess the downside of this approach is that derefs are a special case to some degree. Which isn't necessarily bad but it feels a little limiting, as in my comment above.
A third alternative is to adopt my patches 010* from my 6 April mail, i.e. to make both lvalue and rvalue derefs explicitly chains of derefs, and synthesize variables when necessary. That doesn't preclude folding hlsl_deref into hlsl_src, of course.
Right. I feel this ends up conflating a few things together though. Representing lvalues and rvalues as chains of (only) derefs is an IR policy; synthesizing temporary variables is the mechanism; The relation between hlsl_deref and hlsl_src is the "enforcement". Those are all good but they are separate pieces of the puzzle. E.g. as I mentioned earlier, synthesizing variables is a mechanism that will probably come in handy for one reason or another even if we decide to go with a different route for derefs.
I have another proposal, as a way to sidestep the whole deref story. As usual, with the caveat that this is all armchair planning and it is known that plans rarely survive contact with the enemy. Anyway...
With the current IR we end up having arbitrarily complex deref chains to represent the composition of multiple levels of array and struct "indexing", which are necessarily cumbersome to handle. What if we transform all of that into offsetting instead? An arbitrary variable dereference would be represented as (variable, offset), with offset an arbitrary expression with an integer scalar value. That requires that we know the layout of complex data types early on, which now we can do, I think. The offsets could be in scalar or vec4 units, I can see benefits for either choice. Ideally we would directly generate offsets right during parsing and skip derefs altogether, although that might not be practical. An obvious downside coming from this is that constant folding becomes pretty much required, to compute a register index at codegen time. It does have the advantage that non-constant offsets are no different from constant ones as far as IR is concerned. Obviously we can skip supporting those initially, regardless of all this.
What do you think? I have a complication right away, which is the fact that in SM3 struct variables can be allocated into multiple register sets (see http://www.winehq.org/pipermail/wine-devel/2013-August/100705.html). I don't think it's a blocker but it is annoying...
On 4/28/20 6:38 AM, Matteo Bruni wrote:
On Sun, Apr 19, 2020 at 11:06 PM Zebediah Figura zfigura@codeweavers.com wrote:
On 4/17/20 5:43 PM, Matteo Bruni wrote:
I see your point. I guess a possibility opened by the flat derefs is that you could share intermediate derefs, although that might well make things more complicated for no benefit.
I mean, it could potentially be done. I guess the easiest way that occurs to me is to set a flag that just states whether a HLSL_IR_DEREF is read from...
I think I understand why flattening is desirable in general, but it's not clear to me how that would apply to lvalues (or even to rvalue deref chains, honestly), since at codegen time we essentially resolve arbitrarily complex deref chains into one register (well, except for non-constant array indexing...)
True, but the same can be said in principle for any other instruction.
What I meant to say is that an expression like "(a + b) * c" is necessarily two instructions, whereas "a.b.c.<...>.e = f" is always one, regardless of the length of the deref chain.
Not if that's e.g. a matrix assignment, that needs to become multiple mov instructions at some point. Stretching it a bit, the former can potentially be just one instruction (MAD).
Sure, I guess I'm assuming that we've already gone through splitting passes so that's not a concern.
Also if you always "go flat" you have only one way to follow sources, instead of having to mix tree traversing inside a single instruction with instruction traversing. Relatedly, if we start doing more invasive IR transformations, having simple, "atomic" instructions might make things easier. E.g. with structure splitting, I think you'd end up with replacing one instruction (as in one instruction list entry) without changing the rest of the IR, while in the other case you have to replace a piece of a larger instruction which might be buried arbitrarily deep in the tree. Admittedly this is all very abstract and I might not be seeing a sensible overall picture.
I guess the way I look at it is, emitting an assignment instruction means we have to reach up the entire chain at codegen time anyway, i.e. I'm not sure how you can emit an instruction for each individual lvalue deref node, whereas you can emit (at least) one instruction for each node currently.
My point is that, during compilation, we probably have to traverse the instruction list to lookup sources for non-deref nodes anyway. At that point, if derefs are also full instructions / nodes, you get those for free. If, on the other hand, derefs are stored in an arbitrarily deep tree inside other instructions, we need to have a way to traverse derefs in addition to the "non-derefs" instructions. That's just me guessing though.
I don't particularly understand this assertion (despite our conversation in IRC about it). Maybe you could describe a specific optimization that would be helped by this, if you have any in mind?
Fundamentally I can understand that things have to recursively reach through deref nodes (well, unless we collapse them down to a single offset, like your suggestion below), but it's not clear to me that this is necessary for anything but struct or array derefs (and in that case, I don't see a lot of problem with treating them as a special case).
Not that this is hugely important, though, if I pursue the offsetting approach and it works.
To be clear, I don't think it's awful to emit nodes for lvalue derefs, I'm just not convinced it's better—but I'll defer to the maintainer in any case.
Well, sure, but what I'm doing is mostly picturing things in my head, which doesn't always work right.
It's also not obvious to me that's prettier to flatten it, given on the other hand we're also now inserting nodes into the IR list that would presumably not participate in RA or generate code.
Right, but that's a bit like doing an optimization right while parsing. I guess we want lvalues and rvalues / derefs to either be both flat or both tree-like. Let's focus on rvalues for a moment. Imagine you have a complicated deref chain, eventually picking a single float out of something larger and complex. If you have a very naive compiler you can imagine that each step of the deref is implemented as a copy of whatever the result of the deref step is into a temporary variable. This might mean e.g. temporarily copying a full matrix before getting at the individual component. Incredibly inefficient, but simple and consistent. Obviously this isn't very interesting and can be simplified right away... except when it can't, like the non-constant array indexing case you mention. Or copying a whole input structure to an output variable. For lvalues you can imagine the specular thing, where the naive compiler stores results into a temporary variable and then copies that into the actual destination.
It seems to me that it would be nicer to have a simple and flat IR structure where each entry can do very little, or even nothing, in practice, and handle simplifications and IR restructuring over a number of passes rather than the opposite. Somewhat related, you can already get instructions that eventually do nothing of consequence (e.g. dead code, swizzling into the exact same thing, ...) and I don't think that has to be necessarily that different from the compiler's point of view.
So I'm not entirely sure from this if you have something specific in mind, but I'll at least run through my thoughts on the matter.
The compiler that exists in at least two of my local trees does emit a copy for every HLSL_IR_DEREF, i.e. from a variable into a temp node. This is awful, inasmuchas it matters, but the bottom line is by design, HLSL_IR_DEREF is essentially completely redundant, as you say.
One way to get around this—and the approach implied if we do make each deref an explicit node—is to reach through all the derefs at codegen time and grab the relevant register. We'd skip actually emitting code for any deref node. Hence an expression like "func().b.c + 1" would generate IR like
2: func() 3: @2.b 4: @3.c 5: 1 6: @4 + @5
Inlining aside, we'd only allocate registers for 2, 5, 6, and the source to node 6 would offset the register allocated to node 2 by the offset of "b.c".
Note that liveness analysis would also have to recursively reach through deref nodes.
The advantage of this, I guess, is that if we really do want to make lvalues separate nodes, then rvalues should probably be separate nodes too.
Right. One way to see it is that this is essentially a specialized form of copy propagation that only applies to derefs (which is admittedly the most important case, but not the only one). If we have actual copy (or maybe even constant) propagation we become able to 'skip' register allocation for other node types too. I already mentioned swizzling as something can do without a separate instruction and register, but it's not limited to that.
The second alternative that I was looking at was to make this a bit more explicit, and essentially let the hlsl_src structure contain a union of hlsl_ir_node and hlsl_deref (note that hlsl_deref.array/record would then have to be a pointer to hlsl_src instead of the flat structure as patch 0005 has it). Then we'd essentially generate
2: func() 3: 1 4: @2.b.c + 3
I mildly prefer this, since I like having the IR language be explicit about what's allowed and to resemble the code we produce, and it's not difficult to form this even at parse time. We can't easily get rid of HLSL_IR_DEREF entirely here, but we can guarantee it doesn't occur except at parse time (plus maybe a dce run?)
I guess the downside of this approach is that derefs are a special case to some degree. Which isn't necessarily bad but it feels a little limiting, as in my comment above.
A third alternative is to adopt my patches 010* from my 6 April mail, i.e. to make both lvalue and rvalue derefs explicitly chains of derefs, and synthesize variables when necessary. That doesn't preclude folding hlsl_deref into hlsl_src, of course.
Right. I feel this ends up conflating a few things together though. Representing lvalues and rvalues as chains of (only) derefs is an IR policy; synthesizing temporary variables is the mechanism; The relation between hlsl_deref and hlsl_src is the "enforcement". Those are all good but they are separate pieces of the puzzle. E.g. as I mentioned earlier, synthesizing variables is a mechanism that will probably come in handy for one reason or another even if we decide to go with a different route for derefs.
I have another proposal, as a way to sidestep the whole deref story. As usual, with the caveat that this is all armchair planning and it is known that plans rarely survive contact with the enemy. Anyway...
With the current IR we end up having arbitrarily complex deref chains to represent the composition of multiple levels of array and struct "indexing", which are necessarily cumbersome to handle. What if we transform all of that into offsetting instead? An arbitrary variable dereference would be represented as (variable, offset), with offset an arbitrary expression with an integer scalar value. That requires that we know the layout of complex data types early on, which now we can do, I think. The offsets could be in scalar or vec4 units, I can see benefits for either choice. Ideally we would directly generate offsets right during parsing and skip derefs altogether, although that might not be practical. An obvious downside coming from this is that constant folding becomes pretty much required, to compute a register index at codegen time. It does have the advantage that non-constant offsets are no different from constant ones as far as IR is concerned. Obviously we can skip supporting those initially, regardless of all this.
I like this inasmuch as it's basically "hlsl_deref chains, but simpler"; I guess it requires no recursion anywhere, and so kind of sidesteps the constraints that led us here in the first place.
What do you think? I have a complication right away, which is the fact that in SM3 struct variables can be allocated into multiple register sets (see http://www.winehq.org/pipermail/wine-devel/2013-August/100705.html). I don't think it's a blocker but it is annoying...
The way I'd solve that is just to allow for potentially multiple hlsl_reg structures to any given uniform. That could be as simple as including three separate hlsl_reg structures inside hlsl_var. sm2-3 apparently allocates structs by limiting the size to include the last used element (whereas prior used elements remain unused). That'd just have to be done per type.
I'm admittedly not sure how changing our representation of derefs affects this specific problem, but maybe I'm missing something.
To my simultaneous delight and chagrin I can't come up with anything else that this approach would break.
On Tue, Apr 28, 2020 at 10:47 PM Zebediah Figura zfigura@codeweavers.com wrote:
On 4/28/20 6:38 AM, Matteo Bruni wrote:
On Sun, Apr 19, 2020 at 11:06 PM Zebediah Figura zfigura@codeweavers.com wrote:
On 4/17/20 5:43 PM, Matteo Bruni wrote:
I see your point. I guess a possibility opened by the flat derefs is that you could share intermediate derefs, although that might well make things more complicated for no benefit.
I mean, it could potentially be done. I guess the easiest way that occurs to me is to set a flag that just states whether a HLSL_IR_DEREF is read from...
I think I understand why flattening is desirable in general, but it's not clear to me how that would apply to lvalues (or even to rvalue deref chains, honestly), since at codegen time we essentially resolve arbitrarily complex deref chains into one register (well, except for non-constant array indexing...)
True, but the same can be said in principle for any other instruction.
What I meant to say is that an expression like "(a + b) * c" is necessarily two instructions, whereas "a.b.c.<...>.e = f" is always one, regardless of the length of the deref chain.
Not if that's e.g. a matrix assignment, that needs to become multiple mov instructions at some point. Stretching it a bit, the former can potentially be just one instruction (MAD).
Sure, I guess I'm assuming that we've already gone through splitting passes so that's not a concern.
Also if you always "go flat" you have only one way to follow sources, instead of having to mix tree traversing inside a single instruction with instruction traversing. Relatedly, if we start doing more invasive IR transformations, having simple, "atomic" instructions might make things easier. E.g. with structure splitting, I think you'd end up with replacing one instruction (as in one instruction list entry) without changing the rest of the IR, while in the other case you have to replace a piece of a larger instruction which might be buried arbitrarily deep in the tree. Admittedly this is all very abstract and I might not be seeing a sensible overall picture.
I guess the way I look at it is, emitting an assignment instruction means we have to reach up the entire chain at codegen time anyway, i.e. I'm not sure how you can emit an instruction for each individual lvalue deref node, whereas you can emit (at least) one instruction for each node currently.
My point is that, during compilation, we probably have to traverse the instruction list to lookup sources for non-deref nodes anyway. At that point, if derefs are also full instructions / nodes, you get those for free. If, on the other hand, derefs are stored in an arbitrarily deep tree inside other instructions, we need to have a way to traverse derefs in addition to the "non-derefs" instructions. That's just me guessing though.
I don't particularly understand this assertion (despite our conversation in IRC about it). Maybe you could describe a specific optimization that would be helped by this, if you have any in mind?
My thought was that any pass that replaces an instruction might have to also look at the sources. But during our IRC conversation we came up with at least two ways to avoid that, so maybe not. Passes that attempt to combine instructions (like the MAD case I mentioned earlier) might use this kind of traversal. But again, after more thinking it seems like it should also be possible without.
Fundamentally I can understand that things have to recursively reach through deref nodes (well, unless we collapse them down to a single offset, like your suggestion below), but it's not clear to me that this is necessary for anything but struct or array derefs (and in that case, I don't see a lot of problem with treating them as a special case).
Certainly not a lot of problem, but it would be nicer to avoid the recursive search entirely, if possible.
Not that this is hugely important, though, if I pursue the offsetting approach and it works.
To be clear, I don't think it's awful to emit nodes for lvalue derefs, I'm just not convinced it's better—but I'll defer to the maintainer in any case.
Well, sure, but what I'm doing is mostly picturing things in my head, which doesn't always work right.
It's also not obvious to me that's prettier to flatten it, given on the other hand we're also now inserting nodes into the IR list that would presumably not participate in RA or generate code.
Right, but that's a bit like doing an optimization right while parsing. I guess we want lvalues and rvalues / derefs to either be both flat or both tree-like. Let's focus on rvalues for a moment. Imagine you have a complicated deref chain, eventually picking a single float out of something larger and complex. If you have a very naive compiler you can imagine that each step of the deref is implemented as a copy of whatever the result of the deref step is into a temporary variable. This might mean e.g. temporarily copying a full matrix before getting at the individual component. Incredibly inefficient, but simple and consistent. Obviously this isn't very interesting and can be simplified right away... except when it can't, like the non-constant array indexing case you mention. Or copying a whole input structure to an output variable. For lvalues you can imagine the specular thing, where the naive compiler stores results into a temporary variable and then copies that into the actual destination.
It seems to me that it would be nicer to have a simple and flat IR structure where each entry can do very little, or even nothing, in practice, and handle simplifications and IR restructuring over a number of passes rather than the opposite. Somewhat related, you can already get instructions that eventually do nothing of consequence (e.g. dead code, swizzling into the exact same thing, ...) and I don't think that has to be necessarily that different from the compiler's point of view.
So I'm not entirely sure from this if you have something specific in mind, but I'll at least run through my thoughts on the matter.
The compiler that exists in at least two of my local trees does emit a copy for every HLSL_IR_DEREF, i.e. from a variable into a temp node. This is awful, inasmuchas it matters, but the bottom line is by design, HLSL_IR_DEREF is essentially completely redundant, as you say.
One way to get around this—and the approach implied if we do make each deref an explicit node—is to reach through all the derefs at codegen time and grab the relevant register. We'd skip actually emitting code for any deref node. Hence an expression like "func().b.c + 1" would generate IR like
2: func() 3: @2.b 4: @3.c 5: 1 6: @4 + @5
Inlining aside, we'd only allocate registers for 2, 5, 6, and the source to node 6 would offset the register allocated to node 2 by the offset of "b.c".
Note that liveness analysis would also have to recursively reach through deref nodes.
The advantage of this, I guess, is that if we really do want to make lvalues separate nodes, then rvalues should probably be separate nodes too.
Right. One way to see it is that this is essentially a specialized form of copy propagation that only applies to derefs (which is admittedly the most important case, but not the only one). If we have actual copy (or maybe even constant) propagation we become able to 'skip' register allocation for other node types too. I already mentioned swizzling as something can do without a separate instruction and register, but it's not limited to that.
The second alternative that I was looking at was to make this a bit more explicit, and essentially let the hlsl_src structure contain a union of hlsl_ir_node and hlsl_deref (note that hlsl_deref.array/record would then have to be a pointer to hlsl_src instead of the flat structure as patch 0005 has it). Then we'd essentially generate
2: func() 3: 1 4: @2.b.c + 3
I mildly prefer this, since I like having the IR language be explicit about what's allowed and to resemble the code we produce, and it's not difficult to form this even at parse time. We can't easily get rid of HLSL_IR_DEREF entirely here, but we can guarantee it doesn't occur except at parse time (plus maybe a dce run?)
I guess the downside of this approach is that derefs are a special case to some degree. Which isn't necessarily bad but it feels a little limiting, as in my comment above.
A third alternative is to adopt my patches 010* from my 6 April mail, i.e. to make both lvalue and rvalue derefs explicitly chains of derefs, and synthesize variables when necessary. That doesn't preclude folding hlsl_deref into hlsl_src, of course.
Right. I feel this ends up conflating a few things together though. Representing lvalues and rvalues as chains of (only) derefs is an IR policy; synthesizing temporary variables is the mechanism; The relation between hlsl_deref and hlsl_src is the "enforcement". Those are all good but they are separate pieces of the puzzle. E.g. as I mentioned earlier, synthesizing variables is a mechanism that will probably come in handy for one reason or another even if we decide to go with a different route for derefs.
I have another proposal, as a way to sidestep the whole deref story. As usual, with the caveat that this is all armchair planning and it is known that plans rarely survive contact with the enemy. Anyway...
With the current IR we end up having arbitrarily complex deref chains to represent the composition of multiple levels of array and struct "indexing", which are necessarily cumbersome to handle. What if we transform all of that into offsetting instead? An arbitrary variable dereference would be represented as (variable, offset), with offset an arbitrary expression with an integer scalar value. That requires that we know the layout of complex data types early on, which now we can do, I think. The offsets could be in scalar or vec4 units, I can see benefits for either choice. Ideally we would directly generate offsets right during parsing and skip derefs altogether, although that might not be practical. An obvious downside coming from this is that constant folding becomes pretty much required, to compute a register index at codegen time. It does have the advantage that non-constant offsets are no different from constant ones as far as IR is concerned. Obviously we can skip supporting those initially, regardless of all this.
I like this inasmuch as it's basically "hlsl_deref chains, but simpler"; I guess it requires no recursion anywhere, and so kind of sidesteps the constraints that led us here in the first place.
Yeah, I didn't really love any of the options coming from derefs so decided to throw them away and see what happens :P
What do you think? I have a complication right away, which is the fact that in SM3 struct variables can be allocated into multiple register sets (see http://www.winehq.org/pipermail/wine-devel/2013-August/100705.html). I don't think it's a blocker but it is annoying...
The way I'd solve that is just to allow for potentially multiple hlsl_reg structures to any given uniform. That could be as simple as including three separate hlsl_reg structures inside hlsl_var. sm2-3 apparently allocates structs by limiting the size to include the last used element (whereas prior used elements remain unused). That'd just have to be done per type.
Hmm, not sure if it does allocate "unnecessary" elements but, if that's the case, that's quite nice indeed since it means offsets (when counting in units of actual SM2 registers) are the same regardless of the register set.
I'm admittedly not sure how changing our representation of derefs affects this specific problem, but maybe I'm missing something.
It doesn't seem insurmountable either way, just something that's maybe a bit more of a hassle with offsets vs derefs. Or not, if the above is how it works.
To my simultaneous delight and chagrin I can't come up with anything else that this approach would break.
I call it a success! Until you find it while updating the code :D
On 4/28/20 4:22 PM, Matteo Bruni wrote:
I have another proposal, as a way to sidestep the whole deref story. As usual, with the caveat that this is all armchair planning and it is known that plans rarely survive contact with the enemy. Anyway...
With the current IR we end up having arbitrarily complex deref chains to represent the composition of multiple levels of array and struct "indexing", which are necessarily cumbersome to handle. What if we transform all of that into offsetting instead? An arbitrary variable dereference would be represented as (variable, offset), with offset an arbitrary expression with an integer scalar value. That requires that we know the layout of complex data types early on, which now we can do, I think. The offsets could be in scalar or vec4 units, I can see benefits for either choice. Ideally we would directly generate offsets right during parsing and skip derefs altogether, although that might not be practical. An obvious downside coming from this is that constant folding becomes pretty much required, to compute a register index at codegen time. It does have the advantage that non-constant offsets are no different from constant ones as far as IR is concerned. Obviously we can skip supporting those initially, regardless of all this.
I like this inasmuch as it's basically "hlsl_deref chains, but simpler"; I guess it requires no recursion anywhere, and so kind of sidesteps the constraints that led us here in the first place.
Yeah, I didn't really love any of the options coming from derefs so decided to throw them away and see what happens :P
What do you think? I have a complication right away, which is the fact that in SM3 struct variables can be allocated into multiple register sets (see http://www.winehq.org/pipermail/wine-devel/2013-August/100705.html). I don't think it's a blocker but it is annoying...
The way I'd solve that is just to allow for potentially multiple hlsl_reg structures to any given uniform. That could be as simple as including three separate hlsl_reg structures inside hlsl_var. sm2-3 apparently allocates structs by limiting the size to include the last used element (whereas prior used elements remain unused). That'd just have to be done per type.
Hmm, not sure if it does allocate "unnecessary" elements but, if that's the case, that's quite nice indeed since it means offsets (when counting in units of actual SM2 registers) are the same regardless of the register set.
From my testing it essentially does, yes, i.e. if you have
struct { int unused; float f; bool b; } s; float4 main(float4 pos : POSITION) : POSITION { if (s.b) return s.f; return pos; }
then "s" gets allocated to registers b0-b2 and c0-c1, but only b2 and c1 are ever used.
So yeah, it makes things pretty simple. I can see how it would have been a lot uglier otherwise.
I'm admittedly not sure how changing our representation of derefs affects this specific problem, but maybe I'm missing something.
It doesn't seem insurmountable either way, just something that's maybe a bit more of a hassle with offsets vs derefs. Or not, if the above is how it works.
To my simultaneous delight and chagrin I can't come up with anything else that this approach would break.
I call it a success! Until you find it while updating the code :D
From: Zebediah Figura zfigura@codeweavers.com
Signed-off-by: Zebediah Figura z.figura12@gmail.com --- dlls/d3dcompiler_43/hlsl.y | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/dlls/d3dcompiler_43/hlsl.y b/dlls/d3dcompiler_43/hlsl.y index 5366181f356..2d6ed925640 100644 --- a/dlls/d3dcompiler_43/hlsl.y +++ b/dlls/d3dcompiler_43/hlsl.y @@ -2035,7 +2035,7 @@ primary_expr: C_FLOAT YYABORT; } c->node.type = HLSL_IR_CONSTANT; - c->node.loc = get_location(&yylloc); + c->node.loc = get_location(&@1); c->node.data_type = new_hlsl_type(d3dcompiler_strdup("float"), HLSL_CLASS_SCALAR, HLSL_TYPE_FLOAT, 1, 1); c->v.value.f[0] = $1; if (!($$ = make_list(&c->node))) @@ -2050,7 +2050,7 @@ primary_expr: C_FLOAT YYABORT; } c->node.type = HLSL_IR_CONSTANT; - c->node.loc = get_location(&yylloc); + c->node.loc = get_location(&@1); c->node.data_type = new_hlsl_type(d3dcompiler_strdup("int"), HLSL_CLASS_SCALAR, HLSL_TYPE_INT, 1, 1); c->v.value.i[0] = $1; if (!($$ = make_list(&c->node))) @@ -2065,7 +2065,7 @@ primary_expr: C_FLOAT YYABORT; } c->node.type = HLSL_IR_CONSTANT; - c->node.loc = get_location(&yylloc); + c->node.loc = get_location(&@1); c->node.data_type = new_hlsl_type(d3dcompiler_strdup("bool"), HLSL_CLASS_SCALAR, HLSL_TYPE_BOOL, 1, 1); c->v.value.b[0] = $1; if (!($$ = make_list(&c->node)))
Signed-off-by: Matteo Bruni mbruni@codeweavers.com
From: Zebediah Figura zfigura@codeweavers.com
Signed-off-by: Zebediah Figura z.figura12@gmail.com --- dlls/d3dcompiler_43/d3dcompiler_private.h | 10 +- dlls/d3dcompiler_43/hlsl.y | 110 +++++++++++++--------- dlls/d3dcompiler_43/utils.c | 57 ++--------- 3 files changed, 80 insertions(+), 97 deletions(-)
diff --git a/dlls/d3dcompiler_43/d3dcompiler_private.h b/dlls/d3dcompiler_43/d3dcompiler_private.h index 8f81271383b..1aa232791ba 100644 --- a/dlls/d3dcompiler_43/d3dcompiler_private.h +++ b/dlls/d3dcompiler_43/d3dcompiler_private.h @@ -1096,6 +1096,14 @@ static inline struct hlsl_ir_node *node_from_list(struct list *list) return LIST_ENTRY(list_tail(list), struct hlsl_ir_node, entry); }
+static inline void init_node(struct hlsl_ir_node *node, enum hlsl_ir_node_type type, + struct hlsl_type *data_type, struct source_location loc) +{ + node->type = type; + node->data_type = data_type; + node->loc = loc; +} + BOOL add_declaration(struct hlsl_scope *scope, struct hlsl_ir_var *decl, BOOL local_var) DECLSPEC_HIDDEN; struct hlsl_ir_var *get_variable(struct hlsl_scope *scope, const char *name) DECLSPEC_HIDDEN; void free_declaration(struct hlsl_ir_var *decl) DECLSPEC_HIDDEN; @@ -1115,8 +1123,6 @@ struct hlsl_ir_expr *new_cast(struct hlsl_ir_node *node, struct hlsl_type *type, struct source_location *loc) DECLSPEC_HIDDEN; struct hlsl_ir_node *implicit_conversion(struct hlsl_ir_node *node, struct hlsl_type *type, struct source_location *loc) DECLSPEC_HIDDEN; -struct hlsl_ir_deref *new_var_deref(struct hlsl_ir_var *var) DECLSPEC_HIDDEN; -struct hlsl_ir_deref *new_record_deref(struct hlsl_ir_node *record, struct hlsl_struct_field *field) DECLSPEC_HIDDEN; struct hlsl_ir_node *make_assignment(struct hlsl_ir_node *left, enum parse_assign_op assign_op, struct hlsl_ir_node *right) DECLSPEC_HIDDEN; void push_scope(struct hlsl_parse_ctx *ctx) DECLSPEC_HIDDEN; diff --git a/dlls/d3dcompiler_43/hlsl.y b/dlls/d3dcompiler_43/hlsl.y index 2d6ed925640..c0ad6cda9e0 100644 --- a/dlls/d3dcompiler_43/hlsl.y +++ b/dlls/d3dcompiler_43/hlsl.y @@ -277,7 +277,7 @@ static BOOL append_conditional_break(struct list *cond_list) ERR("Out of memory.\n"); return FALSE; } - iff->node.type = HLSL_IR_IF; + init_node(&iff->node, HLSL_IR_IF, NULL, condition->loc); iff->condition = not; list_add_tail(cond_list, &iff->node.entry);
@@ -293,7 +293,7 @@ static BOOL append_conditional_break(struct list *cond_list) ERR("Out of memory.\n"); return FALSE; } - jump->node.type = HLSL_IR_JUMP; + init_node(&jump->node, HLSL_IR_JUMP, NULL, condition->loc); jump->type = HLSL_IR_JUMP_BREAK; list_add_head(iff->then_instrs, &jump->node.entry); return TRUE; @@ -324,8 +324,7 @@ static struct list *create_loop(enum loop_type type, struct list *init, struct l loop = d3dcompiler_alloc(sizeof(*loop)); if (!loop) goto oom; - loop->node.type = HLSL_IR_LOOP; - loop->node.loc = loc; + init_node(&loop->node, HLSL_IR_LOOP, NULL, loc); list_add_tail(list, &loop->node.entry); loop->body = d3dcompiler_alloc(sizeof(*loop->body)); if (!loop->body) @@ -390,9 +389,8 @@ static struct hlsl_ir_swizzle *new_swizzle(DWORD s, unsigned int components,
if (!swizzle) return NULL; - swizzle->node.type = HLSL_IR_SWIZZLE; - swizzle->node.loc = *loc; - swizzle->node.data_type = new_hlsl_type(NULL, HLSL_CLASS_VECTOR, val->data_type->base_type, components, 1); + init_node(&swizzle->node, HLSL_IR_SWIZZLE, + new_hlsl_type(NULL, HLSL_CLASS_VECTOR, val->data_type->base_type, components, 1), *loc); swizzle->val = val; swizzle->swizzle = s; return swizzle; @@ -489,8 +487,7 @@ static struct hlsl_ir_jump *new_return(struct hlsl_ir_node *value, struct source ERR("Out of memory\n"); return NULL; } - jump->node.type = HLSL_IR_JUMP; - jump->node.loc = loc; + init_node(&jump->node, HLSL_IR_JUMP, NULL, loc); jump->type = HLSL_IR_JUMP_RETURN; if (value) { @@ -510,6 +507,38 @@ static struct hlsl_ir_jump *new_return(struct hlsl_ir_node *value, struct source return jump; }
+static struct hlsl_ir_deref *new_var_deref(struct hlsl_ir_var *var, const struct source_location loc) +{ + struct hlsl_ir_deref *deref = d3dcompiler_alloc(sizeof(*deref)); + + if (!deref) + { + ERR("Out of memory.\n"); + return NULL; + } + init_node(&deref->node, HLSL_IR_DEREF, var->data_type, loc); + deref->src.type = HLSL_IR_DEREF_VAR; + deref->src.v.var = var; + return deref; +} + +static struct hlsl_ir_deref *new_record_deref(struct hlsl_ir_node *record, + struct hlsl_struct_field *field, const struct source_location loc) +{ + struct hlsl_ir_deref *deref = d3dcompiler_alloc(sizeof(*deref)); + + if (!deref) + { + ERR("Out of memory.\n"); + return NULL; + } + init_node(&deref->node, HLSL_IR_DEREF, field->type, loc); + deref->src.type = HLSL_IR_DEREF_RECORD; + deref->src.v.record.record = record; + deref->src.v.record.field = field; + return deref; +} + static void struct_var_initializer(struct list *list, struct hlsl_ir_var *var, struct parse_initializer *initializer) { @@ -540,13 +569,12 @@ static void struct_var_initializer(struct list *list, struct hlsl_ir_var *var, } if (components_count_type(field->type) == components_count_type(node->data_type)) { - deref = new_record_deref(&new_var_deref(var)->node, field); + deref = new_record_deref(&new_var_deref(var, var->loc)->node, field, node->loc); if (!deref) { ERR("Out of memory.\n"); break; } - deref->node.loc = node->loc; list_add_tail(list, &deref->node.entry); assignment = make_assignment(&deref->node, ASSIGN_OP_ASSIGN, node); list_add_tail(list, &assignment->entry); @@ -695,7 +723,7 @@ static struct list *declare_vars(struct hlsl_type *basic_type, DWORD modifiers, list_move_tail(statements_list, v->initializer.instrs); d3dcompiler_free(v->initializer.instrs);
- deref = new_var_deref(var); + deref = new_var_deref(var, var->loc); list_add_tail(statements_list, &deref->node.entry); assignment = make_assignment(&deref->node, ASSIGN_OP_ASSIGN, v->initializer.args[0]); d3dcompiler_free(v->initializer.args); @@ -1969,8 +1997,7 @@ selection_statement: KW_IF '(' expr ')' if_body ERR("Out of memory\n"); YYABORT; } - instr->node.type = HLSL_IR_IF; - instr->node.loc = get_location(&@1); + init_node(&instr->node, HLSL_IR_IF, NULL, get_location(&@1)); instr->condition = node_from_list($3); instr->then_instrs = $5.then_instrs; instr->else_instrs = $5.else_instrs; @@ -2034,9 +2061,8 @@ primary_expr: C_FLOAT ERR("Out of memory.\n"); YYABORT; } - c->node.type = HLSL_IR_CONSTANT; - c->node.loc = get_location(&@1); - c->node.data_type = new_hlsl_type(d3dcompiler_strdup("float"), HLSL_CLASS_SCALAR, HLSL_TYPE_FLOAT, 1, 1); + init_node(&c->node, HLSL_IR_CONSTANT, new_hlsl_type(d3dcompiler_strdup("float"), + HLSL_CLASS_SCALAR, HLSL_TYPE_FLOAT, 1, 1), get_location(&@1)); c->v.value.f[0] = $1; if (!($$ = make_list(&c->node))) YYABORT; @@ -2049,9 +2075,8 @@ primary_expr: C_FLOAT ERR("Out of memory.\n"); YYABORT; } - c->node.type = HLSL_IR_CONSTANT; - c->node.loc = get_location(&@1); - c->node.data_type = new_hlsl_type(d3dcompiler_strdup("int"), HLSL_CLASS_SCALAR, HLSL_TYPE_INT, 1, 1); + init_node(&c->node, HLSL_IR_CONSTANT, new_hlsl_type(d3dcompiler_strdup("int"), + HLSL_CLASS_SCALAR, HLSL_TYPE_INT, 1, 1), get_location(&@1)); c->v.value.i[0] = $1; if (!($$ = make_list(&c->node))) YYABORT; @@ -2064,9 +2089,8 @@ primary_expr: C_FLOAT ERR("Out of memory.\n"); YYABORT; } - c->node.type = HLSL_IR_CONSTANT; - c->node.loc = get_location(&@1); - c->node.data_type = new_hlsl_type(d3dcompiler_strdup("bool"), HLSL_CLASS_SCALAR, HLSL_TYPE_BOOL, 1, 1); + init_node(&c->node, HLSL_IR_CONSTANT, new_hlsl_type(d3dcompiler_strdup("bool"), + HLSL_CLASS_SCALAR, HLSL_TYPE_BOOL, 1, 1), get_location(&@1)); c->v.value.b[0] = $1; if (!($$ = make_list(&c->node))) YYABORT; @@ -2083,9 +2107,8 @@ primary_expr: C_FLOAT set_parse_status(&hlsl_ctx.status, PARSE_ERR); YYABORT; } - if ((deref = new_var_deref(var))) + if ((deref = new_var_deref(var, get_location(&@1)))) { - deref->node.loc = get_location(&@1); if (!($$ = make_list(&deref->node))) YYABORT; } @@ -2151,14 +2174,13 @@ postfix_expr: primary_expr { if (!strcmp($3, field->name)) { - struct hlsl_ir_deref *deref = new_record_deref(node, field); + struct hlsl_ir_deref *deref = new_record_deref(node, field, loc);
if (!deref) { ERR("Out of memory\n"); YYABORT; } - deref->node.loc = loc; $$ = append_unop($1, &deref->node); break; } @@ -2195,28 +2217,23 @@ postfix_expr: primary_expr /* This may be an array dereference or a vector/matrix * subcomponent access. * We store it as an array dereference in any case. */ - struct hlsl_ir_deref *deref = d3dcompiler_alloc(sizeof(*deref)); - struct hlsl_type *expr_type = node_from_list($1)->data_type; + const struct hlsl_type *expr_type = node_from_list($1)->data_type; + struct hlsl_ir_deref *deref; + struct hlsl_type *data_type;
TRACE("Array dereference from type %s\n", debug_hlsl_type(expr_type)); - if (!deref) - { - ERR("Out of memory\n"); - YYABORT; - } - deref->node.type = HLSL_IR_DEREF; - deref->node.loc = get_location(&@2); + if (expr_type->type == HLSL_CLASS_ARRAY) { - deref->node.data_type = expr_type->e.array.type; + data_type = expr_type->e.array.type; } else if (expr_type->type == HLSL_CLASS_MATRIX) { - deref->node.data_type = new_hlsl_type(NULL, HLSL_CLASS_VECTOR, expr_type->base_type, expr_type->dimx, 1); + data_type = new_hlsl_type(NULL, HLSL_CLASS_VECTOR, expr_type->base_type, expr_type->dimx, 1); } else if (expr_type->type == HLSL_CLASS_VECTOR) { - deref->node.data_type = new_hlsl_type(NULL, HLSL_CLASS_SCALAR, expr_type->base_type, 1, 1); + data_type = new_hlsl_type(NULL, HLSL_CLASS_SCALAR, expr_type->base_type, 1, 1); } else { @@ -2224,19 +2241,26 @@ postfix_expr: primary_expr hlsl_report_message(get_location(&@2), HLSL_LEVEL_ERROR, "array-indexed expression is scalar"); else hlsl_report_message(get_location(&@2), HLSL_LEVEL_ERROR, "expression is not array-indexable"); - d3dcompiler_free(deref); free_instr_list($1); free_instr_list($3); YYABORT; } + if (node_from_list($3)->data_type->type != HLSL_CLASS_SCALAR) { hlsl_report_message(get_location(&@3), HLSL_LEVEL_ERROR, "array index is not scalar"); - d3dcompiler_free(deref); free_instr_list($1); free_instr_list($3); YYABORT; } + + if (!(deref = d3dcompiler_alloc(sizeof(*deref)))) + { + free_instr_list($1); + free_instr_list($3); + YYABORT; + } + init_node(&deref->node, HLSL_IR_DEREF, data_type, get_location(&@2)); deref->src.type = HLSL_IR_DEREF_ARRAY; deref->src.v.array.array = node_from_list($1); deref->src.v.array.index = node_from_list($3); @@ -2274,9 +2298,7 @@ postfix_expr: primary_expr assert($4.args_count <= ARRAY_SIZE(constructor->args));
constructor = d3dcompiler_alloc(sizeof(*constructor)); - constructor->node.type = HLSL_IR_CONSTRUCTOR; - constructor->node.loc = get_location(&@3); - constructor->node.data_type = $2; + init_node(&constructor->node, HLSL_IR_CONSTRUCTOR, $2, get_location(&@3)); constructor->args_count = $4.args_count; memcpy(constructor->args, $4.args, $4.args_count * sizeof(*$4.args)); d3dcompiler_free($4.args); diff --git a/dlls/d3dcompiler_43/utils.c b/dlls/d3dcompiler_43/utils.c index 519f50612e8..991306d5947 100644 --- a/dlls/d3dcompiler_43/utils.c +++ b/dlls/d3dcompiler_43/utils.c @@ -1325,17 +1325,10 @@ struct hlsl_ir_node *implicit_conversion(struct hlsl_ir_node *node, struct hlsl_ struct hlsl_ir_expr *new_expr(enum hlsl_ir_expr_op op, struct hlsl_ir_node **operands, struct source_location *loc) { - struct hlsl_ir_expr *expr = d3dcompiler_alloc(sizeof(*expr)); + struct hlsl_ir_expr *expr; struct hlsl_type *type; unsigned int i;
- if (!expr) - { - ERR("Out of memory\n"); - return NULL; - } - expr->node.type = HLSL_IR_EXPR; - expr->node.loc = *loc; type = operands[0]->data_type; for (i = 1; i <= 2; ++i) { @@ -1343,10 +1336,7 @@ struct hlsl_ir_expr *new_expr(enum hlsl_ir_expr_op op, struct hlsl_ir_node **ope break; type = expr_common_type(type, operands[i]->data_type, loc); if (!type) - { - d3dcompiler_free(expr); return NULL; - } } for (i = 0; i <= 2; ++i) { @@ -1364,14 +1354,14 @@ struct hlsl_ir_expr *new_expr(enum hlsl_ir_expr_op op, struct hlsl_ir_node **ope }
if (!(cast = new_cast(operands[i], type, &operands[i]->loc))) - { - d3dcompiler_free(expr); return NULL; - } list_add_after(&operands[i]->entry, &cast->node.entry); operands[i] = &cast->node; } - expr->node.data_type = type; + + if (!(expr = d3dcompiler_alloc(sizeof(*expr)))) + return NULL; + init_node(&expr->node, HLSL_IR_EXPR, type, *loc); expr->op = op; expr->operands[0] = operands[0]; expr->operands[1] = operands[1]; @@ -1391,39 +1381,6 @@ struct hlsl_ir_expr *new_cast(struct hlsl_ir_node *node, struct hlsl_type *type, return expr_from_node(cast); }
-struct hlsl_ir_deref *new_var_deref(struct hlsl_ir_var *var) -{ - struct hlsl_ir_deref *deref = d3dcompiler_alloc(sizeof(*deref)); - - if (!deref) - { - ERR("Out of memory.\n"); - return NULL; - } - deref->node.type = HLSL_IR_DEREF; - deref->node.data_type = var->data_type; - deref->src.type = HLSL_IR_DEREF_VAR; - deref->src.v.var = var; - return deref; -} - -struct hlsl_ir_deref *new_record_deref(struct hlsl_ir_node *record, struct hlsl_struct_field *field) -{ - struct hlsl_ir_deref *deref = d3dcompiler_alloc(sizeof(*deref)); - - if (!deref) - { - ERR("Out of memory.\n"); - return NULL; - } - deref->node.type = HLSL_IR_DEREF; - deref->node.data_type = field->type; - deref->src.type = HLSL_IR_DEREF_RECORD; - deref->src.v.record.record = record; - deref->src.v.record.field = field; - return deref; -} - static enum hlsl_ir_expr_op op_from_assignment(enum parse_assign_op op) { static const enum hlsl_ir_expr_op ops[] = @@ -1605,9 +1562,7 @@ struct hlsl_ir_node *make_assignment(struct hlsl_ir_node *lhs, enum parse_assign return NULL; }
- assign->node.type = HLSL_IR_ASSIGNMENT; - assign->node.loc = lhs->loc; - assign->node.data_type = type; + init_node(&assign->node, HLSL_IR_ASSIGNMENT, type, lhs->loc); assign->writemask = writemask; rhs = implicit_conversion(rhs, type, &rhs->loc);
From: Zebediah Figura zfigura@codeweavers.com
This will be generally useful, and it's easier to make this change now while relatively few things are touching source pointers.
Signed-off-by: Zebediah Figura z.figura12@gmail.com --- dlls/d3dcompiler_43/d3dcompiler_private.h | 41 +++++++++++++----- dlls/d3dcompiler_43/hlsl.y | 53 +++++++++++++---------- dlls/d3dcompiler_43/utils.c | 48 ++++++++++---------- 3 files changed, 84 insertions(+), 58 deletions(-)
diff --git a/dlls/d3dcompiler_43/d3dcompiler_private.h b/dlls/d3dcompiler_43/d3dcompiler_private.h index 1aa232791ba..c235dff8e2f 100644 --- a/dlls/d3dcompiler_43/d3dcompiler_private.h +++ b/dlls/d3dcompiler_43/d3dcompiler_private.h @@ -662,6 +662,8 @@ struct hlsl_ir_node enum hlsl_ir_node_type type; struct hlsl_type *data_type;
+ struct list uses; + struct source_location loc;
/* Liveness ranges. "index" is the index of this instruction. Since this is @@ -671,6 +673,12 @@ struct hlsl_ir_node unsigned int index, last_read; };
+struct hlsl_src +{ + struct hlsl_ir_node *node; + struct list entry; +}; + #define HLSL_STORAGE_EXTERN 0x00000001 #define HLSL_STORAGE_NOINTERPOLATION 0x00000002 #define HLSL_MODIFIER_PRECISE 0x00000004 @@ -732,7 +740,7 @@ struct hlsl_ir_function_decl struct hlsl_ir_if { struct hlsl_ir_node node; - struct hlsl_ir_node *condition; + struct hlsl_src condition; struct list *then_instrs; struct list *else_instrs; }; @@ -816,7 +824,7 @@ struct hlsl_ir_expr { struct hlsl_ir_node node; enum hlsl_ir_expr_op op; - struct hlsl_ir_node *operands[3]; + struct hlsl_src operands[3]; };
enum hlsl_ir_jump_type @@ -831,13 +839,13 @@ struct hlsl_ir_jump { struct hlsl_ir_node node; enum hlsl_ir_jump_type type; - struct hlsl_ir_node *return_value; + struct hlsl_src return_value; };
struct hlsl_ir_swizzle { struct hlsl_ir_node node; - struct hlsl_ir_node *val; + struct hlsl_src val; DWORD swizzle; };
@@ -856,12 +864,12 @@ struct hlsl_deref struct hlsl_ir_var *var; struct { - struct hlsl_ir_node *array; - struct hlsl_ir_node *index; + struct hlsl_src array; + struct hlsl_src index; } array; struct { - struct hlsl_ir_node *record; + struct hlsl_src record; struct hlsl_struct_field *field; } record; } v; @@ -882,7 +890,7 @@ struct hlsl_lvalue struct { struct hlsl_lvalue *array; - struct hlsl_ir_node *index; + struct hlsl_src index; } array; struct { @@ -896,7 +904,7 @@ struct hlsl_ir_assignment { struct hlsl_ir_node node; struct hlsl_lvalue lhs; - struct hlsl_ir_node *rhs; + struct hlsl_src rhs; unsigned char writemask; };
@@ -921,7 +929,7 @@ struct hlsl_ir_constant struct hlsl_ir_constructor { struct hlsl_ir_node node; - struct hlsl_ir_node *args[16]; + struct hlsl_src args[16]; unsigned int args_count; };
@@ -1102,6 +1110,19 @@ static inline void init_node(struct hlsl_ir_node *node, enum hlsl_ir_node_type t node->type = type; node->data_type = data_type; node->loc = loc; + list_init(&node->uses); +} + +static inline void hlsl_src_from_node(struct hlsl_src *src, struct hlsl_ir_node *node) +{ + src->node = node; + list_add_tail(&node->uses, &src->entry); +} + +static inline void hlsl_src_remove(struct hlsl_src *src) +{ + src->node = NULL; + list_remove(&src->entry); }
BOOL add_declaration(struct hlsl_scope *scope, struct hlsl_ir_var *decl, BOOL local_var) DECLSPEC_HIDDEN; diff --git a/dlls/d3dcompiler_43/hlsl.y b/dlls/d3dcompiler_43/hlsl.y index c0ad6cda9e0..1b6e31e4964 100644 --- a/dlls/d3dcompiler_43/hlsl.y +++ b/dlls/d3dcompiler_43/hlsl.y @@ -278,7 +278,7 @@ static BOOL append_conditional_break(struct list *cond_list) return FALSE; } init_node(&iff->node, HLSL_IR_IF, NULL, condition->loc); - iff->condition = not; + hlsl_src_from_node(&iff->condition, not); list_add_tail(cond_list, &iff->node.entry);
if (!(iff->then_instrs = d3dcompiler_alloc(sizeof(*iff->then_instrs)))) @@ -391,7 +391,7 @@ static struct hlsl_ir_swizzle *new_swizzle(DWORD s, unsigned int components, return NULL; init_node(&swizzle->node, HLSL_IR_SWIZZLE, new_hlsl_type(NULL, HLSL_CLASS_VECTOR, val->data_type->base_type, components, 1), *loc); - swizzle->val = val; + hlsl_src_from_node(&swizzle->val, val); swizzle->swizzle = s; return swizzle; } @@ -482,6 +482,8 @@ static struct hlsl_ir_jump *new_return(struct hlsl_ir_node *value, struct source { struct hlsl_type *return_type = hlsl_ctx.cur_function->return_type; struct hlsl_ir_jump *jump = d3dcompiler_alloc(sizeof(*jump)); + struct hlsl_ir_node *return_value; + if (!jump) { ERR("Out of memory\n"); @@ -491,11 +493,12 @@ static struct hlsl_ir_jump *new_return(struct hlsl_ir_node *value, struct source jump->type = HLSL_IR_JUMP_RETURN; if (value) { - if (!(jump->return_value = implicit_conversion(value, return_type, &loc))) + if (!(return_value = implicit_conversion(value, return_type, &loc))) { d3dcompiler_free(jump); return NULL; } + hlsl_src_from_node(&jump->return_value, return_value); } else if (return_type->base_type != HLSL_TYPE_VOID) { @@ -534,7 +537,7 @@ static struct hlsl_ir_deref *new_record_deref(struct hlsl_ir_node *record, } init_node(&deref->node, HLSL_IR_DEREF, field->type, loc); deref->src.type = HLSL_IR_DEREF_RECORD; - deref->src.v.record.record = record; + hlsl_src_from_node(&deref->src.v.record.record, record); deref->src.v.record.field = field; return deref; } @@ -1992,16 +1995,18 @@ jump_statement: KW_RETURN expr ';' selection_statement: KW_IF '(' expr ')' if_body { struct hlsl_ir_if *instr = d3dcompiler_alloc(sizeof(*instr)); + struct hlsl_ir_node *condition = node_from_list($3); + if (!instr) { ERR("Out of memory\n"); YYABORT; } init_node(&instr->node, HLSL_IR_IF, NULL, get_location(&@1)); - instr->condition = node_from_list($3); + hlsl_src_from_node(&instr->condition, condition); instr->then_instrs = $5.then_instrs; instr->else_instrs = $5.else_instrs; - if (instr->condition->data_type->dimx > 1 || instr->condition->data_type->dimy > 1) + if (condition->data_type->dimx > 1 || condition->data_type->dimy > 1) { hlsl_report_message(instr->node.loc, HLSL_LEVEL_ERROR, "if condition requires a scalar"); @@ -2262,9 +2267,8 @@ postfix_expr: primary_expr } init_node(&deref->node, HLSL_IR_DEREF, data_type, get_location(&@2)); deref->src.type = HLSL_IR_DEREF_ARRAY; - deref->src.v.array.array = node_from_list($1); - deref->src.v.array.index = node_from_list($3); - + hlsl_src_from_node(&deref->src.v.array.array, node_from_list($1)); + hlsl_src_from_node(&deref->src.v.array.index, node_from_list($3)); $$ = append_binop($1, $3, &deref->node); } /* "var_modifiers" doesn't make sense in this case, but it's needed @@ -2272,6 +2276,7 @@ postfix_expr: primary_expr | var_modifiers type '(' initializer_expr_list ')' { struct hlsl_ir_constructor *constructor; + unsigned int i;
TRACE("%s constructor.\n", debug_hlsl_type($2)); if ($1) @@ -2300,7 +2305,8 @@ postfix_expr: primary_expr constructor = d3dcompiler_alloc(sizeof(*constructor)); init_node(&constructor->node, HLSL_IR_CONSTRUCTOR, $2, get_location(&@3)); constructor->args_count = $4.args_count; - memcpy(constructor->args, $4.args, $4.args_count * sizeof(*$4.args)); + for (i = 0; i < $4.args_count; ++i) + hlsl_src_from_node(&constructor->args[i], $4.args[i]); d3dcompiler_free($4.args); $$ = append_unop($4.instrs, &constructor->node); } @@ -2699,7 +2705,7 @@ static void compute_liveness_recurse(struct list *instrs, unsigned int loop_firs var = hlsl_var_from_lvalue(&assignment->lhs); if (!var->first_write) var->first_write = loop_first ? min(instr->index, loop_first) : instr->index; - assignment->rhs->last_read = instr->index; + assignment->rhs.node->last_read = instr->index; break; } case HLSL_IR_CONSTANT: @@ -2709,7 +2715,7 @@ static void compute_liveness_recurse(struct list *instrs, unsigned int loop_firs struct hlsl_ir_constructor *constructor = constructor_from_node(instr); unsigned int i; for (i = 0; i < constructor->args_count; ++i) - constructor->args[i]->last_read = instr->index; + constructor->args[i].node->last_read = instr->index; break; } case HLSL_IR_DEREF: @@ -2723,12 +2729,12 @@ static void compute_liveness_recurse(struct list *instrs, unsigned int loop_firs break;
case HLSL_IR_DEREF_ARRAY: - deref->src.v.array.array->last_read = instr->index; - deref->src.v.array.index->last_read = instr->index; + deref->src.v.array.array.node->last_read = instr->index; + deref->src.v.array.index.node->last_read = instr->index; break;
case HLSL_IR_DEREF_RECORD: - deref->src.v.record.record->last_read = instr->index; + deref->src.v.record.record.node->last_read = instr->index; break; } break; @@ -2736,11 +2742,10 @@ static void compute_liveness_recurse(struct list *instrs, unsigned int loop_firs case HLSL_IR_EXPR: { struct hlsl_ir_expr *expr = expr_from_node(instr); - expr->operands[0]->last_read = instr->index; - if (expr->operands[1]) - expr->operands[1]->last_read = instr->index; - if (expr->operands[2]) - expr->operands[2]->last_read = instr->index; + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(expr->operands) && expr->operands[i].node; ++i) + expr->operands[i].node->last_read = instr->index; break; } case HLSL_IR_IF: @@ -2749,14 +2754,14 @@ static void compute_liveness_recurse(struct list *instrs, unsigned int loop_firs compute_liveness_recurse(iff->then_instrs, loop_first, loop_last); if (iff->else_instrs) compute_liveness_recurse(iff->else_instrs, loop_first, loop_last); - iff->condition->last_read = instr->index; + iff->condition.node->last_read = instr->index; break; } case HLSL_IR_JUMP: { struct hlsl_ir_jump *jump = jump_from_node(instr); - if (jump->type == HLSL_IR_JUMP_RETURN && jump->return_value) - jump->return_value->last_read = instr->index; + if (jump->type == HLSL_IR_JUMP_RETURN && jump->return_value.node) + jump->return_value.node->last_read = instr->index; break; } case HLSL_IR_LOOP: @@ -2769,7 +2774,7 @@ static void compute_liveness_recurse(struct list *instrs, unsigned int loop_firs case HLSL_IR_SWIZZLE: { struct hlsl_ir_swizzle *swizzle = swizzle_from_node(instr); - swizzle->val->last_read = instr->index; + swizzle->val.node->last_read = instr->index; break; } default: diff --git a/dlls/d3dcompiler_43/utils.c b/dlls/d3dcompiler_43/utils.c index 991306d5947..8537e193db4 100644 --- a/dlls/d3dcompiler_43/utils.c +++ b/dlls/d3dcompiler_43/utils.c @@ -1363,9 +1363,8 @@ struct hlsl_ir_expr *new_expr(enum hlsl_ir_expr_op op, struct hlsl_ir_node **ope return NULL; init_node(&expr->node, HLSL_IR_EXPR, type, *loc); expr->op = op; - expr->operands[0] = operands[0]; - expr->operands[1] = operands[1]; - expr->operands[2] = operands[2]; + for (i = 0; i <= 2 && operands[i]; ++i) + hlsl_src_from_node(&expr->operands[i], operands[i]);
return expr; } @@ -1455,16 +1454,16 @@ static BOOL get_assignment_lhs(struct hlsl_lvalue *dst, struct hlsl_ir_node *nod return TRUE;
case HLSL_IR_DEREF_ARRAY: - dst->v.array.index = deref->src.v.array.index; + hlsl_src_from_node(&dst->v.array.index, deref->src.v.array.index.node); if (!(dst->v.array.array = d3dcompiler_alloc(sizeof(*dst->v.array.array)))) return FALSE; - return get_assignment_lhs(dst->v.array.array, deref->src.v.array.array); + return get_assignment_lhs(dst->v.array.array, deref->src.v.array.array.node);
case HLSL_IR_DEREF_RECORD: dst->v.record.field = deref->src.v.record.field; if (!(dst->v.record.record = d3dcompiler_alloc(sizeof(*dst->v.record.field)))) return FALSE; - return get_assignment_lhs(dst->v.record.record, deref->src.v.record.record); + return get_assignment_lhs(dst->v.record.record, deref->src.v.record.record.node);
default: assert(0); @@ -1501,11 +1500,12 @@ struct hlsl_ir_node *make_assignment(struct hlsl_ir_node *lhs, enum parse_assign if (lhs->data_type->type == HLSL_CLASS_MATRIX) FIXME("Assignments with writemasks and matrices on lhs are not supported yet.\n");
- lhs_inner = swizzle->val; + lhs_inner = swizzle->val.node; + hlsl_src_remove(&swizzle->val); list_remove(&lhs->entry);
list_add_after(&rhs->entry, &lhs->entry); - swizzle->val = rhs; + hlsl_src_from_node(&swizzle->val, rhs); if (!(writemask = invert_swizzle(&swizzle->swizzle, writemask))) { hlsl_report_message(lhs->loc, HLSL_LEVEL_ERROR, "invalid writemask"); @@ -1574,21 +1574,21 @@ struct hlsl_ir_node *make_assignment(struct hlsl_ir_node *lhs, enum parse_assign if (assign->lhs.type != HLSL_IR_DEREF_VAR) { FIXME("LHS expression not supported in compound assignments yet.\n"); - assign->rhs = rhs; + hlsl_src_from_node(&assign->rhs, rhs); } else { TRACE("Adding an expression for the compound assignment.\n"); expr = new_binary_expr(op, lhs, rhs, lhs->loc); list_add_after(&rhs->entry, &expr->entry); - assign->rhs = expr; + hlsl_src_from_node(&assign->rhs, expr); } } else { /* We could free the LHS derefs here, since we aren't using them * anymore, but it's easier just to leave that to a DCE pass. */ - assign->rhs = rhs; + hlsl_src_from_node(&assign->rhs, rhs); }
return &assign->node; @@ -1895,7 +1895,7 @@ static void debug_dump_lvalue(const struct hlsl_lvalue *deref) case HLSL_IR_DEREF_ARRAY: debug_dump_lvalue(deref->v.array.array); wine_dbg_printf("["); - debug_dump_src(deref->v.array.index); + debug_dump_src(deref->v.array.index.node); wine_dbg_printf("]"); break; case HLSL_IR_DEREF_RECORD: @@ -1915,13 +1915,13 @@ static void debug_dump_deref(const struct hlsl_deref *deref) wine_dbg_printf(")"); break; case HLSL_IR_DEREF_ARRAY: - debug_dump_src(deref->v.array.array); + debug_dump_src(deref->v.array.array.node); wine_dbg_printf("["); - debug_dump_src(deref->v.array.index); + debug_dump_src(deref->v.array.index.node); wine_dbg_printf("]"); break; case HLSL_IR_DEREF_RECORD: - debug_dump_src(deref->v.record.record); + debug_dump_src(deref->v.record.record.node); wine_dbg_printf(".%s", debugstr_a(deref->v.record.field->name)); break; } @@ -2050,9 +2050,9 @@ static void debug_dump_ir_expr(const struct hlsl_ir_expr *expr) unsigned int i;
wine_dbg_printf("%s (", debug_expr_op(expr)); - for (i = 0; i < 3 && expr->operands[i]; ++i) + for (i = 0; i < 3 && expr->operands[i].node; ++i) { - debug_dump_src(expr->operands[i]); + debug_dump_src(expr->operands[i].node); wine_dbg_printf(" "); } wine_dbg_printf(")"); @@ -2065,7 +2065,7 @@ static void debug_dump_ir_constructor(const struct hlsl_ir_constructor *construc wine_dbg_printf("%s (", debug_hlsl_type(constructor->node.data_type)); for (i = 0; i < constructor->args_count; ++i) { - debug_dump_src(constructor->args[i]); + debug_dump_src(constructor->args[i].node); wine_dbg_printf(" "); } wine_dbg_printf(")"); @@ -2097,7 +2097,7 @@ static void debug_dump_ir_assignment(const struct hlsl_ir_assignment *assign) if (assign->writemask != BWRITERSP_WRITEMASK_ALL) wine_dbg_printf("%s", debug_writemask(assign->writemask)); wine_dbg_printf(" "); - debug_dump_src(assign->rhs); + debug_dump_src(assign->rhs.node); wine_dbg_printf(")"); }
@@ -2105,9 +2105,9 @@ static void debug_dump_ir_swizzle(const struct hlsl_ir_swizzle *swizzle) { unsigned int i;
- debug_dump_src(swizzle->val); + debug_dump_src(swizzle->val.node); wine_dbg_printf("."); - if (swizzle->val->data_type->dimy > 1) + if (swizzle->val.node->data_type->dimy > 1) { for (i = 0; i < swizzle->node.data_type->dimx; ++i) wine_dbg_printf("_m%u%u", (swizzle->swizzle >> i * 8) & 0xf, (swizzle->swizzle >> (i * 8 + 4)) & 0xf); @@ -2136,8 +2136,8 @@ static void debug_dump_ir_jump(const struct hlsl_ir_jump *jump) break; case HLSL_IR_JUMP_RETURN: wine_dbg_printf("return "); - if (jump->return_value) - debug_dump_src(jump->return_value); + if (jump->return_value.node) + debug_dump_src(jump->return_value.node); wine_dbg_printf(";"); break; } @@ -2146,7 +2146,7 @@ static void debug_dump_ir_jump(const struct hlsl_ir_jump *jump) static void debug_dump_ir_if(const struct hlsl_ir_if *if_node) { wine_dbg_printf("if ("); - debug_dump_src(if_node->condition); + debug_dump_src(if_node->condition.node); wine_dbg_printf(")\n{\n"); debug_dump_instr_list(if_node->then_instrs); wine_dbg_printf("}\n");