Supersedes !482.
Instead of only storing the value that each variable's component has at the moment of the instruction currently handled by copy-prop, we store the trace of all the historic values with their timestamps, i.e. the instruction index on which the value was stored.
This allow us to query the value that the variable's component had at the time of a swizzle's load instead of the swizzle instead, which also preempts copy-prop from getting stuck in an infinite loop, as which the added tests.
-- v3: vkd3d-shader/hlsl: Use values at the time of the swizzle's load in copy-propagation.
From: Francisco Casas fcasas@codeweavers.com
--- libs/vkd3d-shader/hlsl_codegen.c | 74 ++++++++++++++++---------------- 1 file changed, 37 insertions(+), 37 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 5a70878bc..3d39fb921 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -1203,6 +1203,43 @@ static bool lower_broadcasts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, s return false; }
+/* Allocate a unique, ordered index to each instruction, which will be used for + * computing liveness ranges. + * Index 0 means unused; index 1 means function entry, so start at 2. */ +static unsigned int index_instructions(struct hlsl_block *block, unsigned int index) +{ + struct hlsl_ir_node *instr; + + LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) + { + instr->index = index++; + + if (instr->type == HLSL_IR_IF) + { + struct hlsl_ir_if *iff = hlsl_ir_if(instr); + index = index_instructions(&iff->then_block, index); + index = index_instructions(&iff->else_block, index); + } + else if (instr->type == HLSL_IR_LOOP) + { + index = index_instructions(&hlsl_ir_loop(instr)->body, index); + hlsl_ir_loop(instr)->next_index = index; + } + else if (instr->type == HLSL_IR_SWITCH) + { + struct hlsl_ir_switch *s = hlsl_ir_switch(instr); + struct hlsl_ir_switch_case *c; + + LIST_FOR_EACH_ENTRY(c, &s->cases, struct hlsl_ir_switch_case, entry) + { + index = index_instructions(&c->body, index); + } + } + } + + return index; +} + /* * Copy propagation. The basic idea is to recognize instruction sequences of the * form: @@ -3242,42 +3279,6 @@ static bool dce(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) return false; }
-/* Allocate a unique, ordered index to each instruction, which will be used for - * computing liveness ranges. */ -static unsigned int index_instructions(struct hlsl_block *block, unsigned int index) -{ - struct hlsl_ir_node *instr; - - LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) - { - instr->index = index++; - - if (instr->type == HLSL_IR_IF) - { - struct hlsl_ir_if *iff = hlsl_ir_if(instr); - index = index_instructions(&iff->then_block, index); - index = index_instructions(&iff->else_block, index); - } - else if (instr->type == HLSL_IR_LOOP) - { - index = index_instructions(&hlsl_ir_loop(instr)->body, index); - hlsl_ir_loop(instr)->next_index = index; - } - else if (instr->type == HLSL_IR_SWITCH) - { - struct hlsl_ir_switch *s = hlsl_ir_switch(instr); - struct hlsl_ir_switch_case *c; - - LIST_FOR_EACH_ENTRY(c, &s->cases, struct hlsl_ir_switch_case, entry) - { - index = index_instructions(&c->body, index); - } - } - } - - return index; -} - static void dump_function(struct rb_entry *entry, void *context) { struct hlsl_ir_function *func = RB_ENTRY_VALUE(entry, struct hlsl_ir_function, entry); @@ -3520,7 +3521,6 @@ static void compute_liveness(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl struct hlsl_scope *scope; struct hlsl_ir_var *var;
- /* Index 0 means unused; index 1 means function entry, so start at 2. */ index_instructions(&entry_func->body, 2);
LIST_FOR_EACH_ENTRY(scope, &ctx->scopes, struct hlsl_scope, entry)
From: Francisco Casas fcasas@codeweavers.com
Instead of only storing the value that each variable's component has at the moment of the instruction currently handled by copy-prop, we store the trace of all the historic values with their timestamps, i.e. the instruction index on which the value was stored.
This would allow us to query the value that the variable had at the time of execution of previous instructions. --- libs/vkd3d-shader/hlsl_codegen.c | 170 ++++++++++++++++++++----------- 1 file changed, 112 insertions(+), 58 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 3d39fb921..013ec5703 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -1204,7 +1204,7 @@ static bool lower_broadcasts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, s }
/* Allocate a unique, ordered index to each instruction, which will be used for - * computing liveness ranges. + * copy propagation and computing liveness ranges. * Index 0 means unused; index 1 means function entry, so start at 2. */ static unsigned int index_instructions(struct hlsl_block *block, unsigned int index) { @@ -1289,25 +1289,25 @@ static unsigned int index_instructions(struct hlsl_block *block, unsigned int in * cannot easily vectorize the stores @3 and @6. */
-enum copy_propagation_value_state -{ - VALUE_STATE_NOT_WRITTEN = 0, - VALUE_STATE_STATICALLY_WRITTEN, - VALUE_STATE_DYNAMICALLY_WRITTEN, -}; - struct copy_propagation_value { - enum copy_propagation_value_state state; + unsigned int timestamp; + bool statically_written; struct hlsl_ir_node *node; unsigned int component; };
+struct copy_propagation_component_trace +{ + struct copy_propagation_value *records; + size_t record_count, record_capacity; +}; + struct copy_propagation_var_def { struct rb_entry entry; struct hlsl_ir_var *var; - struct copy_propagation_value values[]; + struct copy_propagation_component_trace traces[]; };
struct copy_propagation_state @@ -1327,12 +1327,30 @@ static int copy_propagation_var_def_compare(const void *key, const struct rb_ent static void copy_propagation_var_def_destroy(struct rb_entry *entry, void *context) { struct copy_propagation_var_def *var_def = RB_ENTRY_VALUE(entry, struct copy_propagation_var_def, entry); + unsigned int component_count = hlsl_type_component_count(var_def->var->data_type); + unsigned int i;
+ for (i = 0; i < component_count; ++i) + vkd3d_free(var_def->traces[i].records); vkd3d_free(var_def); }
+static struct copy_propagation_value *copy_propagation_get_value_at_time( + struct copy_propagation_component_trace *trace, unsigned int time) +{ + int r; + + for (r = trace->record_count - 1; r >= 0; --r) + { + if (trace->records[r].timestamp < time) + return &trace->records[r]; + } + + return NULL; +} + static struct copy_propagation_value *copy_propagation_get_value(const struct copy_propagation_state *state, - const struct hlsl_ir_var *var, unsigned int component) + const struct hlsl_ir_var *var, unsigned int component, unsigned int time) { for (; state; state = state->parent) { @@ -1341,20 +1359,18 @@ static struct copy_propagation_value *copy_propagation_get_value(const struct co { struct copy_propagation_var_def *var_def = RB_ENTRY_VALUE(entry, struct copy_propagation_var_def, entry); unsigned int component_count = hlsl_type_component_count(var->data_type); - enum copy_propagation_value_state state; + struct copy_propagation_value *value;
assert(component < component_count); - state = var_def->values[component].state; + value = copy_propagation_get_value_at_time(&var_def->traces[component], time);
- switch (state) - { - case VALUE_STATE_STATICALLY_WRITTEN: - return &var_def->values[component]; - case VALUE_STATE_DYNAMICALLY_WRITTEN: - return NULL; - case VALUE_STATE_NOT_WRITTEN: - break; - } + if (!value) + continue; + + if (value->statically_written) + return value; + else + return NULL; } }
@@ -1372,7 +1388,7 @@ static struct copy_propagation_var_def *copy_propagation_create_var_def(struct h if (entry) return RB_ENTRY_VALUE(entry, struct copy_propagation_var_def, entry);
- if (!(var_def = hlsl_alloc(ctx, offsetof(struct copy_propagation_var_def, values[component_count])))) + if (!(var_def = hlsl_alloc(ctx, offsetof(struct copy_propagation_var_def, traces[component_count])))) return NULL;
var_def->var = var; @@ -1383,8 +1399,8 @@ static struct copy_propagation_var_def *copy_propagation_create_var_def(struct h return var_def; }
-static void copy_propagation_invalidate_variable(struct copy_propagation_var_def *var_def, - unsigned int comp, unsigned char writemask) +static void copy_propagation_invalidate_variable(struct hlsl_ctx *ctx, struct copy_propagation_var_def *var_def, + unsigned int comp, unsigned char writemask, unsigned int time) { unsigned i;
@@ -1393,13 +1409,34 @@ static void copy_propagation_invalidate_variable(struct copy_propagation_var_def for (i = 0; i < 4; ++i) { if (writemask & (1u << i)) - var_def->values[comp + i].state = VALUE_STATE_DYNAMICALLY_WRITTEN; + { + struct copy_propagation_component_trace *trace = &var_def->traces[comp + i]; + + /* Don't add an invalidate record if it is already invalidated. */ + if (trace->record_count && trace->records[trace->record_count - 1].timestamp == time) + { + assert(!trace->records[trace->record_count - 1].statically_written); + continue; + } + + assert(!trace->record_count || trace->records[trace->record_count - 1].timestamp < time); + + if (!hlsl_array_reserve(ctx, (void **)&trace->records, &trace->record_capacity, + trace->record_count + 1, sizeof(trace->records[0]))) + return; + + trace->records[trace->record_count].timestamp = time; + trace->records[trace->record_count].statically_written = false; + + ++trace->record_count; + } } }
static void copy_propagation_invalidate_variable_from_deref_recurse(struct hlsl_ctx *ctx, struct copy_propagation_var_def *var_def, const struct hlsl_deref *deref, - struct hlsl_type *type, unsigned int depth, unsigned int comp_start, unsigned char writemask) + struct hlsl_type *type, unsigned int depth, unsigned int comp_start, unsigned char writemask, + unsigned int time) { unsigned int i, subtype_comp_count; struct hlsl_ir_node *path_node; @@ -1407,7 +1444,7 @@ static void copy_propagation_invalidate_variable_from_deref_recurse(struct hlsl_
if (depth == deref->path_len) { - copy_propagation_invalidate_variable(var_def, comp_start, writemask); + copy_propagation_invalidate_variable(ctx, var_def, comp_start, writemask, time); return; }
@@ -1422,7 +1459,7 @@ static void copy_propagation_invalidate_variable_from_deref_recurse(struct hlsl_ comp_start += hlsl_type_component_count(type->e.record.fields[i].type);
copy_propagation_invalidate_variable_from_deref_recurse(ctx, var_def, deref, subtype, - depth + 1, comp_start, writemask); + depth + 1, comp_start, writemask, time); } else { @@ -1431,28 +1468,30 @@ static void copy_propagation_invalidate_variable_from_deref_recurse(struct hlsl_ if (path_node->type == HLSL_IR_CONSTANT) { copy_propagation_invalidate_variable_from_deref_recurse(ctx, var_def, deref, subtype, - depth + 1, hlsl_ir_constant(path_node)->value.u[0].u * subtype_comp_count, writemask); + depth + 1, hlsl_ir_constant(path_node)->value.u[0].u * subtype_comp_count, + writemask, time); } else { for (i = 0; i < hlsl_type_element_count(type); ++i) { copy_propagation_invalidate_variable_from_deref_recurse(ctx, var_def, deref, subtype, - depth + 1, i * subtype_comp_count, writemask); + depth + 1, i * subtype_comp_count, writemask, time); } } } }
static void copy_propagation_invalidate_variable_from_deref(struct hlsl_ctx *ctx, - struct copy_propagation_var_def *var_def, const struct hlsl_deref *deref, unsigned char writemask) + struct copy_propagation_var_def *var_def, const struct hlsl_deref *deref, + unsigned char writemask, unsigned int time) { copy_propagation_invalidate_variable_from_deref_recurse(ctx, var_def, deref, deref->var->data_type, - 0, 0, writemask); + 0, 0, writemask, time); }
-static void copy_propagation_set_value(struct copy_propagation_var_def *var_def, unsigned int comp, - unsigned char writemask, struct hlsl_ir_node *instr) +static void copy_propagation_set_value(struct hlsl_ctx *ctx, struct copy_propagation_var_def *var_def, + unsigned int comp, unsigned char writemask, struct hlsl_ir_node *instr, unsigned int time) { unsigned int i, j = 0;
@@ -1460,11 +1499,22 @@ static void copy_propagation_set_value(struct copy_propagation_var_def *var_def, { if (writemask & (1u << i)) { + struct copy_propagation_component_trace *trace = &var_def->traces[comp + i]; + + assert(!trace->record_count || trace->records[trace->record_count - 1].timestamp < time); + + if (!hlsl_array_reserve(ctx, (void **)&trace->records, &trace->record_capacity, + trace->record_count + 1, sizeof(trace->records[0]))) + return; + TRACE("Variable %s[%u] is written by instruction %p%s.\n", var_def->var->name, comp + i, instr, debug_hlsl_writemask(1u << i)); - var_def->values[comp + i].state = VALUE_STATE_STATICALLY_WRITTEN; - var_def->values[comp + i].node = instr; - var_def->values[comp + i].component = j++; + trace->records[trace->record_count].timestamp = time; + trace->records[trace->record_count].statically_written = true; + trace->records[trace->record_count].node = instr; + trace->records[trace->record_count].component = j++; + + ++trace->record_count; } } } @@ -1486,7 +1536,8 @@ static bool copy_propagation_replace_with_single_instr(struct hlsl_ctx *ctx, { struct copy_propagation_value *value;
- if (!(value = copy_propagation_get_value(state, var, start + hlsl_swizzle_get_component(swizzle, i)))) + if (!(value = copy_propagation_get_value(state, var, start + hlsl_swizzle_get_component(swizzle, i), + instr->index))) return false;
if (!new_instr) @@ -1537,8 +1588,8 @@ static bool copy_propagation_replace_with_constant_vector(struct hlsl_ctx *ctx, { struct copy_propagation_value *value;
- if (!(value = copy_propagation_get_value(state, var, start + hlsl_swizzle_get_component(swizzle, i))) - || value->node->type != HLSL_IR_CONSTANT) + if (!(value = copy_propagation_get_value(state, var, start + hlsl_swizzle_get_component(swizzle, i), + instr->index)) || value->node->type != HLSL_IR_CONSTANT) return false;
values.u[i] = hlsl_ir_constant(value->node)->value.u[value->component]; @@ -1603,7 +1654,7 @@ static bool copy_propagation_transform_swizzle(struct hlsl_ctx *ctx, }
static bool copy_propagation_transform_object_load(struct hlsl_ctx *ctx, - struct hlsl_deref *deref, struct copy_propagation_state *state) + struct hlsl_deref *deref, struct copy_propagation_state *state, unsigned int time) { struct copy_propagation_value *value; struct hlsl_ir_load *load; @@ -1613,7 +1664,7 @@ static bool copy_propagation_transform_object_load(struct hlsl_ctx *ctx, return false; assert(count == 1);
- if (!(value = copy_propagation_get_value(state, deref->var, start))) + if (!(value = copy_propagation_get_value(state, deref->var, start, time))) return false; assert(value->component == 0);
@@ -1651,9 +1702,9 @@ static bool copy_propagation_transform_resource_load(struct hlsl_ctx *ctx, { bool progress = false;
- progress |= copy_propagation_transform_object_load(ctx, &load->resource, state); + progress |= copy_propagation_transform_object_load(ctx, &load->resource, state, load->node.index); if (load->sampler.var) - progress |= copy_propagation_transform_object_load(ctx, &load->sampler, state); + progress |= copy_propagation_transform_object_load(ctx, &load->sampler, state, load->node.index); return progress; }
@@ -1662,7 +1713,7 @@ static bool copy_propagation_transform_resource_store(struct hlsl_ctx *ctx, { bool progress = false;
- progress |= copy_propagation_transform_object_load(ctx, &store->resource, state); + progress |= copy_propagation_transform_object_load(ctx, &store->resource, state, store->node.index); return progress; }
@@ -1683,11 +1734,12 @@ static void copy_propagation_record_store(struct hlsl_ctx *ctx, struct hlsl_ir_s
if (store->rhs.node->data_type->class == HLSL_CLASS_OBJECT) writemask = VKD3DSP_WRITEMASK_0; - copy_propagation_set_value(var_def, start, writemask, store->rhs.node); + copy_propagation_set_value(ctx, var_def, start, writemask, store->rhs.node, store->node.index); } else { - copy_propagation_invalidate_variable_from_deref(ctx, var_def, lhs, store->writemask); + copy_propagation_invalidate_variable_from_deref(ctx, var_def, lhs, store->writemask, + store->node.index); } }
@@ -1704,7 +1756,7 @@ static void copy_propagation_state_destroy(struct copy_propagation_state *state) }
static void copy_propagation_invalidate_from_block(struct hlsl_ctx *ctx, struct copy_propagation_state *state, - struct hlsl_block *block) + struct hlsl_block *block, unsigned int time) { struct hlsl_ir_node *instr;
@@ -1722,7 +1774,7 @@ static void copy_propagation_invalidate_from_block(struct hlsl_ctx *ctx, struct if (!(var_def = copy_propagation_create_var_def(ctx, state, var))) continue;
- copy_propagation_invalidate_variable_from_deref(ctx, var_def, lhs, store->writemask); + copy_propagation_invalidate_variable_from_deref(ctx, var_def, lhs, store->writemask, time);
break; } @@ -1731,8 +1783,8 @@ static void copy_propagation_invalidate_from_block(struct hlsl_ctx *ctx, struct { struct hlsl_ir_if *iff = hlsl_ir_if(instr);
- copy_propagation_invalidate_from_block(ctx, state, &iff->then_block); - copy_propagation_invalidate_from_block(ctx, state, &iff->else_block); + copy_propagation_invalidate_from_block(ctx, state, &iff->then_block, time); + copy_propagation_invalidate_from_block(ctx, state, &iff->else_block, time);
break; } @@ -1741,7 +1793,7 @@ static void copy_propagation_invalidate_from_block(struct hlsl_ctx *ctx, struct { struct hlsl_ir_loop *loop = hlsl_ir_loop(instr);
- copy_propagation_invalidate_from_block(ctx, state, &loop->body); + copy_propagation_invalidate_from_block(ctx, state, &loop->body, time);
break; } @@ -1753,7 +1805,7 @@ static void copy_propagation_invalidate_from_block(struct hlsl_ctx *ctx, struct
LIST_FOR_EACH_ENTRY(c, &s->cases, struct hlsl_ir_switch_case, entry) { - copy_propagation_invalidate_from_block(ctx, state, &c->body); + copy_propagation_invalidate_from_block(ctx, state, &c->body, time); }
break; @@ -1786,8 +1838,8 @@ static bool copy_propagation_process_if(struct hlsl_ctx *ctx, struct hlsl_ir_if * touched in the two inner states, but this doesn't work for * loops (because we need to know what is invalidated in advance), * so we need copy_propagation_invalidate_from_block() anyway. */ - copy_propagation_invalidate_from_block(ctx, state, &iff->then_block); - copy_propagation_invalidate_from_block(ctx, state, &iff->else_block); + copy_propagation_invalidate_from_block(ctx, state, &iff->then_block, iff->node.index); + copy_propagation_invalidate_from_block(ctx, state, &iff->else_block, iff->node.index);
return progress; } @@ -1798,7 +1850,7 @@ static bool copy_propagation_process_loop(struct hlsl_ctx *ctx, struct hlsl_ir_l struct copy_propagation_state inner_state; bool progress = false;
- copy_propagation_invalidate_from_block(ctx, state, &loop->body); + copy_propagation_invalidate_from_block(ctx, state, &loop->body, loop->node.index);
copy_propagation_state_init(ctx, &inner_state, state); progress |= copy_propagation_transform_block(ctx, &loop->body, &inner_state); @@ -1823,7 +1875,7 @@ static bool copy_propagation_process_switch(struct hlsl_ctx *ctx, struct hlsl_ir
LIST_FOR_EACH_ENTRY(c, &s->cases, struct hlsl_ir_switch_case, entry) { - copy_propagation_invalidate_from_block(ctx, state, &c->body); + copy_propagation_invalidate_from_block(ctx, state, &c->body, s->node.index); }
return progress; @@ -1884,6 +1936,8 @@ bool hlsl_copy_propagation_execute(struct hlsl_ctx *ctx, struct hlsl_block *bloc struct copy_propagation_state state; bool progress;
+ index_instructions(block, 2); + copy_propagation_state_init(ctx, &state, NULL);
progress = copy_propagation_transform_block(ctx, block, &state);
From: Francisco Casas fcasas@codeweavers.com
This also helps us to avoid replacing a swizzle with itself in copy-prop which can result in infinite loops, as with the included tests this commit.
Consider the following sequence of instructions:
1 : A 2 : B = @1 3 : B 4 : A = @3 5 : @1.x
Current copy-prop would replace 5 so it points to @3 now:
1 : A 2 : B = @1 3 : B 4 : A = @3 5 : @3.x
But in the next iteration it would make it point back to @1, keeping it spinning infinitively.
The solution is to index the instructions and don't replace the swizzle if the new load happens after the current load. --- Makefile.am | 1 + libs/vkd3d-shader/hlsl_codegen.c | 20 ++++++----- tests/hlsl/hard-copy-prop.shader_test | 49 +++++++++++++++++++++++++++ 3 files changed, 62 insertions(+), 8 deletions(-) create mode 100644 tests/hlsl/hard-copy-prop.shader_test
diff --git a/Makefile.am b/Makefile.am index 7d5898193..a7bfeec8a 100644 --- a/Makefile.am +++ b/Makefile.am @@ -103,6 +103,7 @@ vkd3d_shader_tests = \ tests/hlsl/gather.shader_test \ tests/hlsl/getdimensions.shader_test \ tests/hlsl/half.shader_test \ + tests/hlsl/hard-copy-prop.shader_test \ tests/hlsl/initializer-flatten.shader_test \ tests/hlsl/initializer-implicit-array.shader_test \ tests/hlsl/initializer-invalid-arg-count.shader_test \ diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 013ec5703..ba7f253eb 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -1520,12 +1520,14 @@ static void copy_propagation_set_value(struct hlsl_ctx *ctx, struct copy_propaga }
static bool copy_propagation_replace_with_single_instr(struct hlsl_ctx *ctx, - const struct copy_propagation_state *state, const struct hlsl_deref *deref, + const struct copy_propagation_state *state, const struct hlsl_ir_load *load, unsigned int swizzle, struct hlsl_ir_node *instr) { const unsigned int instr_component_count = hlsl_type_component_count(instr->data_type); + const struct hlsl_deref *deref = &load->src; const struct hlsl_ir_var *var = deref->var; struct hlsl_ir_node *new_instr = NULL; + unsigned int time = load->node.index; unsigned int start, count, i; unsigned int ret_swizzle = 0;
@@ -1537,7 +1539,7 @@ static bool copy_propagation_replace_with_single_instr(struct hlsl_ctx *ctx, struct copy_propagation_value *value;
if (!(value = copy_propagation_get_value(state, var, start + hlsl_swizzle_get_component(swizzle, i), - instr->index))) + time))) return false;
if (!new_instr) @@ -1572,12 +1574,14 @@ static bool copy_propagation_replace_with_single_instr(struct hlsl_ctx *ctx, }
static bool copy_propagation_replace_with_constant_vector(struct hlsl_ctx *ctx, - const struct copy_propagation_state *state, const struct hlsl_deref *deref, + const struct copy_propagation_state *state, const struct hlsl_ir_load *load, unsigned int swizzle, struct hlsl_ir_node *instr) { const unsigned int instr_component_count = hlsl_type_component_count(instr->data_type); + const struct hlsl_deref *deref = &load->src; const struct hlsl_ir_var *var = deref->var; struct hlsl_constant_value values = {0}; + unsigned int time = load->node.index; unsigned int start, count, i; struct hlsl_ir_node *cons;
@@ -1589,7 +1593,7 @@ static bool copy_propagation_replace_with_constant_vector(struct hlsl_ctx *ctx, struct copy_propagation_value *value;
if (!(value = copy_propagation_get_value(state, var, start + hlsl_swizzle_get_component(swizzle, i), - instr->index)) || value->node->type != HLSL_IR_CONSTANT) + time)) || value->node->type != HLSL_IR_CONSTANT) return false;
values.u[i] = hlsl_ir_constant(value->node)->value.u[value->component]; @@ -1626,10 +1630,10 @@ static bool copy_propagation_transform_load(struct hlsl_ctx *ctx, return false; }
- if (copy_propagation_replace_with_constant_vector(ctx, state, &load->src, HLSL_SWIZZLE(X, Y, Z, W), &load->node)) + if (copy_propagation_replace_with_constant_vector(ctx, state, load, HLSL_SWIZZLE(X, Y, Z, W), &load->node)) return true;
- if (copy_propagation_replace_with_single_instr(ctx, state, &load->src, HLSL_SWIZZLE(X, Y, Z, W), &load->node)) + if (copy_propagation_replace_with_single_instr(ctx, state, load, HLSL_SWIZZLE(X, Y, Z, W), &load->node)) return true;
return false; @@ -1644,10 +1648,10 @@ static bool copy_propagation_transform_swizzle(struct hlsl_ctx *ctx, return false; load = hlsl_ir_load(swizzle->val.node);
- if (copy_propagation_replace_with_constant_vector(ctx, state, &load->src, swizzle->swizzle, &swizzle->node)) + if (copy_propagation_replace_with_constant_vector(ctx, state, load, swizzle->swizzle, &swizzle->node)) return true;
- if (copy_propagation_replace_with_single_instr(ctx, state, &load->src, swizzle->swizzle, &swizzle->node)) + if (copy_propagation_replace_with_single_instr(ctx, state, load, swizzle->swizzle, &swizzle->node)) return true;
return false; diff --git a/tests/hlsl/hard-copy-prop.shader_test b/tests/hlsl/hard-copy-prop.shader_test new file mode 100644 index 000000000..5b8604b82 --- /dev/null +++ b/tests/hlsl/hard-copy-prop.shader_test @@ -0,0 +1,49 @@ +[pixel shader] +float cond; + +float4 main() : sv_target +{ + float2 r = {1, 2}; + float2 tmp; + + // invalidate r + if (cond) + r = float2(10, 20); + + tmp = r; + r = tmp; + return r.y; +} + +[test] +uniform 0 float 0.0 +draw quad +probe all rgba (2.0, 2.0, 2.0, 2.0) +uniform 0 float 1.0 +draw quad +probe all rgba (20.0, 20.0, 20.0, 20.0) + + +[pixel shader] +float cond; + +float4 main() : sv_target +{ + float2 r = {3, 4}; + + // invalidate r + if (cond) + r = float2(30, 40); + + r = r; + r = float2(1, r.y); + + return float4(r, 0, 0); +} +[test] +uniform 0 float 0.0 +draw quad +probe all rgba (1.0, 4.0, 0.0, 0.0) +uniform 0 float 1.0 +draw quad +probe all rgba (1.0, 40.0, 0.0, 0.0)
On Wed Nov 22 19:51:29 2023 +0000, Francisco Casas wrote:
changed this line in [version 3 of the diff](/wine/vkd3d/-/merge_requests/487/diffs?diff_id=85242&start_sha=802889fab3a8f2e878b2c4964dd1a41175f556df#3cf804f245af47d51595ff932bf817c50967eea2_1630_1633)
Sorry, I had forgot to push these changes in the last version, I added them now.
I can totally change it if you insist, I just don't see the disadvantage of it being there.
Clarity, mostly. It looks like an optimization at first glance, but if it were an optimization I'd expect to see
``` if (trace->record_count && trace->records[trace->record_count - 1].timestamp <= time && !trace->records[trace->record_count - 1].statically_written) continue; ```
On Wed Nov 22 21:22:09 2023 +0000, Zebediah Figura wrote:
I can totally change it if you insist, I just don't see the
disadvantage of it being there. Clarity, mostly. It looks like an optimization at first glance, but if it were an optimization I'd expect to see
if (trace->record_count && trace->records[trace->record_count - 1].timestamp <= time && !trace->records[trace->record_count - 1].statically_written) continue;
I see, that is a better optimization. The disadvantage is that it is at the expense of making the asserts in the following calls to copy_propagation_set_value() and copy_propagation_invalidate_variable() less strong, because the time of the last record may remain smaller that the requested time of the invalidation.
Giovanni Mascellani (@giomasce) commented about libs/vkd3d-shader/hlsl_codegen.c:
- cannot easily vectorize the stores @3 and @6.
*/
-enum copy_propagation_value_state -{
- VALUE_STATE_NOT_WRITTEN = 0,
- VALUE_STATE_STATICALLY_WRITTEN,
- VALUE_STATE_DYNAMICALLY_WRITTEN,
-};
struct copy_propagation_value {
- enum copy_propagation_value_state state;
- unsigned int timestamp;
- bool statically_written;
It seems as if `statically_written` could be assumed to be `!!node`, so you have one fewer consistency rule to enforce.
In 3/3 the commit might mention that you're not just fixing an infinite loop (totally worth of fixing anyway, of course), but also a genuine miscompilation, as noticed by Zeb in https://gitlab.winehq.org/wine/vkd3d/-/merge_requests/482#note_52784.
Giovanni Mascellani (@giomasce) commented about libs/vkd3d-shader/hlsl_codegen.c:
- for (i = 0; i < component_count; ++i)
vkd3d_free(var_def);vkd3d_free(var_def->traces[i].records);
}
+static struct copy_propagation_value *copy_propagation_get_value_at_time(
struct copy_propagation_component_trace *trace, unsigned int time)
+{
- int r;
- for (r = trace->record_count - 1; r >= 0; --r)
- {
if (trace->records[r].timestamp < time)
return &trace->records[r];
- }
If I am not mistaken timestamps are monotonic here. I wonder whether a binary search might be more efficient, at least when looking for the load referenced by a swizzle (which might be much earlier in the code).
Just thinking loudly, though, I think the code as is is fine until we find evidence of the contrary. It might even be that the binary search is detrimental, if loads referenced by swizzles end up being close to the swizzles themselves. It depends on how much history we have to "unwind" on average.
Giovanni Mascellani (@giomasce) commented about libs/vkd3d-shader/hlsl_codegen.c:
if (trace->record_count && trace->records[trace->record_count - 1].timestamp == time)
{
assert(!trace->records[trace->record_count - 1].statically_written);
continue;
}
assert(!trace->record_count || trace->records[trace->record_count - 1].timestamp < time);
if (!hlsl_array_reserve(ctx, (void **)&trace->records, &trace->record_capacity,
trace->record_count + 1, sizeof(trace->records[0])))
return;
trace->records[trace->record_count].timestamp = time;
trace->records[trace->record_count].statically_written = false;
++trace->record_count;
It looks like this snippet could be refactored to a helper, which is also used below. Mostly for clarity.
On Wed Nov 22 23:30:15 2023 +0000, Francisco Casas wrote:
I see, that is a better optimization. The disadvantage is that it is at the expense of making the asserts in the following calls to copy_propagation_set_value() and copy_propagation_invalidate_variable() less strong, because the time of the last record may remain smaller that the requested time of the invalidation.
More in general it seems that you can avoid pushing a new record each time it matches the last one, be it statically written or not. It's probably not an essential optimization, but it's an easy one. I'd rather have it, but I can live without it, as you two prefer.