From: Zebediah Figura zfigura@codeweavers.com
--- libs/vkd3d-shader/hlsl_codegen.c | 99 ++++++++++++++++---------------- 1 file changed, 51 insertions(+), 48 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index d63bccdf..38d5f3b7 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -2558,7 +2558,7 @@ static void compute_liveness(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl compute_liveness_recurse(&entry_func->body, 0, 0); }
-struct liveness +struct register_allocator { size_t size; uint32_t reg_count; @@ -2569,14 +2569,14 @@ struct liveness } *regs; };
-static unsigned int get_available_writemask(struct liveness *liveness, +static unsigned int get_available_writemask(struct register_allocator *allocator, unsigned int first_write, unsigned int component_idx, unsigned int reg_size) { unsigned int i, writemask = 0, count = 0;
for (i = 0; i < 4; ++i) { - if (liveness->regs[component_idx + i].last_read <= first_write) + if (allocator->regs[component_idx + i].last_read <= first_write) { writemask |= 1u << i; if (++count == reg_size) @@ -2587,22 +2587,22 @@ static unsigned int get_available_writemask(struct liveness *liveness, return 0; }
-static bool resize_liveness(struct hlsl_ctx *ctx, struct liveness *liveness, size_t new_count) +static bool resize_liveness(struct hlsl_ctx *ctx, struct register_allocator *allocator, size_t new_count) { - size_t old_capacity = liveness->size; + size_t old_capacity = allocator->size;
- if (!hlsl_array_reserve(ctx, (void **)&liveness->regs, &liveness->size, new_count, sizeof(*liveness->regs))) + if (!hlsl_array_reserve(ctx, (void **)&allocator->regs, &allocator->size, new_count, sizeof(*allocator->regs))) return false;
- if (liveness->size > old_capacity) - memset(liveness->regs + old_capacity, 0, (liveness->size - old_capacity) * sizeof(*liveness->regs)); + if (allocator->size > old_capacity) + memset(allocator->regs + old_capacity, 0, (allocator->size - old_capacity) * sizeof(*allocator->regs)); return true; }
/* reg_size is the number of register components to be reserved, while component_count is the number * of components for the register's writemask. In SM1, floats and vectors allocate the whole * register, even if they don't use it completely. */ -static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct liveness *liveness, +static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct register_allocator *allocator, unsigned int first_write, unsigned int last_read, unsigned int reg_size, unsigned int component_count) { @@ -2611,74 +2611,74 @@ static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct liveness *
assert(component_count <= reg_size);
- for (component_idx = 0; component_idx < liveness->size; component_idx += 4) + for (component_idx = 0; component_idx < allocator->size; component_idx += 4) { - if ((writemask = get_available_writemask(liveness, first_write, component_idx, reg_size))) + if ((writemask = get_available_writemask(allocator, first_write, component_idx, reg_size))) break; } - if (component_idx == liveness->size) + if (component_idx == allocator->size) { - if (!resize_liveness(ctx, liveness, component_idx + 4)) + if (!resize_liveness(ctx, allocator, component_idx + 4)) return ret; writemask = (1u << reg_size) - 1; } for (i = 0; i < 4; ++i) { if (writemask & (1u << i)) - liveness->regs[component_idx + i].last_read = last_read; + allocator->regs[component_idx + i].last_read = last_read; } ret.id = component_idx / 4; ret.writemask = hlsl_combine_writemasks(writemask, (1u << component_count) - 1); ret.allocated = true; - liveness->reg_count = max(liveness->reg_count, ret.id + 1); + allocator->reg_count = max(allocator->reg_count, ret.id + 1); return ret; }
-static bool is_range_available(struct liveness *liveness, unsigned int first_write, +static bool is_range_available(struct register_allocator *allocator, unsigned int first_write, unsigned int component_idx, unsigned int reg_size) { unsigned int i;
for (i = 0; i < reg_size; i += 4) { - if (!get_available_writemask(liveness, first_write, component_idx + i, 4)) + if (!get_available_writemask(allocator, first_write, component_idx + i, 4)) return false; } return true; }
-static struct hlsl_reg allocate_range(struct hlsl_ctx *ctx, struct liveness *liveness, +static struct hlsl_reg allocate_range(struct hlsl_ctx *ctx, struct register_allocator *allocator, unsigned int first_write, unsigned int last_read, unsigned int reg_size) { unsigned int i, component_idx; struct hlsl_reg ret = {0};
- for (component_idx = 0; component_idx < liveness->size; component_idx += 4) + for (component_idx = 0; component_idx < allocator->size; component_idx += 4) { - if (is_range_available(liveness, first_write, component_idx, - min(reg_size, liveness->size - component_idx))) + if (is_range_available(allocator, first_write, component_idx, + min(reg_size, allocator->size - component_idx))) break; } - if (!resize_liveness(ctx, liveness, component_idx + reg_size)) + if (!resize_liveness(ctx, allocator, component_idx + reg_size)) return ret;
for (i = 0; i < reg_size; ++i) - liveness->regs[component_idx + i].last_read = last_read; + allocator->regs[component_idx + i].last_read = last_read; ret.id = component_idx / 4; ret.allocated = true; - liveness->reg_count = max(liveness->reg_count, ret.id + align(reg_size, 4)); + allocator->reg_count = max(allocator->reg_count, ret.id + align(reg_size, 4)); return ret; }
-static struct hlsl_reg allocate_numeric_registers_for_type(struct hlsl_ctx *ctx, struct liveness *liveness, +static struct hlsl_reg allocate_numeric_registers_for_type(struct hlsl_ctx *ctx, struct register_allocator *allocator, unsigned int first_write, unsigned int last_read, const struct hlsl_type *type) { unsigned int reg_size = type->reg_size[HLSL_REGSET_NUMERIC];
if (type->class <= HLSL_CLASS_VECTOR) - return allocate_register(ctx, liveness, first_write, last_read, reg_size, type->dimx); + return allocate_register(ctx, allocator, first_write, last_read, reg_size, type->dimx); else - return allocate_range(ctx, liveness, first_write, last_read, reg_size); + return allocate_range(ctx, allocator, first_write, last_read, reg_size); }
static const char *debug_register(char class, struct hlsl_reg reg, const struct hlsl_type *type) @@ -2697,14 +2697,15 @@ static const char *debug_register(char class, struct hlsl_reg reg, const struct return vkd3d_dbg_sprintf("%c%u%s", class, reg.id, debug_hlsl_writemask(reg.writemask)); }
-static void allocate_variable_temp_register(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, struct liveness *liveness) +static void allocate_variable_temp_register(struct hlsl_ctx *ctx, + struct hlsl_ir_var *var, struct register_allocator *allocator) { if (var->is_input_semantic || var->is_output_semantic || var->is_uniform) return;
if (!var->regs[HLSL_REGSET_NUMERIC].allocated && var->last_read) { - var->regs[HLSL_REGSET_NUMERIC] = allocate_numeric_registers_for_type(ctx, liveness, + var->regs[HLSL_REGSET_NUMERIC] = allocate_numeric_registers_for_type(ctx, allocator, var->first_write, var->last_read, var->data_type);
TRACE("Allocated %s to %s (liveness %u-%u).\n", var->name, debug_register('r', @@ -2712,7 +2713,8 @@ static void allocate_variable_temp_register(struct hlsl_ctx *ctx, struct hlsl_ir } }
-static void allocate_temp_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_block *block, struct liveness *liveness) +static void allocate_temp_registers_recurse(struct hlsl_ctx *ctx, + struct hlsl_block *block, struct register_allocator *allocator) { struct hlsl_ir_node *instr;
@@ -2720,7 +2722,7 @@ static void allocate_temp_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_bl { if (!instr->reg.allocated && instr->last_read) { - instr->reg = allocate_numeric_registers_for_type(ctx, liveness, instr->index, instr->last_read, + instr->reg = allocate_numeric_registers_for_type(ctx, allocator, instr->index, instr->last_read, instr->data_type); TRACE("Allocated anonymous expression @%u to %s (liveness %u-%u).\n", instr->index, debug_register('r', instr->reg, instr->data_type), instr->index, instr->last_read); @@ -2731,8 +2733,8 @@ static void allocate_temp_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_bl case HLSL_IR_IF: { struct hlsl_ir_if *iff = hlsl_ir_if(instr); - allocate_temp_registers_recurse(ctx, &iff->then_block, liveness); - allocate_temp_registers_recurse(ctx, &iff->else_block, liveness); + allocate_temp_registers_recurse(ctx, &iff->then_block, allocator); + allocate_temp_registers_recurse(ctx, &iff->else_block, allocator); break; }
@@ -2741,21 +2743,21 @@ static void allocate_temp_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_bl struct hlsl_ir_load *load = hlsl_ir_load(instr); /* We need to at least allocate a variable for undefs. * FIXME: We should probably find a way to remove them instead. */ - allocate_variable_temp_register(ctx, load->src.var, liveness); + allocate_variable_temp_register(ctx, load->src.var, allocator); break; }
case HLSL_IR_LOOP: { struct hlsl_ir_loop *loop = hlsl_ir_loop(instr); - allocate_temp_registers_recurse(ctx, &loop->body, liveness); + allocate_temp_registers_recurse(ctx, &loop->body, allocator); break; }
case HLSL_IR_STORE: { struct hlsl_ir_store *store = hlsl_ir_store(instr); - allocate_variable_temp_register(ctx, store->lhs.var, liveness); + allocate_variable_temp_register(ctx, store->lhs.var, allocator); break; }
@@ -2765,7 +2767,8 @@ static void allocate_temp_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_bl } }
-static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_block *block, struct liveness *liveness) +static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, + struct hlsl_block *block, struct register_allocator *allocator) { struct hlsl_constant_defs *defs = &ctx->constant_defs; struct hlsl_ir_node *instr; @@ -2781,7 +2784,7 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_b unsigned int x, y, i, writemask, end_reg; unsigned int reg_size = type->reg_size[HLSL_REGSET_NUMERIC];
- constant->reg = allocate_numeric_registers_for_type(ctx, liveness, 1, UINT_MAX, type); + constant->reg = allocate_numeric_registers_for_type(ctx, allocator, 1, UINT_MAX, type); TRACE("Allocated constant @%u to %s.\n", instr->index, debug_register('c', constant->reg, type));
if (!hlsl_array_reserve(ctx, (void **)&defs->values, &defs->size, @@ -2846,15 +2849,15 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_b case HLSL_IR_IF: { struct hlsl_ir_if *iff = hlsl_ir_if(instr); - allocate_const_registers_recurse(ctx, &iff->then_block, liveness); - allocate_const_registers_recurse(ctx, &iff->else_block, liveness); + allocate_const_registers_recurse(ctx, &iff->then_block, allocator); + allocate_const_registers_recurse(ctx, &iff->else_block, allocator); break; }
case HLSL_IR_LOOP: { struct hlsl_ir_loop *loop = hlsl_ir_loop(instr); - allocate_const_registers_recurse(ctx, &loop->body, liveness); + allocate_const_registers_recurse(ctx, &loop->body, allocator); break; }
@@ -2866,10 +2869,10 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_b
static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) { - struct liveness liveness = {0}; + struct register_allocator allocator = {0}; struct hlsl_ir_var *var;
- allocate_const_registers_recurse(ctx, &entry_func->body, &liveness); + allocate_const_registers_recurse(ctx, &entry_func->body, &allocator);
LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { @@ -2880,7 +2883,7 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi if (reg_size == 0) continue;
- var->regs[HLSL_REGSET_NUMERIC] = allocate_numeric_registers_for_type(ctx, &liveness, + var->regs[HLSL_REGSET_NUMERIC] = allocate_numeric_registers_for_type(ctx, &allocator, 1, UINT_MAX, var->data_type); TRACE("Allocated %s to %s.\n", var->name, debug_register('c', var->regs[HLSL_REGSET_NUMERIC], var->data_type)); @@ -2894,10 +2897,10 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi * does not handle constants. */ static void allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) { - struct liveness liveness = {0}; - allocate_temp_registers_recurse(ctx, &entry_func->body, &liveness); - ctx->temp_count = liveness.reg_count; - vkd3d_free(liveness.regs); + struct register_allocator allocator = {0}; + allocate_temp_registers_recurse(ctx, &entry_func->body, &allocator); + ctx->temp_count = allocator.reg_count; + vkd3d_free(allocator.regs); }
static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, unsigned int *counter, bool output)
From: Zebediah Figura zfigura@codeweavers.com
--- libs/vkd3d-shader/hlsl_codegen.c | 2 ++ 1 file changed, 2 insertions(+)
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 38d5f3b7..e1eb8b49 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -2889,6 +2889,8 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi debug_register('c', var->regs[HLSL_REGSET_NUMERIC], var->data_type)); } } + + vkd3d_free(allocator.regs); }
/* Simple greedy temporary register allocation pass that just assigns a unique
From: Zebediah Figura zfigura@codeweavers.com
We will need this in order to allocate some "special" registers: ps_1_* output, sincos output, etc. --- libs/vkd3d-shader/hlsl_codegen.c | 129 +++++++++++++++++-------------- 1 file changed, 71 insertions(+), 58 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index e1eb8b49..bc6e22ca 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -2560,43 +2560,61 @@ static void compute_liveness(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl
struct register_allocator { - size_t size; - uint32_t reg_count; - struct + size_t count, capacity; + + /* Highest register index that has been allocated. + * Used to declare sm4 temp count. */ + uint32_t max_reg; + + struct allocation { - /* 0 if not live yet. */ - unsigned int last_read; - } *regs; + uint32_t reg; + unsigned int writemask; + unsigned int first_write, last_read; + } *allocations; };
-static unsigned int get_available_writemask(struct register_allocator *allocator, - unsigned int first_write, unsigned int component_idx, unsigned int reg_size) +static unsigned int get_available_writemask(const struct register_allocator *allocator, + unsigned int first_write, unsigned int last_read, uint32_t reg_idx) { - unsigned int i, writemask = 0, count = 0; + unsigned int writemask = VKD3DSP_WRITEMASK_ALL; + size_t i;
- for (i = 0; i < 4; ++i) + for (i = 0; i < allocator->count; ++i) { - if (allocator->regs[component_idx + i].last_read <= first_write) - { - writemask |= 1u << i; - if (++count == reg_size) - return writemask; - } + const struct allocation *allocation = &allocator->allocations[i]; + + /* We do not overlap if first write == last read: + * this is the case where we are allocating the result of that + * expression, e.g. "add r0, r0, r1". */ + + if (allocation->reg == reg_idx + && first_write < allocation->last_read && last_read > allocation->first_write) + writemask &= ~allocation->writemask; + + if (!writemask) + break; }
- return 0; + return writemask; }
-static bool resize_liveness(struct hlsl_ctx *ctx, struct register_allocator *allocator, size_t new_count) +static void record_allocation(struct hlsl_ctx *ctx, struct register_allocator *allocator, + uint32_t reg_idx, unsigned int writemask, unsigned int first_write, unsigned int last_read) { - size_t old_capacity = allocator->size; + struct allocation *allocation;
- if (!hlsl_array_reserve(ctx, (void **)&allocator->regs, &allocator->size, new_count, sizeof(*allocator->regs))) - return false; + if (!hlsl_array_reserve(ctx, (void **)&allocator->allocations, &allocator->capacity, + allocator->count + 1, sizeof(*allocator->allocations))) + return;
- if (allocator->size > old_capacity) - memset(allocator->regs + old_capacity, 0, (allocator->size - old_capacity) * sizeof(*allocator->regs)); - return true; + allocation = &allocator->allocations[allocator->count++]; + allocation->reg = reg_idx; + allocation->writemask = writemask; + allocation->first_write = first_write; + allocation->last_read = last_read; + + allocator->max_reg = max(allocator->max_reg, reg_idx); }
/* reg_size is the number of register components to be reserved, while component_count is the number @@ -2606,42 +2624,39 @@ static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct register_a unsigned int first_write, unsigned int last_read, unsigned int reg_size, unsigned int component_count) { - unsigned int component_idx, writemask, i; struct hlsl_reg ret = {0}; + unsigned int writemask; + uint32_t reg_idx;
assert(component_count <= reg_size);
- for (component_idx = 0; component_idx < allocator->size; component_idx += 4) + for (reg_idx = 0;; ++reg_idx) { - if ((writemask = get_available_writemask(allocator, first_write, component_idx, reg_size))) + writemask = get_available_writemask(allocator, first_write, last_read, reg_idx); + + if (vkd3d_popcount(writemask) >= reg_size) + { + writemask = hlsl_combine_writemasks(writemask, (1u << reg_size) - 1); break; + } } - if (component_idx == allocator->size) - { - if (!resize_liveness(ctx, allocator, component_idx + 4)) - return ret; - writemask = (1u << reg_size) - 1; - } - for (i = 0; i < 4; ++i) - { - if (writemask & (1u << i)) - allocator->regs[component_idx + i].last_read = last_read; - } - ret.id = component_idx / 4; + + record_allocation(ctx, allocator, reg_idx, writemask, first_write, last_read); + + ret.id = reg_idx; ret.writemask = hlsl_combine_writemasks(writemask, (1u << component_count) - 1); ret.allocated = true; - allocator->reg_count = max(allocator->reg_count, ret.id + 1); return ret; }
-static bool is_range_available(struct register_allocator *allocator, unsigned int first_write, - unsigned int component_idx, unsigned int reg_size) +static bool is_range_available(const struct register_allocator *allocator, + unsigned int first_write, unsigned int last_read, uint32_t reg_idx, unsigned int reg_size) { - unsigned int i; + uint32_t i;
- for (i = 0; i < reg_size; i += 4) + for (i = 0; i < (reg_size / 4); ++i) { - if (!get_available_writemask(allocator, first_write, component_idx + i, 4)) + if (get_available_writemask(allocator, first_write, last_read, reg_idx + i) != VKD3DSP_WRITEMASK_ALL) return false; } return true; @@ -2650,23 +2665,21 @@ static bool is_range_available(struct register_allocator *allocator, unsigned in static struct hlsl_reg allocate_range(struct hlsl_ctx *ctx, struct register_allocator *allocator, unsigned int first_write, unsigned int last_read, unsigned int reg_size) { - unsigned int i, component_idx; struct hlsl_reg ret = {0}; + uint32_t reg_idx; + unsigned int i;
- for (component_idx = 0; component_idx < allocator->size; component_idx += 4) + for (reg_idx = 0;; ++reg_idx) { - if (is_range_available(allocator, first_write, component_idx, - min(reg_size, allocator->size - component_idx))) + if (is_range_available(allocator, first_write, last_read, reg_idx, reg_size)) break; } - if (!resize_liveness(ctx, allocator, component_idx + reg_size)) - return ret;
- for (i = 0; i < reg_size; ++i) - allocator->regs[component_idx + i].last_read = last_read; - ret.id = component_idx / 4; + for (i = 0; i < reg_size / 4; ++i) + record_allocation(ctx, allocator, reg_idx + i, VKD3DSP_WRITEMASK_ALL, first_write, last_read); + + ret.id = reg_idx; ret.allocated = true; - allocator->reg_count = max(allocator->reg_count, ret.id + align(reg_size, 4)); return ret; }
@@ -2890,7 +2903,7 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi } }
- vkd3d_free(allocator.regs); + vkd3d_free(allocator.allocations); }
/* Simple greedy temporary register allocation pass that just assigns a unique @@ -2901,8 +2914,8 @@ static void allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functio { struct register_allocator allocator = {0}; allocate_temp_registers_recurse(ctx, &entry_func->body, &allocator); - ctx->temp_count = allocator.reg_count; - vkd3d_free(allocator.regs); + ctx->temp_count = allocator.max_reg + 1; + vkd3d_free(allocator.allocations); }
static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, unsigned int *counter, bool output)
From: Zebediah Figura zfigura@codeweavers.com
--- libs/vkd3d-shader/hlsl_codegen.c | 22 ++++++++++++++++++++++ libs/vkd3d-shader/hlsl_sm1.c | 7 ++++++- 2 files changed, 28 insertions(+), 1 deletion(-)
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index bc6e22ca..769e7429 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -2913,6 +2913,24 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi static void allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) { struct register_allocator allocator = {0}; + + /* ps_1_* outputs are special and go in temp register 0. */ + if (ctx->profile->major_version == 1 && ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) + { + size_t i; + + for (i = 0; i < entry_func->parameters.count; ++i) + { + const struct hlsl_ir_var *var = entry_func->parameters.vars[i]; + + if (var->is_output_semantic) + { + record_allocation(ctx, &allocator, 0, VKD3DSP_WRITEMASK_ALL, var->first_write, var->last_read); + break; + } + } + } + allocate_temp_registers_recurse(ctx, &entry_func->body, &allocator); ctx->temp_count = allocator.max_reg + 1; vkd3d_free(allocator.allocations); @@ -2941,6 +2959,10 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var D3DDECLUSAGE usage; uint32_t usage_idx;
+ /* ps_1_* outputs are special and go in temp register 0. */ + if (ctx->profile->major_version == 1 && output && ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) + return; + builtin = hlsl_sm1_register_from_semantic(ctx, &var->semantic, output, &type, ®); if (!builtin && !hlsl_sm1_usage_from_semantic(&var->semantic, &usage, &usage_idx)) { diff --git a/libs/vkd3d-shader/hlsl_sm1.c b/libs/vkd3d-shader/hlsl_sm1.c index 0afeac68..2df8bc98 100644 --- a/libs/vkd3d-shader/hlsl_sm1.c +++ b/libs/vkd3d-shader/hlsl_sm1.c @@ -867,7 +867,12 @@ static void write_sm1_store(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *
if (store->lhs.var->is_output_semantic) { - if (!hlsl_sm1_register_from_semantic(ctx, &store->lhs.var->semantic, + if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL && ctx->profile->major_version == 1) + { + sm1_instr.dst.type = D3DSPR_TEMP; + sm1_instr.dst.reg = 0; + } + else if (!hlsl_sm1_register_from_semantic(ctx, &store->lhs.var->semantic, true, &sm1_instr.dst.type, &sm1_instr.dst.reg)) { assert(reg.allocated);
Matteo Bruni (@Mystral) commented about libs/vkd3d-shader/hlsl_codegen.c:
- size_t i;
- for (i = 0; i < 4; ++i)
- for (i = 0; i < allocator->count; ++i) {
if (allocator->regs[component_idx + i].last_read <= first_write)
{
writemask |= 1u << i;
if (++count == reg_size)
return writemask;
}
const struct allocation *allocation = &allocator->allocations[i];
/* We do not overlap if first write == last read:
* this is the case where we are allocating the result of that
* expression, e.g. "add r0, r0, r1". */
I'm confused by this comment: doesn't that example show that you can in fact use the same register as both source and destination?
I haven't fully and thoroughly reviewed this MR but it does seem to be generally fine. Note to the fellow reviewer: the new get_available_writemask() is a pretty different function from the old one (WRT how it's used mostly), it just happens to cover the same "idea" and thus share the same name.
On Thu Apr 20 19:58:20 2023 +0000, Matteo Bruni wrote:
I'm confused by this comment: doesn't that example show that you can in fact use the same register as both source and destination?
Right, exactly, the point is that doesn't count as an overlap and hence that register isn't in use. Maybe I need to rephrase the comment to be worded less confusingly...
On Thu Apr 20 20:40:35 2023 +0000, Zebediah Figura wrote:
Right, exactly, the point is that doesn't count as an overlap and hence that register isn't in use. Maybe I need to rephrase the comment to be worded less confusingly...
Oh I see, not sure why it confused me as it is. I guess I'd have expected the following if condition to have a "first_write <= allocation->last_read" to match?
On Thu Apr 20 20:58:30 2023 +0000, Matteo Bruni wrote:
Oh I see, not sure why it confused me as it is. I guess I'd have expected the following if condition to have a "first_write <= allocation->last_read" to match?
In any case, I don't think you need to do anything about it.