From: Conor McCarthy cmccarthy@codeweavers.com
--- libs/vkd3d-shader/ir.c | 154 +++++++++++++++++++++++++++++++---------- 1 file changed, 119 insertions(+), 35 deletions(-)
diff --git a/libs/vkd3d-shader/ir.c b/libs/vkd3d-shader/ir.c index b7348b4f1..e94a8dfea 100644 --- a/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d-shader/ir.c @@ -2794,35 +2794,6 @@ fail: return VKD3D_ERROR_OUT_OF_MEMORY; }
-static enum vkd3d_result vsir_program_materialise_ssas_to_temps(struct vsir_program *program) -{ - struct ssa_allocation ssa = {0}; - size_t i; - - if (!(ssa.table = vkd3d_calloc(program->ssa_count, sizeof(*ssa.table)))) - return VKD3D_ERROR_OUT_OF_MEMORY; - memset(ssa.table, 0xff, program->ssa_count * sizeof(*ssa.table)); - - for (i = 0; i < program->instructions.count; ++i) - { - struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; - size_t j; - - for (j = 0; j < ins->dst_count; ++j) - materialize_ssas_to_temps_process_dst_param(program, &ssa, &ins->dst[j]); - - for (j = 0; j < ins->src_count; ++j) - materialize_ssas_to_temps_process_src_param(program, &ssa, &ins->src[j]); - } - - program->temp_count += ssa.next_temp_idx; - program->ssa_count = 0; - - vkd3d_free(ssa.table); - - return VKD3D_OK; -} - struct vsir_block_list { struct vsir_block **blocks; @@ -4291,6 +4262,108 @@ fail: return ret; }
+static size_t register_map_undominated_use(struct vkd3d_shader_register *reg, struct ssa_allocation *ssa, + struct vsir_block *block, struct vsir_block **origin_blocks) +{ + unsigned int ssa_id, i; + size_t count = 0; + + if (!register_is_ssa(reg)) + return 0; + + ssa_id = reg->idx[0].offset; + if (!vsir_block_dominates(origin_blocks[ssa_id], block)) + { + ssa->table[ssa_id] = UINT_MAX; + ++count; + } + + for (i = 0; i < reg->idx_count; ++i) + if (reg->idx[i].rel_addr) + count += register_map_undominated_use(®->idx[i].rel_addr->reg, ssa, block, origin_blocks); + + return count; +} + +/* Drivers are not necessarily optimised to handle very large numbers of temps. For example, + * using them only where necessary fixes stuttering issues in Horizon Zero Dawn on RADV. + * This can also result in the backend emitting less code because temps typically need an + * access chain and a load/store. */ +static enum vkd3d_result vsir_program_materialize_undominated_ssas_to_temps(struct vsir_cfg *cfg) +{ + struct vsir_program *program = cfg->program; + struct vsir_block **origin_blocks; + struct ssa_allocation ssa = {0}; + size_t i, count; + unsigned int j; + + if (!(origin_blocks = vkd3d_calloc(program->ssa_count, sizeof(*origin_blocks)))) + { + ERR("Failed to allocate origin block array.\n"); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + if (!(ssa.table = vkd3d_calloc(program->ssa_count, sizeof(*ssa.table)))) + { + ERR("Failed to allocate SSA table.\n"); + vkd3d_free(origin_blocks); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + + for (i = 0; i < cfg->block_count; ++i) + { + struct vsir_block *block = &cfg->blocks[i]; + struct vkd3d_shader_instruction *ins; + + for (ins = block->begin; ins <= block->end; ++ins) + { + for (j = 0; j < ins->dst_count; ++j) + { + if (register_is_ssa(&ins->dst[j].reg)) + origin_blocks[ins->dst[j].reg.idx[0].offset] = block; + } + } + } + + for (i = 0, count = 0; i < cfg->block_count; ++i) + { + struct vsir_block *block = &cfg->blocks[i]; + struct vkd3d_shader_instruction *ins; + + for (ins = block->begin; ins <= block->end; ++ins) + { + for (j = 0; j < ins->src_count; ++j) + count += register_map_undominated_use(&ins->src[j].reg, &ssa, block, origin_blocks); + } + } + + if (!count) + { + vkd3d_free(origin_blocks); + vkd3d_free(ssa.table); + return VKD3D_OK; + } + + for (i = 0; i < program->instructions.count; ++i) + { + struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; + + for (j = 0; j < ins->dst_count; ++j) + materialize_ssas_to_temps_process_dst_param(program, &ssa, &ins->dst[j]); + + for (j = 0; j < ins->src_count; ++j) + materialize_ssas_to_temps_process_src_param(program, &ssa, &ins->src[j]); + } + + if (ssa.next_temp_idx) + TRACE("Emitting temps for %u values with undominated usage.\n", ssa.next_temp_idx); + + program->temp_count += ssa.next_temp_idx; + vkd3d_free(origin_blocks); + vkd3d_free(ssa.table); + + return VKD3D_OK; +} + enum vkd3d_result vsir_program_normalise(struct vsir_program *program, uint64_t config_flags, const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context) { @@ -4311,9 +4384,6 @@ enum vkd3d_result vsir_program_normalise(struct vsir_program *program, uint64_t if ((result = lower_switch_to_if_ladder(program)) < 0) return result;
- if ((result = vsir_program_materialise_ssas_to_temps(program)) < 0) - return result; - if ((result = vsir_cfg_init(&cfg, program, message_context)) < 0) return result;
@@ -4350,6 +4420,20 @@ enum vkd3d_result vsir_program_normalise(struct vsir_program *program, uint64_t }
vsir_cfg_cleanup(&cfg); + + if ((result = vsir_program_flatten_control_flow_constructs(program, message_context)) < 0) + return result; + + if ((result = vsir_cfg_init(&cfg, program, message_context)) < 0) + return result; + vsir_cfg_compute_dominators(&cfg); + + result = vsir_program_materialize_undominated_ssas_to_temps(&cfg); + + vsir_cfg_cleanup(&cfg); + + if (result < 0) + return result; } else { @@ -4379,10 +4463,10 @@ enum vkd3d_result vsir_program_normalise(struct vsir_program *program, uint64_t
if ((result = vsir_program_normalise_combined_samplers(program, message_context)) < 0) return result; - }
- if ((result = vsir_program_flatten_control_flow_constructs(program, message_context)) < 0) - return result; + if ((result = vsir_program_flatten_control_flow_constructs(program, message_context)) < 0) + return result; + }
if (TRACE_ON()) vkd3d_shader_trace(program);