Overall this is simpler, results in less verbose SPIR-V code which is easier to debug, and eliminates stuttering issues in Horizon Zero Dawn when run in an SM 6 dev branch with Gio's cfg4 structuriser branch.
From: Conor McCarthy cmccarthy@codeweavers.com
RADV converts temps to phi instructions, so converting phis to MOVC in VSIR just translates back to phis feeding into a MOVC. This commit eliminates the MOVC. --- libs/vkd3d-shader/ir.c | 383 +++++++++++++++-------------------------- 1 file changed, 142 insertions(+), 241 deletions(-)
diff --git a/libs/vkd3d-shader/ir.c b/libs/vkd3d-shader/ir.c index 4f0226187..b7348b4f1 100644 --- a/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d-shader/ir.c @@ -2577,97 +2577,6 @@ static enum vkd3d_result lower_switch_to_if_ladder(struct vsir_program *program) } }
- /* Second subpass: creating new blocks might have broken - * references in PHI instructions, so we use the block map to fix - * them. */ - current_label = 0; - for (i = 0; i < ins_count; ++i) - { - struct vkd3d_shader_instruction *ins = &instructions[i]; - struct vkd3d_shader_src_param *new_src; - unsigned int j, l, new_src_count = 0; - - switch (ins->handler_idx) - { - case VKD3DSIH_LABEL: - current_label = label_from_src_param(&ins->src[0]); - continue; - - case VKD3DSIH_PHI: - break; - - default: - continue; - } - - /* First count how many source parameters we need. */ - for (j = 0; j < ins->src_count; j += 2) - { - unsigned int source_label = label_from_src_param(&ins->src[j + 1]); - size_t k, match_count = 0; - - for (k = 0; k < map_count; ++k) - { - struct lower_switch_to_if_ladder_block_mapping *mapping = &block_map[k]; - - if (mapping->switch_label == source_label && mapping->target_label == current_label) - match_count += 1; - } - - new_src_count += (match_count != 0) ? 2 * match_count : 2; - } - - assert(new_src_count >= ins->src_count); - - /* Allocate more source parameters if needed. */ - if (new_src_count == ins->src_count) - { - new_src = ins->src; - } - else - { - if (!(new_src = vsir_program_get_src_params(program, new_src_count))) - { - ERR("Failed to allocate %u source parameters.\n", new_src_count); - goto fail; - } - } - - /* Then do the copy. */ - for (j = 0, l = 0; j < ins->src_count; j += 2) - { - unsigned int source_label = label_from_src_param(&ins->src[j + 1]); - size_t k, match_count = 0; - - for (k = 0; k < map_count; ++k) - { - struct lower_switch_to_if_ladder_block_mapping *mapping = &block_map[k]; - - if (mapping->switch_label == source_label && mapping->target_label == current_label) - { - match_count += 1; - - new_src[l] = ins->src[j]; - new_src[l + 1] = ins->src[j + 1]; - new_src[l + 1].reg.idx[0].offset = mapping->if_label; - l += 2; - } - } - - if (match_count == 0) - { - new_src[l] = ins->src[j]; - new_src[l + 1] = ins->src[j + 1]; - l += 2; - } - } - - assert(l == new_src_count); - - ins->src_count = new_src_count; - ins->src = new_src; - } - vkd3d_free(program->instructions.elements); vkd3d_free(block_map); program->instructions.elements = instructions; @@ -2685,148 +2594,151 @@ fail: return VKD3D_ERROR_OUT_OF_MEMORY; }
-static void materialize_ssas_to_temps_process_src_param(struct vsir_program *program, +struct ssa_allocation +{ + unsigned int *table; + unsigned int next_temp_idx; +}; + +static void materialize_ssas_to_temps_process_src_param(struct vsir_program *program, struct ssa_allocation *ssa, struct vkd3d_shader_src_param *src);
/* This is idempotent: it can be safely applied more than once on the * same register. */ -static void materialize_ssas_to_temps_process_reg(struct vsir_program *program, struct vkd3d_shader_register *reg) +static void materialize_ssas_to_temps_process_reg(struct vsir_program *program, struct ssa_allocation *ssa, + struct vkd3d_shader_register *reg) { unsigned int i;
- if (reg->type == VKD3DSPR_SSA) + if (reg->type == VKD3DSPR_SSA && ssa->table[reg->idx[0].offset]) { reg->type = VKD3DSPR_TEMP; - reg->idx[0].offset += program->temp_count; + if (ssa->table[reg->idx[0].offset] == UINT_MAX) + { + i = program->temp_count + ssa->next_temp_idx++; + ssa->table[reg->idx[0].offset] = i + 1; + reg->idx[0].offset = i; + } + else + { + reg->idx[0].offset = ssa->table[reg->idx[0].offset] - 1; + } }
for (i = 0; i < reg->idx_count; ++i) if (reg->idx[i].rel_addr) - materialize_ssas_to_temps_process_src_param(program, reg->idx[i].rel_addr); + materialize_ssas_to_temps_process_src_param(program, ssa, reg->idx[i].rel_addr); }
-static void materialize_ssas_to_temps_process_dst_param(struct vsir_program *program, +static void materialize_ssas_to_temps_process_dst_param(struct vsir_program *program, struct ssa_allocation *ssa, struct vkd3d_shader_dst_param *dst) { - materialize_ssas_to_temps_process_reg(program, &dst->reg); + materialize_ssas_to_temps_process_reg(program, ssa, &dst->reg); }
-static void materialize_ssas_to_temps_process_src_param(struct vsir_program *program, +static void materialize_ssas_to_temps_process_src_param(struct vsir_program *program, struct ssa_allocation *ssa, struct vkd3d_shader_src_param *src) { - materialize_ssas_to_temps_process_reg(program, &src->reg); + materialize_ssas_to_temps_process_reg(program, ssa, &src->reg); }
-static const struct vkd3d_shader_src_param *materialize_ssas_to_temps_compute_source(struct vkd3d_shader_instruction *ins, - unsigned int label) +struct materialize_ssas_to_temps_block_info { - unsigned int i; - - assert(ins->handler_idx == VKD3DSIH_PHI); - - for (i = 0; i < ins->src_count; i += 2) + struct phi_incoming_to_temp { - if (label_from_src_param(&ins->src[i + 1]) == label) - return &ins->src[i]; - } - - vkd3d_unreachable(); -} + struct vkd3d_shader_src_param *src; + struct vkd3d_shader_dst_param *dst; + unsigned int temp_idx; + } *incomings; + size_t incoming_capacity; + size_t incoming_count; +};
-static bool materialize_ssas_to_temps_synthesize_mov(struct vsir_program *program, - struct vkd3d_shader_instruction *instruction, const struct vkd3d_shader_location *loc, - const struct vkd3d_shader_dst_param *dest, const struct vkd3d_shader_src_param *cond, - const struct vkd3d_shader_src_param *source, bool invert) +static void materialize_ssas_to_temps_block_info_cleanup(struct materialize_ssas_to_temps_block_info *block_info, + size_t count) { - struct vkd3d_shader_src_param *src; - struct vkd3d_shader_dst_param *dst; - - if (!vsir_instruction_init_with_params(program, instruction, loc, - cond ? VKD3DSIH_MOVC : VKD3DSIH_MOV, 1, cond ? 3 : 1)) - return false; - - dst = instruction->dst; - src = instruction->src; - - dst[0] = *dest; - materialize_ssas_to_temps_process_dst_param(program, &dst[0]); - - assert(dst[0].write_mask == VKD3DSP_WRITEMASK_0); - assert(dst[0].modifiers == 0); - assert(dst[0].shift == 0); + size_t i;
- if (cond) - { - src[0] = *cond; - src[1 + invert] = *source; - memset(&src[2 - invert], 0, sizeof(src[2 - invert])); - src[2 - invert].reg = dst[0].reg; - materialize_ssas_to_temps_process_src_param(program, &src[1]); - materialize_ssas_to_temps_process_src_param(program, &src[2]); - } - else - { - src[0] = *source; - materialize_ssas_to_temps_process_src_param(program, &src[0]); - } + for (i = 0; i < count; ++i) + vkd3d_free(block_info[i].incomings);
- return true; + vkd3d_free(block_info); }
-static enum vkd3d_result vsir_program_materialise_ssas_to_temps(struct vsir_program *program) +static enum vkd3d_result vsir_program_materialize_phis_to_temps(struct vsir_program *program) { + struct materialize_ssas_to_temps_block_info *info, *block_info = NULL; + size_t ins_capacity = 0, ins_count = 0, phi_count, incoming_count, i; struct vkd3d_shader_instruction *instructions = NULL; - struct materialize_ssas_to_temps_block_data - { - size_t phi_begin; - size_t phi_count; - } *block_index = NULL; - size_t ins_capacity = 0, ins_count = 0, i; + struct ssa_allocation ssa = {0}; unsigned int current_label = 0;
- if (!reserve_instructions(&instructions, &ins_capacity, program->instructions.count)) + if (!(ssa.table = vkd3d_calloc(program->ssa_count, sizeof(*ssa.table)))) + { + ERR("Failed to allocate SSA table.\n"); goto fail; - - if (!(block_index = vkd3d_calloc(program->block_count, sizeof(*block_index)))) + } + if (!(block_info = vkd3d_calloc(program->block_count, sizeof(*block_info)))) { - ERR("Failed to allocate block index.\n"); + ERR("Failed to allocate block info array.\n"); goto fail; }
- for (i = 0; i < program->instructions.count; ++i) + for (i = 0, phi_count = 0, incoming_count = 0; i < program->instructions.count; ++i) { struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; + unsigned int j, temp_idx;
- switch (ins->handler_idx) + if (ins->handler_idx != VKD3DSIH_PHI) + continue; + ++phi_count; + + temp_idx = program->temp_count + ssa.next_temp_idx++; + + for (j = 0; j < ins->src_count; j += 2) { - case VKD3DSIH_LABEL: - current_label = label_from_src_param(&ins->src[0]); - break; + struct phi_incoming_to_temp *incoming; + unsigned int label;
- case VKD3DSIH_PHI: - assert(current_label != 0); - assert(i != 0); - if (block_index[current_label - 1].phi_begin == 0) - block_index[current_label - 1].phi_begin = i; - block_index[current_label - 1].phi_count += 1; - break; + label = label_from_src_param(&ins->src[j + 1]); + assert(label);
- default: - current_label = 0; - break; + info = &block_info[label - 1]; + + if (!(vkd3d_array_reserve((void **)&info->incomings, &info->incoming_capacity, info->incoming_count + 1, + sizeof(*info->incomings)))) + goto fail; + + incoming = &info->incomings[info->incoming_count++]; + incoming->src = &ins->src[j]; + incoming->dst = ins->dst; + incoming->temp_idx = temp_idx; + + ssa.table[ins->dst->reg.idx[0].offset] = temp_idx + 1; + + ++incoming_count; } }
+ if (!incoming_count) + { + materialize_ssas_to_temps_block_info_cleanup(block_info, program->block_count); + vkd3d_free(ssa.table); + return VKD3D_OK; + } + + if (!reserve_instructions(&instructions, &ins_capacity, program->instructions.count + incoming_count - phi_count)) + goto fail; + for (i = 0; i < program->instructions.count; ++i) { - struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; + struct vkd3d_shader_instruction *mov_ins, *ins = &program->instructions.elements[i]; size_t j;
- for (j = 0; j < ins->dst_count; ++j) - materialize_ssas_to_temps_process_dst_param(program, &ins->dst[j]); - for (j = 0; j < ins->src_count; ++j) - materialize_ssas_to_temps_process_src_param(program, &ins->src[j]); + materialize_ssas_to_temps_process_src_param(program, &ssa, &ins->src[j]); + for (j = 0; j < ins->dst_count; ++j) + materialize_ssas_to_temps_process_dst_param(program, &ssa, &ins->dst[j]);
switch (ins->handler_idx) { @@ -2835,65 +2747,23 @@ static enum vkd3d_result vsir_program_materialise_ssas_to_temps(struct vsir_prog break;
case VKD3DSIH_BRANCH: - { - if (vsir_register_is_label(&ins->src[0].reg)) - { - const struct materialize_ssas_to_temps_block_data *data = &block_index[label_from_src_param(&ins->src[0]) - 1]; - - if (!reserve_instructions(&instructions, &ins_capacity, ins_count + data->phi_count)) - goto fail; - - for (j = data->phi_begin; j < data->phi_begin + data->phi_count; ++j) - { - const struct vkd3d_shader_src_param *source; - - source = materialize_ssas_to_temps_compute_source(&program->instructions.elements[j], - current_label); - if (!materialize_ssas_to_temps_synthesize_mov(program, &instructions[ins_count], - &ins->location, &program->instructions.elements[j].dst[0], NULL, source, false)) - goto fail; + case VKD3DSIH_RET: + case VKD3DSIH_SWITCH_MONOLITHIC: + info = &block_info[current_label - 1];
- ++ins_count; - } - } - else + for (j = 0; j < info->incoming_count; ++j) { - struct materialize_ssas_to_temps_block_data *data_true = &block_index[label_from_src_param(&ins->src[1]) - 1], - *data_false = &block_index[label_from_src_param(&ins->src[2]) - 1]; - const struct vkd3d_shader_src_param *cond = &ins->src[0]; - - if (!reserve_instructions(&instructions, &ins_capacity, - ins_count + data_true->phi_count + data_false->phi_count)) - goto fail; - - for (j = data_true->phi_begin; j < data_true->phi_begin + data_true->phi_count; ++j) - { - const struct vkd3d_shader_src_param *source; - - source = materialize_ssas_to_temps_compute_source(&program->instructions.elements[j], - current_label); - if (!materialize_ssas_to_temps_synthesize_mov(program, &instructions[ins_count], - &ins->location, &program->instructions.elements[j].dst[0], cond, source, false)) - goto fail; - - ++ins_count; - } - - for (j = data_false->phi_begin; j < data_false->phi_begin + data_false->phi_count; ++j) - { - const struct vkd3d_shader_src_param *source; - - source = materialize_ssas_to_temps_compute_source(&program->instructions.elements[j], - current_label); - if (!materialize_ssas_to_temps_synthesize_mov(program, &instructions[ins_count], - &ins->location, &program->instructions.elements[j].dst[0], cond, source, true)) - goto fail; - - ++ins_count; - } + struct phi_incoming_to_temp *incoming = &info->incomings[j]; + + mov_ins = &instructions[ins_count++]; + vsir_instruction_init_with_params(program, mov_ins, &ins->location, VKD3DSIH_MOV, 1, 0); + mov_ins->src = incoming->src; + mov_ins->src_count = 1; + *mov_ins->dst = *incoming->dst; + mov_ins->dst->reg.type = VKD3DSPR_TEMP; + mov_ins->dst->reg.idx[0].offset = incoming->temp_idx; } break; - }
case VKD3DSIH_PHI: continue; @@ -2902,29 +2772,57 @@ static enum vkd3d_result vsir_program_materialise_ssas_to_temps(struct vsir_prog break; }
- if (!reserve_instructions(&instructions, &ins_capacity, ins_count + 1)) - goto fail; - instructions[ins_count++] = *ins; }
vkd3d_free(program->instructions.elements); - vkd3d_free(block_index); program->instructions.elements = instructions; program->instructions.capacity = ins_capacity; program->instructions.count = ins_count; - program->temp_count += program->ssa_count; - program->ssa_count = 0; + program->temp_count += ssa.next_temp_idx; + + materialize_ssas_to_temps_block_info_cleanup(block_info, program->block_count); + vkd3d_free(ssa.table);
return VKD3D_OK;
fail: vkd3d_free(instructions); - vkd3d_free(block_index); + materialize_ssas_to_temps_block_info_cleanup(block_info, program->block_count); + vkd3d_free(ssa.table);
return VKD3D_ERROR_OUT_OF_MEMORY; }
+static enum vkd3d_result vsir_program_materialise_ssas_to_temps(struct vsir_program *program) +{ + struct ssa_allocation ssa = {0}; + size_t i; + + if (!(ssa.table = vkd3d_calloc(program->ssa_count, sizeof(*ssa.table)))) + return VKD3D_ERROR_OUT_OF_MEMORY; + memset(ssa.table, 0xff, program->ssa_count * sizeof(*ssa.table)); + + for (i = 0; i < program->instructions.count; ++i) + { + struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; + size_t j; + + for (j = 0; j < ins->dst_count; ++j) + materialize_ssas_to_temps_process_dst_param(program, &ssa, &ins->dst[j]); + + for (j = 0; j < ins->src_count; ++j) + materialize_ssas_to_temps_process_src_param(program, &ssa, &ins->src[j]); + } + + program->temp_count += ssa.next_temp_idx; + program->ssa_count = 0; + + vkd3d_free(ssa.table); + + return VKD3D_OK; +} + struct vsir_block_list { struct vsir_block **blocks; @@ -4407,6 +4305,9 @@ enum vkd3d_result vsir_program_normalise(struct vsir_program *program, uint64_t { struct vsir_cfg cfg;
+ if ((result = vsir_program_materialize_phis_to_temps(program)) < 0) + return result; + if ((result = lower_switch_to_if_ladder(program)) < 0) return result;
From: Conor McCarthy cmccarthy@codeweavers.com
--- libs/vkd3d-shader/spirv.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-)
diff --git a/libs/vkd3d-shader/spirv.c b/libs/vkd3d-shader/spirv.c index 46130244c..b70bc30e7 100644 --- a/libs/vkd3d-shader/spirv.c +++ b/libs/vkd3d-shader/spirv.c @@ -9334,6 +9334,7 @@ static void spirv_compiler_emit_resinfo(struct spirv_compiler *compiler, const struct vkd3d_shader_dst_param *dst = instruction->dst; const struct vkd3d_shader_src_param *src = instruction->src; uint32_t type_id, lod_id, val_id, miplevel_count_id; + enum vkd3d_shader_component_type component_type; uint32_t constituents[VKD3D_VEC4_SIZE]; unsigned int i, size_component_count; struct vkd3d_shader_image image; @@ -9370,10 +9371,16 @@ static void spirv_compiler_emit_resinfo(struct spirv_compiler *compiler, val_id = vkd3d_spirv_build_op_composite_construct(builder, type_id, constituents, i + 2);
+ component_type = VKD3D_SHADER_COMPONENT_FLOAT; + type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_VEC4_SIZE); if (instruction->flags == VKD3DSI_RESINFO_UINT) { - val_id = vkd3d_spirv_build_op_bitcast(builder, type_id, val_id); + /* SSA registers must match the specified result type. */ + if (!register_is_ssa(&dst->reg)) + val_id = vkd3d_spirv_build_op_bitcast(builder, type_id, val_id); + else + component_type = VKD3D_SHADER_COMPONENT_UINT; } else { @@ -9382,7 +9389,7 @@ static void spirv_compiler_emit_resinfo(struct spirv_compiler *compiler, val_id = vkd3d_spirv_build_op_convert_utof(builder, type_id, val_id); } val_id = spirv_compiler_emit_swizzle(compiler, val_id, VKD3DSP_WRITEMASK_ALL, - VKD3D_SHADER_COMPONENT_FLOAT, src[1].swizzle, dst->write_mask); + component_type, src[1].swizzle, dst->write_mask);
spirv_compiler_emit_store_dst(compiler, dst, val_id); }
From: Conor McCarthy cmccarthy@codeweavers.com
--- libs/vkd3d-shader/ir.c | 154 +++++++++++++++++++++++++++++++---------- 1 file changed, 119 insertions(+), 35 deletions(-)
diff --git a/libs/vkd3d-shader/ir.c b/libs/vkd3d-shader/ir.c index b7348b4f1..e94a8dfea 100644 --- a/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d-shader/ir.c @@ -2794,35 +2794,6 @@ fail: return VKD3D_ERROR_OUT_OF_MEMORY; }
-static enum vkd3d_result vsir_program_materialise_ssas_to_temps(struct vsir_program *program) -{ - struct ssa_allocation ssa = {0}; - size_t i; - - if (!(ssa.table = vkd3d_calloc(program->ssa_count, sizeof(*ssa.table)))) - return VKD3D_ERROR_OUT_OF_MEMORY; - memset(ssa.table, 0xff, program->ssa_count * sizeof(*ssa.table)); - - for (i = 0; i < program->instructions.count; ++i) - { - struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; - size_t j; - - for (j = 0; j < ins->dst_count; ++j) - materialize_ssas_to_temps_process_dst_param(program, &ssa, &ins->dst[j]); - - for (j = 0; j < ins->src_count; ++j) - materialize_ssas_to_temps_process_src_param(program, &ssa, &ins->src[j]); - } - - program->temp_count += ssa.next_temp_idx; - program->ssa_count = 0; - - vkd3d_free(ssa.table); - - return VKD3D_OK; -} - struct vsir_block_list { struct vsir_block **blocks; @@ -4291,6 +4262,108 @@ fail: return ret; }
+static size_t register_map_undominated_use(struct vkd3d_shader_register *reg, struct ssa_allocation *ssa, + struct vsir_block *block, struct vsir_block **origin_blocks) +{ + unsigned int ssa_id, i; + size_t count = 0; + + if (!register_is_ssa(reg)) + return 0; + + ssa_id = reg->idx[0].offset; + if (!vsir_block_dominates(origin_blocks[ssa_id], block)) + { + ssa->table[ssa_id] = UINT_MAX; + ++count; + } + + for (i = 0; i < reg->idx_count; ++i) + if (reg->idx[i].rel_addr) + count += register_map_undominated_use(®->idx[i].rel_addr->reg, ssa, block, origin_blocks); + + return count; +} + +/* Drivers are not necessarily optimised to handle very large numbers of temps. For example, + * using them only where necessary fixes stuttering issues in Horizon Zero Dawn on RADV. + * This can also result in the backend emitting less code because temps typically need an + * access chain and a load/store. */ +static enum vkd3d_result vsir_program_materialize_undominated_ssas_to_temps(struct vsir_cfg *cfg) +{ + struct vsir_program *program = cfg->program; + struct vsir_block **origin_blocks; + struct ssa_allocation ssa = {0}; + size_t i, count; + unsigned int j; + + if (!(origin_blocks = vkd3d_calloc(program->ssa_count, sizeof(*origin_blocks)))) + { + ERR("Failed to allocate origin block array.\n"); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + if (!(ssa.table = vkd3d_calloc(program->ssa_count, sizeof(*ssa.table)))) + { + ERR("Failed to allocate SSA table.\n"); + vkd3d_free(origin_blocks); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + + for (i = 0; i < cfg->block_count; ++i) + { + struct vsir_block *block = &cfg->blocks[i]; + struct vkd3d_shader_instruction *ins; + + for (ins = block->begin; ins <= block->end; ++ins) + { + for (j = 0; j < ins->dst_count; ++j) + { + if (register_is_ssa(&ins->dst[j].reg)) + origin_blocks[ins->dst[j].reg.idx[0].offset] = block; + } + } + } + + for (i = 0, count = 0; i < cfg->block_count; ++i) + { + struct vsir_block *block = &cfg->blocks[i]; + struct vkd3d_shader_instruction *ins; + + for (ins = block->begin; ins <= block->end; ++ins) + { + for (j = 0; j < ins->src_count; ++j) + count += register_map_undominated_use(&ins->src[j].reg, &ssa, block, origin_blocks); + } + } + + if (!count) + { + vkd3d_free(origin_blocks); + vkd3d_free(ssa.table); + return VKD3D_OK; + } + + for (i = 0; i < program->instructions.count; ++i) + { + struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; + + for (j = 0; j < ins->dst_count; ++j) + materialize_ssas_to_temps_process_dst_param(program, &ssa, &ins->dst[j]); + + for (j = 0; j < ins->src_count; ++j) + materialize_ssas_to_temps_process_src_param(program, &ssa, &ins->src[j]); + } + + if (ssa.next_temp_idx) + TRACE("Emitting temps for %u values with undominated usage.\n", ssa.next_temp_idx); + + program->temp_count += ssa.next_temp_idx; + vkd3d_free(origin_blocks); + vkd3d_free(ssa.table); + + return VKD3D_OK; +} + enum vkd3d_result vsir_program_normalise(struct vsir_program *program, uint64_t config_flags, const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context) { @@ -4311,9 +4384,6 @@ enum vkd3d_result vsir_program_normalise(struct vsir_program *program, uint64_t if ((result = lower_switch_to_if_ladder(program)) < 0) return result;
- if ((result = vsir_program_materialise_ssas_to_temps(program)) < 0) - return result; - if ((result = vsir_cfg_init(&cfg, program, message_context)) < 0) return result;
@@ -4350,6 +4420,20 @@ enum vkd3d_result vsir_program_normalise(struct vsir_program *program, uint64_t }
vsir_cfg_cleanup(&cfg); + + if ((result = vsir_program_flatten_control_flow_constructs(program, message_context)) < 0) + return result; + + if ((result = vsir_cfg_init(&cfg, program, message_context)) < 0) + return result; + vsir_cfg_compute_dominators(&cfg); + + result = vsir_program_materialize_undominated_ssas_to_temps(&cfg); + + vsir_cfg_cleanup(&cfg); + + if (result < 0) + return result; } else { @@ -4379,10 +4463,10 @@ enum vkd3d_result vsir_program_normalise(struct vsir_program *program, uint64_t
if ((result = vsir_program_normalise_combined_samplers(program, message_context)) < 0) return result; - }
- if ((result = vsir_program_flatten_control_flow_constructs(program, message_context)) < 0) - return result; + if ((result = vsir_program_flatten_control_flow_constructs(program, message_context)) < 0) + return result; + }
if (TRACE_ON()) vkd3d_shader_trace(program);
Giovanni Mascellani (@giomasce) commented about libs/vkd3d-shader/ir.c:
- else
- {
src[0] = *source;
materialize_ssas_to_temps_process_src_param(program, &src[0]);
- }
- for (i = 0; i < count; ++i)
vkd3d_free(block_info[i].incomings);
- return true;
- vkd3d_free(block_info);
}
-static enum vkd3d_result vsir_program_materialise_ssas_to_temps(struct vsir_program *program) +static enum vkd3d_result vsir_program_materialize_phis_to_temps(struct vsir_program *program) {
- struct materialize_ssas_to_temps_block_info *info, *block_info = NULL;
This should probably be renamed now. Also, please keep the `materialize_ssas_to_temps` stuff contiguous, moving the new `materialize_phis_to_temps` above all of it.
Giovanni Mascellani (@giomasce) commented about libs/vkd3d-shader/ir.c:
reg->idx[0].offset = i;
}
else
{
reg->idx[0].offset = ssa->table[reg->idx[0].offset] - 1;
}
}
for (i = 0; i < reg->idx_count; ++i) if (reg->idx[i].rel_addr)
materialize_ssas_to_temps_process_src_param(program, reg->idx[i].rel_addr);
materialize_ssas_to_temps_process_src_param(program, ssa, reg->idx[i].rel_addr);
}
-static void materialize_ssas_to_temps_process_dst_param(struct vsir_program *program, +static void materialize_ssas_to_temps_process_dst_param(struct vsir_program *program, struct ssa_allocation *ssa,
For the record, I am not particularly attached to these two helpers. I introduced them at some point when I believed that something specific for source and destination parameters had to be done, but they look pretty useless now. If you want to take advantage of the churn to remove them along, no objections on my side.
Giovanni Mascellani (@giomasce) commented about libs/vkd3d-shader/ir.c:
- unsigned int next_temp_idx;
+};
+static void materialize_ssas_to_temps_process_src_param(struct vsir_program *program, struct ssa_allocation *ssa, struct vkd3d_shader_src_param *src);
/* This is idempotent: it can be safely applied more than once on the
- same register. */
-static void materialize_ssas_to_temps_process_reg(struct vsir_program *program, struct vkd3d_shader_register *reg) +static void materialize_ssas_to_temps_process_reg(struct vsir_program *program, struct ssa_allocation *ssa,
struct vkd3d_shader_register *reg)
{ unsigned int i;
- if (reg->type == VKD3DSPR_SSA)
- if (reg->type == VKD3DSPR_SSA && ssa->table[reg->idx[0].offset])
Given that we're using `UINT_MAX` as a special value anyway, could we use it to mark an entry that shouldn't be translated rather than zero? This has the advantage that you don't need to fixup indices, and you can only introduce this convention in the commit that really uses it.
Giovanni Mascellani (@giomasce) commented about libs/vkd3d-shader/ir.c:
for (j = data_false->phi_begin; j < data_false->phi_begin + data_false->phi_count; ++j)
{
const struct vkd3d_shader_src_param *source;
source = materialize_ssas_to_temps_compute_source(&program->instructions.elements[j],
current_label);
if (!materialize_ssas_to_temps_synthesize_mov(program, &instructions[ins_count],
&ins->location, &program->instructions.elements[j].dst[0], cond, source, true))
goto fail;
++ins_count;
}
struct phi_incoming_to_temp *incoming = &info->incomings[j];
mov_ins = &instructions[ins_count++];
vsir_instruction_init_with_params(program, mov_ins, &ins->location, VKD3DSIH_MOV, 1, 0);
This can fail.
Giovanni Mascellani (@giomasce) commented about libs/vkd3d-shader/ir.c:
{ struct vsir_cfg cfg;
if ((result = vsir_program_materialize_phis_to_temps(program)) < 0)
return result;
This commit seems to be doing two different things: * generating MOV instead of MOVC when converting PHIs (which is sensible, I was overly conservative originally with MOVC); * splitting processing PHIs from processing all the other instructions.
Could you please split these changes? For the first one something list [this patch](/uploads/c6874604d67a9e6492a8463c869944a5/patch.diff) should be enough.
Giovanni Mascellani (@giomasce) commented about libs/vkd3d-shader/ir.c:
}
- }
- for (i = 0, count = 0; i < cfg->block_count; ++i)
- {
struct vsir_block *block = &cfg->blocks[i];
struct vkd3d_shader_instruction *ins;
for (ins = block->begin; ins <= block->end; ++ins)
{
for (j = 0; j < ins->src_count; ++j)
count += register_map_undominated_use(&ins->src[j].reg, &ssa, block, origin_blocks);
}
- }
- if (!count)
You don't really need to keep the whole count, right? You just need to know whether you have at least one to optimize the last pass away.
Giovanni Mascellani (@giomasce) commented about libs/vkd3d-shader/ir.c:
ssa->table[ssa_id] = UINT_MAX;
++count;
- }
- for (i = 0; i < reg->idx_count; ++i)
if (reg->idx[i].rel_addr)
count += register_map_undominated_use(®->idx[i].rel_addr->reg, ssa, block, origin_blocks);
- return count;
+}
+/* Drivers are not necessarily optimised to handle very large numbers of temps. For example,
- using them only where necessary fixes stuttering issues in Horizon Zero Dawn on RADV.
- This can also result in the backend emitting less code because temps typically need an
- access chain and a load/store. */
+static enum vkd3d_result vsir_program_materialize_undominated_ssas_to_temps(struct vsir_cfg *cfg)
I guess this should have a `vsir_cfg_` prefix, to fit with the convention we're (slowly) moving to.
A few comments, but this looks really useful.
I don't totally like the fact that at some point during the pipeline the VSIR code is technically illegal (because it has SSA registers without satisfying the SSA rules). Ideally I'd like the code to validate at each stage, which might become helpful for debugging at some point. I won't block development on this, but eventually I'll try to look for better solutions.