Overall this is simpler, results in less verbose SPIR-V code which is easier to debug, and eliminates stuttering issues in Horizon Zero Dawn when run in an SM 6 dev branch with Gio's cfg4 structuriser branch.
-- v8: vkd3d-shader/ir: Materialise SSAs to temps before lowering switch instructions. vkd3d-shader/ir: Convert SSAs to temps only if the block of origin does not dominate all uses. vkd3d-shader/spirv: Handle uint2 to double bitcast in spirv_compiler_emit_mov(). vkd3d-shader/spirv: Emit a uint result for RESINFO_UINT if the dst register is SSA. vkd3d-shader/ir: Materialise phis to temps in the incoming blocks.
From: Conor McCarthy cmccarthy@codeweavers.com
RADV converts temps to phi instructions, so converting phis to MOVC in VSIR just translates back to phis feeding into a MOVC. This commit eliminates the MOVC. --- libs/vkd3d-shader/ir.c | 190 +++++++++++++---------------------------- 1 file changed, 57 insertions(+), 133 deletions(-)
diff --git a/libs/vkd3d-shader/ir.c b/libs/vkd3d-shader/ir.c index 8af537390..86ca89c47 100644 --- a/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d-shader/ir.c @@ -2717,109 +2717,80 @@ static void materialize_ssas_to_temps_process_src_param(struct vsir_program *pro materialize_ssas_to_temps_process_reg(program, &src->reg); }
-static const struct vkd3d_shader_src_param *materialize_ssas_to_temps_compute_source(struct vkd3d_shader_instruction *ins, - unsigned int label) +struct ssas_to_temps_block_info { - unsigned int i; - - assert(ins->handler_idx == VKD3DSIH_PHI); - - for (i = 0; i < ins->src_count; i += 2) + struct phi_incoming_to_temp { - if (label_from_src_param(&ins->src[i + 1]) == label) - return &ins->src[i]; - } - - vkd3d_unreachable(); -} + struct vkd3d_shader_src_param *src; + struct vkd3d_shader_dst_param *dst; + } *incomings; + size_t incoming_capacity; + size_t incoming_count; +};
-static bool materialize_ssas_to_temps_synthesize_mov(struct vsir_program *program, - struct vkd3d_shader_instruction *instruction, const struct vkd3d_shader_location *loc, - const struct vkd3d_shader_dst_param *dest, const struct vkd3d_shader_src_param *cond, - const struct vkd3d_shader_src_param *source, bool invert) +static void ssas_to_temps_block_info_cleanup(struct ssas_to_temps_block_info *block_info, + size_t count) { - struct vkd3d_shader_src_param *src; - struct vkd3d_shader_dst_param *dst; - - if (!vsir_instruction_init_with_params(program, instruction, loc, - cond ? VKD3DSIH_MOVC : VKD3DSIH_MOV, 1, cond ? 3 : 1)) - return false; - - dst = instruction->dst; - src = instruction->src; - - dst[0] = *dest; - materialize_ssas_to_temps_process_dst_param(program, &dst[0]); - - assert(dst[0].write_mask == VKD3DSP_WRITEMASK_0); - assert(dst[0].modifiers == 0); - assert(dst[0].shift == 0); + size_t i;
- if (cond) - { - src[0] = *cond; - src[1 + invert] = *source; - memset(&src[2 - invert], 0, sizeof(src[2 - invert])); - src[2 - invert].reg = dst[0].reg; - materialize_ssas_to_temps_process_src_param(program, &src[1]); - materialize_ssas_to_temps_process_src_param(program, &src[2]); - } - else - { - src[0] = *source; - materialize_ssas_to_temps_process_src_param(program, &src[0]); - } + for (i = 0; i < count; ++i) + vkd3d_free(block_info[i].incomings);
- return true; + vkd3d_free(block_info); }
static enum vkd3d_result vsir_program_materialise_ssas_to_temps(struct vsir_program *program) { + size_t ins_capacity = 0, ins_count = 0, phi_count, incoming_count, i; + struct ssas_to_temps_block_info *info, *block_info = NULL; struct vkd3d_shader_instruction *instructions = NULL; - struct materialize_ssas_to_temps_block_data - { - size_t phi_begin; - size_t phi_count; - } *block_index = NULL; - size_t ins_capacity = 0, ins_count = 0, i; unsigned int current_label = 0;
- if (!reserve_instructions(&instructions, &ins_capacity, program->instructions.count)) - goto fail; - - if (!(block_index = vkd3d_calloc(program->block_count, sizeof(*block_index)))) + if (!(block_info = vkd3d_calloc(program->block_count, sizeof(*block_info)))) { - ERR("Failed to allocate block index.\n"); + ERR("Failed to allocate block info array.\n"); goto fail; }
- for (i = 0; i < program->instructions.count; ++i) + for (i = 0, phi_count = 0, incoming_count = 0; i < program->instructions.count; ++i) { struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; + unsigned int j;
- switch (ins->handler_idx) + if (ins->handler_idx != VKD3DSIH_PHI) + continue; + ++phi_count; + + for (j = 0; j < ins->src_count; j += 2) { - case VKD3DSIH_LABEL: - current_label = label_from_src_param(&ins->src[0]); - break; + struct phi_incoming_to_temp *incoming; + unsigned int label;
- case VKD3DSIH_PHI: - assert(current_label != 0); - assert(i != 0); - if (block_index[current_label - 1].phi_begin == 0) - block_index[current_label - 1].phi_begin = i; - block_index[current_label - 1].phi_count += 1; - break; + label = label_from_src_param(&ins->src[j + 1]); + assert(label);
- default: - current_label = 0; - break; + info = &block_info[label - 1]; + + if (!(vkd3d_array_reserve((void **)&info->incomings, &info->incoming_capacity, info->incoming_count + 1, + sizeof(*info->incomings)))) + goto fail; + + incoming = &info->incomings[info->incoming_count++]; + incoming->src = &ins->src[j]; + incoming->dst = ins->dst; + + ++incoming_count; } + + materialize_ssas_to_temps_process_dst_param(program, ins->dst); }
+ if (!reserve_instructions(&instructions, &ins_capacity, program->instructions.count + incoming_count - phi_count)) + goto fail; + for (i = 0; i < program->instructions.count; ++i) { - struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; + struct vkd3d_shader_instruction *mov_ins, *ins = &program->instructions.elements[i]; size_t j;
for (j = 0; j < ins->dst_count; ++j) @@ -2835,65 +2806,20 @@ static enum vkd3d_result vsir_program_materialise_ssas_to_temps(struct vsir_prog break;
case VKD3DSIH_BRANCH: - { - if (vsir_register_is_label(&ins->src[0].reg)) - { - const struct materialize_ssas_to_temps_block_data *data = &block_index[label_from_src_param(&ins->src[0]) - 1]; - - if (!reserve_instructions(&instructions, &ins_capacity, ins_count + data->phi_count)) - goto fail; - - for (j = data->phi_begin; j < data->phi_begin + data->phi_count; ++j) - { - const struct vkd3d_shader_src_param *source; - - source = materialize_ssas_to_temps_compute_source(&program->instructions.elements[j], - current_label); - if (!materialize_ssas_to_temps_synthesize_mov(program, &instructions[ins_count], - &ins->location, &program->instructions.elements[j].dst[0], NULL, source, false)) - goto fail; + info = &block_info[current_label - 1];
- ++ins_count; - } - } - else + for (j = 0; j < info->incoming_count; ++j) { - struct materialize_ssas_to_temps_block_data *data_true = &block_index[label_from_src_param(&ins->src[1]) - 1], - *data_false = &block_index[label_from_src_param(&ins->src[2]) - 1]; - const struct vkd3d_shader_src_param *cond = &ins->src[0]; + struct phi_incoming_to_temp *incoming = &info->incomings[j];
- if (!reserve_instructions(&instructions, &ins_capacity, - ins_count + data_true->phi_count + data_false->phi_count)) + mov_ins = &instructions[ins_count++]; + if (!vsir_instruction_init_with_params(program, mov_ins, &ins->location, VKD3DSIH_MOV, 1, 0)) goto fail; - - for (j = data_true->phi_begin; j < data_true->phi_begin + data_true->phi_count; ++j) - { - const struct vkd3d_shader_src_param *source; - - source = materialize_ssas_to_temps_compute_source(&program->instructions.elements[j], - current_label); - if (!materialize_ssas_to_temps_synthesize_mov(program, &instructions[ins_count], - &ins->location, &program->instructions.elements[j].dst[0], cond, source, false)) - goto fail; - - ++ins_count; - } - - for (j = data_false->phi_begin; j < data_false->phi_begin + data_false->phi_count; ++j) - { - const struct vkd3d_shader_src_param *source; - - source = materialize_ssas_to_temps_compute_source(&program->instructions.elements[j], - current_label); - if (!materialize_ssas_to_temps_synthesize_mov(program, &instructions[ins_count], - &ins->location, &program->instructions.elements[j].dst[0], cond, source, true)) - goto fail; - - ++ins_count; - } + *mov_ins->dst = *incoming->dst; + mov_ins->src = incoming->src; + mov_ins->src_count = 1; } break; - }
case VKD3DSIH_PHI: continue; @@ -2902,25 +2828,23 @@ static enum vkd3d_result vsir_program_materialise_ssas_to_temps(struct vsir_prog break; }
- if (!reserve_instructions(&instructions, &ins_capacity, ins_count + 1)) - goto fail; - instructions[ins_count++] = *ins; }
vkd3d_free(program->instructions.elements); - vkd3d_free(block_index); program->instructions.elements = instructions; program->instructions.capacity = ins_capacity; program->instructions.count = ins_count; program->temp_count += program->ssa_count; program->ssa_count = 0;
+ ssas_to_temps_block_info_cleanup(block_info, program->block_count); + return VKD3D_OK;
fail: vkd3d_free(instructions); - vkd3d_free(block_index); + ssas_to_temps_block_info_cleanup(block_info, program->block_count);
return VKD3D_ERROR_OUT_OF_MEMORY; }
From: Conor McCarthy cmccarthy@codeweavers.com
--- libs/vkd3d-shader/spirv.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-)
diff --git a/libs/vkd3d-shader/spirv.c b/libs/vkd3d-shader/spirv.c index 46130244c..b70bc30e7 100644 --- a/libs/vkd3d-shader/spirv.c +++ b/libs/vkd3d-shader/spirv.c @@ -9334,6 +9334,7 @@ static void spirv_compiler_emit_resinfo(struct spirv_compiler *compiler, const struct vkd3d_shader_dst_param *dst = instruction->dst; const struct vkd3d_shader_src_param *src = instruction->src; uint32_t type_id, lod_id, val_id, miplevel_count_id; + enum vkd3d_shader_component_type component_type; uint32_t constituents[VKD3D_VEC4_SIZE]; unsigned int i, size_component_count; struct vkd3d_shader_image image; @@ -9370,10 +9371,16 @@ static void spirv_compiler_emit_resinfo(struct spirv_compiler *compiler, val_id = vkd3d_spirv_build_op_composite_construct(builder, type_id, constituents, i + 2);
+ component_type = VKD3D_SHADER_COMPONENT_FLOAT; + type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_VEC4_SIZE); if (instruction->flags == VKD3DSI_RESINFO_UINT) { - val_id = vkd3d_spirv_build_op_bitcast(builder, type_id, val_id); + /* SSA registers must match the specified result type. */ + if (!register_is_ssa(&dst->reg)) + val_id = vkd3d_spirv_build_op_bitcast(builder, type_id, val_id); + else + component_type = VKD3D_SHADER_COMPONENT_UINT; } else { @@ -9382,7 +9389,7 @@ static void spirv_compiler_emit_resinfo(struct spirv_compiler *compiler, val_id = vkd3d_spirv_build_op_convert_utof(builder, type_id, val_id); } val_id = spirv_compiler_emit_swizzle(compiler, val_id, VKD3DSP_WRITEMASK_ALL, - VKD3D_SHADER_COMPONENT_FLOAT, src[1].swizzle, dst->write_mask); + component_type, src[1].swizzle, dst->write_mask);
spirv_compiler_emit_store_dst(compiler, dst, val_id); }
From: Conor McCarthy cmccarthy@codeweavers.com
Necessary for MakeDouble if the dst is SSA. --- libs/vkd3d-shader/spirv.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/libs/vkd3d-shader/spirv.c b/libs/vkd3d-shader/spirv.c index b70bc30e7..78c95bde8 100644 --- a/libs/vkd3d-shader/spirv.c +++ b/libs/vkd3d-shader/spirv.c @@ -7283,8 +7283,12 @@ static void spirv_compiler_emit_mov(struct spirv_compiler *compiler, }
general_implementation: - write_mask = (src->reg.type == VKD3DSPR_IMMCONST64 && !data_type_is_64_bit(dst->reg.data_type)) - ? vsir_write_mask_64_from_32(dst->write_mask) : dst->write_mask; + write_mask = dst->write_mask; + if (src->reg.type == VKD3DSPR_IMMCONST64 && !data_type_is_64_bit(dst->reg.data_type)) + write_mask = vsir_write_mask_64_from_32(write_mask); + else if (!data_type_is_64_bit(src->reg.data_type) && data_type_is_64_bit(dst->reg.data_type)) + write_mask = vsir_write_mask_32_from_64(write_mask); + val_id = spirv_compiler_emit_load_src(compiler, src, write_mask); if (dst->reg.data_type != src->reg.data_type) {
From: Conor McCarthy cmccarthy@codeweavers.com
--- libs/vkd3d-shader/ir.c | 187 ++++++++++++++++++++++++++++++++++------- 1 file changed, 157 insertions(+), 30 deletions(-)
diff --git a/libs/vkd3d-shader/ir.c b/libs/vkd3d-shader/ir.c index 86ca89c47..82d7f9a33 100644 --- a/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d-shader/ir.c @@ -2685,36 +2685,43 @@ fail: return VKD3D_ERROR_OUT_OF_MEMORY; }
-static void materialize_ssas_to_temps_process_src_param(struct vsir_program *program, - struct vkd3d_shader_src_param *src); +struct ssas_to_temps_alloc +{ + unsigned int *table; + unsigned int next_temp_idx; +}; + +static bool ssas_to_temps_alloc_init(struct ssas_to_temps_alloc *alloc, unsigned int ssa_count, unsigned int temp_count) +{ + size_t i = ssa_count * sizeof(*alloc->table); + + if (!(alloc->table = vkd3d_malloc(i))) + { + ERR("Failed to allocate SSA table.\n"); + return false; + } + memset(alloc->table, 0xff, i); + + alloc->next_temp_idx = temp_count; + return true; +}
/* This is idempotent: it can be safely applied more than once on the * same register. */ -static void materialize_ssas_to_temps_process_reg(struct vsir_program *program, struct vkd3d_shader_register *reg) +static void materialize_ssas_to_temps_process_reg(struct vsir_program *program, struct ssas_to_temps_alloc *alloc, + struct vkd3d_shader_register *reg) { unsigned int i;
- if (reg->type == VKD3DSPR_SSA) + if (reg->type == VKD3DSPR_SSA && alloc->table[reg->idx[0].offset] != UINT_MAX) { reg->type = VKD3DSPR_TEMP; - reg->idx[0].offset += program->temp_count; + reg->idx[0].offset = alloc->table[reg->idx[0].offset]; }
for (i = 0; i < reg->idx_count; ++i) if (reg->idx[i].rel_addr) - materialize_ssas_to_temps_process_src_param(program, reg->idx[i].rel_addr); -} - -static void materialize_ssas_to_temps_process_dst_param(struct vsir_program *program, - struct vkd3d_shader_dst_param *dst) -{ - materialize_ssas_to_temps_process_reg(program, &dst->reg); -} - -static void materialize_ssas_to_temps_process_src_param(struct vsir_program *program, - struct vkd3d_shader_src_param *src) -{ - materialize_ssas_to_temps_process_reg(program, &src->reg); + materialize_ssas_to_temps_process_reg(program, alloc, ®->idx[i].rel_addr->reg); }
struct ssas_to_temps_block_info @@ -2739,11 +2746,12 @@ static void ssas_to_temps_block_info_cleanup(struct ssas_to_temps_block_info *bl vkd3d_free(block_info); }
-static enum vkd3d_result vsir_program_materialise_ssas_to_temps(struct vsir_program *program) +static enum vkd3d_result vsir_program_materialise_phi_ssas_to_temps(struct vsir_program *program) { size_t ins_capacity = 0, ins_count = 0, phi_count, incoming_count, i; struct ssas_to_temps_block_info *info, *block_info = NULL; struct vkd3d_shader_instruction *instructions = NULL; + struct ssas_to_temps_alloc alloc = {0}; unsigned int current_label = 0;
if (!(block_info = vkd3d_calloc(program->block_count, sizeof(*block_info)))) @@ -2752,15 +2760,22 @@ static enum vkd3d_result vsir_program_materialise_ssas_to_temps(struct vsir_prog goto fail; }
+ if (!ssas_to_temps_alloc_init(&alloc, program->ssa_count, program->temp_count)) + goto fail; + for (i = 0, phi_count = 0, incoming_count = 0; i < program->instructions.count; ++i) { struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; - unsigned int j; + unsigned int j, temp_idx;
+ /* Only phi src/dst SSA values need be converted here. Structurisation may + * introduce new cases of undominated SSA use, which will be handled later. */ if (ins->handler_idx != VKD3DSIH_PHI) continue; ++phi_count;
+ temp_idx = alloc.next_temp_idx++; + for (j = 0; j < ins->src_count; j += 2) { struct phi_incoming_to_temp *incoming; @@ -2779,12 +2794,17 @@ static enum vkd3d_result vsir_program_materialise_ssas_to_temps(struct vsir_prog incoming->src = &ins->src[j]; incoming->dst = ins->dst;
+ alloc.table[ins->dst->reg.idx[0].offset] = temp_idx; + ++incoming_count; }
- materialize_ssas_to_temps_process_dst_param(program, ins->dst); + materialize_ssas_to_temps_process_reg(program, &alloc, &ins->dst->reg); }
+ if (!phi_count) + goto done; + if (!reserve_instructions(&instructions, &ins_capacity, program->instructions.count + incoming_count - phi_count)) goto fail;
@@ -2794,10 +2814,10 @@ static enum vkd3d_result vsir_program_materialise_ssas_to_temps(struct vsir_prog size_t j;
for (j = 0; j < ins->dst_count; ++j) - materialize_ssas_to_temps_process_dst_param(program, &ins->dst[j]); + materialize_ssas_to_temps_process_reg(program, &alloc, &ins->dst[j].reg);
for (j = 0; j < ins->src_count; ++j) - materialize_ssas_to_temps_process_src_param(program, &ins->src[j]); + materialize_ssas_to_temps_process_reg(program, &alloc, &ins->src[j].reg);
switch (ins->handler_idx) { @@ -2835,16 +2855,17 @@ static enum vkd3d_result vsir_program_materialise_ssas_to_temps(struct vsir_prog program->instructions.elements = instructions; program->instructions.capacity = ins_capacity; program->instructions.count = ins_count; - program->temp_count += program->ssa_count; - program->ssa_count = 0; - + program->temp_count = alloc.next_temp_idx; +done: ssas_to_temps_block_info_cleanup(block_info, program->block_count); + vkd3d_free(alloc.table);
return VKD3D_OK;
fail: vkd3d_free(instructions); ssas_to_temps_block_info_cleanup(block_info, program->block_count); + vkd3d_free(alloc.table);
return VKD3D_ERROR_OUT_OF_MEMORY; } @@ -4513,6 +4534,98 @@ fail: return ret; }
+static void register_map_undominated_use(struct vkd3d_shader_register *reg, struct ssas_to_temps_alloc *alloc, + struct vsir_block *block, struct vsir_block **origin_blocks) +{ + unsigned int i; + + if (!register_is_ssa(reg)) + return; + + i = reg->idx[0].offset; + if (alloc->table[i] == UINT_MAX && !vsir_block_dominates(origin_blocks[i], block)) + alloc->table[i] = alloc->next_temp_idx++; + + for (i = 0; i < reg->idx_count; ++i) + if (reg->idx[i].rel_addr) + register_map_undominated_use(®->idx[i].rel_addr->reg, alloc, block, origin_blocks); +} + +/* Drivers are not necessarily optimised to handle very large numbers of temps. For example, + * using them only where necessary fixes stuttering issues in Horizon Zero Dawn on RADV. + * This can also result in the backend emitting less code because temps typically need an + * access chain and a load/store. Conversion of phi SSA values to temps should eliminate all + * undominated SSA use, but structurisation may create new occurrences. */ +static enum vkd3d_result vsir_cfg_materialize_undominated_ssas_to_temps(struct vsir_cfg *cfg) +{ + struct vsir_program *program = cfg->program; + struct ssas_to_temps_alloc alloc = {0}; + struct vsir_block **origin_blocks; + unsigned int j; + size_t i; + + if (!(origin_blocks = vkd3d_calloc(program->ssa_count, sizeof(*origin_blocks)))) + { + ERR("Failed to allocate origin block array.\n"); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + if (!ssas_to_temps_alloc_init(&alloc, program->ssa_count, program->temp_count)) + { + vkd3d_free(origin_blocks); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + + for (i = 0; i < cfg->block_count; ++i) + { + struct vsir_block *block = &cfg->blocks[i]; + struct vkd3d_shader_instruction *ins; + + for (ins = block->begin; ins <= block->end; ++ins) + { + for (j = 0; j < ins->dst_count; ++j) + { + if (register_is_ssa(&ins->dst[j].reg)) + origin_blocks[ins->dst[j].reg.idx[0].offset] = block; + } + } + } + + for (i = 0; i < cfg->block_count; ++i) + { + struct vsir_block *block = &cfg->blocks[i]; + struct vkd3d_shader_instruction *ins; + + for (ins = block->begin; ins <= block->end; ++ins) + { + for (j = 0; j < ins->src_count; ++j) + register_map_undominated_use(&ins->src[j].reg, &alloc, block, origin_blocks); + } + } + + if (alloc.next_temp_idx == program->temp_count) + goto done; + + TRACE("Emitting temps for %u values with undominated usage.\n", alloc.next_temp_idx - program->temp_count); + + for (i = 0; i < program->instructions.count; ++i) + { + struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; + + for (j = 0; j < ins->dst_count; ++j) + materialize_ssas_to_temps_process_reg(program, &alloc, &ins->dst[j].reg); + + for (j = 0; j < ins->src_count; ++j) + materialize_ssas_to_temps_process_reg(program, &alloc, &ins->src[j].reg); + } + + program->temp_count = alloc.next_temp_idx; +done: + vkd3d_free(origin_blocks); + vkd3d_free(alloc.table); + + return VKD3D_OK; +} + struct validation_context { struct vkd3d_shader_message_context *message_context; @@ -5395,7 +5508,7 @@ enum vkd3d_result vsir_program_normalise(struct vsir_program *program, uint64_t if ((result = lower_switch_to_if_ladder(program)) < 0) return result;
- if ((result = vsir_program_materialise_ssas_to_temps(program)) < 0) + if ((result = vsir_program_materialise_phi_ssas_to_temps(program)) < 0) return result;
if ((result = vsir_cfg_init(&cfg, program, message_context)) < 0) @@ -5440,6 +5553,20 @@ enum vkd3d_result vsir_program_normalise(struct vsir_program *program, uint64_t }
vsir_cfg_cleanup(&cfg); + + if ((result = vsir_program_flatten_control_flow_constructs(program, message_context)) < 0) + return result; + + if ((result = vsir_cfg_init(&cfg, program, message_context)) < 0) + return result; + vsir_cfg_compute_dominators(&cfg); + + result = vsir_cfg_materialize_undominated_ssas_to_temps(&cfg); + + vsir_cfg_cleanup(&cfg); + + if (result < 0) + return result; } else { @@ -5469,10 +5596,10 @@ enum vkd3d_result vsir_program_normalise(struct vsir_program *program, uint64_t
if ((result = vsir_program_normalise_combined_samplers(program, message_context)) < 0) return result; - }
- if ((result = vsir_program_flatten_control_flow_constructs(program, message_context)) < 0) - return result; + if ((result = vsir_program_flatten_control_flow_constructs(program, message_context)) < 0) + return result; + }
if (TRACE_ON()) vkd3d_shader_trace(program);
From: Conor McCarthy cmccarthy@codeweavers.com
--- libs/vkd3d-shader/ir.c | 96 ++---------------------------------------- 1 file changed, 3 insertions(+), 93 deletions(-)
diff --git a/libs/vkd3d-shader/ir.c b/libs/vkd3d-shader/ir.c index 82d7f9a33..37278e81c 100644 --- a/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d-shader/ir.c @@ -2577,97 +2577,6 @@ static enum vkd3d_result lower_switch_to_if_ladder(struct vsir_program *program) } }
- /* Second subpass: creating new blocks might have broken - * references in PHI instructions, so we use the block map to fix - * them. */ - current_label = 0; - for (i = 0; i < ins_count; ++i) - { - struct vkd3d_shader_instruction *ins = &instructions[i]; - struct vkd3d_shader_src_param *new_src; - unsigned int j, l, new_src_count = 0; - - switch (ins->handler_idx) - { - case VKD3DSIH_LABEL: - current_label = label_from_src_param(&ins->src[0]); - continue; - - case VKD3DSIH_PHI: - break; - - default: - continue; - } - - /* First count how many source parameters we need. */ - for (j = 0; j < ins->src_count; j += 2) - { - unsigned int source_label = label_from_src_param(&ins->src[j + 1]); - size_t k, match_count = 0; - - for (k = 0; k < map_count; ++k) - { - struct lower_switch_to_if_ladder_block_mapping *mapping = &block_map[k]; - - if (mapping->switch_label == source_label && mapping->target_label == current_label) - match_count += 1; - } - - new_src_count += (match_count != 0) ? 2 * match_count : 2; - } - - assert(new_src_count >= ins->src_count); - - /* Allocate more source parameters if needed. */ - if (new_src_count == ins->src_count) - { - new_src = ins->src; - } - else - { - if (!(new_src = vsir_program_get_src_params(program, new_src_count))) - { - ERR("Failed to allocate %u source parameters.\n", new_src_count); - goto fail; - } - } - - /* Then do the copy. */ - for (j = 0, l = 0; j < ins->src_count; j += 2) - { - unsigned int source_label = label_from_src_param(&ins->src[j + 1]); - size_t k, match_count = 0; - - for (k = 0; k < map_count; ++k) - { - struct lower_switch_to_if_ladder_block_mapping *mapping = &block_map[k]; - - if (mapping->switch_label == source_label && mapping->target_label == current_label) - { - match_count += 1; - - new_src[l] = ins->src[j]; - new_src[l + 1] = ins->src[j + 1]; - new_src[l + 1].reg.idx[0].offset = mapping->if_label; - l += 2; - } - } - - if (match_count == 0) - { - new_src[l] = ins->src[j]; - new_src[l + 1] = ins->src[j + 1]; - l += 2; - } - } - - assert(l == new_src_count); - - ins->src_count = new_src_count; - ins->src = new_src; - } - vkd3d_free(program->instructions.elements); vkd3d_free(block_map); program->instructions.elements = instructions; @@ -2826,6 +2735,7 @@ static enum vkd3d_result vsir_program_materialise_phi_ssas_to_temps(struct vsir_ break;
case VKD3DSIH_BRANCH: + case VKD3DSIH_SWITCH_MONOLITHIC: info = &block_info[current_label - 1];
for (j = 0; j < info->incoming_count; ++j) @@ -5505,10 +5415,10 @@ enum vkd3d_result vsir_program_normalise(struct vsir_program *program, uint64_t { struct vsir_cfg cfg;
- if ((result = lower_switch_to_if_ladder(program)) < 0) + if ((result = vsir_program_materialise_phi_ssas_to_temps(program)) < 0) return result;
- if ((result = vsir_program_materialise_phi_ssas_to_temps(program)) < 0) + if ((result = lower_switch_to_if_ladder(program)) < 0) return result;
if ((result = vsir_cfg_init(&cfg, program, message_context)) < 0)
On Fri Apr 5 00:14:12 2024 +0000, Conor McCarthy wrote:
changed this line in [version 7 of the diff](/wine/vkd3d/-/merge_requests/749/diffs?diff_id=108691&start_sha=f25812a8b0fe5df41d14ba0735b05f7d19df9ea2#4fae87cf1e8d06a5c7871f244e3dd75ef2dac299_2750_2751)
I changed it. Some passes, e.g. remapping temp indices, would break on sharing of params, and we're more likely to do something like that than modifying the details of some MOVs.
On Fri Apr 5 00:18:16 2024 +0000, Conor McCarthy wrote:
I changed it. Some passes, e.g. remapping temp indices, would break on sharing of params, and we're more likely to do something like that than modifying the details of some MOVs.
Could you copy the source as well? I would like to assume anything at all about what later passes will do on the program. For example, suppose that we have a program like this: ``` add sr3, sr2, |sr1| ... add sr5, sr4, |sr1| ```
Now imagine that we later introduce a pass that wants to rewrite the abs operation as a separate operator. It should be legitimate to change the above code to: ``` add sr3, sr2, |sr1| ... abs sr6, sr1 add sr5, sr4, sr6 ```
If the two occurrences of `|sr1|` happen to be pointers to the same object, the code would actually become this, which is invalid: ``` add sr3, sr2, sr6 ... abs sr6, sr1 add sr5, sr4, sr6 ```
Of course it might be pointed out that this pass might as well move the abs operation above the first `sr1` usage, since if you're changing one of them you probably want to change all of them, but my point is that I wouldn't want to assume it. The change I said is sound and a pass should legitimately be able to do it without making the program invalid. For this to be reasonably ensured, there must be no aliasing between different parameters.
In other words, assuming they are unrelated would itself be an error.
This is a concern for any other pass that touches that parameter. And, as I said, the fact that two parameters are related doesn't mean you'll want to do the same changes on all of them.
On Fri Apr 5 12:22:58 2024 +0000, Giovanni Mascellani wrote:
Could you copy the source as well? I would like to assume anything at all about what later passes will do on the program. For example, suppose that we have a program like this:
add sr3, sr2, |sr1| ... add sr5, sr4, |sr1|
Now imagine that we later introduce a pass that wants to rewrite the abs operation as a separate operator. It should be legitimate to change the above code to:
add sr3, sr2, |sr1| ... abs sr6, sr1 add sr5, sr4, sr6
If the two occurrences of `|sr1|` happen to be pointers to the same object, the code would actually become this, which is invalid:
add sr3, sr2, sr6 ... abs sr6, sr1 add sr5, sr4, sr6
Of course it might be pointed out that this pass might as well move the abs operation above the first `sr1` usage, since if you're changing one of them you probably want to change all of them, but my point is that I wouldn't want to assume it. The change I said is sound and a pass should legitimately be able to do it without making the program invalid. For this to be reasonably ensured, there must be no aliasing between different parameters.
In other words, assuming they are unrelated would itself be an error.
This is a concern for any other pass that touches that parameter. And, as I said, the fact that two parameters are related doesn't mean you'll want to do the same changes on all of them.
The sources were allocated as part of the phi instruction, so they are not shared once the phi is deleted.
This merge request was approved by Giovanni Mascellani.
This merge request was approved by Henri Verbeet.