This applies on top of !711, the last three commits belong here.
This MR and the following ones will introduce a number of optimization passes on the structured representation of the shader, with the goal of fixing the idiosyncrasies of the code generated by the new structurizer. The general pattern is that we want to recognize when the combination of loops and jumps can be rather written with selection constructs. Ideally that should bring to removing all the synthesized loop intervals, but that cannot be guaranteed in general. We still want to do remove all the loops we can, first to make the generated code easier to read and to recompile, and second because having fewer loops also means that more multilevel jumps become ordinary single level jumps, which do not require overhead to be represented in SPIR-V.
-- v2: vkd3d-shader/ir: Synthesize selection constructs from conditional jumps. vkd3d-shader/ir: Remove trailing `continue's. vkd3d-shader/ir: Move `continue's to the false branch when possible. vkd3d-shader/ir: Emit multilevel jumps in the structured program.
From: Giovanni Mascellani gmascellani@codeweavers.com
Multilevel jumps are not supported yet, and trigger a fallback to the simple structurizer. --- libs/vkd3d-shader/ir.c | 150 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 147 insertions(+), 3 deletions(-)
diff --git a/libs/vkd3d-shader/ir.c b/libs/vkd3d-shader/ir.c index 55d121646..9548fb5b0 100644 --- a/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d-shader/ir.c @@ -3283,6 +3283,9 @@ struct vsir_cfg size_t loop_interval_count, loop_interval_capacity;
struct vsir_cfg_structure_list structured_program; + + struct vkd3d_shader_instruction *instructions; + size_t ins_capacity, ins_count; };
static void vsir_cfg_cleanup(struct vsir_cfg *cfg) @@ -4295,6 +4298,136 @@ fail: return VKD3D_ERROR_OUT_OF_MEMORY; }
+static enum vkd3d_result vsir_cfg_structure_list_emit(struct vsir_cfg *cfg, + struct vsir_cfg_structure_list *list, unsigned int loop_idx) +{ + const struct vkd3d_shader_location no_loc = {0}; + enum vkd3d_result ret; + size_t i; + + for (i = 0; i < list->count; ++i) + { + struct vsir_cfg_structure *structure = &list->structures[i]; + + switch (structure->type) + { + case STRUCTURE_TYPE_BLOCK: + { + struct vsir_block *block = structure->u.block; + + if (!reserve_instructions(&cfg->instructions, &cfg->ins_capacity, cfg->ins_count + (block->end - block->begin))) + return VKD3D_ERROR_OUT_OF_MEMORY; + + memcpy(&cfg->instructions[cfg->ins_count], block->begin, (char *)block->end - (char *)block->begin); + + cfg->ins_count += block->end - block->begin; + break; + } + + case STRUCTURE_TYPE_LOOP: + if (!reserve_instructions(&cfg->instructions, &cfg->ins_capacity, cfg->ins_count + 1)) + return VKD3D_ERROR_OUT_OF_MEMORY; + + vsir_instruction_init(&cfg->instructions[cfg->ins_count++], &no_loc, VKD3DSIH_LOOP); + + if ((ret = vsir_cfg_structure_list_emit(cfg, &structure->u.loop.body, structure->u.loop.idx)) < 0) + return ret; + + if (!reserve_instructions(&cfg->instructions, &cfg->ins_capacity, cfg->ins_count + 1)) + return VKD3D_ERROR_OUT_OF_MEMORY; + + vsir_instruction_init(&cfg->instructions[cfg->ins_count++], &no_loc, VKD3DSIH_ENDLOOP); + break; + + case STRUCTURE_TYPE_JUMP: + { + enum vkd3d_shader_opcode opcode; + + if (structure->u.jump.target != loop_idx) + { + WARN("Multilevel jumps are not supported yet, falling back to the simple structurizer.\n"); + return VKD3D_ERROR_NOT_IMPLEMENTED; + } + + switch (structure->u.jump.type) + { + case JUMP_BREAK: + opcode = structure->u.jump.condition ? VKD3DSIH_BREAKP : VKD3DSIH_BREAK; + break; + + case JUMP_CONTINUE: + opcode = structure->u.jump.condition ? VKD3DSIH_CONTINUEP : VKD3DSIH_CONTINUE; + break; + + case JUMP_RET: + assert(!structure->u.jump.condition); + opcode = VKD3DSIH_RET; + break; + + default: + vkd3d_unreachable(); + } + + if (!reserve_instructions(&cfg->instructions, &cfg->ins_capacity, cfg->ins_count + 1)) + return VKD3D_ERROR_OUT_OF_MEMORY; + + if (!vsir_instruction_init_with_params(cfg->program, &cfg->instructions[cfg->ins_count], + &no_loc, opcode, 0, !!structure->u.jump.condition)) + return VKD3D_ERROR_OUT_OF_MEMORY; + + if (structure->u.jump.invert_condition) + cfg->instructions[cfg->ins_count].flags |= VKD3D_SHADER_CONDITIONAL_OP_Z; + + if (structure->u.jump.condition) + cfg->instructions[cfg->ins_count].src[0] = *structure->u.jump.condition; + + ++cfg->ins_count; + break; + } + + default: + vkd3d_unreachable(); + } + } + + return VKD3D_OK; +} + +static enum vkd3d_result vsir_cfg_emit_structured_program(struct vsir_cfg *cfg) +{ + enum vkd3d_result ret; + size_t i; + + if (!reserve_instructions(&cfg->instructions, &cfg->ins_capacity, cfg->program->instructions.count)) + return VKD3D_ERROR_OUT_OF_MEMORY; + + /* Copy declarations until the first block. */ + for (i = 0; i < cfg->program->instructions.count; ++i) + { + struct vkd3d_shader_instruction *ins = &cfg->program->instructions.elements[i]; + + if (ins->handler_idx == VKD3DSIH_LABEL) + break; + + cfg->instructions[cfg->ins_count++] = *ins; + } + + if ((ret = vsir_cfg_structure_list_emit(cfg, &cfg->structured_program, UINT_MAX)) < 0) + goto fail; + + vkd3d_free(cfg->program->instructions.elements); + cfg->program->instructions.elements = cfg->instructions; + cfg->program->instructions.capacity = cfg->ins_capacity; + cfg->program->instructions.count = cfg->ins_count; + + return VKD3D_OK; + +fail: + vkd3d_free(cfg->instructions); + + return ret; +} + enum vkd3d_result vkd3d_shader_normalise(struct vkd3d_shader_parser *parser, const struct vkd3d_shader_compile_info *compile_info) { @@ -4346,10 +4479,21 @@ enum vkd3d_result vkd3d_shader_normalise(struct vkd3d_shader_parser *parser, return result; }
- if ((result = vsir_program_structurise(program)) < 0) + if ((result = vsir_cfg_emit_structured_program(&cfg)) < 0) { - vsir_cfg_cleanup(&cfg); - return result; + if (result == VKD3D_ERROR_NOT_IMPLEMENTED) + { + if ((result = vsir_program_structurise(program)) < 0) + { + vsir_cfg_cleanup(&cfg); + return result; + } + } + else + { + vsir_cfg_cleanup(&cfg); + return result; + } }
vsir_cfg_cleanup(&cfg);
From: Giovanni Mascellani gmascellani@codeweavers.com
The new structurizer therefore reaches feature parity with the older simple one, except for a couple of points:
* the old structurizer accepts any CFG, without requiring reducibility; however, the DXIL specification requires the CFG to be reducible anyway, so we're not really losing anything;
* the new structurizer additionally requires that no block has two incoming back arrows; AFAIK this is condition that can happen, but in practice it seems to be rare; also, it's not hard to add support for it, as soon as it is decided it is useful.
On the other hand, the new structurizer makes use of the merging information that are reconstructed from the CFG, which is important for downstream optimization and fundamental for correctly emitting tangled instructions.
Taking these considerations into account, the old structurizer is considered superseded and is therefore removed. --- libs/vkd3d-shader/ir.c | 256 +++++++++++++++++------------------------ 1 file changed, 105 insertions(+), 151 deletions(-)
diff --git a/libs/vkd3d-shader/ir.c b/libs/vkd3d-shader/ir.c index 9548fb5b0..0e6eec57c 100644 --- a/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d-shader/ir.c @@ -467,12 +467,24 @@ static void src_param_init_ssa_bool(struct vkd3d_shader_src_param *src, unsigned src->reg.idx[0].offset = idx; }
+static void src_param_init_temp_bool(struct vkd3d_shader_src_param *src, unsigned int idx) +{ + vsir_src_param_init(src, VKD3DSPR_TEMP, VKD3D_DATA_BOOL, 1); + src->reg.idx[0].offset = idx; +} + static void dst_param_init_ssa_bool(struct vkd3d_shader_dst_param *dst, unsigned int idx) { vsir_dst_param_init(dst, VKD3DSPR_SSA, VKD3D_DATA_BOOL, 1); dst->reg.idx[0].offset = idx; }
+static void dst_param_init_temp_bool(struct vkd3d_shader_dst_param *dst, unsigned int idx) +{ + vsir_dst_param_init(dst, VKD3DSPR_TEMP, VKD3D_DATA_BOOL, 1); + dst->reg.idx[0].offset = idx; +} + static void dst_param_init_temp_uint(struct vkd3d_shader_dst_param *dst, unsigned int idx) { vsir_dst_param_init(dst, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); @@ -2913,132 +2925,6 @@ fail: return VKD3D_ERROR_OUT_OF_MEMORY; }
-static enum vkd3d_result vsir_program_structurise(struct vsir_program *program) -{ - const unsigned int block_temp_idx = program->temp_count; - struct vkd3d_shader_instruction *instructions = NULL; - const struct vkd3d_shader_location no_loc = {0}; - size_t ins_capacity = 0, ins_count = 0, i; - bool first_label_found = false; - - if (!reserve_instructions(&instructions, &ins_capacity, program->instructions.count)) - goto fail; - - for (i = 0; i < program->instructions.count; ++i) - { - struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; - - switch (ins->handler_idx) - { - case VKD3DSIH_PHI: - case VKD3DSIH_SWITCH_MONOLITHIC: - vkd3d_unreachable(); - - case VKD3DSIH_LABEL: - if (!reserve_instructions(&instructions, &ins_capacity, ins_count + 4)) - goto fail; - - if (!first_label_found) - { - first_label_found = true; - - if (!vsir_instruction_init_with_params(program, - &instructions[ins_count], &no_loc, VKD3DSIH_MOV, 1, 1)) - goto fail; - dst_param_init_temp_uint(&instructions[ins_count].dst[0], block_temp_idx); - src_param_init_const_uint(&instructions[ins_count].src[0], label_from_src_param(&ins->src[0])); - ins_count++; - - if (!vsir_instruction_init_with_params(program, - &instructions[ins_count], &no_loc, VKD3DSIH_LOOP, 0, 0)) - goto fail; - ins_count++; - - if (!vsir_instruction_init_with_params(program, - &instructions[ins_count], &no_loc, VKD3DSIH_SWITCH, 0, 1)) - goto fail; - src_param_init_temp_uint(&instructions[ins_count].src[0], block_temp_idx); - ins_count++; - } - - if (!vsir_instruction_init_with_params(program, - &instructions[ins_count], &no_loc, VKD3DSIH_CASE, 0, 1)) - goto fail; - src_param_init_const_uint(&instructions[ins_count].src[0], label_from_src_param(&ins->src[0])); - ins_count++; - break; - - case VKD3DSIH_BRANCH: - if (!reserve_instructions(&instructions, &ins_capacity, ins_count + 2)) - goto fail; - - if (vsir_register_is_label(&ins->src[0].reg)) - { - if (!vsir_instruction_init_with_params(program, - &instructions[ins_count], &no_loc, VKD3DSIH_MOV, 1, 1)) - goto fail; - dst_param_init_temp_uint(&instructions[ins_count].dst[0], block_temp_idx); - src_param_init_const_uint(&instructions[ins_count].src[0], label_from_src_param(&ins->src[0])); - ins_count++; - } - else - { - if (!vsir_instruction_init_with_params(program, - &instructions[ins_count], &no_loc, VKD3DSIH_MOVC, 1, 3)) - goto fail; - dst_param_init_temp_uint(&instructions[ins_count].dst[0], block_temp_idx); - instructions[ins_count].src[0] = ins->src[0]; - src_param_init_const_uint(&instructions[ins_count].src[1], label_from_src_param(&ins->src[1])); - src_param_init_const_uint(&instructions[ins_count].src[2], label_from_src_param(&ins->src[2])); - ins_count++; - } - - if (!vsir_instruction_init_with_params(program, - &instructions[ins_count], &no_loc, VKD3DSIH_BREAK, 0, 0)) - goto fail; - ins_count++; - break; - - case VKD3DSIH_RET: - default: - if (!reserve_instructions(&instructions, &ins_capacity, ins_count + 1)) - goto fail; - - instructions[ins_count++] = *ins; - break; - } - } - - assert(first_label_found); - - if (!reserve_instructions(&instructions, &ins_capacity, ins_count + 3)) - goto fail; - - if (!vsir_instruction_init_with_params(program, &instructions[ins_count], &no_loc, VKD3DSIH_ENDSWITCH, 0, 0)) - goto fail; - ins_count++; - - if (!vsir_instruction_init_with_params(program, &instructions[ins_count], &no_loc, VKD3DSIH_ENDLOOP, 0, 0)) - goto fail; - ins_count++; - - if (!vsir_instruction_init_with_params(program, &instructions[ins_count], &no_loc, VKD3DSIH_RET, 0, 0)) - goto fail; - ins_count++; - - vkd3d_free(program->instructions.elements); - program->instructions.elements = instructions; - program->instructions.capacity = ins_capacity; - program->instructions.count = ins_count; - program->temp_count += 1; - - return VKD3D_OK; - -fail: - vkd3d_free(instructions); - return VKD3D_ERROR_OUT_OF_MEMORY; -} - struct vsir_block_list { struct vsir_block **blocks; @@ -3286,6 +3172,8 @@ struct vsir_cfg
struct vkd3d_shader_instruction *instructions; size_t ins_capacity, ins_count; + unsigned int jump_target_temp_idx; + unsigned int temp_count; };
static void vsir_cfg_cleanup(struct vsir_cfg *cfg) @@ -4325,6 +4213,7 @@ static enum vkd3d_result vsir_cfg_structure_list_emit(struct vsir_cfg *cfg, }
case STRUCTURE_TYPE_LOOP: + { if (!reserve_instructions(&cfg->instructions, &cfg->ins_capacity, cfg->ins_count + 1)) return VKD3D_ERROR_OUT_OF_MEMORY;
@@ -4333,32 +4222,92 @@ static enum vkd3d_result vsir_cfg_structure_list_emit(struct vsir_cfg *cfg, if ((ret = vsir_cfg_structure_list_emit(cfg, &structure->u.loop.body, structure->u.loop.idx)) < 0) return ret;
- if (!reserve_instructions(&cfg->instructions, &cfg->ins_capacity, cfg->ins_count + 1)) + if (!reserve_instructions(&cfg->instructions, &cfg->ins_capacity, cfg->ins_count + 5)) return VKD3D_ERROR_OUT_OF_MEMORY;
vsir_instruction_init(&cfg->instructions[cfg->ins_count++], &no_loc, VKD3DSIH_ENDLOOP); + + /* Add a trampoline to implement multilevel jumping depending on the stored + * jump_target value. */ + if (loop_idx != UINT_MAX) + { + /* If the multilevel jump is a `continue' and the target is the loop we're inside + * right now, then we can finally do the `continue'. */ + const unsigned int outer_continue_target = loop_idx << 1 | 1; + /* If the multilevel jump is a `continue' to any other target, or if it is a `break' + * and the target is not the loop we just finished emitting, then it means that + * we have to reach an outer loop, so we keep breaking. */ + const unsigned int inner_break_target = structure->u.loop.idx << 1; + + if (!vsir_instruction_init_with_params(cfg->program, &cfg->instructions[cfg->ins_count], + &no_loc, VKD3DSIH_IEQ, 1, 2)) + return VKD3D_ERROR_OUT_OF_MEMORY; + + dst_param_init_temp_bool(&cfg->instructions[cfg->ins_count].dst[0], cfg->temp_count); + src_param_init_temp_uint(&cfg->instructions[cfg->ins_count].src[0], cfg->jump_target_temp_idx); + src_param_init_const_uint(&cfg->instructions[cfg->ins_count].src[1], outer_continue_target); + + ++cfg->ins_count; + + if (!vsir_instruction_init_with_params(cfg->program, &cfg->instructions[cfg->ins_count], + &no_loc, VKD3DSIH_CONTINUEP, 0, 1)) + return VKD3D_ERROR_OUT_OF_MEMORY; + + src_param_init_temp_bool(&cfg->instructions[cfg->ins_count].src[0], cfg->temp_count); + + ++cfg->ins_count; + ++cfg->temp_count; + + if (!vsir_instruction_init_with_params(cfg->program, &cfg->instructions[cfg->ins_count], + &no_loc, VKD3DSIH_IEQ, 1, 2)) + return VKD3D_ERROR_OUT_OF_MEMORY; + + dst_param_init_temp_bool(&cfg->instructions[cfg->ins_count].dst[0], cfg->temp_count); + src_param_init_temp_uint(&cfg->instructions[cfg->ins_count].src[0], cfg->jump_target_temp_idx); + src_param_init_const_uint(&cfg->instructions[cfg->ins_count].src[1], inner_break_target); + + ++cfg->ins_count; + + if (!vsir_instruction_init_with_params(cfg->program, &cfg->instructions[cfg->ins_count], + &no_loc, VKD3DSIH_BREAKP, 0, 1)) + return VKD3D_ERROR_OUT_OF_MEMORY; + cfg->instructions[cfg->ins_count].flags |= VKD3D_SHADER_CONDITIONAL_OP_Z; + + src_param_init_temp_bool(&cfg->instructions[cfg->ins_count].src[0], cfg->temp_count); + + ++cfg->ins_count; + ++cfg->temp_count; + } + break; + }
case STRUCTURE_TYPE_JUMP: { + /* Encode the jump target as the loop index plus a bit to remember whether + * we're breaking or continueing. */ + unsigned int jump_target = structure->u.jump.target << 1; enum vkd3d_shader_opcode opcode;
- if (structure->u.jump.target != loop_idx) - { - WARN("Multilevel jumps are not supported yet, falling back to the simple structurizer.\n"); - return VKD3D_ERROR_NOT_IMPLEMENTED; - } - switch (structure->u.jump.type) { + case JUMP_CONTINUE: + /* If we're continueing the loop we're directly inside, then we can emit a + * `continue'. Otherwise we first have to break all the loops between here + * and the loop to continue, recording our intention to continue + * in the lowest bit of jump_target. */ + if (structure->u.jump.target == loop_idx) + { + opcode = structure->u.jump.condition ? VKD3DSIH_CONTINUEP : VKD3DSIH_CONTINUE; + break; + } + jump_target |= 1; + /* fall through */ + case JUMP_BREAK: opcode = structure->u.jump.condition ? VKD3DSIH_BREAKP : VKD3DSIH_BREAK; break;
- case JUMP_CONTINUE: - opcode = structure->u.jump.condition ? VKD3DSIH_CONTINUEP : VKD3DSIH_CONTINUE; - break; - case JUMP_RET: assert(!structure->u.jump.condition); opcode = VKD3DSIH_RET; @@ -4368,9 +4317,21 @@ static enum vkd3d_result vsir_cfg_structure_list_emit(struct vsir_cfg *cfg, vkd3d_unreachable(); }
- if (!reserve_instructions(&cfg->instructions, &cfg->ins_capacity, cfg->ins_count + 1)) + if (!reserve_instructions(&cfg->instructions, &cfg->ins_capacity, cfg->ins_count + 2)) return VKD3D_ERROR_OUT_OF_MEMORY;
+ if (opcode == VKD3DSIH_BREAK || opcode == VKD3DSIH_BREAKP) + { + if (!vsir_instruction_init_with_params(cfg->program, &cfg->instructions[cfg->ins_count], + &no_loc, VKD3DSIH_MOV, 1, 1)) + return VKD3D_ERROR_OUT_OF_MEMORY; + + dst_param_init_temp_uint(&cfg->instructions[cfg->ins_count].dst[0], cfg->jump_target_temp_idx); + src_param_init_const_uint(&cfg->instructions[cfg->ins_count].src[0], jump_target); + + ++cfg->ins_count; + } + if (!vsir_instruction_init_with_params(cfg->program, &cfg->instructions[cfg->ins_count], &no_loc, opcode, 0, !!structure->u.jump.condition)) return VKD3D_ERROR_OUT_OF_MEMORY; @@ -4398,6 +4359,9 @@ static enum vkd3d_result vsir_cfg_emit_structured_program(struct vsir_cfg *cfg) enum vkd3d_result ret; size_t i;
+ cfg->jump_target_temp_idx = cfg->program->temp_count; + cfg->temp_count = cfg->program->temp_count + 1; + if (!reserve_instructions(&cfg->instructions, &cfg->ins_capacity, cfg->program->instructions.count)) return VKD3D_ERROR_OUT_OF_MEMORY;
@@ -4419,6 +4383,7 @@ static enum vkd3d_result vsir_cfg_emit_structured_program(struct vsir_cfg *cfg) cfg->program->instructions.elements = cfg->instructions; cfg->program->instructions.capacity = cfg->ins_capacity; cfg->program->instructions.count = cfg->ins_count; + cfg->program->temp_count = cfg->temp_count;
return VKD3D_OK;
@@ -4481,19 +4446,8 @@ enum vkd3d_result vkd3d_shader_normalise(struct vkd3d_shader_parser *parser,
if ((result = vsir_cfg_emit_structured_program(&cfg)) < 0) { - if (result == VKD3D_ERROR_NOT_IMPLEMENTED) - { - if ((result = vsir_program_structurise(program)) < 0) - { - vsir_cfg_cleanup(&cfg); - return result; - } - } - else - { - vsir_cfg_cleanup(&cfg); - return result; - } + vsir_cfg_cleanup(&cfg); + return result; }
vsir_cfg_cleanup(&cfg);
From: Giovanni Mascellani gmascellani@codeweavers.com
--- libs/vkd3d-shader/ir.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-)
diff --git a/libs/vkd3d-shader/ir.c b/libs/vkd3d-shader/ir.c index 0e6eec57c..23e8d3de8 100644 --- a/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d-shader/ir.c @@ -4123,11 +4123,29 @@ static enum vkd3d_result vsir_cfg_build_structured_program(struct vsir_cfg *cfg) * next block, in which case we make sure it's the * false branch. */ if (action_true.jump_type == JUMP_NONE) + { + invert_condition = true; + } + else if (stack_depth >= 2) + { + struct vsir_cfg_structure_list *inner_loop_frame = stack[stack_depth - 2]; + struct vsir_cfg_structure *inner_loop = &inner_loop_frame->structures[inner_loop_frame->count - 1]; + + assert(inner_loop->type == STRUCTURE_TYPE_LOOP); + + /* Otherwise, if one of the branches is + * continueing the inner loop we're inside, + * make sure it's the false branch (because it + * will be optimized out later). */ + if (action_true.jump_type == JUMP_CONTINUE && action_true.target == inner_loop->u.loop.idx) + invert_condition = true; + } + + if (invert_condition) { struct vsir_cfg_edge_action tmp = action_true; action_true = action_false; action_false = tmp; - invert_condition = true; }
assert(action_true.jump_type != JUMP_NONE);
From: Giovanni Mascellani gmascellani@codeweavers.com
--- libs/vkd3d-shader/ir.c | 54 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+)
diff --git a/libs/vkd3d-shader/ir.c b/libs/vkd3d-shader/ir.c index 23e8d3de8..0ecda5927 100644 --- a/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d-shader/ir.c @@ -4204,6 +4204,54 @@ fail: return VKD3D_ERROR_OUT_OF_MEMORY; }
+static void vsir_cfg_remove_trailing_continue(struct vsir_cfg_structure_list *list, unsigned int target) +{ + struct vsir_cfg_structure *last = &list->structures[list->count - 1]; + + if (last->type == STRUCTURE_TYPE_JUMP && last->u.jump.type == JUMP_CONTINUE + && !last->u.jump.condition && last->u.jump.target == target) + --list->count; +} + +static enum vkd3d_result vsir_cfg_optimize_recurse(struct vsir_cfg *cfg, struct vsir_cfg_structure_list *list) +{ + enum vkd3d_result ret; + size_t i; + + for (i = 0; i < list->count; ++i) + { + struct vsir_cfg_structure *loop = &list->structures[i]; + struct vsir_cfg_structure_list *loop_body; + + if (loop->type != STRUCTURE_TYPE_LOOP) + continue; + + loop_body = &loop->u.loop.body; + + if (loop_body->count == 0) + continue; + + vsir_cfg_remove_trailing_continue(loop_body, loop->u.loop.idx); + + if ((ret = vsir_cfg_optimize_recurse(cfg, loop_body)) < 0) + return ret; + } + + return VKD3D_OK; +} + +static enum vkd3d_result vsir_cfg_optimize(struct vsir_cfg *cfg) +{ + enum vkd3d_result ret; + + ret = vsir_cfg_optimize_recurse(cfg, &cfg->structured_program); + + if (TRACE_ON()) + vsir_cfg_dump_structured_program(cfg); + + return ret; +} + static enum vkd3d_result vsir_cfg_structure_list_emit(struct vsir_cfg *cfg, struct vsir_cfg_structure_list *list, unsigned int loop_idx) { @@ -4462,6 +4510,12 @@ enum vkd3d_result vkd3d_shader_normalise(struct vkd3d_shader_parser *parser, return result; }
+ if ((result = vsir_cfg_optimize(&cfg)) < 0) + { + vsir_cfg_cleanup(&cfg); + return result; + } + if ((result = vsir_cfg_emit_structured_program(&cfg)) < 0) { vsir_cfg_cleanup(&cfg);
From: Giovanni Mascellani gmascellani@codeweavers.com
--- libs/vkd3d-shader/ir.c | 142 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 140 insertions(+), 2 deletions(-)
diff --git a/libs/vkd3d-shader/ir.c b/libs/vkd3d-shader/ir.c index 0ecda5927..629aae888 100644 --- a/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d-shader/ir.c @@ -3050,6 +3050,8 @@ struct vsir_cfg_structure STRUCTURE_TYPE_BLOCK, /* Execute a loop, which is identified by an index. */ STRUCTURE_TYPE_LOOP, + /* Execute a selection construct. */ + STRUCTURE_TYPE_SELECTION, /* Execute a `return' or a (possibly) multilevel `break' or * `continue', targeting a loop by its index. If `condition' * is non-NULL, then the jump is conditional (this is @@ -3065,6 +3067,13 @@ struct vsir_cfg_structure unsigned idx; } loop; struct + { + struct vkd3d_shader_src_param *condition; + struct vsir_cfg_structure_list if_body; + struct vsir_cfg_structure_list else_body; + bool invert_condition; + } selection; + struct { enum vsir_cfg_jump_type { @@ -3110,6 +3119,20 @@ static struct vsir_cfg_structure *vsir_cfg_structure_list_append(struct vsir_cfg return ret; }
+static enum vkd3d_result vsir_cfg_structure_list_append_from_region(struct vsir_cfg_structure_list *list, + struct vsir_cfg_structure *begin, size_t size) +{ + if (!vkd3d_array_reserve((void **)&list->structures, &list->capacity, list->count + size, + sizeof(*list->structures))) + return VKD3D_ERROR_OUT_OF_MEMORY; + + memcpy(&list->structures[list->count], begin, size * sizeof(*begin)); + + list->count += size; + + return VKD3D_OK; +} + static void vsir_cfg_structure_init(struct vsir_cfg_structure *structure, enum vsir_cfg_structure_type type) { memset(structure, 0, sizeof(*structure)); @@ -3118,8 +3141,20 @@ static void vsir_cfg_structure_init(struct vsir_cfg_structure *structure, enum v
static void vsir_cfg_structure_cleanup(struct vsir_cfg_structure *structure) { - if (structure->type == STRUCTURE_TYPE_LOOP) - vsir_cfg_structure_list_cleanup(&structure->u.loop.body); + switch (structure->type) + { + case STRUCTURE_TYPE_LOOP: + vsir_cfg_structure_list_cleanup(&structure->u.loop.body); + break; + + case STRUCTURE_TYPE_SELECTION: + vsir_cfg_structure_list_cleanup(&structure->u.selection.if_body); + vsir_cfg_structure_list_cleanup(&structure->u.selection.else_body); + break; + + default: + break; + } }
struct vsir_cfg @@ -3295,6 +3330,25 @@ static void vsir_cfg_structure_dump(struct vsir_cfg *cfg, struct vsir_cfg_struct TRACE("%s} # %u\n", cfg->debug_buffer.buffer, structure->u.loop.idx); break;
+ case STRUCTURE_TYPE_SELECTION: + TRACE("%sif {\n", cfg->debug_buffer.buffer); + + vsir_cfg_structure_list_dump(cfg, &structure->u.selection.if_body); + + if (structure->u.selection.else_body.count == 0) + { + TRACE("%s}\n", cfg->debug_buffer.buffer); + } + else + { + TRACE("%s} else {\n", cfg->debug_buffer.buffer); + + vsir_cfg_structure_list_dump(cfg, &structure->u.selection.else_body); + + TRACE("%s}\n", cfg->debug_buffer.buffer); + } + break; + case STRUCTURE_TYPE_JUMP: { const char *type_str; @@ -4213,6 +4267,52 @@ static void vsir_cfg_remove_trailing_continue(struct vsir_cfg_structure_list *li --list->count; }
+static enum vkd3d_result vsir_cfg_synthesize_selections(struct vsir_cfg_structure_list *list) +{ + enum vkd3d_result ret; + size_t i; + + for (i = 0; i < list->count; ++i) + { + struct vsir_cfg_structure *structure = &list->structures[i], new_selection, *new_jump; + + if (structure->type != STRUCTURE_TYPE_JUMP || !structure->u.jump.condition) + continue; + + vsir_cfg_structure_init(&new_selection, STRUCTURE_TYPE_SELECTION); + new_selection.u.selection.condition = structure->u.jump.condition; + new_selection.u.selection.invert_condition = structure->u.jump.invert_condition; + + if (!(new_jump = vsir_cfg_structure_list_append(&new_selection.u.selection.if_body, + STRUCTURE_TYPE_JUMP))) + return VKD3D_ERROR_OUT_OF_MEMORY; + new_jump->u.jump.type = structure->u.jump.type; + new_jump->u.jump.target = structure->u.jump.target; + + /* Move the rest of the structure list in the else branch + * rather than leaving it after the selection construct. The + * reason is that this is more conducive to further + * optimization, because all the conditional `break's appear + * as the last instruction of a branch of a cascade of + * selection constructs at the end of the structure list we're + * processing, instead of being buried in the middle of the + * structure list itself. */ + if ((ret = vsir_cfg_structure_list_append_from_region(&new_selection.u.selection.else_body, + &list->structures[i + 1], list->count - i - 1)) < 0) + return ret; + + *structure = new_selection; + list->count = i + 1; + + if ((ret = vsir_cfg_synthesize_selections(&structure->u.selection.else_body)) < 0) + return ret; + + break; + } + + return VKD3D_OK; +} + static enum vkd3d_result vsir_cfg_optimize_recurse(struct vsir_cfg *cfg, struct vsir_cfg_structure_list *list) { enum vkd3d_result ret; @@ -4235,6 +4335,9 @@ static enum vkd3d_result vsir_cfg_optimize_recurse(struct vsir_cfg *cfg, struct
if ((ret = vsir_cfg_optimize_recurse(cfg, loop_body)) < 0) return ret; + + if ((ret = vsir_cfg_synthesize_selections(loop_body)) < 0) + return ret; }
return VKD3D_OK; @@ -4348,6 +4451,41 @@ static enum vkd3d_result vsir_cfg_structure_list_emit(struct vsir_cfg *cfg, break; }
+ case STRUCTURE_TYPE_SELECTION: + if (!reserve_instructions(&cfg->instructions, &cfg->ins_capacity, cfg->ins_count + 1)) + return VKD3D_ERROR_OUT_OF_MEMORY; + + if (!vsir_instruction_init_with_params(cfg->program, &cfg->instructions[cfg->ins_count], &no_loc, + VKD3DSIH_IF, 0, 1)) + return VKD3D_ERROR_OUT_OF_MEMORY; + + cfg->instructions[cfg->ins_count].src[0] = *structure->u.selection.condition; + + if (structure->u.selection.invert_condition) + cfg->instructions[cfg->ins_count].flags |= VKD3D_SHADER_CONDITIONAL_OP_Z; + + ++cfg->ins_count; + + if ((ret = vsir_cfg_structure_list_emit(cfg, &structure->u.selection.if_body, loop_idx)) < 0) + return ret; + + if (structure->u.selection.else_body.count != 0) + { + if (!reserve_instructions(&cfg->instructions, &cfg->ins_capacity, cfg->ins_count + 1)) + return VKD3D_ERROR_OUT_OF_MEMORY; + + vsir_instruction_init(&cfg->instructions[cfg->ins_count++], &no_loc, VKD3DSIH_ELSE); + + if ((ret = vsir_cfg_structure_list_emit(cfg, &structure->u.selection.else_body, loop_idx)) < 0) + return ret; + } + + if (!reserve_instructions(&cfg->instructions, &cfg->ins_capacity, cfg->ins_count + 1)) + return VKD3D_ERROR_OUT_OF_MEMORY; + + vsir_instruction_init(&cfg->instructions[cfg->ins_count++], &no_loc, VKD3DSIH_ENDIF); + break; + case STRUCTURE_TYPE_JUMP: { /* Encode the jump target as the loop index plus a bit to remember whether
This merge request was approved by Giovanni Mascellani.