~~This applies on top of !711, the last three commits belong here.~~
This MR and the following ones will introduce a number of optimization passes on the structured representation of the shader, with the goal of fixing the idiosyncrasies of the code generated by the new structurizer. The general pattern is that we want to recognize when the combination of loops and jumps can be rather written with selection constructs. Ideally that should bring to removing all the synthesized loop intervals, but that cannot be guaranteed in general. We still want to do remove all the loops we can, first to make the generated code easier to read and to recompile, and second because having fewer loops also means that more multilevel jumps become ordinary single level jumps, which do not require overhead to be represented in SPIR-V.
-- v5: vkd3d-shader/ir: Synthesize selection constructs from conditional jumps. vkd3d-shader/ir: Remove trailing `continue's. vkd3d-shader/ir: Move `continue's to the false branch when possible.
From: Giovanni Mascellani gmascellani@codeweavers.com
--- libs/vkd3d-shader/ir.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-)
diff --git a/libs/vkd3d-shader/ir.c b/libs/vkd3d-shader/ir.c index 15fe6ca93..55d3446f9 100644 --- a/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d-shader/ir.c @@ -4123,11 +4123,29 @@ static enum vkd3d_result vsir_cfg_build_structured_program(struct vsir_cfg *cfg) * next block, in which case we make sure it's the * false branch. */ if (action_true.jump_type == JUMP_NONE) + { + invert_condition = true; + } + else if (stack_depth >= 2) + { + struct vsir_cfg_structure_list *inner_loop_frame = stack[stack_depth - 2]; + struct vsir_cfg_structure *inner_loop = &inner_loop_frame->structures[inner_loop_frame->count - 1]; + + assert(inner_loop->type == STRUCTURE_TYPE_LOOP); + + /* Otherwise, if one of the branches is + * continueing the inner loop we're inside, + * make sure it's the false branch (because it + * will be optimized out later). */ + if (action_true.jump_type == JUMP_CONTINUE && action_true.target == inner_loop->u.loop.idx) + invert_condition = true; + } + + if (invert_condition) { struct vsir_cfg_edge_action tmp = action_true; action_true = action_false; action_false = tmp; - invert_condition = true; }
assert(action_true.jump_type != JUMP_NONE);
From: Giovanni Mascellani gmascellani@codeweavers.com
--- libs/vkd3d-shader/ir.c | 54 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+)
diff --git a/libs/vkd3d-shader/ir.c b/libs/vkd3d-shader/ir.c index 55d3446f9..82e53c82b 100644 --- a/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d-shader/ir.c @@ -4204,6 +4204,54 @@ fail: return VKD3D_ERROR_OUT_OF_MEMORY; }
+static void vsir_cfg_remove_trailing_continue(struct vsir_cfg_structure_list *list, unsigned int target) +{ + struct vsir_cfg_structure *last = &list->structures[list->count - 1]; + + if (last->type == STRUCTURE_TYPE_JUMP && last->u.jump.type == JUMP_CONTINUE + && !last->u.jump.condition && last->u.jump.target == target) + --list->count; +} + +static enum vkd3d_result vsir_cfg_optimize_recurse(struct vsir_cfg *cfg, struct vsir_cfg_structure_list *list) +{ + enum vkd3d_result ret; + size_t i; + + for (i = 0; i < list->count; ++i) + { + struct vsir_cfg_structure *loop = &list->structures[i]; + struct vsir_cfg_structure_list *loop_body; + + if (loop->type != STRUCTURE_TYPE_LOOP) + continue; + + loop_body = &loop->u.loop.body; + + if (loop_body->count == 0) + continue; + + vsir_cfg_remove_trailing_continue(loop_body, loop->u.loop.idx); + + if ((ret = vsir_cfg_optimize_recurse(cfg, loop_body)) < 0) + return ret; + } + + return VKD3D_OK; +} + +static enum vkd3d_result vsir_cfg_optimize(struct vsir_cfg *cfg) +{ + enum vkd3d_result ret; + + ret = vsir_cfg_optimize_recurse(cfg, &cfg->structured_program); + + if (TRACE_ON()) + vsir_cfg_dump_structured_program(cfg); + + return ret; +} + static enum vkd3d_result vsir_cfg_structure_list_emit(struct vsir_cfg *cfg, struct vsir_cfg_structure_list *list, unsigned int loop_idx) { @@ -5325,6 +5373,12 @@ enum vkd3d_result vsir_program_normalise(struct vsir_program *program, uint64_t return result; }
+ if ((result = vsir_cfg_optimize(&cfg)) < 0) + { + vsir_cfg_cleanup(&cfg); + return result; + } + if ((result = vsir_cfg_emit_structured_program(&cfg)) < 0) { vsir_cfg_cleanup(&cfg);
From: Giovanni Mascellani gmascellani@codeweavers.com
--- libs/vkd3d-shader/ir.c | 142 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 140 insertions(+), 2 deletions(-)
diff --git a/libs/vkd3d-shader/ir.c b/libs/vkd3d-shader/ir.c index 82e53c82b..5287774da 100644 --- a/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d-shader/ir.c @@ -3050,6 +3050,8 @@ struct vsir_cfg_structure STRUCTURE_TYPE_BLOCK, /* Execute a loop, which is identified by an index. */ STRUCTURE_TYPE_LOOP, + /* Execute a selection construct. */ + STRUCTURE_TYPE_SELECTION, /* Execute a `return' or a (possibly) multilevel `break' or * `continue', targeting a loop by its index. If `condition' * is non-NULL, then the jump is conditional (this is @@ -3065,6 +3067,13 @@ struct vsir_cfg_structure unsigned idx; } loop; struct + { + struct vkd3d_shader_src_param *condition; + struct vsir_cfg_structure_list if_body; + struct vsir_cfg_structure_list else_body; + bool invert_condition; + } selection; + struct { enum vsir_cfg_jump_type { @@ -3110,6 +3119,20 @@ static struct vsir_cfg_structure *vsir_cfg_structure_list_append(struct vsir_cfg return ret; }
+static enum vkd3d_result vsir_cfg_structure_list_append_from_region(struct vsir_cfg_structure_list *list, + struct vsir_cfg_structure *begin, size_t size) +{ + if (!vkd3d_array_reserve((void **)&list->structures, &list->capacity, list->count + size, + sizeof(*list->structures))) + return VKD3D_ERROR_OUT_OF_MEMORY; + + memcpy(&list->structures[list->count], begin, size * sizeof(*begin)); + + list->count += size; + + return VKD3D_OK; +} + static void vsir_cfg_structure_init(struct vsir_cfg_structure *structure, enum vsir_cfg_structure_type type) { memset(structure, 0, sizeof(*structure)); @@ -3118,8 +3141,20 @@ static void vsir_cfg_structure_init(struct vsir_cfg_structure *structure, enum v
static void vsir_cfg_structure_cleanup(struct vsir_cfg_structure *structure) { - if (structure->type == STRUCTURE_TYPE_LOOP) - vsir_cfg_structure_list_cleanup(&structure->u.loop.body); + switch (structure->type) + { + case STRUCTURE_TYPE_LOOP: + vsir_cfg_structure_list_cleanup(&structure->u.loop.body); + break; + + case STRUCTURE_TYPE_SELECTION: + vsir_cfg_structure_list_cleanup(&structure->u.selection.if_body); + vsir_cfg_structure_list_cleanup(&structure->u.selection.else_body); + break; + + default: + break; + } }
struct vsir_cfg @@ -3295,6 +3330,25 @@ static void vsir_cfg_structure_dump(struct vsir_cfg *cfg, struct vsir_cfg_struct TRACE("%s} # %u\n", cfg->debug_buffer.buffer, structure->u.loop.idx); break;
+ case STRUCTURE_TYPE_SELECTION: + TRACE("%sif {\n", cfg->debug_buffer.buffer); + + vsir_cfg_structure_list_dump(cfg, &structure->u.selection.if_body); + + if (structure->u.selection.else_body.count == 0) + { + TRACE("%s}\n", cfg->debug_buffer.buffer); + } + else + { + TRACE("%s} else {\n", cfg->debug_buffer.buffer); + + vsir_cfg_structure_list_dump(cfg, &structure->u.selection.else_body); + + TRACE("%s}\n", cfg->debug_buffer.buffer); + } + break; + case STRUCTURE_TYPE_JUMP: { const char *type_str; @@ -4213,6 +4267,52 @@ static void vsir_cfg_remove_trailing_continue(struct vsir_cfg_structure_list *li --list->count; }
+static enum vkd3d_result vsir_cfg_synthesize_selections(struct vsir_cfg_structure_list *list) +{ + enum vkd3d_result ret; + size_t i; + + for (i = 0; i < list->count; ++i) + { + struct vsir_cfg_structure *structure = &list->structures[i], new_selection, *new_jump; + + if (structure->type != STRUCTURE_TYPE_JUMP || !structure->u.jump.condition) + continue; + + vsir_cfg_structure_init(&new_selection, STRUCTURE_TYPE_SELECTION); + new_selection.u.selection.condition = structure->u.jump.condition; + new_selection.u.selection.invert_condition = structure->u.jump.invert_condition; + + if (!(new_jump = vsir_cfg_structure_list_append(&new_selection.u.selection.if_body, + STRUCTURE_TYPE_JUMP))) + return VKD3D_ERROR_OUT_OF_MEMORY; + new_jump->u.jump.type = structure->u.jump.type; + new_jump->u.jump.target = structure->u.jump.target; + + /* Move the rest of the structure list in the else branch + * rather than leaving it after the selection construct. The + * reason is that this is more conducive to further + * optimization, because all the conditional `break's appear + * as the last instruction of a branch of a cascade of + * selection constructs at the end of the structure list we're + * processing, instead of being buried in the middle of the + * structure list itself. */ + if ((ret = vsir_cfg_structure_list_append_from_region(&new_selection.u.selection.else_body, + &list->structures[i + 1], list->count - i - 1)) < 0) + return ret; + + *structure = new_selection; + list->count = i + 1; + + if ((ret = vsir_cfg_synthesize_selections(&structure->u.selection.else_body)) < 0) + return ret; + + break; + } + + return VKD3D_OK; +} + static enum vkd3d_result vsir_cfg_optimize_recurse(struct vsir_cfg *cfg, struct vsir_cfg_structure_list *list) { enum vkd3d_result ret; @@ -4235,6 +4335,9 @@ static enum vkd3d_result vsir_cfg_optimize_recurse(struct vsir_cfg *cfg, struct
if ((ret = vsir_cfg_optimize_recurse(cfg, loop_body)) < 0) return ret; + + if ((ret = vsir_cfg_synthesize_selections(loop_body)) < 0) + return ret; }
return VKD3D_OK; @@ -4348,6 +4451,41 @@ static enum vkd3d_result vsir_cfg_structure_list_emit(struct vsir_cfg *cfg, break; }
+ case STRUCTURE_TYPE_SELECTION: + if (!reserve_instructions(&cfg->instructions, &cfg->ins_capacity, cfg->ins_count + 1)) + return VKD3D_ERROR_OUT_OF_MEMORY; + + if (!vsir_instruction_init_with_params(cfg->program, &cfg->instructions[cfg->ins_count], &no_loc, + VKD3DSIH_IF, 0, 1)) + return VKD3D_ERROR_OUT_OF_MEMORY; + + cfg->instructions[cfg->ins_count].src[0] = *structure->u.selection.condition; + + if (structure->u.selection.invert_condition) + cfg->instructions[cfg->ins_count].flags |= VKD3D_SHADER_CONDITIONAL_OP_Z; + + ++cfg->ins_count; + + if ((ret = vsir_cfg_structure_list_emit(cfg, &structure->u.selection.if_body, loop_idx)) < 0) + return ret; + + if (structure->u.selection.else_body.count != 0) + { + if (!reserve_instructions(&cfg->instructions, &cfg->ins_capacity, cfg->ins_count + 1)) + return VKD3D_ERROR_OUT_OF_MEMORY; + + vsir_instruction_init(&cfg->instructions[cfg->ins_count++], &no_loc, VKD3DSIH_ELSE); + + if ((ret = vsir_cfg_structure_list_emit(cfg, &structure->u.selection.else_body, loop_idx)) < 0) + return ret; + } + + if (!reserve_instructions(&cfg->instructions, &cfg->ins_capacity, cfg->ins_count + 1)) + return VKD3D_ERROR_OUT_OF_MEMORY; + + vsir_instruction_init(&cfg->instructions[cfg->ins_count++], &no_loc, VKD3DSIH_ENDIF); + break; + case STRUCTURE_TYPE_JUMP: { /* Encode the jump target as the loop index plus a bit to remember whether