~~This goes atop !773. The last three commits belong to this MR.~~
Trampolines and launchers allow us to handle code doing multilevel jumps, but they partially virtualize the control flow, which potentially makes the jobs of downstream compilers harder. So we avoid them every time we can.
-- v2: vkd3d-shader/ir: Only emit launchers when needed. vkd3d-shader/ir: Only emit trampolines when needed. vkd3d-shader/ir: Swap selection branches if the if branch is empty.
From: Giovanni Mascellani gmascellani@codeweavers.com
--- libs/vkd3d-shader/ir.c | 11 +++++++++++ 1 file changed, 11 insertions(+)
diff --git a/libs/vkd3d-shader/ir.c b/libs/vkd3d-shader/ir.c index 610d907d9..b0b3499c2 100644 --- a/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d-shader/ir.c @@ -4268,6 +4268,17 @@ static enum vkd3d_result vsir_cfg_move_breaks_out_of_selections(struct vsir_cfg --cfg->loop_intervals[else_target].target_count; }
+ /* If a branch becomes empty, make it the else branch, so we save a block. */ + if (selection->u.selection.if_body.count == 0) + { + struct vsir_cfg_structure_list tmp; + + selection->u.selection.invert_condition = !selection->u.selection.invert_condition; + tmp = selection->u.selection.if_body; + selection->u.selection.if_body = selection->u.selection.else_body; + selection->u.selection.else_body = tmp; + } + return VKD3D_OK; }
From: Giovanni Mascellani gmascellani@codeweavers.com
--- libs/vkd3d-shader/ir.c | 54 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 52 insertions(+), 2 deletions(-)
diff --git a/libs/vkd3d-shader/ir.c b/libs/vkd3d-shader/ir.c index b0b3499c2..77ad28236 100644 --- a/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d-shader/ir.c @@ -2986,6 +2986,8 @@ struct vsir_cfg_structure { struct vsir_cfg_structure_list body; unsigned idx; + bool needs_trampoline; + struct vsir_cfg_structure *outer_loop; } loop; struct vsir_cfg_structure_selection { @@ -3257,7 +3259,8 @@ static void vsir_cfg_structure_dump(struct vsir_cfg *cfg, struct vsir_cfg_struct
vsir_cfg_structure_list_dump(cfg, &structure->u.loop.body);
- TRACE("%s} # %u\n", cfg->debug_buffer.buffer, structure->u.loop.idx); + TRACE("%s} # %u%s\n", cfg->debug_buffer.buffer, structure->u.loop.idx, + structure->u.loop.needs_trampoline ? ", tramp" : ""); break;
case STRUCTURE_TYPE_SELECTION: @@ -4503,6 +4506,51 @@ static void vsir_cfg_count_targets(struct vsir_cfg *cfg, struct vsir_cfg_structu } }
+/* Trampolines are code gadgets used to emulate multilevel jumps (which are not natively supported + * by SPIR-V). A trampoline is inserted just after a loop and checks whether control has reached the + * intended site (i.e., we just jumped out of the target block) or if other levels of jumping are + * needed. For each jump a trampoline is required for all the loops between the jump itself and the + * target loop, excluding the target loop itself. */ +static void vsir_cfg_mark_trampolines(struct vsir_cfg *cfg, struct vsir_cfg_structure_list *list, + struct vsir_cfg_structure *loop) +{ + size_t i; + + for (i = 0; i < list->count; ++i) + { + struct vsir_cfg_structure *structure = &list->structures[i]; + + switch (structure->type) + { + case STRUCTURE_TYPE_BLOCK: + break; + + case STRUCTURE_TYPE_LOOP: + structure->u.loop.outer_loop = loop; + vsir_cfg_mark_trampolines(cfg, &structure->u.loop.body, structure); + break; + + case STRUCTURE_TYPE_SELECTION: + vsir_cfg_mark_trampolines(cfg, &structure->u.selection.if_body, loop); + vsir_cfg_mark_trampolines(cfg, &structure->u.selection.else_body, loop); + break; + + case STRUCTURE_TYPE_JUMP: + { + struct vsir_cfg_structure *l; + if (structure->u.jump.type != JUMP_BREAK && structure->u.jump.type != JUMP_CONTINUE) + break; + for (l = loop; l && l->u.loop.idx != structure->u.jump.target; l = l->u.loop.outer_loop) + { + assert(l->type == STRUCTURE_TYPE_LOOP); + l->u.loop.needs_trampoline = true; + } + break; + } + } + } +} + static enum vkd3d_result vsir_cfg_optimize(struct vsir_cfg *cfg) { enum vkd3d_result ret; @@ -4511,6 +4559,8 @@ static enum vkd3d_result vsir_cfg_optimize(struct vsir_cfg *cfg)
ret = vsir_cfg_optimize_recurse(cfg, &cfg->structured_program);
+ vsir_cfg_mark_trampolines(cfg, &cfg->structured_program, NULL); + if (TRACE_ON()) vsir_cfg_dump_structured_program(cfg);
@@ -4559,7 +4609,7 @@ static enum vkd3d_result vsir_cfg_structure_list_emit_loop(struct vsir_cfg *cfg,
/* Add a trampoline to implement multilevel jumping depending on the stored * jump_target value. */ - if (loop_idx != UINT_MAX) + if (loop->needs_trampoline) { /* If the multilevel jump is a `continue' and the target is the loop we're inside * right now, then we can finally do the `continue'. */
From: Giovanni Mascellani gmascellani@codeweavers.com
--- libs/vkd3d-shader/ir.c | 53 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 50 insertions(+), 3 deletions(-)
diff --git a/libs/vkd3d-shader/ir.c b/libs/vkd3d-shader/ir.c index 77ad28236..97dc17de5 100644 --- a/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d-shader/ir.c @@ -3010,6 +3010,7 @@ struct vsir_cfg_structure unsigned int target; struct vkd3d_shader_src_param *condition; bool invert_condition; + bool needs_launcher; } jump; } u; }; @@ -3304,8 +3305,9 @@ static void vsir_cfg_structure_dump(struct vsir_cfg *cfg, struct vsir_cfg_struct vkd3d_unreachable(); }
- TRACE("%s%s%s %u\n", cfg->debug_buffer.buffer, type_str, - structure->u.jump.condition ? "c" : "", structure->u.jump.target); + TRACE("%s%s%s %u%s\n", cfg->debug_buffer.buffer, type_str, + structure->u.jump.condition ? "c" : "", structure->u.jump.target, + structure->u.jump.needs_launcher ? " # launch" : ""); break; }
@@ -4551,6 +4553,45 @@ static void vsir_cfg_mark_trampolines(struct vsir_cfg *cfg, struct vsir_cfg_stru } }
+/* Launchers are the counterpart of trampolines. A launcher is inserted just before a jump, and + * writes in a well-known variable what is the target of the jump. Trampolines will then read that + * variable to decide how to redirect the jump to its intended target. A launcher is needed each + * time the innermost loop containing the jump itself has a trampoline (independently of whether the + * jump is targeting that loop or not). */ +static void vsir_cfg_mark_launchers(struct vsir_cfg *cfg, struct vsir_cfg_structure_list *list, + struct vsir_cfg_structure *loop) +{ + size_t i; + + for (i = 0; i < list->count; ++i) + { + struct vsir_cfg_structure *structure = &list->structures[i]; + + switch (structure->type) + { + case STRUCTURE_TYPE_BLOCK: + break; + + case STRUCTURE_TYPE_LOOP: + vsir_cfg_mark_launchers(cfg, &structure->u.loop.body, structure); + break; + + case STRUCTURE_TYPE_SELECTION: + vsir_cfg_mark_launchers(cfg, &structure->u.selection.if_body, loop); + vsir_cfg_mark_launchers(cfg, &structure->u.selection.else_body, loop); + break; + + case STRUCTURE_TYPE_JUMP: + if (structure->u.jump.type != JUMP_BREAK && structure->u.jump.type != JUMP_CONTINUE) + break; + assert(loop && loop->type == STRUCTURE_TYPE_LOOP); + if (loop->u.loop.needs_trampoline) + structure->u.jump.needs_launcher = true; + break; + } + } +} + static enum vkd3d_result vsir_cfg_optimize(struct vsir_cfg *cfg) { enum vkd3d_result ret; @@ -4559,7 +4600,13 @@ static enum vkd3d_result vsir_cfg_optimize(struct vsir_cfg *cfg)
ret = vsir_cfg_optimize_recurse(cfg, &cfg->structured_program);
+ /* Trampolines and launchers cannot be marked with the same pass, + * because a jump might have to be marked as launcher even when it + * targets its innermost loop, if other jumps in the same loop + * need a trampoline anyway. So launchers can be discovered only + * once all the trampolines are known. */ vsir_cfg_mark_trampolines(cfg, &cfg->structured_program, NULL); + vsir_cfg_mark_launchers(cfg, &cfg->structured_program, NULL);
if (TRACE_ON()) vsir_cfg_dump_structured_program(cfg); @@ -4746,7 +4793,7 @@ static enum vkd3d_result vsir_cfg_structure_list_emit_jump(struct vsir_cfg *cfg, if (!reserve_instructions(&target->instructions, &target->ins_capacity, target->ins_count + 2)) return VKD3D_ERROR_OUT_OF_MEMORY;
- if (opcode == VKD3DSIH_BREAK || opcode == VKD3DSIH_BREAKP) + if (jump->needs_launcher) { if (!vsir_instruction_init_with_params(cfg->program, &target->instructions[target->ins_count], &no_loc, VKD3DSIH_MOV, 1, 1))
This merge request was approved by Conor McCarthy.