~~This applies on top of !704, the last two commits belong here.~~
Here we finally emit the structured program, replacing the older simple structurizer. The advantage of the new structurizer is that the CFG if statically described in the generated program, without having to trace the value of a variable. Upcoming MRs will make the CFG even simpler, easier to read and hopefully to optimize for downstream compilers.
-- v4: vkd3d-shader/ir: Emit multilevel jumps in the structured program. vkd3d-shader/ir: Emit the reconstructed structured program.
From: Giovanni Mascellani gmascellani@codeweavers.com
Multilevel jumps are not supported yet, and trigger a fallback to the simple structurizer. --- libs/vkd3d-shader/ir.c | 150 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 147 insertions(+), 3 deletions(-)
diff --git a/libs/vkd3d-shader/ir.c b/libs/vkd3d-shader/ir.c index 55d121646..9548fb5b0 100644 --- a/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d-shader/ir.c @@ -3283,6 +3283,9 @@ struct vsir_cfg size_t loop_interval_count, loop_interval_capacity;
struct vsir_cfg_structure_list structured_program; + + struct vkd3d_shader_instruction *instructions; + size_t ins_capacity, ins_count; };
static void vsir_cfg_cleanup(struct vsir_cfg *cfg) @@ -4295,6 +4298,136 @@ fail: return VKD3D_ERROR_OUT_OF_MEMORY; }
+static enum vkd3d_result vsir_cfg_structure_list_emit(struct vsir_cfg *cfg, + struct vsir_cfg_structure_list *list, unsigned int loop_idx) +{ + const struct vkd3d_shader_location no_loc = {0}; + enum vkd3d_result ret; + size_t i; + + for (i = 0; i < list->count; ++i) + { + struct vsir_cfg_structure *structure = &list->structures[i]; + + switch (structure->type) + { + case STRUCTURE_TYPE_BLOCK: + { + struct vsir_block *block = structure->u.block; + + if (!reserve_instructions(&cfg->instructions, &cfg->ins_capacity, cfg->ins_count + (block->end - block->begin))) + return VKD3D_ERROR_OUT_OF_MEMORY; + + memcpy(&cfg->instructions[cfg->ins_count], block->begin, (char *)block->end - (char *)block->begin); + + cfg->ins_count += block->end - block->begin; + break; + } + + case STRUCTURE_TYPE_LOOP: + if (!reserve_instructions(&cfg->instructions, &cfg->ins_capacity, cfg->ins_count + 1)) + return VKD3D_ERROR_OUT_OF_MEMORY; + + vsir_instruction_init(&cfg->instructions[cfg->ins_count++], &no_loc, VKD3DSIH_LOOP); + + if ((ret = vsir_cfg_structure_list_emit(cfg, &structure->u.loop.body, structure->u.loop.idx)) < 0) + return ret; + + if (!reserve_instructions(&cfg->instructions, &cfg->ins_capacity, cfg->ins_count + 1)) + return VKD3D_ERROR_OUT_OF_MEMORY; + + vsir_instruction_init(&cfg->instructions[cfg->ins_count++], &no_loc, VKD3DSIH_ENDLOOP); + break; + + case STRUCTURE_TYPE_JUMP: + { + enum vkd3d_shader_opcode opcode; + + if (structure->u.jump.target != loop_idx) + { + WARN("Multilevel jumps are not supported yet, falling back to the simple structurizer.\n"); + return VKD3D_ERROR_NOT_IMPLEMENTED; + } + + switch (structure->u.jump.type) + { + case JUMP_BREAK: + opcode = structure->u.jump.condition ? VKD3DSIH_BREAKP : VKD3DSIH_BREAK; + break; + + case JUMP_CONTINUE: + opcode = structure->u.jump.condition ? VKD3DSIH_CONTINUEP : VKD3DSIH_CONTINUE; + break; + + case JUMP_RET: + assert(!structure->u.jump.condition); + opcode = VKD3DSIH_RET; + break; + + default: + vkd3d_unreachable(); + } + + if (!reserve_instructions(&cfg->instructions, &cfg->ins_capacity, cfg->ins_count + 1)) + return VKD3D_ERROR_OUT_OF_MEMORY; + + if (!vsir_instruction_init_with_params(cfg->program, &cfg->instructions[cfg->ins_count], + &no_loc, opcode, 0, !!structure->u.jump.condition)) + return VKD3D_ERROR_OUT_OF_MEMORY; + + if (structure->u.jump.invert_condition) + cfg->instructions[cfg->ins_count].flags |= VKD3D_SHADER_CONDITIONAL_OP_Z; + + if (structure->u.jump.condition) + cfg->instructions[cfg->ins_count].src[0] = *structure->u.jump.condition; + + ++cfg->ins_count; + break; + } + + default: + vkd3d_unreachable(); + } + } + + return VKD3D_OK; +} + +static enum vkd3d_result vsir_cfg_emit_structured_program(struct vsir_cfg *cfg) +{ + enum vkd3d_result ret; + size_t i; + + if (!reserve_instructions(&cfg->instructions, &cfg->ins_capacity, cfg->program->instructions.count)) + return VKD3D_ERROR_OUT_OF_MEMORY; + + /* Copy declarations until the first block. */ + for (i = 0; i < cfg->program->instructions.count; ++i) + { + struct vkd3d_shader_instruction *ins = &cfg->program->instructions.elements[i]; + + if (ins->handler_idx == VKD3DSIH_LABEL) + break; + + cfg->instructions[cfg->ins_count++] = *ins; + } + + if ((ret = vsir_cfg_structure_list_emit(cfg, &cfg->structured_program, UINT_MAX)) < 0) + goto fail; + + vkd3d_free(cfg->program->instructions.elements); + cfg->program->instructions.elements = cfg->instructions; + cfg->program->instructions.capacity = cfg->ins_capacity; + cfg->program->instructions.count = cfg->ins_count; + + return VKD3D_OK; + +fail: + vkd3d_free(cfg->instructions); + + return ret; +} + enum vkd3d_result vkd3d_shader_normalise(struct vkd3d_shader_parser *parser, const struct vkd3d_shader_compile_info *compile_info) { @@ -4346,10 +4479,21 @@ enum vkd3d_result vkd3d_shader_normalise(struct vkd3d_shader_parser *parser, return result; }
- if ((result = vsir_program_structurise(program)) < 0) + if ((result = vsir_cfg_emit_structured_program(&cfg)) < 0) { - vsir_cfg_cleanup(&cfg); - return result; + if (result == VKD3D_ERROR_NOT_IMPLEMENTED) + { + if ((result = vsir_program_structurise(program)) < 0) + { + vsir_cfg_cleanup(&cfg); + return result; + } + } + else + { + vsir_cfg_cleanup(&cfg); + return result; + } }
vsir_cfg_cleanup(&cfg);
From: Giovanni Mascellani gmascellani@codeweavers.com
The new structurizer therefore reaches feature parity with the older simple one, except for a couple of points:
* the old structurizer accepts any CFG, without requiring reducibilty; however, the DXIL specification requires the CFG to be reducible anyway, so we're not really losing anything;
* the new structurizer additionally requires that no block has two incoming back arrows; AFAIK this is condition that can happen, but in practice it seems to be rare; also, it's not hard to add support for it, as soon as it is decided it is useful.
Taking these considerations into account, the old structurizer is considered superseded and is therefore removed. --- libs/vkd3d-shader/ir.c | 263 ++++++++++++++++++----------------------- 1 file changed, 112 insertions(+), 151 deletions(-)
diff --git a/libs/vkd3d-shader/ir.c b/libs/vkd3d-shader/ir.c index 9548fb5b0..e3709c61c 100644 --- a/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d-shader/ir.c @@ -467,12 +467,24 @@ static void src_param_init_ssa_bool(struct vkd3d_shader_src_param *src, unsigned src->reg.idx[0].offset = idx; }
+static void src_param_init_temp_bool(struct vkd3d_shader_src_param *src, unsigned int idx) +{ + vsir_src_param_init(src, VKD3DSPR_TEMP, VKD3D_DATA_BOOL, 1); + src->reg.idx[0].offset = idx; +} + static void dst_param_init_ssa_bool(struct vkd3d_shader_dst_param *dst, unsigned int idx) { vsir_dst_param_init(dst, VKD3DSPR_SSA, VKD3D_DATA_BOOL, 1); dst->reg.idx[0].offset = idx; }
+static void dst_param_init_temp_bool(struct vkd3d_shader_dst_param *dst, unsigned int idx) +{ + vsir_dst_param_init(dst, VKD3DSPR_TEMP, VKD3D_DATA_BOOL, 1); + dst->reg.idx[0].offset = idx; +} + static void dst_param_init_temp_uint(struct vkd3d_shader_dst_param *dst, unsigned int idx) { vsir_dst_param_init(dst, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); @@ -2913,132 +2925,6 @@ fail: return VKD3D_ERROR_OUT_OF_MEMORY; }
-static enum vkd3d_result vsir_program_structurise(struct vsir_program *program) -{ - const unsigned int block_temp_idx = program->temp_count; - struct vkd3d_shader_instruction *instructions = NULL; - const struct vkd3d_shader_location no_loc = {0}; - size_t ins_capacity = 0, ins_count = 0, i; - bool first_label_found = false; - - if (!reserve_instructions(&instructions, &ins_capacity, program->instructions.count)) - goto fail; - - for (i = 0; i < program->instructions.count; ++i) - { - struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; - - switch (ins->handler_idx) - { - case VKD3DSIH_PHI: - case VKD3DSIH_SWITCH_MONOLITHIC: - vkd3d_unreachable(); - - case VKD3DSIH_LABEL: - if (!reserve_instructions(&instructions, &ins_capacity, ins_count + 4)) - goto fail; - - if (!first_label_found) - { - first_label_found = true; - - if (!vsir_instruction_init_with_params(program, - &instructions[ins_count], &no_loc, VKD3DSIH_MOV, 1, 1)) - goto fail; - dst_param_init_temp_uint(&instructions[ins_count].dst[0], block_temp_idx); - src_param_init_const_uint(&instructions[ins_count].src[0], label_from_src_param(&ins->src[0])); - ins_count++; - - if (!vsir_instruction_init_with_params(program, - &instructions[ins_count], &no_loc, VKD3DSIH_LOOP, 0, 0)) - goto fail; - ins_count++; - - if (!vsir_instruction_init_with_params(program, - &instructions[ins_count], &no_loc, VKD3DSIH_SWITCH, 0, 1)) - goto fail; - src_param_init_temp_uint(&instructions[ins_count].src[0], block_temp_idx); - ins_count++; - } - - if (!vsir_instruction_init_with_params(program, - &instructions[ins_count], &no_loc, VKD3DSIH_CASE, 0, 1)) - goto fail; - src_param_init_const_uint(&instructions[ins_count].src[0], label_from_src_param(&ins->src[0])); - ins_count++; - break; - - case VKD3DSIH_BRANCH: - if (!reserve_instructions(&instructions, &ins_capacity, ins_count + 2)) - goto fail; - - if (vsir_register_is_label(&ins->src[0].reg)) - { - if (!vsir_instruction_init_with_params(program, - &instructions[ins_count], &no_loc, VKD3DSIH_MOV, 1, 1)) - goto fail; - dst_param_init_temp_uint(&instructions[ins_count].dst[0], block_temp_idx); - src_param_init_const_uint(&instructions[ins_count].src[0], label_from_src_param(&ins->src[0])); - ins_count++; - } - else - { - if (!vsir_instruction_init_with_params(program, - &instructions[ins_count], &no_loc, VKD3DSIH_MOVC, 1, 3)) - goto fail; - dst_param_init_temp_uint(&instructions[ins_count].dst[0], block_temp_idx); - instructions[ins_count].src[0] = ins->src[0]; - src_param_init_const_uint(&instructions[ins_count].src[1], label_from_src_param(&ins->src[1])); - src_param_init_const_uint(&instructions[ins_count].src[2], label_from_src_param(&ins->src[2])); - ins_count++; - } - - if (!vsir_instruction_init_with_params(program, - &instructions[ins_count], &no_loc, VKD3DSIH_BREAK, 0, 0)) - goto fail; - ins_count++; - break; - - case VKD3DSIH_RET: - default: - if (!reserve_instructions(&instructions, &ins_capacity, ins_count + 1)) - goto fail; - - instructions[ins_count++] = *ins; - break; - } - } - - assert(first_label_found); - - if (!reserve_instructions(&instructions, &ins_capacity, ins_count + 3)) - goto fail; - - if (!vsir_instruction_init_with_params(program, &instructions[ins_count], &no_loc, VKD3DSIH_ENDSWITCH, 0, 0)) - goto fail; - ins_count++; - - if (!vsir_instruction_init_with_params(program, &instructions[ins_count], &no_loc, VKD3DSIH_ENDLOOP, 0, 0)) - goto fail; - ins_count++; - - if (!vsir_instruction_init_with_params(program, &instructions[ins_count], &no_loc, VKD3DSIH_RET, 0, 0)) - goto fail; - ins_count++; - - vkd3d_free(program->instructions.elements); - program->instructions.elements = instructions; - program->instructions.capacity = ins_capacity; - program->instructions.count = ins_count; - program->temp_count += 1; - - return VKD3D_OK; - -fail: - vkd3d_free(instructions); - return VKD3D_ERROR_OUT_OF_MEMORY; -} - struct vsir_block_list { struct vsir_block **blocks; @@ -3286,6 +3172,8 @@ struct vsir_cfg
struct vkd3d_shader_instruction *instructions; size_t ins_capacity, ins_count; + unsigned int jump_target_temp_idx; + unsigned int temp_count; };
static void vsir_cfg_cleanup(struct vsir_cfg *cfg) @@ -4325,6 +4213,7 @@ static enum vkd3d_result vsir_cfg_structure_list_emit(struct vsir_cfg *cfg, }
case STRUCTURE_TYPE_LOOP: + { if (!reserve_instructions(&cfg->instructions, &cfg->ins_capacity, cfg->ins_count + 1)) return VKD3D_ERROR_OUT_OF_MEMORY;
@@ -4333,32 +4222,99 @@ static enum vkd3d_result vsir_cfg_structure_list_emit(struct vsir_cfg *cfg, if ((ret = vsir_cfg_structure_list_emit(cfg, &structure->u.loop.body, structure->u.loop.idx)) < 0) return ret;
- if (!reserve_instructions(&cfg->instructions, &cfg->ins_capacity, cfg->ins_count + 1)) + if (!reserve_instructions(&cfg->instructions, &cfg->ins_capacity, cfg->ins_count + 5)) return VKD3D_ERROR_OUT_OF_MEMORY;
vsir_instruction_init(&cfg->instructions[cfg->ins_count++], &no_loc, VKD3DSIH_ENDLOOP); + + /* Add a trampoline to implement multilevel jumping + * depending on the stored jump_target value. */ + if (loop_idx != UINT_MAX) + { + /* If the multilevel jump is a `continue' and the + * target is the loop we're inside right now, then + * we can finally do the `continue'. */ + const unsigned int outer_continue_target = loop_idx << 1 | 1; + /* If the multilevel jump is a `continue' to any + * other target, or if it is a `break' and the + * target is not the loop we just finished + * emitting, then it means that we have to reach + * an outer loop, so we keep breaking. */ + const unsigned int inner_break_target = structure->u.loop.idx << 1; + + if (!vsir_instruction_init_with_params(cfg->program, &cfg->instructions[cfg->ins_count], + &no_loc, VKD3DSIH_IEQ, 1, 2)) + return VKD3D_ERROR_OUT_OF_MEMORY; + + dst_param_init_temp_bool(&cfg->instructions[cfg->ins_count].dst[0], cfg->temp_count); + src_param_init_temp_uint(&cfg->instructions[cfg->ins_count].src[0], cfg->jump_target_temp_idx); + src_param_init_const_uint(&cfg->instructions[cfg->ins_count].src[1], outer_continue_target); + + ++cfg->ins_count; + + if (!vsir_instruction_init_with_params(cfg->program, &cfg->instructions[cfg->ins_count], + &no_loc, VKD3DSIH_CONTINUEP, 0, 1)) + return VKD3D_ERROR_OUT_OF_MEMORY; + + src_param_init_temp_bool(&cfg->instructions[cfg->ins_count].src[0], cfg->temp_count); + + ++cfg->ins_count; + ++cfg->temp_count; + + if (!vsir_instruction_init_with_params(cfg->program, &cfg->instructions[cfg->ins_count], + &no_loc, VKD3DSIH_IEQ, 1, 2)) + return VKD3D_ERROR_OUT_OF_MEMORY; + + dst_param_init_temp_bool(&cfg->instructions[cfg->ins_count].dst[0], cfg->temp_count); + src_param_init_temp_uint(&cfg->instructions[cfg->ins_count].src[0], cfg->jump_target_temp_idx); + src_param_init_const_uint(&cfg->instructions[cfg->ins_count].src[1], inner_break_target); + + ++cfg->ins_count; + + if (!vsir_instruction_init_with_params(cfg->program, &cfg->instructions[cfg->ins_count], + &no_loc, VKD3DSIH_BREAKP, 0, 1)) + return VKD3D_ERROR_OUT_OF_MEMORY; + cfg->instructions[cfg->ins_count].flags |= VKD3D_SHADER_CONDITIONAL_OP_Z; + + src_param_init_temp_bool(&cfg->instructions[cfg->ins_count].src[0], cfg->temp_count); + + ++cfg->ins_count; + ++cfg->temp_count; + } + break; + }
case STRUCTURE_TYPE_JUMP: { + /* Encode the jump target as the loop index plus a bit + * to remember whether we're breaking or + * continueing. */ + unsigned int jump_target = structure->u.jump.target << 1; enum vkd3d_shader_opcode opcode;
- if (structure->u.jump.target != loop_idx) - { - WARN("Multilevel jumps are not supported yet, falling back to the simple structurizer.\n"); - return VKD3D_ERROR_NOT_IMPLEMENTED; - } - switch (structure->u.jump.type) { + case JUMP_CONTINUE: + /* If we're continueing the loop we're + * directly inside, then we can emit a + * `continue'. Otherwise we first have to + * break all the loops between here and the + * loop to continue, recording our intention + * to continue in the lowest bit of + * jump_target. */ + if (structure->u.jump.target == loop_idx) + { + opcode = structure->u.jump.condition ? VKD3DSIH_CONTINUEP : VKD3DSIH_CONTINUE; + break; + } + jump_target |= 1; + /* fall through */ + case JUMP_BREAK: opcode = structure->u.jump.condition ? VKD3DSIH_BREAKP : VKD3DSIH_BREAK; break;
- case JUMP_CONTINUE: - opcode = structure->u.jump.condition ? VKD3DSIH_CONTINUEP : VKD3DSIH_CONTINUE; - break; - case JUMP_RET: assert(!structure->u.jump.condition); opcode = VKD3DSIH_RET; @@ -4368,9 +4324,21 @@ static enum vkd3d_result vsir_cfg_structure_list_emit(struct vsir_cfg *cfg, vkd3d_unreachable(); }
- if (!reserve_instructions(&cfg->instructions, &cfg->ins_capacity, cfg->ins_count + 1)) + if (!reserve_instructions(&cfg->instructions, &cfg->ins_capacity, cfg->ins_count + 2)) return VKD3D_ERROR_OUT_OF_MEMORY;
+ if (opcode == VKD3DSIH_BREAK || opcode == VKD3DSIH_BREAKP) + { + if (!vsir_instruction_init_with_params(cfg->program, &cfg->instructions[cfg->ins_count], + &no_loc, VKD3DSIH_MOV, 1, 1)) + return VKD3D_ERROR_OUT_OF_MEMORY; + + dst_param_init_temp_uint(&cfg->instructions[cfg->ins_count].dst[0], cfg->jump_target_temp_idx); + src_param_init_const_uint(&cfg->instructions[cfg->ins_count].src[0], jump_target); + + ++cfg->ins_count; + } + if (!vsir_instruction_init_with_params(cfg->program, &cfg->instructions[cfg->ins_count], &no_loc, opcode, 0, !!structure->u.jump.condition)) return VKD3D_ERROR_OUT_OF_MEMORY; @@ -4398,6 +4366,9 @@ static enum vkd3d_result vsir_cfg_emit_structured_program(struct vsir_cfg *cfg) enum vkd3d_result ret; size_t i;
+ cfg->jump_target_temp_idx = cfg->program->temp_count; + cfg->temp_count = cfg->program->temp_count + 1; + if (!reserve_instructions(&cfg->instructions, &cfg->ins_capacity, cfg->program->instructions.count)) return VKD3D_ERROR_OUT_OF_MEMORY;
@@ -4419,6 +4390,7 @@ static enum vkd3d_result vsir_cfg_emit_structured_program(struct vsir_cfg *cfg) cfg->program->instructions.elements = cfg->instructions; cfg->program->instructions.capacity = cfg->ins_capacity; cfg->program->instructions.count = cfg->ins_count; + cfg->program->temp_count = cfg->temp_count;
return VKD3D_OK;
@@ -4481,19 +4453,8 @@ enum vkd3d_result vkd3d_shader_normalise(struct vkd3d_shader_parser *parser,
if ((result = vsir_cfg_emit_structured_program(&cfg)) < 0) { - if (result == VKD3D_ERROR_NOT_IMPLEMENTED) - { - if ((result = vsir_program_structurise(program)) < 0) - { - vsir_cfg_cleanup(&cfg); - return result; - } - } - else - { - vsir_cfg_cleanup(&cfg); - return result; - } + vsir_cfg_cleanup(&cfg); + return result; }
vsir_cfg_cleanup(&cfg);
Conor McCarthy (@cmccarthy) commented about libs/vkd3d-shader/ir.c:
vkd3d_unreachable(); }
if (!reserve_instructions(&cfg->instructions, &cfg->ins_capacity, cfg->ins_count + 1))
if (!reserve_instructions(&cfg->instructions, &cfg->ins_capacity, cfg->ins_count + 2)) return VKD3D_ERROR_OUT_OF_MEMORY;
if (opcode == VKD3DSIH_BREAK || opcode == VKD3DSIH_BREAKP)
{
if (!vsir_instruction_init_with_params(cfg->program, &cfg->instructions[cfg->ins_count],
&no_loc, VKD3DSIH_MOV, 1, 1))
return VKD3D_ERROR_OUT_OF_MEMORY;
dst_param_init_temp_uint(&cfg->instructions[cfg->ins_count].dst[0], cfg->jump_target_temp_idx);
src_param_init_const_uint(&cfg->instructions[cfg->ins_count].src[0], jump_target);
This is virtualisation, right? I wonder if there will be issues with older drivers, like we found with the TGSM MR. I don't the tests there will trigger this case.
Conor McCarthy (@cmccarthy) commented about libs/vkd3d-shader/ir.c:
if (structure->u.jump.target != loop_idx)
{
WARN("Multilevel jumps are not supported yet, falling back to the simple structurizer.\n");
return VKD3D_ERROR_NOT_IMPLEMENTED;
}
switch (structure->u.jump.type) {
case JUMP_CONTINUE:
/* If we're continueing the loop we're
* directly inside, then we can emit a
* `continue'. Otherwise we first have to
* break all the loops between here and the
* loop to continue, recording our intention
* to continue in the lowest bit of
* jump_target. */
A minor observation: some comments in here have very short line lengths.
On Fri Mar 15 05:44:54 2024 +0000, Conor McCarthy wrote:
This is virtualisation, right? I wonder if there will be issues with older drivers, like we found with the TGSM MR. I don't the tests there will trigger this case.
Yeah, unfortunately there is some virtualization here. But whether it exposes us to a variant of the problem we're having with TGSM, I'm not completely sure, and I lean towards thinking it does not. My understanding is that the real problem we're having with TGSM is not virtualization, but lack of merging information. The new structurizer has usable merging information, and AFAIU the SPIR-V specification only require dynamically uniform control flow for tangled instructions, not necessarily statically uniform control flow. So the virtualization introduced by the break trampoline might make the compiler miss some optimization opportunities, but if invocations are expected to (dynamically) converge at some instruction, then they should (while clearly they cannot be expected to converge with the old structurizer).
That nevertheless, the optimizations passes I'm going to submit soon should largely remove the need for multilevel jumps (because loops with breaks are converted to selections whenever possible), so eventually I'll also add code to skip emitting trampolines when they're not needed. [My work branch](https://gitlab.winehq.org/giomasce/vkd3d/-/commits/cfg4) already does a decent job (though skipping trampolines is not written yet). Eventually I'm also going to gather statistics on ShadowCI to see how often we're forced to emit trampolines and if there are other heuristics that can help getting rid of them. I guess in the general case they cannot be avoided completely, though: one can always come up with an arbitrarily complicated DXIL shader that cannot be represented with a structured program without virtualizing at least some control flow. Hopefully this kind of shader is not often generated by DXC.
For completeness, let me mention that in general the new structurizer has to make some arbitrary choice, because the problem of structurizing a DXIL shader is intrinsically ill defined. [Consider for example this CFG](http://magjac.com/graphviz-visual-editor/?dot=digraph%20%7B%0A%20%20%20%20n1...), which is reducible (trivially: it has not back edges): ```dot digraph { n1 -> n2 -> n4; n1 -> n3 -> n5; n2 -> n5 -> n6; n3 -> n4 -> n6; } ``` When constructing the block order the structurizer can arbitrarily decide the relative order of blocks 2 and 3 and independently of blocks 4 and 5 (well, the structurizer currently has a rule to prefer more recently added nodes, so in practice the two choices are not independent; but that's mostly a coincidence). Depending on that choice, either block 4 will be a merge for block 5 or the other way around, but there is nothing in the graph that suggest that one option is better than the other. So if, say, block 5 contains a barrier and we happen to make it the merge node for 4, then everything will work fine. If we happen to pick the other choice, the program will be broken. And this is not a bug with the structurizer, it simply happens that the structure information is not there and we can only guess in some cases. If we happen to stumble upon cases like this we have nothing to do other than finding heuristics and hope they're enough. I assume that's what Windows drivers do too.
On Fri Mar 15 05:44:55 2024 +0000, Conor McCarthy wrote:
A minor observation: some comments in here have very short line lengths.
Yeah, it's a byproduct of me just lazily delegating formatting comment to `M-q` on emacs. But it's true that it has become a bit ridiculous here, I'll fix them.