This applies on top of !662, the last four commits belong here.
Here we compute a topological order (plus some additional properties) of the shader blocks. Eventually the blocks will be emitted precisely in this order, after having been enriched with structured control flow information.
-- v2: vkd3d-shader/ir: Keep loops contiguous in the topological sort. vkd3d-shader/ir: Dump the topological order of the control flow graph. vkd3d-shader/ir: Topologically sort the control flow graph. vkd3d-shader/ir: Allow adding to a block list without checking for duplicates. vkd3d-shader/ir: Sort each loop by block label. vkd3d-shader/ir: Dump the loops in the control flow graph. vkd3d-shader/ir: Keep track of loops by header block. vkd3d-shader/ir: Keep a reference to the parser inside struct vsir_cfg. vkd3d-shader/ir: Compute the loops in the control flow graph.
From: Giovanni Mascellani gmascellani@codeweavers.com
They have to be considered code rather than declarations, as required for instance by the SPIR-V backend. --- libs/vkd3d-shader/ir.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-)
diff --git a/libs/vkd3d-shader/ir.c b/libs/vkd3d-shader/ir.c index f0bd85338..a379ba6dd 100644 --- a/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d-shader/ir.c @@ -2041,12 +2041,19 @@ static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flatte flattener->location = instruction->location;
/* Declarations should occur before the first code block, which in hull shaders is marked by the first - * phase instruction, and in all other shader types begins with the first label instruction. */ - if (!after_declarations_section && !vsir_instruction_is_dcl(instruction) - && instruction->handler_idx != VKD3DSIH_NOP) + * phase instruction, and in all other shader types begins with the first label instruction. + * Declaring an indexable temp with function scope is not considered a declaration, + * because it needs to live inside a function. */ + if (!after_declarations_section && instruction->handler_idx != VKD3DSIH_NOP) { - after_declarations_section = true; - cf_flattener_emit_label(flattener, cf_flattener_alloc_block_id(flattener)); + bool is_function_indexable = instruction->handler_idx == VKD3DSIH_DCL_INDEXABLE_TEMP + && instruction->declaration.indexable_temp.has_function_scope; + + if (!vsir_instruction_is_dcl(instruction) || is_function_indexable) + { + after_declarations_section = true; + cf_flattener_emit_label(flattener, cf_flattener_alloc_block_id(flattener)); + } }
cf_info = flattener->control_flow_depth
From: Giovanni Mascellani gmascellani@codeweavers.com
--- libs/vkd3d-shader/ir.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-)
diff --git a/libs/vkd3d-shader/ir.c b/libs/vkd3d-shader/ir.c index a379ba6dd..2e05d19ca 100644 --- a/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d-shader/ir.c @@ -3102,6 +3102,7 @@ struct vsir_cfg struct vsir_block *blocks; struct vsir_block *entry; size_t block_count; + struct vkd3d_string_buffer debug_buffer; };
static void vsir_cfg_cleanup(struct vsir_cfg *cfg) @@ -3112,6 +3113,9 @@ static void vsir_cfg_cleanup(struct vsir_cfg *cfg) vsir_block_cleanup(&cfg->blocks[i]);
vkd3d_free(cfg->blocks); + + if (TRACE_ON()) + vkd3d_string_buffer_cleanup(&cfg->debug_buffer); }
static enum vkd3d_result vsir_cfg_add_edge(struct vsir_cfg *cfg, struct vsir_block *block, @@ -3182,6 +3186,9 @@ static enum vkd3d_result vsir_cfg_init(struct vsir_cfg *cfg, struct vsir_program if (!(cfg->blocks = vkd3d_calloc(cfg->block_count, sizeof(*cfg->blocks)))) return VKD3D_ERROR_OUT_OF_MEMORY;
+ if (TRACE_ON()) + vkd3d_string_buffer_init(&cfg->debug_buffer); + for (i = 0; i < program->instructions.count; ++i) { struct vkd3d_shader_instruction *instruction = &program->instructions.elements[i]; @@ -3292,12 +3299,8 @@ static void vsir_cfg_compute_dominators_recurse(struct vsir_block *current, stru
static void vsir_cfg_compute_dominators(struct vsir_cfg *cfg) { - struct vkd3d_string_buffer buf; size_t i, j;
- if (TRACE_ON()) - vkd3d_string_buffer_init(&buf); - for (i = 0; i < cfg->block_count; ++i) { struct vsir_block *block = &cfg->blocks[i]; @@ -3309,7 +3312,7 @@ static void vsir_cfg_compute_dominators(struct vsir_cfg *cfg)
if (TRACE_ON()) { - vkd3d_string_buffer_printf(&buf, "Block %u dominates:", block->label); + vkd3d_string_buffer_printf(&cfg->debug_buffer, "Block %u dominates:", block->label); for (j = 0; j < cfg->block_count; j++) { struct vsir_block *block2 = &cfg->blocks[j]; @@ -3318,15 +3321,12 @@ static void vsir_cfg_compute_dominators(struct vsir_cfg *cfg) continue;
if (bitmap_is_set(block->dominates, j)) - vkd3d_string_buffer_printf(&buf, " %u", block2->label); + vkd3d_string_buffer_printf(&cfg->debug_buffer, " %u", block2->label); } - TRACE("%s\n", buf.buffer); - vkd3d_string_buffer_clear(&buf); + TRACE("%s\n", cfg->debug_buffer.buffer); + vkd3d_string_buffer_clear(&cfg->debug_buffer); } } - - if (TRACE_ON()) - vkd3d_string_buffer_cleanup(&buf); }
enum vkd3d_result vkd3d_shader_normalise(struct vkd3d_shader_parser *parser,
From: Giovanni Mascellani gmascellani@codeweavers.com
--- libs/vkd3d-shader/ir.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/libs/vkd3d-shader/ir.c b/libs/vkd3d-shader/ir.c index 2e05d19ca..4ffdd0cb1 100644 --- a/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d-shader/ir.c @@ -3118,6 +3118,11 @@ static void vsir_cfg_cleanup(struct vsir_cfg *cfg) vkd3d_string_buffer_cleanup(&cfg->debug_buffer); }
+static bool vsir_block_dominates(struct vsir_block *b1, struct vsir_block *b2) +{ + return bitmap_is_set(b1->dominates, b2->label - 1); +} + static enum vkd3d_result vsir_cfg_add_edge(struct vsir_cfg *cfg, struct vsir_block *block, struct vkd3d_shader_src_param *successor_param) { @@ -3320,7 +3325,7 @@ static void vsir_cfg_compute_dominators(struct vsir_cfg *cfg) if (block2->label == 0) continue;
- if (bitmap_is_set(block->dominates, j)) + if (vsir_block_dominates(block, block2)) vkd3d_string_buffer_printf(&cfg->debug_buffer, " %u", block2->label); } TRACE("%s\n", cfg->debug_buffer.buffer);
From: Giovanni Mascellani gmascellani@codeweavers.com
--- include/vkd3d_types.h | 2 + libs/vkd3d-shader/ir.c | 93 +++++++++++++++++++++++++++++++++++++++++- 2 files changed, 94 insertions(+), 1 deletion(-)
diff --git a/include/vkd3d_types.h b/include/vkd3d_types.h index 4a7aca236..9060fa087 100644 --- a/include/vkd3d_types.h +++ b/include/vkd3d_types.h @@ -41,6 +41,8 @@ enum vkd3d_result { /** Success. */ VKD3D_OK = 0, + /** Success as a result of there being nothing to do. */ + VKD3D_FALSE = 1, /** An unspecified failure occurred. */ VKD3D_ERROR = -1, /** There are not enough resources available to complete the operation. */ diff --git a/libs/vkd3d-shader/ir.c b/libs/vkd3d-shader/ir.c index 4ffdd0cb1..05997241f 100644 --- a/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d-shader/ir.c @@ -3038,7 +3038,7 @@ static enum vkd3d_result vsir_block_list_add(struct vsir_block_list *list, struc
for (i = 0; i < list->count; ++i) if (block == list->blocks[i]) - return VKD3D_OK; + return VKD3D_FALSE;
if (!vkd3d_array_reserve((void **)&list->blocks, &list->capacity, list->count + 1, sizeof(*list->blocks))) { @@ -3103,6 +3103,9 @@ struct vsir_cfg struct vsir_block *entry; size_t block_count; struct vkd3d_string_buffer debug_buffer; + + struct vsir_block_list *loops; + size_t loops_count, loops_capacity; };
static void vsir_cfg_cleanup(struct vsir_cfg *cfg) @@ -3112,7 +3115,11 @@ static void vsir_cfg_cleanup(struct vsir_cfg *cfg) for (i = 0; i < cfg->block_count; ++i) vsir_block_cleanup(&cfg->blocks[i]);
+ for (i = 0; i < cfg->loops_count; ++i) + vsir_block_list_cleanup(&cfg->loops[i]); + vkd3d_free(cfg->blocks); + vkd3d_free(cfg->loops);
if (TRACE_ON()) vkd3d_string_buffer_cleanup(&cfg->debug_buffer); @@ -3334,6 +3341,84 @@ static void vsir_cfg_compute_dominators(struct vsir_cfg *cfg) } }
+/* A back edge is an edge X -> Y for which block Y dominates block + * X. All the other edges are forward edges, and it is required that + * the input CFG is reducible, i.e., it is acyclic once you strip away + * the back edges. + * + * Each back edge X -> Y defines a loop: block X is the header block, + * block Y is the back edge block, and the loop consists of all the + * blocks which are dominated by the header block and have a path to + * the back edge block that doesn't pass through the header block + * (including the header block itself). It can be proved that all the + * blocks in such a path (connecting a loop block to the back edge + * block without passing through the header block) belong to the same + * loop. + * + * If the input CFG is reducible, each two loops are either disjoint + * or one is a strict subset of the other, provided that each block + * has at most one incoming back edge. If this condition does not + * hold, a synthetic block can be introduced as the only back edge + * block for the given header block, with all the previous back edge + * now being forward edges to the synthetic block. This is not + * currently implemented (but it is rarely found in practice + * anyway). */ +static enum vkd3d_result vsir_cfg_scan_loop(struct vsir_block_list *loop, struct vsir_block *block, + struct vsir_block *header) +{ + enum vkd3d_result ret; + size_t i; + + if ((ret = vsir_block_list_add(loop, block)) < 0) + return ret; + + if (ret == VKD3D_FALSE || block == header) + return VKD3D_OK; + + for (i = 0; i < block->predecessors.count; ++i) + { + if ((ret = vsir_cfg_scan_loop(loop, block->predecessors.blocks[i], header)) < 0) + return ret; + } + + return VKD3D_OK; +} + +static enum vkd3d_result vsir_cfg_compute_loops(struct vsir_cfg *cfg) +{ + size_t i, j; + + for (i = 0; i < cfg->block_count; ++i) + { + struct vsir_block *block = &cfg->blocks[i]; + + if (block->label == 0) + continue; + + for (j = 0; j < block->successors.count; ++j) + { + struct vsir_block *header = block->successors.blocks[j]; + struct vsir_block_list *loop; + enum vkd3d_result ret; + + /* Is this a back edge? */ + if (!vsir_block_dominates(header, block)) + continue; + + if (!vkd3d_array_reserve((void **)&cfg->loops, &cfg->loops_capacity, cfg->loops_count + 1, sizeof(*cfg->loops))) + return VKD3D_ERROR_OUT_OF_MEMORY; + + loop = &cfg->loops[cfg->loops_count++]; + vsir_block_list_init(loop); + + if ((ret = vsir_cfg_scan_loop(loop, block, header)) < 0) + return ret; + } + } + + return VKD3D_OK; +} + enum vkd3d_result vkd3d_shader_normalise(struct vkd3d_shader_parser *parser, const struct vkd3d_shader_compile_info *compile_info) { @@ -3360,6 +3445,12 @@ enum vkd3d_result vkd3d_shader_normalise(struct vkd3d_shader_parser *parser,
vsir_cfg_compute_dominators(&cfg);
+ if ((result = vsir_cfg_compute_loops(&cfg)) < 0) + { + vsir_cfg_cleanup(&cfg); + return result; + } + if ((result = simple_structurizer_run(parser)) < 0) { vsir_cfg_cleanup(&cfg);
From: Giovanni Mascellani gmascellani@codeweavers.com
This is meant only to emit error messages. All accesses to the program should happen through the `program' field. --- libs/vkd3d-shader/ir.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-)
diff --git a/libs/vkd3d-shader/ir.c b/libs/vkd3d-shader/ir.c index 05997241f..dcce1ec38 100644 --- a/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d-shader/ir.c @@ -3098,6 +3098,7 @@ static void vsir_block_cleanup(struct vsir_block *block)
struct vsir_cfg { + struct vkd3d_shader_parser *parser; struct vsir_program *program; struct vsir_block *blocks; struct vsir_block *entry; @@ -3185,15 +3186,16 @@ static void vsir_cfg_dump_dot(struct vsir_cfg *cfg) TRACE("}\n"); }
-static enum vkd3d_result vsir_cfg_init(struct vsir_cfg *cfg, struct vsir_program *program) +static enum vkd3d_result vsir_cfg_init(struct vsir_cfg *cfg, struct vkd3d_shader_parser *parser) { struct vsir_block *current_block = NULL; enum vkd3d_result ret; size_t i;
memset(cfg, 0, sizeof(*cfg)); - cfg->program = program; - cfg->block_count = program->block_count; + cfg->parser = parser; + cfg->program = &parser->program; + cfg->block_count = cfg->program->block_count;
if (!(cfg->blocks = vkd3d_calloc(cfg->block_count, sizeof(*cfg->blocks)))) return VKD3D_ERROR_OUT_OF_MEMORY; @@ -3201,9 +3203,9 @@ static enum vkd3d_result vsir_cfg_init(struct vsir_cfg *cfg, struct vsir_program if (TRACE_ON()) vkd3d_string_buffer_init(&cfg->debug_buffer);
- for (i = 0; i < program->instructions.count; ++i) + for (i = 0; i < cfg->program->instructions.count; ++i) { - struct vkd3d_shader_instruction *instruction = &program->instructions.elements[i]; + struct vkd3d_shader_instruction *instruction = &cfg->program->instructions.elements[i];
switch (instruction->handler_idx) { @@ -3220,9 +3222,9 @@ static enum vkd3d_result vsir_cfg_init(struct vsir_cfg *cfg, struct vsir_program assert(label <= cfg->block_count); current_block = &cfg->blocks[label - 1]; assert(current_block->label == 0); - if ((ret = vsir_block_init(current_block, label, program->block_count)) < 0) + if ((ret = vsir_block_init(current_block, label, cfg->program->block_count)) < 0) goto fail; - current_block->begin = &program->instructions.elements[i + 1]; + current_block->begin = &cfg->program->instructions.elements[i + 1]; if (!cfg->entry) cfg->entry = current_block; break; @@ -3440,7 +3442,7 @@ enum vkd3d_result vkd3d_shader_normalise(struct vkd3d_shader_parser *parser, if ((result = materialize_ssas_to_temps(parser)) < 0) return result;
- if ((result = vsir_cfg_init(&cfg, &parser->program)) < 0) + if ((result = vsir_cfg_init(&cfg, parser)) < 0) return result;
vsir_cfg_compute_dominators(&cfg);
From: Giovanni Mascellani gmascellani@codeweavers.com
--- libs/vkd3d-shader/ir.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+)
diff --git a/libs/vkd3d-shader/ir.c b/libs/vkd3d-shader/ir.c index dcce1ec38..37b6e9739 100644 --- a/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d-shader/ir.c @@ -3107,6 +3107,7 @@ struct vsir_cfg
struct vsir_block_list *loops; size_t loops_count, loops_capacity; + struct vsir_block_list **loops_by_header; };
static void vsir_cfg_cleanup(struct vsir_cfg *cfg) @@ -3121,6 +3122,7 @@ static void vsir_cfg_cleanup(struct vsir_cfg *cfg)
vkd3d_free(cfg->blocks); vkd3d_free(cfg->loops); + vkd3d_free(cfg->loops_by_header);
if (TRACE_ON()) vkd3d_string_buffer_cleanup(&cfg->debug_buffer); @@ -3390,6 +3392,9 @@ static enum vkd3d_result vsir_cfg_compute_loops(struct vsir_cfg *cfg) { size_t i, j;
+ if (!(cfg->loops_by_header = vkd3d_calloc(cfg->block_count, sizeof(*cfg->loops_by_header)))) + return VKD3D_ERROR_OUT_OF_MEMORY; + for (i = 0; i < cfg->block_count; ++i) { struct vsir_block *block = &cfg->blocks[i]; @@ -3415,6 +3420,16 @@ static enum vkd3d_result vsir_cfg_compute_loops(struct vsir_cfg *cfg)
if ((ret = vsir_cfg_scan_loop(loop, block, header)) < 0) return ret; + + if (cfg->loops_by_header[header->label - 1]) + { + FIXME("Block %u is header to more than one loop, this is not implemented.", header->label); + vkd3d_shader_parser_error(cfg->parser, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, + "Block %u is header to more than one loop, this is not implemented.", header->label); + return VKD3D_ERROR_NOT_IMPLEMENTED; + } + + cfg->loops_by_header[header->label - 1] = loop; } }
From: Giovanni Mascellani gmascellani@codeweavers.com
--- libs/vkd3d-shader/ir.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-)
diff --git a/libs/vkd3d-shader/ir.c b/libs/vkd3d-shader/ir.c index 37b6e9739..33ada60c8 100644 --- a/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d-shader/ir.c @@ -3390,7 +3390,7 @@ static enum vkd3d_result vsir_cfg_scan_loop(struct vsir_block_list *loop, struct
static enum vkd3d_result vsir_cfg_compute_loops(struct vsir_cfg *cfg) { - size_t i, j; + size_t i, j, k;
if (!(cfg->loops_by_header = vkd3d_calloc(cfg->block_count, sizeof(*cfg->loops_by_header)))) return VKD3D_ERROR_OUT_OF_MEMORY; @@ -3421,6 +3421,17 @@ static enum vkd3d_result vsir_cfg_compute_loops(struct vsir_cfg *cfg) if ((ret = vsir_cfg_scan_loop(loop, block, header)) < 0) return ret;
+ if (TRACE_ON()) + { + vkd3d_string_buffer_printf(&cfg->debug_buffer, "Back edge %u -> %u with loop:", block->label, header->label); + + for (k = 0; k < loop->count; ++k) + vkd3d_string_buffer_printf(&cfg->debug_buffer, " %u", loop->blocks[k]->label); + + TRACE("%s\n", cfg->debug_buffer.buffer); + vkd3d_string_buffer_clear(&cfg->debug_buffer); + } + if (cfg->loops_by_header[header->label - 1]) { FIXME("Block %u is header to more than one loop, this is not implemented.", header->label);
From: Giovanni Mascellani gmascellani@codeweavers.com
--- libs/vkd3d-shader/ir.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+)
diff --git a/libs/vkd3d-shader/ir.c b/libs/vkd3d-shader/ir.c index 33ada60c8..4c2e2ad77 100644 --- a/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d-shader/ir.c @@ -3096,6 +3096,19 @@ static void vsir_block_cleanup(struct vsir_block *block) vkd3d_free(block->dominates); }
+static int block_compare(const void *ptr1, const void *ptr2) +{ + const struct vsir_block *block1 = *(const struct vsir_block **)ptr1; + const struct vsir_block *block2 = *(const struct vsir_block **)ptr2; + + return vkd3d_u32_compare(block1->label, block2->label); +} + +static void vsir_block_list_sort(struct vsir_block_list *list) +{ + qsort(list->blocks, list->count, sizeof(*list->blocks), block_compare); +} + struct vsir_cfg { struct vkd3d_shader_parser *parser; @@ -3421,6 +3434,8 @@ static enum vkd3d_result vsir_cfg_compute_loops(struct vsir_cfg *cfg) if ((ret = vsir_cfg_scan_loop(loop, block, header)) < 0) return ret;
+ vsir_block_list_sort(loop); + if (TRACE_ON()) { vkd3d_string_buffer_printf(&cfg->debug_buffer, "Back edge %u -> %u with loop:", block->label, header->label);
From: Giovanni Mascellani gmascellani@codeweavers.com
--- libs/vkd3d-shader/ir.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-)
diff --git a/libs/vkd3d-shader/ir.c b/libs/vkd3d-shader/ir.c index 4c2e2ad77..48a1130c5 100644 --- a/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d-shader/ir.c @@ -3032,14 +3032,8 @@ static void vsir_block_list_cleanup(struct vsir_block_list *list) vkd3d_free(list->blocks); }
-static enum vkd3d_result vsir_block_list_add(struct vsir_block_list *list, struct vsir_block *block) +static enum vkd3d_result vsir_block_list_add_unchecked(struct vsir_block_list *list, struct vsir_block *block) { - size_t i; - - for (i = 0; i < list->count; ++i) - if (block == list->blocks[i]) - return VKD3D_FALSE; - if (!vkd3d_array_reserve((void **)&list->blocks, &list->capacity, list->count + 1, sizeof(*list->blocks))) { ERR("Cannot extend block list.\n"); @@ -3051,6 +3045,17 @@ static enum vkd3d_result vsir_block_list_add(struct vsir_block_list *list, struc return VKD3D_OK; }
+static enum vkd3d_result vsir_block_list_add(struct vsir_block_list *list, struct vsir_block *block) +{ + size_t i; + + for (i = 0; i < list->count; ++i) + if (block == list->blocks[i]) + return VKD3D_FALSE; + + return vsir_block_list_add_unchecked(list, block); +} + struct vsir_block { unsigned int label;
From: Giovanni Mascellani gmascellani@codeweavers.com
--- libs/vkd3d-shader/ir.c | 135 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 135 insertions(+)
diff --git a/libs/vkd3d-shader/ir.c b/libs/vkd3d-shader/ir.c index 48a1130c5..1abe3a7a5 100644 --- a/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d-shader/ir.c @@ -3056,6 +3056,13 @@ static enum vkd3d_result vsir_block_list_add(struct vsir_block_list *list, struc return vsir_block_list_add_unchecked(list, block); }
+/* It is guaranteed that the relative order is kept. */ +static void vsir_block_list_remove_index(struct vsir_block_list *list, size_t idx) +{ + memmove(&list->blocks[idx], &list->blocks[idx + 1], (list->count - idx - 1) * sizeof(*list->blocks)); + --list->count; +} + struct vsir_block { unsigned int label; @@ -3126,6 +3133,8 @@ struct vsir_cfg struct vsir_block_list *loops; size_t loops_count, loops_capacity; struct vsir_block_list **loops_by_header; + + struct vsir_block_list order; };
static void vsir_cfg_cleanup(struct vsir_cfg *cfg) @@ -3138,6 +3147,8 @@ static void vsir_cfg_cleanup(struct vsir_cfg *cfg) for (i = 0; i < cfg->loops_count; ++i) vsir_block_list_cleanup(&cfg->loops[i]);
+ vsir_block_list_cleanup(&cfg->order); + vkd3d_free(cfg->blocks); vkd3d_free(cfg->loops); vkd3d_free(cfg->loops_by_header); @@ -3217,6 +3228,8 @@ static enum vkd3d_result vsir_cfg_init(struct vsir_cfg *cfg, struct vkd3d_shader cfg->program = &parser->program; cfg->block_count = cfg->program->block_count;
+ vsir_block_list_init(&cfg->order); + if (!(cfg->blocks = vkd3d_calloc(cfg->block_count, sizeof(*cfg->blocks)))) return VKD3D_ERROR_OUT_OF_MEMORY;
@@ -3467,6 +3480,122 @@ static enum vkd3d_result vsir_cfg_compute_loops(struct vsir_cfg *cfg) return VKD3D_OK; }
+struct vsir_cfg_node_sorter +{ + struct vsir_cfg *cfg; + struct vsir_block_list available_blocks; +}; + +static enum vkd3d_result vsir_cfg_make_node_available(struct vsir_cfg_node_sorter *sorter, struct vsir_block *block) +{ + enum vkd3d_result ret; + + if ((ret = vsir_block_list_add_unchecked(&sorter->available_blocks, block)) < 0) + return ret; + + return VKD3D_OK; +} + +/* Topologically sort the blocks according to the forward edges. By + * definition if the input CFG is reducible then its forward edges + * form a DAG, so a topological sorting exists. In order to compute it + * we keep an array with the incoming degree for each block and an + * available list of all the blocks whose incoming degree has reached + * zero. At each step we pick a block from the available list and + * strip it away from the graph, updating the incoming degrees and + * available list. We prefer picking the most recently added block + * (i.e., from the end of the available list) because it will keep + * together related blocks in the order, which allow us to generate + * fewer control flow primitives. */ +static enum vkd3d_result vsir_cfg_sort_nodes(struct vsir_cfg *cfg) +{ + enum vkd3d_result ret; + struct vsir_cfg_node_sorter sorter = { .cfg = cfg }; + unsigned int *in_degrees = NULL; + size_t i; + + if (!(in_degrees = vkd3d_calloc(cfg->block_count, sizeof(*in_degrees)))) + return VKD3D_ERROR_OUT_OF_MEMORY; + + for (i = 0; i < cfg->block_count; ++i) + { + struct vsir_block *block = &cfg->blocks[i]; + + if (block->label == 0) + { + in_degrees[i] = UINT_MAX; + continue; + } + + in_degrees[i] = block->predecessors.count; + + /* Do not count back edges. */ + if (cfg->loops_by_header[i]) + { + assert(in_degrees[i] > 0); + in_degrees[i] -= 1; + } + + if (in_degrees[i] == 0 && block != cfg->entry) + { + ERR("Unexpected entry point %u.\n", block->label); + ret = VKD3D_ERROR_INVALID_SHADER; + goto fail; + } + } + + if (in_degrees[cfg->entry->label - 1] != 0) + { + ERR("Entry point has %u incoming forward edges.\n", in_degrees[cfg->entry->label - 1]); + ret = VKD3D_ERROR_INVALID_SHADER; + goto fail; + } + + vsir_block_list_init(&sorter.available_blocks); + + if ((ret = vsir_cfg_make_node_available(&sorter, cfg->entry)) < 0) + goto fail; + + while (sorter.available_blocks.count != 0) + { + struct vsir_block *block = sorter.available_blocks.blocks[sorter.available_blocks.count - 1]; + + vsir_block_list_remove_index(&sorter.available_blocks, sorter.available_blocks.count - 1); + if ((ret = vsir_block_list_add_unchecked(&cfg->order, block)) < 0) + goto fail; + + /* Remove (forward) edges and make new nodes available. */ + for (i = 0; i < block->successors.count; ++i) + { + struct vsir_block *successor = block->successors.blocks[i]; + + if (vsir_block_dominates(successor, block)) + continue; + + assert(in_degrees[successor->label - 1] > 0); + --in_degrees[successor->label - 1]; + + if (in_degrees[successor->label - 1] == 0) + { + if ((ret = vsir_cfg_make_node_available(&sorter, successor)) < 0) + goto fail; + } + } + } + + vkd3d_free(in_degrees); + vsir_block_list_cleanup(&sorter.available_blocks); + + return VKD3D_OK; + +fail: + vkd3d_free(in_degrees); + vsir_block_list_cleanup(&sorter.available_blocks); + vsir_block_list_cleanup(&cfg->order); + + return ret; +} + enum vkd3d_result vkd3d_shader_normalise(struct vkd3d_shader_parser *parser, const struct vkd3d_shader_compile_info *compile_info) { @@ -3499,6 +3628,12 @@ enum vkd3d_result vkd3d_shader_normalise(struct vkd3d_shader_parser *parser, return result; }
+ if ((result = vsir_cfg_sort_nodes(&cfg)) < 0) + { + vsir_cfg_cleanup(&cfg); + return result; + } + if ((result = simple_structurizer_run(parser)) < 0) { vsir_cfg_cleanup(&cfg);
From: Giovanni Mascellani gmascellani@codeweavers.com
--- libs/vkd3d-shader/ir.c | 11 +++++++++++ 1 file changed, 11 insertions(+)
diff --git a/libs/vkd3d-shader/ir.c b/libs/vkd3d-shader/ir.c index 1abe3a7a5..77a34eb73 100644 --- a/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d-shader/ir.c @@ -3586,6 +3586,17 @@ static enum vkd3d_result vsir_cfg_sort_nodes(struct vsir_cfg *cfg) vkd3d_free(in_degrees); vsir_block_list_cleanup(&sorter.available_blocks);
+ if (TRACE_ON()) + { + vkd3d_string_buffer_printf(&cfg->debug_buffer, "Block order:"); + + for (i = 0; i < cfg->order.count; ++i) + vkd3d_string_buffer_printf(&cfg->debug_buffer, " %u", cfg->order.blocks[i]->label); + + TRACE("%s\n", cfg->debug_buffer.buffer); + vkd3d_string_buffer_clear(&cfg->debug_buffer); + } + return VKD3D_OK;
fail:
From: Giovanni Mascellani gmascellani@codeweavers.com
--- libs/vkd3d-shader/ir.c | 118 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 112 insertions(+), 6 deletions(-)
diff --git a/libs/vkd3d-shader/ir.c b/libs/vkd3d-shader/ir.c index 77a34eb73..eb53873b6 100644 --- a/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d-shader/ir.c @@ -3063,6 +3063,25 @@ static void vsir_block_list_remove_index(struct vsir_block_list *list, size_t id --list->count; }
+/* It is assumed that both lists are sorted. */ +static bool vsir_block_list_is_subset(struct vsir_block_list *subset, struct vsir_block_list *superset) +{ + size_t i, j; + + if (subset->count > superset->count) + return false; + + for (i = 0, j = 0; i < subset->count; ++i) + { + for (; j < superset->count && subset->blocks[i] != superset->blocks[j]; ++j); + + if (j == superset->count) + return false; + } + + return true; +} + struct vsir_block { unsigned int label; @@ -3121,6 +3140,11 @@ static void vsir_block_list_sort(struct vsir_block_list *list) qsort(list->blocks, list->count, sizeof(*list->blocks), block_compare); }
+static bool vsir_block_list_search(struct vsir_block_list *list, struct vsir_block *block) +{ + return !!bsearch(&block, list->blocks, list->count, sizeof(*list->blocks), block_compare); +} + struct vsir_cfg { struct vkd3d_shader_parser *parser; @@ -3483,16 +3507,45 @@ static enum vkd3d_result vsir_cfg_compute_loops(struct vsir_cfg *cfg) struct vsir_cfg_node_sorter { struct vsir_cfg *cfg; + struct vsir_cfg_node_sorter_stack_item + { + struct vsir_block_list *loop; + unsigned int seen_count; + } *stack; + size_t stack_count, stack_capacity; struct vsir_block_list available_blocks; };
static enum vkd3d_result vsir_cfg_make_node_available(struct vsir_cfg_node_sorter *sorter, struct vsir_block *block) { + struct vsir_block_list *loop = sorter->cfg->loops_by_header[block->label - 1]; + struct vsir_cfg_node_sorter_stack_item *item; enum vkd3d_result ret;
if ((ret = vsir_block_list_add_unchecked(&sorter->available_blocks, block)) < 0) return ret;
+ if (!loop) + return VKD3D_OK; + + if (!vkd3d_array_reserve((void **)&sorter->stack, &sorter->stack_capacity, sorter->stack_count + 1, sizeof(*sorter->stack))) + return VKD3D_ERROR_OUT_OF_MEMORY; + + if (sorter->stack_count > 0) + { + struct vsir_block_list *prev_loop = sorter->stack[sorter->stack_count - 1].loop; + + if (!vsir_block_list_is_subset(loop, prev_loop)) + { + ERR("Loops are not nested.\n"); + return VKD3D_ERROR_NOT_IMPLEMENTED; + } + } + + item = &sorter->stack[sorter->stack_count++]; + item->loop = loop; + item->seen_count = 0; + return VKD3D_OK; }
@@ -3503,10 +3556,23 @@ static enum vkd3d_result vsir_cfg_make_node_available(struct vsir_cfg_node_sorte * available list of all the blocks whose incoming degree has reached * zero. At each step we pick a block from the available list and * strip it away from the graph, updating the incoming degrees and - * available list. We prefer picking the most recently added block - * (i.e., from the end of the available list) because it will keep - * together related blocks in the order, which allow us to generate - * fewer control flow primitives. */ + * available list. + * + * In principle at each step we can pick whatever node we want from + * the available list, and will get a topological sort + * anyway. However, we use these two criteria to give to the computed + * order additional properties: + * + * 1. we keep track of which loops we're into, and pick blocks + * belonging to the current innermost loop, so that loops are kept + * contiguous in the order; this can always be done when the input + * CFG is reducible; + * + * 2. subject to the requirement above, we always pick the most + * recently added block to the available list, because this tends + * to keep related blocks and require fewer control flow + * primitives. + */ static enum vkd3d_result vsir_cfg_sort_nodes(struct vsir_cfg *cfg) { enum vkd3d_result ret; @@ -3558,12 +3624,48 @@ static enum vkd3d_result vsir_cfg_sort_nodes(struct vsir_cfg *cfg)
while (sorter.available_blocks.count != 0) { - struct vsir_block *block = sorter.available_blocks.blocks[sorter.available_blocks.count - 1]; + struct vsir_cfg_node_sorter_stack_item *inner_stack_item = NULL; + struct vsir_block *block; + size_t new_seen_count; + + if (sorter.stack_count != 0) + inner_stack_item = &sorter.stack[sorter.stack_count - 1]; + + for (i = sorter.available_blocks.count - 1; ; --i) + { + if (i == SIZE_MAX) + { + ERR("Couldn't find any viable next block, is the input CFG reducible?\n"); + ret = VKD3D_ERROR_INVALID_SHADER; + goto fail; + }
- vsir_block_list_remove_index(&sorter.available_blocks, sorter.available_blocks.count - 1); + block = sorter.available_blocks.blocks[i]; + + if (!inner_stack_item || vsir_block_list_search(inner_stack_item->loop, block)) + break; + } + + vsir_block_list_remove_index(&sorter.available_blocks, i); if ((ret = vsir_block_list_add_unchecked(&cfg->order, block)) < 0) goto fail;
+ /* Close loops. */ + new_seen_count = 1; + while (inner_stack_item) + { + inner_stack_item->seen_count += new_seen_count; + + assert(inner_stack_item->seen_count <= inner_stack_item->loop->count); + if (inner_stack_item->seen_count != inner_stack_item->loop->count) + break; + + new_seen_count = inner_stack_item->loop->count; + --sorter.stack_count; + + inner_stack_item = sorter.stack_count ? &sorter.stack[sorter.stack_count - 1] : NULL; + } + /* Remove (forward) edges and make new nodes available. */ for (i = 0; i < block->successors.count; ++i) { @@ -3583,7 +3685,10 @@ static enum vkd3d_result vsir_cfg_sort_nodes(struct vsir_cfg *cfg) } }
+ assert(sorter.stack_count == 0); + vkd3d_free(in_degrees); + vkd3d_free(sorter.stack); vsir_block_list_cleanup(&sorter.available_blocks);
if (TRACE_ON()) @@ -3601,6 +3706,7 @@ static enum vkd3d_result vsir_cfg_sort_nodes(struct vsir_cfg *cfg)
fail: vkd3d_free(in_degrees); + vkd3d_free(sorter.stack); vsir_block_list_cleanup(&sorter.available_blocks); vsir_block_list_cleanup(&cfg->order);
-static enum vkd3d_result vsir_block_list_add(struct vsir_block_list *list, struct vsir_block *block) +static enum vkd3d_result vsir_block_list_add_unchecked(struct vsir_block_list *list, struct vsir_block *block)
Does that mean a caller doesn't need to do checks when calling vsir_block_list_add_unchecked()? :) I can certainly see the logic behind calling this _unchecked, but it seems a bit ambiguous.
Note that in case of e.g. d3d12_command_queue_submit_locked(), the _locked suffix means the caller locks; if we were to be consistent with that naming convention, we'd call this vsir_block_list_add_checked().