This applies on top of !662, the last four commits belong here.
Here we compute a topological order (plus some additional properties) of the shader blocks. Eventually the blocks will be emitted precisely in this order, after having been enriched with structured control flow information.
-- v4: vkd3d-shader/ir: Keep loops contiguous in the topological sort. vkd3d-shader/ir: Dump the topological order of the control flow graph. vkd3d-shader/ir: Topologically sort the control flow graph. vkd3d-shader/ir: Allow adding to a block list without checking for duplicates.
From: Giovanni Mascellani gmascellani@codeweavers.com
They have to be considered code rather than declarations, as required for instance by the SPIR-V backend. --- libs/vkd3d-shader/ir.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-)
diff --git a/libs/vkd3d-shader/ir.c b/libs/vkd3d-shader/ir.c index f0bd85338..a379ba6dd 100644 --- a/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d-shader/ir.c @@ -2041,12 +2041,19 @@ static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flatte flattener->location = instruction->location;
/* Declarations should occur before the first code block, which in hull shaders is marked by the first - * phase instruction, and in all other shader types begins with the first label instruction. */ - if (!after_declarations_section && !vsir_instruction_is_dcl(instruction) - && instruction->handler_idx != VKD3DSIH_NOP) + * phase instruction, and in all other shader types begins with the first label instruction. + * Declaring an indexable temp with function scope is not considered a declaration, + * because it needs to live inside a function. */ + if (!after_declarations_section && instruction->handler_idx != VKD3DSIH_NOP) { - after_declarations_section = true; - cf_flattener_emit_label(flattener, cf_flattener_alloc_block_id(flattener)); + bool is_function_indexable = instruction->handler_idx == VKD3DSIH_DCL_INDEXABLE_TEMP + && instruction->declaration.indexable_temp.has_function_scope; + + if (!vsir_instruction_is_dcl(instruction) || is_function_indexable) + { + after_declarations_section = true; + cf_flattener_emit_label(flattener, cf_flattener_alloc_block_id(flattener)); + } }
cf_info = flattener->control_flow_depth
From: Giovanni Mascellani gmascellani@codeweavers.com
--- libs/vkd3d-shader/ir.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-)
diff --git a/libs/vkd3d-shader/ir.c b/libs/vkd3d-shader/ir.c index a379ba6dd..2e05d19ca 100644 --- a/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d-shader/ir.c @@ -3102,6 +3102,7 @@ struct vsir_cfg struct vsir_block *blocks; struct vsir_block *entry; size_t block_count; + struct vkd3d_string_buffer debug_buffer; };
static void vsir_cfg_cleanup(struct vsir_cfg *cfg) @@ -3112,6 +3113,9 @@ static void vsir_cfg_cleanup(struct vsir_cfg *cfg) vsir_block_cleanup(&cfg->blocks[i]);
vkd3d_free(cfg->blocks); + + if (TRACE_ON()) + vkd3d_string_buffer_cleanup(&cfg->debug_buffer); }
static enum vkd3d_result vsir_cfg_add_edge(struct vsir_cfg *cfg, struct vsir_block *block, @@ -3182,6 +3186,9 @@ static enum vkd3d_result vsir_cfg_init(struct vsir_cfg *cfg, struct vsir_program if (!(cfg->blocks = vkd3d_calloc(cfg->block_count, sizeof(*cfg->blocks)))) return VKD3D_ERROR_OUT_OF_MEMORY;
+ if (TRACE_ON()) + vkd3d_string_buffer_init(&cfg->debug_buffer); + for (i = 0; i < program->instructions.count; ++i) { struct vkd3d_shader_instruction *instruction = &program->instructions.elements[i]; @@ -3292,12 +3299,8 @@ static void vsir_cfg_compute_dominators_recurse(struct vsir_block *current, stru
static void vsir_cfg_compute_dominators(struct vsir_cfg *cfg) { - struct vkd3d_string_buffer buf; size_t i, j;
- if (TRACE_ON()) - vkd3d_string_buffer_init(&buf); - for (i = 0; i < cfg->block_count; ++i) { struct vsir_block *block = &cfg->blocks[i]; @@ -3309,7 +3312,7 @@ static void vsir_cfg_compute_dominators(struct vsir_cfg *cfg)
if (TRACE_ON()) { - vkd3d_string_buffer_printf(&buf, "Block %u dominates:", block->label); + vkd3d_string_buffer_printf(&cfg->debug_buffer, "Block %u dominates:", block->label); for (j = 0; j < cfg->block_count; j++) { struct vsir_block *block2 = &cfg->blocks[j]; @@ -3318,15 +3321,12 @@ static void vsir_cfg_compute_dominators(struct vsir_cfg *cfg) continue;
if (bitmap_is_set(block->dominates, j)) - vkd3d_string_buffer_printf(&buf, " %u", block2->label); + vkd3d_string_buffer_printf(&cfg->debug_buffer, " %u", block2->label); } - TRACE("%s\n", buf.buffer); - vkd3d_string_buffer_clear(&buf); + TRACE("%s\n", cfg->debug_buffer.buffer); + vkd3d_string_buffer_clear(&cfg->debug_buffer); } } - - if (TRACE_ON()) - vkd3d_string_buffer_cleanup(&buf); }
enum vkd3d_result vkd3d_shader_normalise(struct vkd3d_shader_parser *parser,
From: Giovanni Mascellani gmascellani@codeweavers.com
--- libs/vkd3d-shader/ir.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/libs/vkd3d-shader/ir.c b/libs/vkd3d-shader/ir.c index 2e05d19ca..4ffdd0cb1 100644 --- a/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d-shader/ir.c @@ -3118,6 +3118,11 @@ static void vsir_cfg_cleanup(struct vsir_cfg *cfg) vkd3d_string_buffer_cleanup(&cfg->debug_buffer); }
+static bool vsir_block_dominates(struct vsir_block *b1, struct vsir_block *b2) +{ + return bitmap_is_set(b1->dominates, b2->label - 1); +} + static enum vkd3d_result vsir_cfg_add_edge(struct vsir_cfg *cfg, struct vsir_block *block, struct vkd3d_shader_src_param *successor_param) { @@ -3320,7 +3325,7 @@ static void vsir_cfg_compute_dominators(struct vsir_cfg *cfg) if (block2->label == 0) continue;
- if (bitmap_is_set(block->dominates, j)) + if (vsir_block_dominates(block, block2)) vkd3d_string_buffer_printf(&cfg->debug_buffer, " %u", block2->label); } TRACE("%s\n", cfg->debug_buffer.buffer);
From: Giovanni Mascellani gmascellani@codeweavers.com
--- include/vkd3d_types.h | 2 + libs/vkd3d-shader/ir.c | 93 +++++++++++++++++++++++++++++++++++++++++- 2 files changed, 94 insertions(+), 1 deletion(-)
diff --git a/include/vkd3d_types.h b/include/vkd3d_types.h index 4a7aca236..9060fa087 100644 --- a/include/vkd3d_types.h +++ b/include/vkd3d_types.h @@ -41,6 +41,8 @@ enum vkd3d_result { /** Success. */ VKD3D_OK = 0, + /** Success as a result of there being nothing to do. */ + VKD3D_FALSE = 1, /** An unspecified failure occurred. */ VKD3D_ERROR = -1, /** There are not enough resources available to complete the operation. */ diff --git a/libs/vkd3d-shader/ir.c b/libs/vkd3d-shader/ir.c index 4ffdd0cb1..05997241f 100644 --- a/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d-shader/ir.c @@ -3038,7 +3038,7 @@ static enum vkd3d_result vsir_block_list_add(struct vsir_block_list *list, struc
for (i = 0; i < list->count; ++i) if (block == list->blocks[i]) - return VKD3D_OK; + return VKD3D_FALSE;
if (!vkd3d_array_reserve((void **)&list->blocks, &list->capacity, list->count + 1, sizeof(*list->blocks))) { @@ -3103,6 +3103,9 @@ struct vsir_cfg struct vsir_block *entry; size_t block_count; struct vkd3d_string_buffer debug_buffer; + + struct vsir_block_list *loops; + size_t loops_count, loops_capacity; };
static void vsir_cfg_cleanup(struct vsir_cfg *cfg) @@ -3112,7 +3115,11 @@ static void vsir_cfg_cleanup(struct vsir_cfg *cfg) for (i = 0; i < cfg->block_count; ++i) vsir_block_cleanup(&cfg->blocks[i]);
+ for (i = 0; i < cfg->loops_count; ++i) + vsir_block_list_cleanup(&cfg->loops[i]); + vkd3d_free(cfg->blocks); + vkd3d_free(cfg->loops);
if (TRACE_ON()) vkd3d_string_buffer_cleanup(&cfg->debug_buffer); @@ -3334,6 +3341,84 @@ static void vsir_cfg_compute_dominators(struct vsir_cfg *cfg) } }
+/* A back edge is an edge X -> Y for which block Y dominates block + * X. All the other edges are forward edges, and it is required that + * the input CFG is reducible, i.e., it is acyclic once you strip away + * the back edges. + * + * Each back edge X -> Y defines a loop: block X is the header block, + * block Y is the back edge block, and the loop consists of all the + * blocks which are dominated by the header block and have a path to + * the back edge block that doesn't pass through the header block + * (including the header block itself). It can be proved that all the + * blocks in such a path (connecting a loop block to the back edge + * block without passing through the header block) belong to the same + * loop. + * + * If the input CFG is reducible, each two loops are either disjoint + * or one is a strict subset of the other, provided that each block + * has at most one incoming back edge. If this condition does not + * hold, a synthetic block can be introduced as the only back edge + * block for the given header block, with all the previous back edge + * now being forward edges to the synthetic block. This is not + * currently implemented (but it is rarely found in practice + * anyway). */ +static enum vkd3d_result vsir_cfg_scan_loop(struct vsir_block_list *loop, struct vsir_block *block, + struct vsir_block *header) +{ + enum vkd3d_result ret; + size_t i; + + if ((ret = vsir_block_list_add(loop, block)) < 0) + return ret; + + if (ret == VKD3D_FALSE || block == header) + return VKD3D_OK; + + for (i = 0; i < block->predecessors.count; ++i) + { + if ((ret = vsir_cfg_scan_loop(loop, block->predecessors.blocks[i], header)) < 0) + return ret; + } + + return VKD3D_OK; +} + +static enum vkd3d_result vsir_cfg_compute_loops(struct vsir_cfg *cfg) +{ + size_t i, j; + + for (i = 0; i < cfg->block_count; ++i) + { + struct vsir_block *block = &cfg->blocks[i]; + + if (block->label == 0) + continue; + + for (j = 0; j < block->successors.count; ++j) + { + struct vsir_block *header = block->successors.blocks[j]; + struct vsir_block_list *loop; + enum vkd3d_result ret; + + /* Is this a back edge? */ + if (!vsir_block_dominates(header, block)) + continue; + + if (!vkd3d_array_reserve((void **)&cfg->loops, &cfg->loops_capacity, cfg->loops_count + 1, sizeof(*cfg->loops))) + return VKD3D_ERROR_OUT_OF_MEMORY; + + loop = &cfg->loops[cfg->loops_count++]; + vsir_block_list_init(loop); + + if ((ret = vsir_cfg_scan_loop(loop, block, header)) < 0) + return ret; + } + } + + return VKD3D_OK; +} + enum vkd3d_result vkd3d_shader_normalise(struct vkd3d_shader_parser *parser, const struct vkd3d_shader_compile_info *compile_info) { @@ -3360,6 +3445,12 @@ enum vkd3d_result vkd3d_shader_normalise(struct vkd3d_shader_parser *parser,
vsir_cfg_compute_dominators(&cfg);
+ if ((result = vsir_cfg_compute_loops(&cfg)) < 0) + { + vsir_cfg_cleanup(&cfg); + return result; + } + if ((result = simple_structurizer_run(parser)) < 0) { vsir_cfg_cleanup(&cfg);
From: Giovanni Mascellani gmascellani@codeweavers.com
--- libs/vkd3d-shader/ir.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/libs/vkd3d-shader/ir.c b/libs/vkd3d-shader/ir.c index 05997241f..8fb52175d 100644 --- a/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d-shader/ir.c @@ -3098,6 +3098,7 @@ static void vsir_block_cleanup(struct vsir_block *block)
struct vsir_cfg { + struct vkd3d_shader_message_context *message_context; struct vsir_program *program; struct vsir_block *blocks; struct vsir_block *entry; @@ -3185,13 +3186,15 @@ static void vsir_cfg_dump_dot(struct vsir_cfg *cfg) TRACE("}\n"); }
-static enum vkd3d_result vsir_cfg_init(struct vsir_cfg *cfg, struct vsir_program *program) +static enum vkd3d_result vsir_cfg_init(struct vsir_cfg *cfg, struct vsir_program *program, + struct vkd3d_shader_message_context *message_context) { struct vsir_block *current_block = NULL; enum vkd3d_result ret; size_t i;
memset(cfg, 0, sizeof(*cfg)); + cfg->message_context = message_context; cfg->program = program; cfg->block_count = program->block_count;
@@ -3440,7 +3443,7 @@ enum vkd3d_result vkd3d_shader_normalise(struct vkd3d_shader_parser *parser, if ((result = materialize_ssas_to_temps(parser)) < 0) return result;
- if ((result = vsir_cfg_init(&cfg, &parser->program)) < 0) + if ((result = vsir_cfg_init(&cfg, &parser->program, parser->message_context)) < 0) return result;
vsir_cfg_compute_dominators(&cfg);
From: Giovanni Mascellani gmascellani@codeweavers.com
--- libs/vkd3d-shader/ir.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+)
diff --git a/libs/vkd3d-shader/ir.c b/libs/vkd3d-shader/ir.c index 8fb52175d..907d4150d 100644 --- a/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d-shader/ir.c @@ -3107,6 +3107,7 @@ struct vsir_cfg
struct vsir_block_list *loops; size_t loops_count, loops_capacity; + struct vsir_block_list **loops_by_header; };
static void vsir_cfg_cleanup(struct vsir_cfg *cfg) @@ -3121,6 +3122,7 @@ static void vsir_cfg_cleanup(struct vsir_cfg *cfg)
vkd3d_free(cfg->blocks); vkd3d_free(cfg->loops); + vkd3d_free(cfg->loops_by_header);
if (TRACE_ON()) vkd3d_string_buffer_cleanup(&cfg->debug_buffer); @@ -3391,6 +3393,9 @@ static enum vkd3d_result vsir_cfg_compute_loops(struct vsir_cfg *cfg) { size_t i, j;
+ if (!(cfg->loops_by_header = vkd3d_calloc(cfg->block_count, sizeof(*cfg->loops_by_header)))) + return VKD3D_ERROR_OUT_OF_MEMORY; + for (i = 0; i < cfg->block_count; ++i) { struct vsir_block *block = &cfg->blocks[i]; @@ -3416,6 +3421,16 @@ static enum vkd3d_result vsir_cfg_compute_loops(struct vsir_cfg *cfg)
if ((ret = vsir_cfg_scan_loop(loop, block, header)) < 0) return ret; + + if (cfg->loops_by_header[header->label - 1]) + { + FIXME("Block %u is header to more than one loop, this is not implemented.\n", header->label); + vkd3d_shader_error(cfg->message_context, &header->begin->location, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, + "Block %u is header to more than one loop, this is not implemented.", header->label); + return VKD3D_ERROR_NOT_IMPLEMENTED; + } + + cfg->loops_by_header[header->label - 1] = loop; } }
From: Giovanni Mascellani gmascellani@codeweavers.com
--- libs/vkd3d-shader/ir.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-)
diff --git a/libs/vkd3d-shader/ir.c b/libs/vkd3d-shader/ir.c index 907d4150d..70ca148c9 100644 --- a/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d-shader/ir.c @@ -3391,7 +3391,7 @@ static enum vkd3d_result vsir_cfg_scan_loop(struct vsir_block_list *loop, struct
static enum vkd3d_result vsir_cfg_compute_loops(struct vsir_cfg *cfg) { - size_t i, j; + size_t i, j, k;
if (!(cfg->loops_by_header = vkd3d_calloc(cfg->block_count, sizeof(*cfg->loops_by_header)))) return VKD3D_ERROR_OUT_OF_MEMORY; @@ -3422,6 +3422,17 @@ static enum vkd3d_result vsir_cfg_compute_loops(struct vsir_cfg *cfg) if ((ret = vsir_cfg_scan_loop(loop, block, header)) < 0) return ret;
+ if (TRACE_ON()) + { + vkd3d_string_buffer_printf(&cfg->debug_buffer, "Back edge %u -> %u with loop:", block->label, header->label); + + for (k = 0; k < loop->count; ++k) + vkd3d_string_buffer_printf(&cfg->debug_buffer, " %u", loop->blocks[k]->label); + + TRACE("%s\n", cfg->debug_buffer.buffer); + vkd3d_string_buffer_clear(&cfg->debug_buffer); + } + if (cfg->loops_by_header[header->label - 1]) { FIXME("Block %u is header to more than one loop, this is not implemented.\n", header->label);
From: Giovanni Mascellani gmascellani@codeweavers.com
--- libs/vkd3d-shader/ir.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+)
diff --git a/libs/vkd3d-shader/ir.c b/libs/vkd3d-shader/ir.c index 70ca148c9..7e938d369 100644 --- a/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d-shader/ir.c @@ -3096,6 +3096,19 @@ static void vsir_block_cleanup(struct vsir_block *block) vkd3d_free(block->dominates); }
+static int block_compare(const void *ptr1, const void *ptr2) +{ + const struct vsir_block *block1 = *(const struct vsir_block **)ptr1; + const struct vsir_block *block2 = *(const struct vsir_block **)ptr2; + + return vkd3d_u32_compare(block1->label, block2->label); +} + +static void vsir_block_list_sort(struct vsir_block_list *list) +{ + qsort(list->blocks, list->count, sizeof(*list->blocks), block_compare); +} + struct vsir_cfg { struct vkd3d_shader_message_context *message_context; @@ -3422,6 +3435,8 @@ static enum vkd3d_result vsir_cfg_compute_loops(struct vsir_cfg *cfg) if ((ret = vsir_cfg_scan_loop(loop, block, header)) < 0) return ret;
+ vsir_block_list_sort(loop); + if (TRACE_ON()) { vkd3d_string_buffer_printf(&cfg->debug_buffer, "Back edge %u -> %u with loop:", block->label, header->label);
From: Giovanni Mascellani gmascellani@codeweavers.com
--- libs/vkd3d-shader/ir.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-)
diff --git a/libs/vkd3d-shader/ir.c b/libs/vkd3d-shader/ir.c index 7e938d369..c61aaced2 100644 --- a/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d-shader/ir.c @@ -3032,14 +3032,8 @@ static void vsir_block_list_cleanup(struct vsir_block_list *list) vkd3d_free(list->blocks); }
-static enum vkd3d_result vsir_block_list_add(struct vsir_block_list *list, struct vsir_block *block) +static enum vkd3d_result vsir_block_list_add_checked(struct vsir_block_list *list, struct vsir_block *block) { - size_t i; - - for (i = 0; i < list->count; ++i) - if (block == list->blocks[i]) - return VKD3D_FALSE; - if (!vkd3d_array_reserve((void **)&list->blocks, &list->capacity, list->count + 1, sizeof(*list->blocks))) { ERR("Cannot extend block list.\n"); @@ -3051,6 +3045,17 @@ static enum vkd3d_result vsir_block_list_add(struct vsir_block_list *list, struc return VKD3D_OK; }
+static enum vkd3d_result vsir_block_list_add(struct vsir_block_list *list, struct vsir_block *block) +{ + size_t i; + + for (i = 0; i < list->count; ++i) + if (block == list->blocks[i]) + return VKD3D_FALSE; + + return vsir_block_list_add_checked(list, block); +} + struct vsir_block { unsigned int label;
From: Giovanni Mascellani gmascellani@codeweavers.com
--- libs/vkd3d-shader/ir.c | 135 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 135 insertions(+)
diff --git a/libs/vkd3d-shader/ir.c b/libs/vkd3d-shader/ir.c index c61aaced2..549f6aedc 100644 --- a/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d-shader/ir.c @@ -3056,6 +3056,13 @@ static enum vkd3d_result vsir_block_list_add(struct vsir_block_list *list, struc return vsir_block_list_add_checked(list, block); }
+/* It is guaranteed that the relative order is kept. */ +static void vsir_block_list_remove_index(struct vsir_block_list *list, size_t idx) +{ + memmove(&list->blocks[idx], &list->blocks[idx + 1], (list->count - idx - 1) * sizeof(*list->blocks)); + --list->count; +} + struct vsir_block { unsigned int label; @@ -3126,6 +3133,8 @@ struct vsir_cfg struct vsir_block_list *loops; size_t loops_count, loops_capacity; struct vsir_block_list **loops_by_header; + + struct vsir_block_list order; };
static void vsir_cfg_cleanup(struct vsir_cfg *cfg) @@ -3138,6 +3147,8 @@ static void vsir_cfg_cleanup(struct vsir_cfg *cfg) for (i = 0; i < cfg->loops_count; ++i) vsir_block_list_cleanup(&cfg->loops[i]);
+ vsir_block_list_cleanup(&cfg->order); + vkd3d_free(cfg->blocks); vkd3d_free(cfg->loops); vkd3d_free(cfg->loops_by_header); @@ -3218,6 +3229,8 @@ static enum vkd3d_result vsir_cfg_init(struct vsir_cfg *cfg, struct vsir_program cfg->program = program; cfg->block_count = program->block_count;
+ vsir_block_list_init(&cfg->order); + if (!(cfg->blocks = vkd3d_calloc(cfg->block_count, sizeof(*cfg->blocks)))) return VKD3D_ERROR_OUT_OF_MEMORY;
@@ -3468,6 +3481,122 @@ static enum vkd3d_result vsir_cfg_compute_loops(struct vsir_cfg *cfg) return VKD3D_OK; }
+struct vsir_cfg_node_sorter +{ + struct vsir_cfg *cfg; + struct vsir_block_list available_blocks; +}; + +static enum vkd3d_result vsir_cfg_make_node_available(struct vsir_cfg_node_sorter *sorter, struct vsir_block *block) +{ + enum vkd3d_result ret; + + if ((ret = vsir_block_list_add_checked(&sorter->available_blocks, block)) < 0) + return ret; + + return VKD3D_OK; +} + +/* Topologically sort the blocks according to the forward edges. By + * definition if the input CFG is reducible then its forward edges + * form a DAG, so a topological sorting exists. In order to compute it + * we keep an array with the incoming degree for each block and an + * available list of all the blocks whose incoming degree has reached + * zero. At each step we pick a block from the available list and + * strip it away from the graph, updating the incoming degrees and + * available list. We prefer picking the most recently added block + * (i.e., from the end of the available list) because it will keep + * together related blocks in the order, which allow us to generate + * fewer control flow primitives. */ +static enum vkd3d_result vsir_cfg_sort_nodes(struct vsir_cfg *cfg) +{ + enum vkd3d_result ret; + struct vsir_cfg_node_sorter sorter = { .cfg = cfg }; + unsigned int *in_degrees = NULL; + size_t i; + + if (!(in_degrees = vkd3d_calloc(cfg->block_count, sizeof(*in_degrees)))) + return VKD3D_ERROR_OUT_OF_MEMORY; + + for (i = 0; i < cfg->block_count; ++i) + { + struct vsir_block *block = &cfg->blocks[i]; + + if (block->label == 0) + { + in_degrees[i] = UINT_MAX; + continue; + } + + in_degrees[i] = block->predecessors.count; + + /* Do not count back edges. */ + if (cfg->loops_by_header[i]) + { + assert(in_degrees[i] > 0); + in_degrees[i] -= 1; + } + + if (in_degrees[i] == 0 && block != cfg->entry) + { + ERR("Unexpected entry point %u.\n", block->label); + ret = VKD3D_ERROR_INVALID_SHADER; + goto fail; + } + } + + if (in_degrees[cfg->entry->label - 1] != 0) + { + ERR("Entry point has %u incoming forward edges.\n", in_degrees[cfg->entry->label - 1]); + ret = VKD3D_ERROR_INVALID_SHADER; + goto fail; + } + + vsir_block_list_init(&sorter.available_blocks); + + if ((ret = vsir_cfg_make_node_available(&sorter, cfg->entry)) < 0) + goto fail; + + while (sorter.available_blocks.count != 0) + { + struct vsir_block *block = sorter.available_blocks.blocks[sorter.available_blocks.count - 1]; + + vsir_block_list_remove_index(&sorter.available_blocks, sorter.available_blocks.count - 1); + if ((ret = vsir_block_list_add_checked(&cfg->order, block)) < 0) + goto fail; + + /* Remove (forward) edges and make new nodes available. */ + for (i = 0; i < block->successors.count; ++i) + { + struct vsir_block *successor = block->successors.blocks[i]; + + if (vsir_block_dominates(successor, block)) + continue; + + assert(in_degrees[successor->label - 1] > 0); + --in_degrees[successor->label - 1]; + + if (in_degrees[successor->label - 1] == 0) + { + if ((ret = vsir_cfg_make_node_available(&sorter, successor)) < 0) + goto fail; + } + } + } + + vkd3d_free(in_degrees); + vsir_block_list_cleanup(&sorter.available_blocks); + + return VKD3D_OK; + +fail: + vkd3d_free(in_degrees); + vsir_block_list_cleanup(&sorter.available_blocks); + vsir_block_list_cleanup(&cfg->order); + + return ret; +} + enum vkd3d_result vkd3d_shader_normalise(struct vkd3d_shader_parser *parser, const struct vkd3d_shader_compile_info *compile_info) { @@ -3500,6 +3629,12 @@ enum vkd3d_result vkd3d_shader_normalise(struct vkd3d_shader_parser *parser, return result; }
+ if ((result = vsir_cfg_sort_nodes(&cfg)) < 0) + { + vsir_cfg_cleanup(&cfg); + return result; + } + if ((result = simple_structurizer_run(parser)) < 0) { vsir_cfg_cleanup(&cfg);
From: Giovanni Mascellani gmascellani@codeweavers.com
--- libs/vkd3d-shader/ir.c | 11 +++++++++++ 1 file changed, 11 insertions(+)
diff --git a/libs/vkd3d-shader/ir.c b/libs/vkd3d-shader/ir.c index 549f6aedc..982cb029d 100644 --- a/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d-shader/ir.c @@ -3587,6 +3587,17 @@ static enum vkd3d_result vsir_cfg_sort_nodes(struct vsir_cfg *cfg) vkd3d_free(in_degrees); vsir_block_list_cleanup(&sorter.available_blocks);
+ if (TRACE_ON()) + { + vkd3d_string_buffer_printf(&cfg->debug_buffer, "Block order:"); + + for (i = 0; i < cfg->order.count; ++i) + vkd3d_string_buffer_printf(&cfg->debug_buffer, " %u", cfg->order.blocks[i]->label); + + TRACE("%s\n", cfg->debug_buffer.buffer); + vkd3d_string_buffer_clear(&cfg->debug_buffer); + } + return VKD3D_OK;
fail:
From: Giovanni Mascellani gmascellani@codeweavers.com
--- libs/vkd3d-shader/ir.c | 118 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 112 insertions(+), 6 deletions(-)
diff --git a/libs/vkd3d-shader/ir.c b/libs/vkd3d-shader/ir.c index 982cb029d..e9e88e283 100644 --- a/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d-shader/ir.c @@ -3063,6 +3063,25 @@ static void vsir_block_list_remove_index(struct vsir_block_list *list, size_t id --list->count; }
+/* It is assumed that both lists are sorted. */ +static bool vsir_block_list_is_subset(struct vsir_block_list *subset, struct vsir_block_list *superset) +{ + size_t i, j; + + if (subset->count > superset->count) + return false; + + for (i = 0, j = 0; i < subset->count; ++i) + { + for (; j < superset->count && subset->blocks[i] != superset->blocks[j]; ++j); + + if (j == superset->count) + return false; + } + + return true; +} + struct vsir_block { unsigned int label; @@ -3121,6 +3140,11 @@ static void vsir_block_list_sort(struct vsir_block_list *list) qsort(list->blocks, list->count, sizeof(*list->blocks), block_compare); }
+static bool vsir_block_list_search(struct vsir_block_list *list, struct vsir_block *block) +{ + return !!bsearch(&block, list->blocks, list->count, sizeof(*list->blocks), block_compare); +} + struct vsir_cfg { struct vkd3d_shader_message_context *message_context; @@ -3484,16 +3508,45 @@ static enum vkd3d_result vsir_cfg_compute_loops(struct vsir_cfg *cfg) struct vsir_cfg_node_sorter { struct vsir_cfg *cfg; + struct vsir_cfg_node_sorter_stack_item + { + struct vsir_block_list *loop; + unsigned int seen_count; + } *stack; + size_t stack_count, stack_capacity; struct vsir_block_list available_blocks; };
static enum vkd3d_result vsir_cfg_make_node_available(struct vsir_cfg_node_sorter *sorter, struct vsir_block *block) { + struct vsir_block_list *loop = sorter->cfg->loops_by_header[block->label - 1]; + struct vsir_cfg_node_sorter_stack_item *item; enum vkd3d_result ret;
if ((ret = vsir_block_list_add_checked(&sorter->available_blocks, block)) < 0) return ret;
+ if (!loop) + return VKD3D_OK; + + if (!vkd3d_array_reserve((void **)&sorter->stack, &sorter->stack_capacity, sorter->stack_count + 1, sizeof(*sorter->stack))) + return VKD3D_ERROR_OUT_OF_MEMORY; + + if (sorter->stack_count > 0) + { + struct vsir_block_list *prev_loop = sorter->stack[sorter->stack_count - 1].loop; + + if (!vsir_block_list_is_subset(loop, prev_loop)) + { + ERR("Loops are not nested.\n"); + return VKD3D_ERROR_NOT_IMPLEMENTED; + } + } + + item = &sorter->stack[sorter->stack_count++]; + item->loop = loop; + item->seen_count = 0; + return VKD3D_OK; }
@@ -3504,10 +3557,23 @@ static enum vkd3d_result vsir_cfg_make_node_available(struct vsir_cfg_node_sorte * available list of all the blocks whose incoming degree has reached * zero. At each step we pick a block from the available list and * strip it away from the graph, updating the incoming degrees and - * available list. We prefer picking the most recently added block - * (i.e., from the end of the available list) because it will keep - * together related blocks in the order, which allow us to generate - * fewer control flow primitives. */ + * available list. + * + * In principle at each step we can pick whatever node we want from + * the available list, and will get a topological sort + * anyway. However, we use these two criteria to give to the computed + * order additional properties: + * + * 1. we keep track of which loops we're into, and pick blocks + * belonging to the current innermost loop, so that loops are kept + * contiguous in the order; this can always be done when the input + * CFG is reducible; + * + * 2. subject to the requirement above, we always pick the most + * recently added block to the available list, because this tends + * to keep related blocks and require fewer control flow + * primitives. + */ static enum vkd3d_result vsir_cfg_sort_nodes(struct vsir_cfg *cfg) { enum vkd3d_result ret; @@ -3559,12 +3625,48 @@ static enum vkd3d_result vsir_cfg_sort_nodes(struct vsir_cfg *cfg)
while (sorter.available_blocks.count != 0) { - struct vsir_block *block = sorter.available_blocks.blocks[sorter.available_blocks.count - 1]; + struct vsir_cfg_node_sorter_stack_item *inner_stack_item = NULL; + struct vsir_block *block; + size_t new_seen_count; + + if (sorter.stack_count != 0) + inner_stack_item = &sorter.stack[sorter.stack_count - 1]; + + for (i = sorter.available_blocks.count - 1; ; --i) + { + if (i == SIZE_MAX) + { + ERR("Couldn't find any viable next block, is the input CFG reducible?\n"); + ret = VKD3D_ERROR_INVALID_SHADER; + goto fail; + }
- vsir_block_list_remove_index(&sorter.available_blocks, sorter.available_blocks.count - 1); + block = sorter.available_blocks.blocks[i]; + + if (!inner_stack_item || vsir_block_list_search(inner_stack_item->loop, block)) + break; + } + + vsir_block_list_remove_index(&sorter.available_blocks, i); if ((ret = vsir_block_list_add_checked(&cfg->order, block)) < 0) goto fail;
+ /* Close loops. */ + new_seen_count = 1; + while (inner_stack_item) + { + inner_stack_item->seen_count += new_seen_count; + + assert(inner_stack_item->seen_count <= inner_stack_item->loop->count); + if (inner_stack_item->seen_count != inner_stack_item->loop->count) + break; + + new_seen_count = inner_stack_item->loop->count; + --sorter.stack_count; + + inner_stack_item = sorter.stack_count ? &sorter.stack[sorter.stack_count - 1] : NULL; + } + /* Remove (forward) edges and make new nodes available. */ for (i = 0; i < block->successors.count; ++i) { @@ -3584,7 +3686,10 @@ static enum vkd3d_result vsir_cfg_sort_nodes(struct vsir_cfg *cfg) } }
+ assert(sorter.stack_count == 0); + vkd3d_free(in_degrees); + vkd3d_free(sorter.stack); vsir_block_list_cleanup(&sorter.available_blocks);
if (TRACE_ON()) @@ -3602,6 +3707,7 @@ static enum vkd3d_result vsir_cfg_sort_nodes(struct vsir_cfg *cfg)
fail: vkd3d_free(in_degrees); + vkd3d_free(sorter.stack); vsir_block_list_cleanup(&sorter.available_blocks); vsir_block_list_cleanup(&cfg->order);
Does that mean a caller doesn't need to do checks when calling vsir_block_list_add_unchecked()? :) I can certainly see the logic behind calling this _unchecked, but it seems a bit ambiguous.
Yeah, I was aware of this ambiguity. Using `_checked` would have had the same ambiguity, though: it remains unclear whether the adjective applies to the work done by the callee or to the work the caller is supposed to do.
To me the actual ambiguity resolution lies with the fact that I expect the shorter function name to be the safer one, and the name with attributes to be the one that requires more care. But I'll admit that's my own thinking.
Note that in case of e.g. d3d12_command_queue_submit_locked(), the _locked suffix means the caller locks; if we were to be consistent with that naming convention, we'd call this vsir_block_list_add_checked().
Sure, ok to go with this convention.
Conor McCarthy (@cmccarthy) commented about libs/vkd3d-shader/ir.c:
return vsir_block_list_add_checked(list, block);
}
+/* It is guaranteed that the relative order is kept. */ +static void vsir_block_list_remove_index(struct vsir_block_list *list, size_t idx) +{
- memmove(&list->blocks[idx], &list->blocks[idx + 1], (list->count - idx - 1) * sizeof(*list->blocks));
Or decrement `list_count` first, or use `(--list->count - idx)`
Conor McCarthy (@cmccarthy) commented about libs/vkd3d-shader/ir.c:
in_degrees[i] = UINT_MAX;
continue;
}
in_degrees[i] = block->predecessors.count;
/* Do not count back edges. */
if (cfg->loops_by_header[i])
{
assert(in_degrees[i] > 0);
in_degrees[i] -= 1;
}
if (in_degrees[i] == 0 && block != cfg->entry)
{
ERR("Unexpected entry point %u.\n", block->label);
Is this more accurately a `FIXME`? We don't normally emit `ERR` because a shader is invalid. Ditto for the entry point check after the loop.
Conor McCarthy (@cmccarthy) commented about libs/vkd3d-shader/ir.c:
+}
+/* Topologically sort the blocks according to the forward edges. By
- definition if the input CFG is reducible then its forward edges
- form a DAG, so a topological sorting exists. In order to compute it
- we keep an array with the incoming degree for each block and an
- available list of all the blocks whose incoming degree has reached
- zero. At each step we pick a block from the available list and
- strip it away from the graph, updating the incoming degrees and
- available list. We prefer picking the most recently added block
- (i.e., from the end of the available list) because it will keep
- together related blocks in the order, which allow us to generate
- fewer control flow primitives. */
+static enum vkd3d_result vsir_cfg_sort_nodes(struct vsir_cfg *cfg) +{
- enum vkd3d_result ret;
Needs to be on the line above `i` for correct formatting.
Conor McCarthy (@cmccarthy) commented about libs/vkd3d-shader/ir.c:
- {
ERR("Entry point has %u incoming forward edges.\n", in_degrees[cfg->entry->label - 1]);
ret = VKD3D_ERROR_INVALID_SHADER;
goto fail;
- }
- vsir_block_list_init(&sorter.available_blocks);
- if ((ret = vsir_cfg_make_node_available(&sorter, cfg->entry)) < 0)
goto fail;
- while (sorter.available_blocks.count != 0)
- {
struct vsir_block *block = sorter.available_blocks.blocks[sorter.available_blocks.count - 1];
vsir_block_list_remove_index(&sorter.available_blocks, sorter.available_blocks.count - 1);
Not necessarily an issue, but there's a mismatch here between how blocks are added to `sorter` and how they are removed. The former uses a helper function on `struct vsir_cfg_node_sorter`, while the latter accesses an internal member of the struct. I get that `vsir_cfg_make_node_available()` ends up doing a lot more than `vsir_block_list_remove_index()` though. Also, do we need the `vsir_` prefix on `vsir_cfg_make_node_available()`, and should it be `cfg_node_sorter_make_node_available()`?
Conor McCarthy (@cmccarthy) commented about libs/vkd3d-shader/ir.c:
}
- }
- if (in_degrees[cfg->entry->label - 1] != 0)
- {
ERR("Entry point has %u incoming forward edges.\n", in_degrees[cfg->entry->label - 1]);
ret = VKD3D_ERROR_INVALID_SHADER;
goto fail;
- }
- vsir_block_list_init(&sorter.available_blocks);
- if ((ret = vsir_cfg_make_node_available(&sorter, cfg->entry)) < 0)
goto fail;
- while (sorter.available_blocks.count != 0)
Is it possible for any input to infinite loop here? (I don't know).
Conor McCarthy (@cmccarthy) commented about libs/vkd3d-shader/ir.c:
while (sorter.available_blocks.count != 0) {
struct vsir_block *block = sorter.available_blocks.blocks[sorter.available_blocks.count - 1];
struct vsir_cfg_node_sorter_stack_item *inner_stack_item = NULL;
struct vsir_block *block;
size_t new_seen_count;
if (sorter.stack_count != 0)
inner_stack_item = &sorter.stack[sorter.stack_count - 1];
for (i = sorter.available_blocks.count - 1; ; --i)
{
if (i == SIZE_MAX)
If you use `i = sorter.available_blocks.count;` and `if (i == 0)` we don't need to know the size of `i`.