~~This applies on top of !662, the last four commits belong here.~~
Here we compute a topological order (plus some additional properties) of the shader blocks. Eventually the blocks will be emitted precisely in this order, after having been enriched with structured control flow information.
-- v9: vkd3d-shader/ir: Keep loops contiguous in the topological sort. vkd3d-shader/ir: Dump the topological order of the control flow graph. vkd3d-shader/ir: Topologically sort the control flow graph. vkd3d-shader/ir: Allow adding to a block list without checking for duplicates.
From: Giovanni Mascellani gmascellani@codeweavers.com
--- libs/vkd3d-shader/ir.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-)
diff --git a/libs/vkd3d-shader/ir.c b/libs/vkd3d-shader/ir.c index ac4a1b18e..885c3fd92 100644 --- a/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d-shader/ir.c @@ -3032,14 +3032,8 @@ static void vsir_block_list_cleanup(struct vsir_block_list *list) vkd3d_free(list->blocks); }
-static enum vkd3d_result vsir_block_list_add(struct vsir_block_list *list, struct vsir_block *block) +static enum vkd3d_result vsir_block_list_add_checked(struct vsir_block_list *list, struct vsir_block *block) { - size_t i; - - for (i = 0; i < list->count; ++i) - if (block == list->blocks[i]) - return VKD3D_FALSE; - if (!vkd3d_array_reserve((void **)&list->blocks, &list->capacity, list->count + 1, sizeof(*list->blocks))) { ERR("Cannot extend block list.\n"); @@ -3051,6 +3045,17 @@ static enum vkd3d_result vsir_block_list_add(struct vsir_block_list *list, struc return VKD3D_OK; }
+static enum vkd3d_result vsir_block_list_add(struct vsir_block_list *list, struct vsir_block *block) +{ + size_t i; + + for (i = 0; i < list->count; ++i) + if (block == list->blocks[i]) + return VKD3D_FALSE; + + return vsir_block_list_add_checked(list, block); +} + struct vsir_block { unsigned int label;
From: Giovanni Mascellani gmascellani@codeweavers.com
--- libs/vkd3d-shader/ir.c | 148 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 148 insertions(+)
diff --git a/libs/vkd3d-shader/ir.c b/libs/vkd3d-shader/ir.c index 885c3fd92..f6b6a4997 100644 --- a/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d-shader/ir.c @@ -3056,6 +3056,13 @@ static enum vkd3d_result vsir_block_list_add(struct vsir_block_list *list, struc return vsir_block_list_add_checked(list, block); }
+/* It is guaranteed that the relative order is kept. */ +static void vsir_block_list_remove_index(struct vsir_block_list *list, size_t idx) +{ + --list->count; + memmove(&list->blocks[idx], &list->blocks[idx + 1], (list->count - idx) * sizeof(*list->blocks)); +} + struct vsir_block { unsigned int label; @@ -3126,6 +3133,8 @@ struct vsir_cfg struct vsir_block_list *loops; size_t loops_count, loops_capacity; size_t *loops_by_header; + + struct vsir_block_list order; };
static void vsir_cfg_cleanup(struct vsir_cfg *cfg) @@ -3138,6 +3147,8 @@ static void vsir_cfg_cleanup(struct vsir_cfg *cfg) for (i = 0; i < cfg->loops_count; ++i) vsir_block_list_cleanup(&cfg->loops[i]);
+ vsir_block_list_cleanup(&cfg->order); + vkd3d_free(cfg->blocks); vkd3d_free(cfg->loops); vkd3d_free(cfg->loops_by_header); @@ -3218,6 +3229,8 @@ static enum vkd3d_result vsir_cfg_init(struct vsir_cfg *cfg, struct vsir_program cfg->program = program; cfg->block_count = program->block_count;
+ vsir_block_list_init(&cfg->order); + if (!(cfg->blocks = vkd3d_calloc(cfg->block_count, sizeof(*cfg->blocks)))) return VKD3D_ERROR_OUT_OF_MEMORY;
@@ -3471,6 +3484,135 @@ static enum vkd3d_result vsir_cfg_compute_loops(struct vsir_cfg *cfg) return VKD3D_OK; }
+struct vsir_cfg_node_sorter +{ + struct vsir_cfg *cfg; + struct vsir_block_list available_blocks; +}; + +static enum vkd3d_result vsir_cfg_node_sorter_make_node_available(struct vsir_cfg_node_sorter *sorter, struct vsir_block *block) +{ + enum vkd3d_result ret; + + if ((ret = vsir_block_list_add_checked(&sorter->available_blocks, block)) < 0) + return ret; + + return VKD3D_OK; +} + +/* Topologically sort the blocks according to the forward edges. By + * definition if the input CFG is reducible then its forward edges + * form a DAG, so a topological sorting exists. In order to compute it + * we keep an array with the incoming degree for each block and an + * available list of all the blocks whose incoming degree has reached + * zero. At each step we pick a block from the available list and + * strip it away from the graph, updating the incoming degrees and + * available list. We prefer picking the most recently added block + * (i.e., from the end of the available list) because it will keep + * together related blocks in the order, which allow us to generate + * fewer control flow primitives. */ +static enum vkd3d_result vsir_cfg_sort_nodes(struct vsir_cfg *cfg) +{ + struct vsir_cfg_node_sorter sorter = { .cfg = cfg }; + unsigned int *in_degrees = NULL; + enum vkd3d_result ret; + size_t i; + + if (!(in_degrees = vkd3d_calloc(cfg->block_count, sizeof(*in_degrees)))) + return VKD3D_ERROR_OUT_OF_MEMORY; + + for (i = 0; i < cfg->block_count; ++i) + { + struct vsir_block *block = &cfg->blocks[i]; + + if (block->label == 0) + { + in_degrees[i] = UINT_MAX; + continue; + } + + in_degrees[i] = block->predecessors.count; + + /* Do not count back edges. */ + if (cfg->loops_by_header[i] != SIZE_MAX) + { + assert(in_degrees[i] > 0); + in_degrees[i] -= 1; + } + + if (in_degrees[i] == 0 && block != cfg->entry) + { + WARN("Unexpected entry point %u.\n", block->label); + vkd3d_shader_error(cfg->message_context, &block->begin->location, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, + "Block %u is unreachable from the entry point.", block->label); + ret = VKD3D_ERROR_INVALID_SHADER; + goto fail; + } + } + + if (in_degrees[cfg->entry->label - 1] != 0) + { + WARN("Entry point has %u incoming forward edges.\n", in_degrees[cfg->entry->label - 1]); + vkd3d_shader_error(cfg->message_context, &cfg->entry->begin->location, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, + "The entry point block has %u incoming forward edges.", in_degrees[cfg->entry->label - 1]); + ret = VKD3D_ERROR_INVALID_SHADER; + goto fail; + } + + vsir_block_list_init(&sorter.available_blocks); + + if ((ret = vsir_cfg_node_sorter_make_node_available(&sorter, cfg->entry)) < 0) + goto fail; + + while (sorter.available_blocks.count != 0) + { + struct vsir_block *block = sorter.available_blocks.blocks[sorter.available_blocks.count - 1]; + + vsir_block_list_remove_index(&sorter.available_blocks, sorter.available_blocks.count - 1); + if ((ret = vsir_block_list_add_checked(&cfg->order, block)) < 0) + goto fail; + + /* Remove (forward) edges and make new nodes available. */ + for (i = 0; i < block->successors.count; ++i) + { + struct vsir_block *successor = block->successors.blocks[i]; + + if (vsir_block_dominates(successor, block)) + continue; + + assert(in_degrees[successor->label - 1] > 0); + --in_degrees[successor->label - 1]; + + if (in_degrees[successor->label - 1] == 0) + { + if ((ret = vsir_cfg_node_sorter_make_node_available(&sorter, successor)) < 0) + goto fail; + } + } + } + + if (cfg->order.count != cfg->block_count) + { + /* There is a cycle of forward edges. */ + WARN("The control flow graph is not reducible.\n"); + vkd3d_shader_error(cfg->message_context, &cfg->entry->begin->location, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, + "The control flow graph is not reducible."); + ret = VKD3D_ERROR_INVALID_SHADER; + goto fail; + } + + vkd3d_free(in_degrees); + vsir_block_list_cleanup(&sorter.available_blocks); + + return VKD3D_OK; + +fail: + vkd3d_free(in_degrees); + vsir_block_list_cleanup(&sorter.available_blocks); + + return ret; +} + enum vkd3d_result vkd3d_shader_normalise(struct vkd3d_shader_parser *parser, const struct vkd3d_shader_compile_info *compile_info) { @@ -3503,6 +3645,12 @@ enum vkd3d_result vkd3d_shader_normalise(struct vkd3d_shader_parser *parser, return result; }
+ if ((result = vsir_cfg_sort_nodes(&cfg)) < 0) + { + vsir_cfg_cleanup(&cfg); + return result; + } + if ((result = simple_structurizer_run(parser)) < 0) { vsir_cfg_cleanup(&cfg);
From: Giovanni Mascellani gmascellani@codeweavers.com
--- libs/vkd3d-shader/ir.c | 11 +++++++++++ 1 file changed, 11 insertions(+)
diff --git a/libs/vkd3d-shader/ir.c b/libs/vkd3d-shader/ir.c index f6b6a4997..894324c5f 100644 --- a/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d-shader/ir.c @@ -3604,6 +3604,17 @@ static enum vkd3d_result vsir_cfg_sort_nodes(struct vsir_cfg *cfg) vkd3d_free(in_degrees); vsir_block_list_cleanup(&sorter.available_blocks);
+ if (TRACE_ON()) + { + vkd3d_string_buffer_printf(&cfg->debug_buffer, "Block order:"); + + for (i = 0; i < cfg->order.count; ++i) + vkd3d_string_buffer_printf(&cfg->debug_buffer, " %u", cfg->order.blocks[i]->label); + + TRACE("%s\n", cfg->debug_buffer.buffer); + vkd3d_string_buffer_clear(&cfg->debug_buffer); + } + return VKD3D_OK;
fail:
From: Giovanni Mascellani gmascellani@codeweavers.com
--- libs/vkd3d-shader/ir.c | 94 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 88 insertions(+), 6 deletions(-)
diff --git a/libs/vkd3d-shader/ir.c b/libs/vkd3d-shader/ir.c index 894324c5f..3c862f33e 100644 --- a/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d-shader/ir.c @@ -3121,6 +3121,11 @@ static void vsir_block_list_sort(struct vsir_block_list *list) qsort(list->blocks, list->count, sizeof(*list->blocks), block_compare); }
+static bool vsir_block_list_search(struct vsir_block_list *list, struct vsir_block *block) +{ + return !!bsearch(&block, list->blocks, list->count, sizeof(*list->blocks), block_compare); +} + struct vsir_cfg { struct vkd3d_shader_message_context *message_context; @@ -3487,16 +3492,37 @@ static enum vkd3d_result vsir_cfg_compute_loops(struct vsir_cfg *cfg) struct vsir_cfg_node_sorter { struct vsir_cfg *cfg; + struct vsir_cfg_node_sorter_stack_item + { + struct vsir_block_list *loop; + unsigned int seen_count; + } *stack; + size_t stack_count, stack_capacity; struct vsir_block_list available_blocks; };
static enum vkd3d_result vsir_cfg_node_sorter_make_node_available(struct vsir_cfg_node_sorter *sorter, struct vsir_block *block) { + struct vsir_block_list *loop = NULL; + struct vsir_cfg_node_sorter_stack_item *item; enum vkd3d_result ret;
+ if (sorter->cfg->loops_by_header[block->label - 1] != SIZE_MAX) + loop = &sorter->cfg->loops[sorter->cfg->loops_by_header[block->label - 1]]; + if ((ret = vsir_block_list_add_checked(&sorter->available_blocks, block)) < 0) return ret;
+ if (!loop) + return VKD3D_OK; + + if (!vkd3d_array_reserve((void **)&sorter->stack, &sorter->stack_capacity, sorter->stack_count + 1, sizeof(*sorter->stack))) + return VKD3D_ERROR_OUT_OF_MEMORY; + + item = &sorter->stack[sorter->stack_count++]; + item->loop = loop; + item->seen_count = 0; + return VKD3D_OK; }
@@ -3507,10 +3533,23 @@ static enum vkd3d_result vsir_cfg_node_sorter_make_node_available(struct vsir_cf * available list of all the blocks whose incoming degree has reached * zero. At each step we pick a block from the available list and * strip it away from the graph, updating the incoming degrees and - * available list. We prefer picking the most recently added block - * (i.e., from the end of the available list) because it will keep - * together related blocks in the order, which allow us to generate - * fewer control flow primitives. */ + * available list. + * + * In principle at each step we can pick whatever node we want from + * the available list, and will get a topological sort + * anyway. However, we use these two criteria to give to the computed + * order additional properties: + * + * 1. we keep track of which loops we're into, and pick blocks + * belonging to the current innermost loop, so that loops are kept + * contiguous in the order; this can always be done when the input + * CFG is reducible; + * + * 2. subject to the requirement above, we always pick the most + * recently added block to the available list, because this tends + * to keep related blocks and require fewer control flow + * primitives. + */ static enum vkd3d_result vsir_cfg_sort_nodes(struct vsir_cfg *cfg) { struct vsir_cfg_node_sorter sorter = { .cfg = cfg }; @@ -3566,12 +3605,51 @@ static enum vkd3d_result vsir_cfg_sort_nodes(struct vsir_cfg *cfg)
while (sorter.available_blocks.count != 0) { - struct vsir_block *block = sorter.available_blocks.blocks[sorter.available_blocks.count - 1]; + struct vsir_cfg_node_sorter_stack_item *inner_stack_item = NULL; + struct vsir_block *block; + size_t new_seen_count; + + if (sorter.stack_count != 0) + inner_stack_item = &sorter.stack[sorter.stack_count - 1]; + + for (i = sorter.available_blocks.count - 1; ; --i) + { + if (i == SIZE_MAX) + { + ERR("Couldn't find any viable next block, is the input CFG reducible?\n"); + ret = VKD3D_ERROR_INVALID_SHADER; + goto fail; + } + + block = sorter.available_blocks.blocks[i]; + + if (!inner_stack_item || vsir_block_list_search(inner_stack_item->loop, block)) + break; + }
- vsir_block_list_remove_index(&sorter.available_blocks, sorter.available_blocks.count - 1); + vsir_block_list_remove_index(&sorter.available_blocks, i); if ((ret = vsir_block_list_add_checked(&cfg->order, block)) < 0) goto fail;
+ /* Close loops: since each loop is a strict subset of any + * outer loop, we just need to track how many blocks we've + * seen; when I close a loop I mark the same number of seen + * blocks for the next outer loop. */ + new_seen_count = 1; + while (sorter.stack_count != 0) + { + inner_stack_item = &sorter.stack[sorter.stack_count - 1]; + + inner_stack_item->seen_count += new_seen_count; + + assert(inner_stack_item->seen_count <= inner_stack_item->loop->count); + if (inner_stack_item->seen_count != inner_stack_item->loop->count) + break; + + new_seen_count = inner_stack_item->loop->count; + --sorter.stack_count; + } + /* Remove (forward) edges and make new nodes available. */ for (i = 0; i < block->successors.count; ++i) { @@ -3601,7 +3679,10 @@ static enum vkd3d_result vsir_cfg_sort_nodes(struct vsir_cfg *cfg) goto fail; }
+ assert(sorter.stack_count == 0); + vkd3d_free(in_degrees); + vkd3d_free(sorter.stack); vsir_block_list_cleanup(&sorter.available_blocks);
if (TRACE_ON()) @@ -3619,6 +3700,7 @@ static enum vkd3d_result vsir_cfg_sort_nodes(struct vsir_cfg *cfg)
fail: vkd3d_free(in_degrees); + vkd3d_free(sorter.stack); vsir_block_list_cleanup(&sorter.available_blocks);
return ret;
This merge request was approved by Giovanni Mascellani.
This merge request was approved by Henri Verbeet.