This applies on top of !672, the last three commits belong here.
Here we compute additional "synthetic" loops that will be used to implement non-trivial forward edges using (possibly conditional) `break` instructions.
From: Giovanni Mascellani gmascellani@codeweavers.com
--- libs/vkd3d-shader/ir.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-)
diff --git a/libs/vkd3d-shader/ir.c b/libs/vkd3d-shader/ir.c index ac4a1b18e..885c3fd92 100644 --- a/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d-shader/ir.c @@ -3032,14 +3032,8 @@ static void vsir_block_list_cleanup(struct vsir_block_list *list) vkd3d_free(list->blocks); }
-static enum vkd3d_result vsir_block_list_add(struct vsir_block_list *list, struct vsir_block *block) +static enum vkd3d_result vsir_block_list_add_checked(struct vsir_block_list *list, struct vsir_block *block) { - size_t i; - - for (i = 0; i < list->count; ++i) - if (block == list->blocks[i]) - return VKD3D_FALSE; - if (!vkd3d_array_reserve((void **)&list->blocks, &list->capacity, list->count + 1, sizeof(*list->blocks))) { ERR("Cannot extend block list.\n"); @@ -3051,6 +3045,17 @@ static enum vkd3d_result vsir_block_list_add(struct vsir_block_list *list, struc return VKD3D_OK; }
+static enum vkd3d_result vsir_block_list_add(struct vsir_block_list *list, struct vsir_block *block) +{ + size_t i; + + for (i = 0; i < list->count; ++i) + if (block == list->blocks[i]) + return VKD3D_FALSE; + + return vsir_block_list_add_checked(list, block); +} + struct vsir_block { unsigned int label;
From: Giovanni Mascellani gmascellani@codeweavers.com
--- libs/vkd3d-shader/ir.c | 148 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 148 insertions(+)
diff --git a/libs/vkd3d-shader/ir.c b/libs/vkd3d-shader/ir.c index 885c3fd92..f6b6a4997 100644 --- a/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d-shader/ir.c @@ -3056,6 +3056,13 @@ static enum vkd3d_result vsir_block_list_add(struct vsir_block_list *list, struc return vsir_block_list_add_checked(list, block); }
+/* It is guaranteed that the relative order is kept. */ +static void vsir_block_list_remove_index(struct vsir_block_list *list, size_t idx) +{ + --list->count; + memmove(&list->blocks[idx], &list->blocks[idx + 1], (list->count - idx) * sizeof(*list->blocks)); +} + struct vsir_block { unsigned int label; @@ -3126,6 +3133,8 @@ struct vsir_cfg struct vsir_block_list *loops; size_t loops_count, loops_capacity; size_t *loops_by_header; + + struct vsir_block_list order; };
static void vsir_cfg_cleanup(struct vsir_cfg *cfg) @@ -3138,6 +3147,8 @@ static void vsir_cfg_cleanup(struct vsir_cfg *cfg) for (i = 0; i < cfg->loops_count; ++i) vsir_block_list_cleanup(&cfg->loops[i]);
+ vsir_block_list_cleanup(&cfg->order); + vkd3d_free(cfg->blocks); vkd3d_free(cfg->loops); vkd3d_free(cfg->loops_by_header); @@ -3218,6 +3229,8 @@ static enum vkd3d_result vsir_cfg_init(struct vsir_cfg *cfg, struct vsir_program cfg->program = program; cfg->block_count = program->block_count;
+ vsir_block_list_init(&cfg->order); + if (!(cfg->blocks = vkd3d_calloc(cfg->block_count, sizeof(*cfg->blocks)))) return VKD3D_ERROR_OUT_OF_MEMORY;
@@ -3471,6 +3484,135 @@ static enum vkd3d_result vsir_cfg_compute_loops(struct vsir_cfg *cfg) return VKD3D_OK; }
+struct vsir_cfg_node_sorter +{ + struct vsir_cfg *cfg; + struct vsir_block_list available_blocks; +}; + +static enum vkd3d_result vsir_cfg_node_sorter_make_node_available(struct vsir_cfg_node_sorter *sorter, struct vsir_block *block) +{ + enum vkd3d_result ret; + + if ((ret = vsir_block_list_add_checked(&sorter->available_blocks, block)) < 0) + return ret; + + return VKD3D_OK; +} + +/* Topologically sort the blocks according to the forward edges. By + * definition if the input CFG is reducible then its forward edges + * form a DAG, so a topological sorting exists. In order to compute it + * we keep an array with the incoming degree for each block and an + * available list of all the blocks whose incoming degree has reached + * zero. At each step we pick a block from the available list and + * strip it away from the graph, updating the incoming degrees and + * available list. We prefer picking the most recently added block + * (i.e., from the end of the available list) because it will keep + * together related blocks in the order, which allow us to generate + * fewer control flow primitives. */ +static enum vkd3d_result vsir_cfg_sort_nodes(struct vsir_cfg *cfg) +{ + struct vsir_cfg_node_sorter sorter = { .cfg = cfg }; + unsigned int *in_degrees = NULL; + enum vkd3d_result ret; + size_t i; + + if (!(in_degrees = vkd3d_calloc(cfg->block_count, sizeof(*in_degrees)))) + return VKD3D_ERROR_OUT_OF_MEMORY; + + for (i = 0; i < cfg->block_count; ++i) + { + struct vsir_block *block = &cfg->blocks[i]; + + if (block->label == 0) + { + in_degrees[i] = UINT_MAX; + continue; + } + + in_degrees[i] = block->predecessors.count; + + /* Do not count back edges. */ + if (cfg->loops_by_header[i] != SIZE_MAX) + { + assert(in_degrees[i] > 0); + in_degrees[i] -= 1; + } + + if (in_degrees[i] == 0 && block != cfg->entry) + { + WARN("Unexpected entry point %u.\n", block->label); + vkd3d_shader_error(cfg->message_context, &block->begin->location, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, + "Block %u is unreachable from the entry point.", block->label); + ret = VKD3D_ERROR_INVALID_SHADER; + goto fail; + } + } + + if (in_degrees[cfg->entry->label - 1] != 0) + { + WARN("Entry point has %u incoming forward edges.\n", in_degrees[cfg->entry->label - 1]); + vkd3d_shader_error(cfg->message_context, &cfg->entry->begin->location, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, + "The entry point block has %u incoming forward edges.", in_degrees[cfg->entry->label - 1]); + ret = VKD3D_ERROR_INVALID_SHADER; + goto fail; + } + + vsir_block_list_init(&sorter.available_blocks); + + if ((ret = vsir_cfg_node_sorter_make_node_available(&sorter, cfg->entry)) < 0) + goto fail; + + while (sorter.available_blocks.count != 0) + { + struct vsir_block *block = sorter.available_blocks.blocks[sorter.available_blocks.count - 1]; + + vsir_block_list_remove_index(&sorter.available_blocks, sorter.available_blocks.count - 1); + if ((ret = vsir_block_list_add_checked(&cfg->order, block)) < 0) + goto fail; + + /* Remove (forward) edges and make new nodes available. */ + for (i = 0; i < block->successors.count; ++i) + { + struct vsir_block *successor = block->successors.blocks[i]; + + if (vsir_block_dominates(successor, block)) + continue; + + assert(in_degrees[successor->label - 1] > 0); + --in_degrees[successor->label - 1]; + + if (in_degrees[successor->label - 1] == 0) + { + if ((ret = vsir_cfg_node_sorter_make_node_available(&sorter, successor)) < 0) + goto fail; + } + } + } + + if (cfg->order.count != cfg->block_count) + { + /* There is a cycle of forward edges. */ + WARN("The control flow graph is not reducible.\n"); + vkd3d_shader_error(cfg->message_context, &cfg->entry->begin->location, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, + "The control flow graph is not reducible."); + ret = VKD3D_ERROR_INVALID_SHADER; + goto fail; + } + + vkd3d_free(in_degrees); + vsir_block_list_cleanup(&sorter.available_blocks); + + return VKD3D_OK; + +fail: + vkd3d_free(in_degrees); + vsir_block_list_cleanup(&sorter.available_blocks); + + return ret; +} + enum vkd3d_result vkd3d_shader_normalise(struct vkd3d_shader_parser *parser, const struct vkd3d_shader_compile_info *compile_info) { @@ -3503,6 +3645,12 @@ enum vkd3d_result vkd3d_shader_normalise(struct vkd3d_shader_parser *parser, return result; }
+ if ((result = vsir_cfg_sort_nodes(&cfg)) < 0) + { + vsir_cfg_cleanup(&cfg); + return result; + } + if ((result = simple_structurizer_run(parser)) < 0) { vsir_cfg_cleanup(&cfg);
From: Giovanni Mascellani gmascellani@codeweavers.com
--- libs/vkd3d-shader/ir.c | 11 +++++++++++ 1 file changed, 11 insertions(+)
diff --git a/libs/vkd3d-shader/ir.c b/libs/vkd3d-shader/ir.c index f6b6a4997..894324c5f 100644 --- a/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d-shader/ir.c @@ -3604,6 +3604,17 @@ static enum vkd3d_result vsir_cfg_sort_nodes(struct vsir_cfg *cfg) vkd3d_free(in_degrees); vsir_block_list_cleanup(&sorter.available_blocks);
+ if (TRACE_ON()) + { + vkd3d_string_buffer_printf(&cfg->debug_buffer, "Block order:"); + + for (i = 0; i < cfg->order.count; ++i) + vkd3d_string_buffer_printf(&cfg->debug_buffer, " %u", cfg->order.blocks[i]->label); + + TRACE("%s\n", cfg->debug_buffer.buffer); + vkd3d_string_buffer_clear(&cfg->debug_buffer); + } + return VKD3D_OK;
fail:
From: Giovanni Mascellani gmascellani@codeweavers.com
--- libs/vkd3d-shader/ir.c | 94 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 88 insertions(+), 6 deletions(-)
diff --git a/libs/vkd3d-shader/ir.c b/libs/vkd3d-shader/ir.c index 894324c5f..3c862f33e 100644 --- a/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d-shader/ir.c @@ -3121,6 +3121,11 @@ static void vsir_block_list_sort(struct vsir_block_list *list) qsort(list->blocks, list->count, sizeof(*list->blocks), block_compare); }
+static bool vsir_block_list_search(struct vsir_block_list *list, struct vsir_block *block) +{ + return !!bsearch(&block, list->blocks, list->count, sizeof(*list->blocks), block_compare); +} + struct vsir_cfg { struct vkd3d_shader_message_context *message_context; @@ -3487,16 +3492,37 @@ static enum vkd3d_result vsir_cfg_compute_loops(struct vsir_cfg *cfg) struct vsir_cfg_node_sorter { struct vsir_cfg *cfg; + struct vsir_cfg_node_sorter_stack_item + { + struct vsir_block_list *loop; + unsigned int seen_count; + } *stack; + size_t stack_count, stack_capacity; struct vsir_block_list available_blocks; };
static enum vkd3d_result vsir_cfg_node_sorter_make_node_available(struct vsir_cfg_node_sorter *sorter, struct vsir_block *block) { + struct vsir_block_list *loop = NULL; + struct vsir_cfg_node_sorter_stack_item *item; enum vkd3d_result ret;
+ if (sorter->cfg->loops_by_header[block->label - 1] != SIZE_MAX) + loop = &sorter->cfg->loops[sorter->cfg->loops_by_header[block->label - 1]]; + if ((ret = vsir_block_list_add_checked(&sorter->available_blocks, block)) < 0) return ret;
+ if (!loop) + return VKD3D_OK; + + if (!vkd3d_array_reserve((void **)&sorter->stack, &sorter->stack_capacity, sorter->stack_count + 1, sizeof(*sorter->stack))) + return VKD3D_ERROR_OUT_OF_MEMORY; + + item = &sorter->stack[sorter->stack_count++]; + item->loop = loop; + item->seen_count = 0; + return VKD3D_OK; }
@@ -3507,10 +3533,23 @@ static enum vkd3d_result vsir_cfg_node_sorter_make_node_available(struct vsir_cf * available list of all the blocks whose incoming degree has reached * zero. At each step we pick a block from the available list and * strip it away from the graph, updating the incoming degrees and - * available list. We prefer picking the most recently added block - * (i.e., from the end of the available list) because it will keep - * together related blocks in the order, which allow us to generate - * fewer control flow primitives. */ + * available list. + * + * In principle at each step we can pick whatever node we want from + * the available list, and will get a topological sort + * anyway. However, we use these two criteria to give to the computed + * order additional properties: + * + * 1. we keep track of which loops we're into, and pick blocks + * belonging to the current innermost loop, so that loops are kept + * contiguous in the order; this can always be done when the input + * CFG is reducible; + * + * 2. subject to the requirement above, we always pick the most + * recently added block to the available list, because this tends + * to keep related blocks and require fewer control flow + * primitives. + */ static enum vkd3d_result vsir_cfg_sort_nodes(struct vsir_cfg *cfg) { struct vsir_cfg_node_sorter sorter = { .cfg = cfg }; @@ -3566,12 +3605,51 @@ static enum vkd3d_result vsir_cfg_sort_nodes(struct vsir_cfg *cfg)
while (sorter.available_blocks.count != 0) { - struct vsir_block *block = sorter.available_blocks.blocks[sorter.available_blocks.count - 1]; + struct vsir_cfg_node_sorter_stack_item *inner_stack_item = NULL; + struct vsir_block *block; + size_t new_seen_count; + + if (sorter.stack_count != 0) + inner_stack_item = &sorter.stack[sorter.stack_count - 1]; + + for (i = sorter.available_blocks.count - 1; ; --i) + { + if (i == SIZE_MAX) + { + ERR("Couldn't find any viable next block, is the input CFG reducible?\n"); + ret = VKD3D_ERROR_INVALID_SHADER; + goto fail; + } + + block = sorter.available_blocks.blocks[i]; + + if (!inner_stack_item || vsir_block_list_search(inner_stack_item->loop, block)) + break; + }
- vsir_block_list_remove_index(&sorter.available_blocks, sorter.available_blocks.count - 1); + vsir_block_list_remove_index(&sorter.available_blocks, i); if ((ret = vsir_block_list_add_checked(&cfg->order, block)) < 0) goto fail;
+ /* Close loops: since each loop is a strict subset of any + * outer loop, we just need to track how many blocks we've + * seen; when I close a loop I mark the same number of seen + * blocks for the next outer loop. */ + new_seen_count = 1; + while (sorter.stack_count != 0) + { + inner_stack_item = &sorter.stack[sorter.stack_count - 1]; + + inner_stack_item->seen_count += new_seen_count; + + assert(inner_stack_item->seen_count <= inner_stack_item->loop->count); + if (inner_stack_item->seen_count != inner_stack_item->loop->count) + break; + + new_seen_count = inner_stack_item->loop->count; + --sorter.stack_count; + } + /* Remove (forward) edges and make new nodes available. */ for (i = 0; i < block->successors.count; ++i) { @@ -3601,7 +3679,10 @@ static enum vkd3d_result vsir_cfg_sort_nodes(struct vsir_cfg *cfg) goto fail; }
+ assert(sorter.stack_count == 0); + vkd3d_free(in_degrees); + vkd3d_free(sorter.stack); vsir_block_list_cleanup(&sorter.available_blocks);
if (TRACE_ON()) @@ -3619,6 +3700,7 @@ static enum vkd3d_result vsir_cfg_sort_nodes(struct vsir_cfg *cfg)
fail: vkd3d_free(in_degrees); + vkd3d_free(sorter.stack); vsir_block_list_cleanup(&sorter.available_blocks);
return ret;
From: Giovanni Mascellani gmascellani@codeweavers.com
--- libs/vkd3d-shader/ir.c | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+)
diff --git a/libs/vkd3d-shader/ir.c b/libs/vkd3d-shader/ir.c index 3c862f33e..05b40cafd 100644 --- a/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d-shader/ir.c @@ -3140,6 +3140,16 @@ struct vsir_cfg size_t *loops_by_header;
struct vsir_block_list order; + struct cfg_loop_interval + { + /* `begin' is the position of the first block of the loop in + * the topological sort; `end' is the position of the first + * block after the loop. In other words, `begin' is where a + * `continue' instruction would jump and `end' is where a + * `break' instruction would jump. */ + unsigned int begin, end; + } *loop_intervals; + size_t loop_interval_count, loop_interval_capacity; };
static void vsir_cfg_cleanup(struct vsir_cfg *cfg) @@ -3157,11 +3167,29 @@ static void vsir_cfg_cleanup(struct vsir_cfg *cfg) vkd3d_free(cfg->blocks); vkd3d_free(cfg->loops); vkd3d_free(cfg->loops_by_header); + vkd3d_free(cfg->loop_intervals);
if (TRACE_ON()) vkd3d_string_buffer_cleanup(&cfg->debug_buffer); }
+static enum vkd3d_result vsir_cfg_add_loop_interval(struct vsir_cfg *cfg, unsigned int begin, + unsigned int end) +{ + struct cfg_loop_interval *interval; + + if (!vkd3d_array_reserve((void **)&cfg->loop_intervals, &cfg->loop_interval_capacity, + cfg->loop_interval_count + 1, sizeof(*cfg->loop_intervals))) + return VKD3D_ERROR_OUT_OF_MEMORY; + + interval = &cfg->loop_intervals[cfg->loop_interval_count++]; + + interval->begin = begin; + interval->end = end; + + return VKD3D_OK; +} + static bool vsir_block_dominates(struct vsir_block *b1, struct vsir_block *b2) { return bitmap_is_set(b1->dominates, b2->label - 1); @@ -3496,6 +3524,7 @@ struct vsir_cfg_node_sorter { struct vsir_block_list *loop; unsigned int seen_count; + unsigned int open_time; } *stack; size_t stack_count, stack_capacity; struct vsir_block_list available_blocks; @@ -3522,6 +3551,7 @@ static enum vkd3d_result vsir_cfg_node_sorter_make_node_available(struct vsir_cf item = &sorter->stack[sorter->stack_count++]; item->loop = loop; item->seen_count = 0; + item->open_time = sorter->cfg->order.count;
return VKD3D_OK; } @@ -3646,6 +3676,10 @@ static enum vkd3d_result vsir_cfg_sort_nodes(struct vsir_cfg *cfg) if (inner_stack_item->seen_count != inner_stack_item->loop->count) break;
+ if ((ret = vsir_cfg_add_loop_interval(cfg, inner_stack_item->open_time, + cfg->order.count)) < 0) + goto fail; + new_seen_count = inner_stack_item->loop->count; --sorter.stack_count; } @@ -3694,6 +3728,9 @@ static enum vkd3d_result vsir_cfg_sort_nodes(struct vsir_cfg *cfg)
TRACE("%s\n", cfg->debug_buffer.buffer); vkd3d_string_buffer_clear(&cfg->debug_buffer); + + for (i = 0; i < cfg->loop_interval_count; ++i) + TRACE("Loop interval %u - %u\n", cfg->loop_intervals[i].begin, cfg->loop_intervals[i].end); }
return VKD3D_OK;
From: Giovanni Mascellani gmascellani@codeweavers.com
--- libs/vkd3d-shader/ir.c | 161 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 155 insertions(+), 6 deletions(-)
diff --git a/libs/vkd3d-shader/ir.c b/libs/vkd3d-shader/ir.c index 05b40cafd..375de3e24 100644 --- a/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d-shader/ir.c @@ -3065,7 +3065,7 @@ static void vsir_block_list_remove_index(struct vsir_block_list *list, size_t id
struct vsir_block { - unsigned int label; + unsigned int label, order_pos; /* `begin' points to the instruction immediately following the * LABEL that introduces the block. `end' points to the terminator * instruction (either BRANCH or RET). They can coincide, meaning @@ -3148,6 +3148,27 @@ struct vsir_cfg * `continue' instruction would jump and `end' is where a * `break' instruction would jump. */ unsigned int begin, end; + /* Each loop interval can be natural or synthetic. Natural + * intervals are added to represent loops given by CFG back + * edges. Synthetic intervals do not correspond to loop into + * the input CFG, but are added to leverage their `break' + * instruction in order to execute forward edges. + * + * For a synthetic loop interval it's not really important + * which one is the `begin' block, since we don't need to + * execute `continue' for them. So we have some leeway for + * moving it provided that these conditions are met: 1. the + * interval must contain all `break' instructions that target + * it, which in practice means that `begin' can be moved + * forward and not backward; 2. intervals must retain the + * correct inclusion properties (for each pair of intervals, + * either one contains the other or they are disjoint). + * + * Subject to these conditions, we try to reuse the same loop + * as much as possible (if many forward edges target the same + * block), but we still try to keep `begin' as forward as + * possible, to keep the loop scope as small as possible. */ + bool synthetic; } *loop_intervals; size_t loop_interval_count, loop_interval_capacity; }; @@ -3174,7 +3195,7 @@ static void vsir_cfg_cleanup(struct vsir_cfg *cfg) }
static enum vkd3d_result vsir_cfg_add_loop_interval(struct vsir_cfg *cfg, unsigned int begin, - unsigned int end) + unsigned int end, bool synthetic) { struct cfg_loop_interval *interval;
@@ -3186,6 +3207,7 @@ static enum vkd3d_result vsir_cfg_add_loop_interval(struct vsir_cfg *cfg, unsign
interval->begin = begin; interval->end = end; + interval->synthetic = synthetic;
return VKD3D_OK; } @@ -3658,6 +3680,7 @@ static enum vkd3d_result vsir_cfg_sort_nodes(struct vsir_cfg *cfg) }
vsir_block_list_remove_index(&sorter.available_blocks, i); + block->order_pos = cfg->order.count; if ((ret = vsir_block_list_add_checked(&cfg->order, block)) < 0) goto fail;
@@ -3677,7 +3700,7 @@ static enum vkd3d_result vsir_cfg_sort_nodes(struct vsir_cfg *cfg) break;
if ((ret = vsir_cfg_add_loop_interval(cfg, inner_stack_item->open_time, - cfg->order.count)) < 0) + cfg->order.count, false)) < 0) goto fail;
new_seen_count = inner_stack_item->loop->count; @@ -3728,9 +3751,6 @@ static enum vkd3d_result vsir_cfg_sort_nodes(struct vsir_cfg *cfg)
TRACE("%s\n", cfg->debug_buffer.buffer); vkd3d_string_buffer_clear(&cfg->debug_buffer); - - for (i = 0; i < cfg->loop_interval_count; ++i) - TRACE("Loop interval %u - %u\n", cfg->loop_intervals[i].begin, cfg->loop_intervals[i].end); }
return VKD3D_OK; @@ -3743,6 +3763,129 @@ fail: return ret; }
+static enum vkd3d_result vsir_cfg_generate_synthetic_loop_intervals(struct vsir_cfg *cfg) +{ + enum vkd3d_result ret; + size_t i, j, k; + + for (i = 0; i < cfg->block_count; ++i) + { + struct vsir_block *block = &cfg->blocks[i]; + + if (block->label == 0) + continue; + + for (j = 0; j < block->successors.count; ++j) + { + struct vsir_block *successor = block->successors.blocks[j]; + struct cfg_loop_interval *extend = NULL; + unsigned int begin; + enum + { + DO_NOTHING, + CREATE_NEW, + EXTEND, + } action = CREATE_NEW; + + /* We've already contructed loop intervals for the back + * edges, there's nothing more to do. */ + if (vsir_block_dominates(successor, block)) + continue; + + assert(block->order_pos < successor->order_pos); + + /* Jumping from a block to the following one is always + * possible, so nothing to do. */ + if (block->order_pos + 1 == successor->order_pos) + continue; + + /* Let's look for a loop interval that already breaks at + * `successor' and either contains or can be extended to + * contain `block'. */ + for (k = 0; k < cfg->loop_interval_count; ++k) + { + struct cfg_loop_interval *interval = &cfg->loop_intervals[k]; + + if (interval->end != successor->order_pos) + continue; + + if (interval->begin <= block->order_pos) + { + action = DO_NOTHING; + break; + } + + if (interval->synthetic) + { + action = EXTEND; + extend = interval; + break; + } + } + + if (action == DO_NOTHING) + continue; + + /* Ok, we have to decide where the new or replacing + * interval has to begin. These are the rules: 1. it must + * begin before `block'; 2. intervals must be correctly + * stacked (for any two intervals, either one is included + * in the other or they are disjoint); 3. the new interval + * should begin as late as possible, to limit control flow + * depth and extension. */ + begin = block->order_pos; + + /* Our candidate interval is always [begin, + * successor->order_pos), and we move `begin' backward + * until the candidate interval contains all the intervals + * whose endpoint lies in the candidate interval + * itself. */ + for (k = 0; k < cfg->loop_interval_count; ++k) + { + struct cfg_loop_interval *interval = &cfg->loop_intervals[k]; + + if (begin < interval->end && interval->end < successor->order_pos) + begin = min(begin, interval->begin); + } + + /* New we have to care about the intervals whose begin + * point lies in the candidate interval. We cannot move + * the condidate interval endpoint, because it is + * important that the loop break target matches + * `successor'. So we have to move that interval's begin + * point to the begin point of the candidate interval, + * i.e. `begin'. But what if the interval we should extend + * backward is not synthetic? This cannot happen, + * fortunately, because it would mean that there is a jump + * entering a loop via a block which is not the loop + * header, so the CFG would not be reducible. */ + for (k = 0; k < cfg->loop_interval_count; ++k) + { + struct cfg_loop_interval *interval = &cfg->loop_intervals[k]; + + if (interval->begin < successor->order_pos && successor->order_pos < interval->end) + { + if (interval->synthetic) + interval->begin = min(begin, interval->begin); + assert(begin >= interval->begin); + } + } + + if (action == EXTEND) + extend->begin = begin; + else if ((ret = vsir_cfg_add_loop_interval(cfg, begin, successor->order_pos, true)) < 0) + return ret; + } + } + + if (TRACE_ON()) + for (i = 0; i < cfg->loop_interval_count; ++i) + TRACE("%s loop interval %u - %u\n", cfg->loop_intervals[i].synthetic ? "Synthetic" : "Natural", + cfg->loop_intervals[i].begin, cfg->loop_intervals[i].end); + + return VKD3D_OK; +} + enum vkd3d_result vkd3d_shader_normalise(struct vkd3d_shader_parser *parser, const struct vkd3d_shader_compile_info *compile_info) { @@ -3781,6 +3924,12 @@ enum vkd3d_result vkd3d_shader_normalise(struct vkd3d_shader_parser *parser, return result; }
+ if ((result = vsir_cfg_generate_synthetic_loop_intervals(&cfg)) < 0) + { + vsir_cfg_cleanup(&cfg); + return result; + } + if ((result = simple_structurizer_run(parser)) < 0) { vsir_cfg_cleanup(&cfg);
From: Giovanni Mascellani gmascellani@codeweavers.com
--- libs/vkd3d-shader/ir.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+)
diff --git a/libs/vkd3d-shader/ir.c b/libs/vkd3d-shader/ir.c index 375de3e24..2b3d692da 100644 --- a/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d-shader/ir.c @@ -3763,6 +3763,20 @@ fail: return ret; }
+/* Sort loop intervals first by ascending begin time and then by + * descending end time, so that inner intervals appear after outer + * ones and disjoint intervals appear in their proper order. */ +static int compare_loop_intervals(const void *ptr1, const void *ptr2) +{ + const struct cfg_loop_interval *interval1 = ptr1; + const struct cfg_loop_interval *interval2 = ptr2; + + if (interval1->begin != interval2->begin) + return vkd3d_u32_compare(interval1->begin, interval2->begin); + + return -vkd3d_u32_compare(interval1->end, interval2->end); +} + static enum vkd3d_result vsir_cfg_generate_synthetic_loop_intervals(struct vsir_cfg *cfg) { enum vkd3d_result ret; @@ -3878,6 +3892,8 @@ static enum vkd3d_result vsir_cfg_generate_synthetic_loop_intervals(struct vsir_ } }
+ qsort(cfg->loop_intervals, cfg->loop_interval_count, sizeof(*cfg->loop_intervals), compare_loop_intervals); + if (TRACE_ON()) for (i = 0; i < cfg->loop_interval_count; ++i) TRACE("%s loop interval %u - %u\n", cfg->loop_intervals[i].synthetic ? "Synthetic" : "Natural",
This merge request was approved by Giovanni Mascellani.