~~This applies on top of !672, the last three commits belong here.~~
Here we compute additional "synthetic" loops that will be used to implement non-trivial forward edges using (possibly conditional) `break` instructions.
-- v4: vkd3d-shader/ir: Sort loop intervals. vkd3d-shader/ir: Generate synthetic intervals for forward edges. vkd3d-shader/ir: Compute loop as intervals of the block order.
From: Giovanni Mascellani gmascellani@codeweavers.com
--- libs/vkd3d-shader/ir.c | 53 +++++++++++++++++++++++++++++++++++------- 1 file changed, 45 insertions(+), 8 deletions(-)
diff --git a/libs/vkd3d-shader/ir.c b/libs/vkd3d-shader/ir.c index 3c862f33e..d26341d54 100644 --- a/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d-shader/ir.c @@ -3140,6 +3140,16 @@ struct vsir_cfg size_t *loops_by_header;
struct vsir_block_list order; + struct cfg_loop_interval + { + /* `begin' is the position of the first block of the loop in + * the topological sort; `end' is the position of the first + * block after the loop. In other words, `begin' is where a + * `continue' instruction would jump and `end' is where a + * `break' instruction would jump. */ + unsigned int begin, end; + } *loop_intervals; + size_t loop_interval_count, loop_interval_capacity; };
static void vsir_cfg_cleanup(struct vsir_cfg *cfg) @@ -3157,11 +3167,29 @@ static void vsir_cfg_cleanup(struct vsir_cfg *cfg) vkd3d_free(cfg->blocks); vkd3d_free(cfg->loops); vkd3d_free(cfg->loops_by_header); + vkd3d_free(cfg->loop_intervals);
if (TRACE_ON()) vkd3d_string_buffer_cleanup(&cfg->debug_buffer); }
+static enum vkd3d_result vsir_cfg_add_loop_interval(struct vsir_cfg *cfg, unsigned int begin, + unsigned int end) +{ + struct cfg_loop_interval *interval; + + if (!vkd3d_array_reserve((void **)&cfg->loop_intervals, &cfg->loop_interval_capacity, + cfg->loop_interval_count + 1, sizeof(*cfg->loop_intervals))) + return VKD3D_ERROR_OUT_OF_MEMORY; + + interval = &cfg->loop_intervals[cfg->loop_interval_count++]; + + interval->begin = begin; + interval->end = end; + + return VKD3D_OK; +} + static bool vsir_block_dominates(struct vsir_block *b1, struct vsir_block *b2) { return bitmap_is_set(b1->dominates, b2->label - 1); @@ -3396,14 +3424,14 @@ static void vsir_cfg_compute_dominators(struct vsir_cfg *cfg) * block without passing through the header block) belong to the same * loop. * - * If the input CFG is reducible, each two loops are either disjoint - * or one is a strict subset of the other, provided that each block - * has at most one incoming back edge. If this condition does not - * hold, a synthetic block can be introduced as the only back edge - * block for the given header block, with all the previous back edge - * now being forward edges to the synthetic block. This is not - * currently implemented (but it is rarely found in practice - * anyway). */ + * If the input CFG is reducible its loops are properly nested (i.e., + * each two loops are either disjoint or one is contained in the + * other), provided that each block has at most one incoming back + * edge. If this condition does not hold, a synthetic block can be + * introduced as the only back edge block for the given header block, + * with all the previous back edge now being forward edges to the + * synthetic block. This is not currently implemented (but it is + * rarely found in practice anyway). */ static enum vkd3d_result vsir_cfg_scan_loop(struct vsir_block_list *loop, struct vsir_block *block, struct vsir_block *header) { @@ -3496,6 +3524,7 @@ struct vsir_cfg_node_sorter { struct vsir_block_list *loop; unsigned int seen_count; + unsigned int begin; } *stack; size_t stack_count, stack_capacity; struct vsir_block_list available_blocks; @@ -3522,6 +3551,7 @@ static enum vkd3d_result vsir_cfg_node_sorter_make_node_available(struct vsir_cf item = &sorter->stack[sorter->stack_count++]; item->loop = loop; item->seen_count = 0; + item->begin = sorter->cfg->order.count;
return VKD3D_OK; } @@ -3646,6 +3676,10 @@ static enum vkd3d_result vsir_cfg_sort_nodes(struct vsir_cfg *cfg) if (inner_stack_item->seen_count != inner_stack_item->loop->count) break;
+ if ((ret = vsir_cfg_add_loop_interval(cfg, inner_stack_item->begin, + cfg->order.count)) < 0) + goto fail; + new_seen_count = inner_stack_item->loop->count; --sorter.stack_count; } @@ -3694,6 +3728,9 @@ static enum vkd3d_result vsir_cfg_sort_nodes(struct vsir_cfg *cfg)
TRACE("%s\n", cfg->debug_buffer.buffer); vkd3d_string_buffer_clear(&cfg->debug_buffer); + + for (i = 0; i < cfg->loop_interval_count; ++i) + TRACE("Loop interval %u - %u\n", cfg->loop_intervals[i].begin, cfg->loop_intervals[i].end); }
return VKD3D_OK;
From: Giovanni Mascellani gmascellani@codeweavers.com
--- libs/vkd3d-shader/ir.c | 159 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 153 insertions(+), 6 deletions(-)
diff --git a/libs/vkd3d-shader/ir.c b/libs/vkd3d-shader/ir.c index d26341d54..4997b475a 100644 --- a/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d-shader/ir.c @@ -3065,7 +3065,7 @@ static void vsir_block_list_remove_index(struct vsir_block_list *list, size_t id
struct vsir_block { - unsigned int label; + unsigned int label, order_pos; /* `begin' points to the instruction immediately following the * LABEL that introduces the block. `end' points to the terminator * instruction (either BRANCH or RET). They can coincide, meaning @@ -3148,6 +3148,27 @@ struct vsir_cfg * `continue' instruction would jump and `end' is where a * `break' instruction would jump. */ unsigned int begin, end; + /* Each loop interval can be natural or synthetic. Natural + * intervals are added to represent loops given by CFG back + * edges. Synthetic intervals do not correspond to loops in + * the input CFG, but are added to leverage their `break' + * instruction in order to execute forward edges. + * + * For a synthetic loop interval it's not really important + * which one is the `begin' block, since we don't need to + * execute `continue' for them. So we have some leeway for + * moving it provided that these conditions are met: 1. the + * interval must contain all `break' instructions that target + * it, which in practice means that `begin' can be moved + * backward and not forward; 2. intervals must remain properly + * nested (for each pair of intervals, either one contains the + * other or they are disjoint). + * + * Subject to these conditions, we try to reuse the same loop + * as much as possible (if many forward edges target the same + * block), but we still try to keep `begin' as forward as + * possible, to keep the loop scope as small as possible. */ + bool synthetic; } *loop_intervals; size_t loop_interval_count, loop_interval_capacity; }; @@ -3174,7 +3195,7 @@ static void vsir_cfg_cleanup(struct vsir_cfg *cfg) }
static enum vkd3d_result vsir_cfg_add_loop_interval(struct vsir_cfg *cfg, unsigned int begin, - unsigned int end) + unsigned int end, bool synthetic) { struct cfg_loop_interval *interval;
@@ -3186,6 +3207,7 @@ static enum vkd3d_result vsir_cfg_add_loop_interval(struct vsir_cfg *cfg, unsign
interval->begin = begin; interval->end = end; + interval->synthetic = synthetic;
return VKD3D_OK; } @@ -3658,6 +3680,7 @@ static enum vkd3d_result vsir_cfg_sort_nodes(struct vsir_cfg *cfg) }
vsir_block_list_remove_index(&sorter.available_blocks, i); + block->order_pos = cfg->order.count; if ((ret = vsir_block_list_add_checked(&cfg->order, block)) < 0) goto fail;
@@ -3677,7 +3700,7 @@ static enum vkd3d_result vsir_cfg_sort_nodes(struct vsir_cfg *cfg) break;
if ((ret = vsir_cfg_add_loop_interval(cfg, inner_stack_item->begin, - cfg->order.count)) < 0) + cfg->order.count, false)) < 0) goto fail;
new_seen_count = inner_stack_item->loop->count; @@ -3728,9 +3751,6 @@ static enum vkd3d_result vsir_cfg_sort_nodes(struct vsir_cfg *cfg)
TRACE("%s\n", cfg->debug_buffer.buffer); vkd3d_string_buffer_clear(&cfg->debug_buffer); - - for (i = 0; i < cfg->loop_interval_count; ++i) - TRACE("Loop interval %u - %u\n", cfg->loop_intervals[i].begin, cfg->loop_intervals[i].end); }
return VKD3D_OK; @@ -3743,6 +3763,127 @@ fail: return ret; }
+static enum vkd3d_result vsir_cfg_generate_synthetic_loop_intervals(struct vsir_cfg *cfg) +{ + enum vkd3d_result ret; + size_t i, j, k; + + for (i = 0; i < cfg->block_count; ++i) + { + struct vsir_block *block = &cfg->blocks[i]; + + if (block->label == 0) + continue; + + for (j = 0; j < block->successors.count; ++j) + { + struct vsir_block *successor = block->successors.blocks[j]; + struct cfg_loop_interval *extend = NULL; + unsigned int begin; + enum + { + ACTION_DO_NOTHING, + ACTION_CREATE_NEW, + ACTION_EXTEND, + } action = ACTION_CREATE_NEW; + + /* We've already contructed loop intervals for the back + * edges, there's nothing more to do. */ + if (vsir_block_dominates(successor, block)) + continue; + + assert(block->order_pos < successor->order_pos); + + /* Jumping from a block to the following one is always + * possible, so nothing to do. */ + if (block->order_pos + 1 == successor->order_pos) + continue; + + /* Let's look for a loop interval that already breaks at + * `successor' and either contains or can be extended to + * contain `block'. */ + for (k = 0; k < cfg->loop_interval_count; ++k) + { + struct cfg_loop_interval *interval = &cfg->loop_intervals[k]; + + if (interval->end != successor->order_pos) + continue; + + if (interval->begin <= block->order_pos) + { + action = ACTION_DO_NOTHING; + break; + } + + if (interval->synthetic) + { + action = ACTION_EXTEND; + extend = interval; + break; + } + } + + if (action == ACTION_DO_NOTHING) + continue; + + /* Ok, we have to decide where the new or replacing + * interval has to begin. These are the rules: 1. it must + * begin before `block'; 2. intervals must be properly + * nested; 3. the new interval should begin as late as + * possible, to limit control flow depth and extension. */ + begin = block->order_pos; + + /* Our candidate interval is always [begin, + * successor->order_pos), and we move `begin' backward + * until the candidate interval contains all the intervals + * whose endpoint lies in the candidate interval + * itself. */ + for (k = 0; k < cfg->loop_interval_count; ++k) + { + struct cfg_loop_interval *interval = &cfg->loop_intervals[k]; + + if (begin < interval->end && interval->end < successor->order_pos) + begin = min(begin, interval->begin); + } + + /* New we have to care about the intervals whose begin + * point lies in the candidate interval. We cannot move + * the candidate interval endpoint, because it is + * important that the loop break target matches + * `successor'. So we have to move that interval's begin + * point to the begin point of the candidate interval, + * i.e. `begin'. But what if the interval we should extend + * backward is not synthetic? This cannot happen, + * fortunately, because it would mean that there is a jump + * entering a loop via a block which is not the loop + * header, so the CFG would not be reducible. */ + for (k = 0; k < cfg->loop_interval_count; ++k) + { + struct cfg_loop_interval *interval = &cfg->loop_intervals[k]; + + if (interval->begin < successor->order_pos && successor->order_pos < interval->end) + { + if (interval->synthetic) + interval->begin = min(begin, interval->begin); + assert(begin >= interval->begin); + } + } + + if (action == ACTION_EXTEND) + extend->begin = begin; + else if ((ret = vsir_cfg_add_loop_interval(cfg, begin, successor->order_pos, true)) < 0) + return ret; + } + } + + if (TRACE_ON()) + for (i = 0; i < cfg->loop_interval_count; ++i) + TRACE("%s loop interval %u - %u\n", cfg->loop_intervals[i].synthetic ? "Synthetic" : "Natural", + cfg->loop_intervals[i].begin, cfg->loop_intervals[i].end); + + return VKD3D_OK; +} + enum vkd3d_result vkd3d_shader_normalise(struct vkd3d_shader_parser *parser, const struct vkd3d_shader_compile_info *compile_info) { @@ -3781,6 +3922,12 @@ enum vkd3d_result vkd3d_shader_normalise(struct vkd3d_shader_parser *parser, return result; }
+ if ((result = vsir_cfg_generate_synthetic_loop_intervals(&cfg)) < 0) + { + vsir_cfg_cleanup(&cfg); + return result; + } + if ((result = simple_structurizer_run(parser)) < 0) { vsir_cfg_cleanup(&cfg);
On Tue Mar 12 04:10:55 2024 +0000, Conor McCarthy wrote:
But the earlier comment in `struct vsir_cfg` says, "which in practice means that `begin' can be moved forward and not backward." Also, there is an opening `[` closed with `)`.
The earlier comment is wrong, I fixed it. The interval can be made larger, not smaller, so `begin` can be brought backward and not forward.
On Tue Mar 12 04:10:54 2024 +0000, Conor McCarthy wrote:
Is `begin` always the header block? If so, is `header` a better name? Similarly, if `end` is always the merge block, can we call it `merge`?
I think it's true that `begin` always end up begin the header block and `end` ends up begin the merge block, but at this level I find it more useful to think these numbers as positions (or times) at which the given interval begins or ends. The algorithm introduced in this MR handles intervals intervals in what I would describe as a "geometric" way, caring very little about what they mean in terms of the SPIR-V program that will eventually be generated. The concepts of header and merge blocks are introduced by SPIR-V at a later stage and feel unnecessary to me here. So I wouldn't change the names here.
From: Giovanni Mascellani gmascellani@codeweavers.com
--- libs/vkd3d-shader/ir.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+)
diff --git a/libs/vkd3d-shader/ir.c b/libs/vkd3d-shader/ir.c index 4997b475a..bf565c2bc 100644 --- a/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d-shader/ir.c @@ -3763,6 +3763,20 @@ fail: return ret; }
+/* Sort loop intervals first by ascending begin time and then by + * descending end time, so that inner intervals appear after outer + * ones and disjoint intervals appear in their proper order. */ +static int compare_loop_intervals(const void *ptr1, const void *ptr2) +{ + const struct cfg_loop_interval *interval1 = ptr1; + const struct cfg_loop_interval *interval2 = ptr2; + + if (interval1->begin != interval2->begin) + return vkd3d_u32_compare(interval1->begin, interval2->begin); + + return -vkd3d_u32_compare(interval1->end, interval2->end); +} + static enum vkd3d_result vsir_cfg_generate_synthetic_loop_intervals(struct vsir_cfg *cfg) { enum vkd3d_result ret; @@ -3876,6 +3890,8 @@ static enum vkd3d_result vsir_cfg_generate_synthetic_loop_intervals(struct vsir_ } }
+ qsort(cfg->loop_intervals, cfg->loop_interval_count, sizeof(*cfg->loop_intervals), compare_loop_intervals); + if (TRACE_ON()) for (i = 0; i < cfg->loop_interval_count; ++i) TRACE("%s loop interval %u - %u\n", cfg->loop_intervals[i].synthetic ? "Synthetic" : "Natural",
On Tue Mar 12 04:10:54 2024 +0000, Conor McCarthy wrote:
In the same vein as my earlier comment about header/merge blocks, is `header_pos` or `open_pos` a better name? Using the word `time` is a bit of a misdirection.
I can give up the time interpretation. Thinking again I would call this `begin`, because that's what it eventually becomes; but `open_pos` works for me too.
On Tue Mar 12 04:10:56 2024 +0000, Conor McCarthy wrote:
`interval` is nested within an outer interval (`begin`, `successor->order_pos`), correct? I understand the value of moving the nested `begin` up as high as possible in case there are other branches it should contain, but are there other reasons for doing this? In particular, does the algorithm depend on it?
I am not I understand the question. The point of this and the previous loop is to make sure that intervals remain properly nested after `[begin, successor->order_pos)` is added. For the nesting property to be broken one of these two situations must happen: ``` existing interval: ---[------)------ candidate interval: -------[-----)--- ``` or ``` existing interval: -------[-----)--- candidate interval: ---[------)------ ```
Broadly speaking, the first loop cares about the first problematic situation and the second loop cares about the second one. In the first case we basically bring the candidate begin backward: ``` existing interval: ---[------)------ candidate interval: -------[-----)---
BECOMES
existing interval: ---[------)------ candidate interval: ---[---------)--- ``` so the candidate interval now includes the existing one and they are properly nested.
In the second case we need to modify the existing interval: ``` existing interval: -------[-----)--- candidate interval: ---[------)------
BECOMES
existing interval: ---[---------)--- candidate interval: ---[------)------ ``` and again they're properly nested. This is admittedly a bit more delicate, because extending the existing interval we might in turn create a bad situation with a third interval. However that cannot really happen, because if that were the case this third interval would already been not properly nested with either the candidate or the existing interval (I can go into detail, if that helps). Another potential problem might be that the existing interval is a natural one, so its begin point cannot be touched. But again this cannot happen (if the input CFG is reducible), because it would mean that there is a jump that lands inside it without going through the begin point (which is the header).
Hopefully this helps understanding these two loops. They are quite probably the most cryptic point in my structurizer, which is why I tried to be generous with comments. If you think this comment does a better job than the comments, I can use it to expand those.
On Tue Mar 12 12:49:25 2024 +0000, Giovanni Mascellani wrote:
I am not I understand the question. The point of this and the previous loop is to make sure that intervals remain properly nested after `[begin, successor->order_pos)` is added. For the nesting property to be broken one of these two situations must happen:
existing interval: ---[------)------ candidate interval: -------[-----)---
or
existing interval: -------[-----)--- candidate interval: ---[------)------
Broadly speaking, the first loop cares about the first problematic situation and the second loop cares about the second one. In the first case we basically bring the candidate begin backward:
existing interval: ---[------)------ candidate interval: -------[-----)--- BECOMES existing interval: ---[------)------ candidate interval: ---[---------)---
so the candidate interval now includes the existing one and they are properly nested. In the second case we need to modify the existing interval:
existing interval: -------[-----)--- candidate interval: ---[------)------ BECOMES existing interval: ---[---------)--- candidate interval: ---[------)------
and again they're properly nested. This is admittedly a bit more delicate, because extending the existing interval we might in turn create a bad situation with a third interval. However that cannot really happen, because if that were the case this third interval would already been not properly nested with either the candidate or the existing interval (I can go into detail, if that helps). Another potential problem might be that the existing interval is a natural one, so its begin point cannot be touched. But again this cannot happen (if the input CFG is reducible), because it would mean that there is a jump that lands inside it without going through the begin point (which is the header). Hopefully this helps understanding these two loops. They are quite probably the most cryptic point in my structurizer, which is why I tried to be generous with comments. If you think this comment does a better job than the comments, I can use it to expand those.
I understand now. I'm on the fence about whether we need something like the above in comments. It may be a bit too much, but does make it clearer.
This merge request was approved by Conor McCarthy.
On Tue Mar 12 13:46:23 2024 +0000, Conor McCarthy wrote:
I understand now. I'm on the fence about whether we need something like the above in comments. It may be a bit too much, but does make it clearer.
Yeah, I had more or less the same feeling while writing the comments.
This merge request was approved by Henri Verbeet.