~~This applies on top of !672, the last three commits belong here.~~
Here we compute additional "synthetic" loops that will be used to implement non-trivial forward edges using (possibly conditional) `break` instructions.
-- v3: vkd3d-shader/ir: Sort loop intervals. vkd3d-shader/ir: Generate synthetic intervals for forward edges. vkd3d-shader/ir: Compute loop as intervals of the block order.
From: Giovanni Mascellani gmascellani@codeweavers.com
--- libs/vkd3d-shader/ir.c | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+)
diff --git a/libs/vkd3d-shader/ir.c b/libs/vkd3d-shader/ir.c index 3c862f33e..05b40cafd 100644 --- a/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d-shader/ir.c @@ -3140,6 +3140,16 @@ struct vsir_cfg size_t *loops_by_header;
struct vsir_block_list order; + struct cfg_loop_interval + { + /* `begin' is the position of the first block of the loop in + * the topological sort; `end' is the position of the first + * block after the loop. In other words, `begin' is where a + * `continue' instruction would jump and `end' is where a + * `break' instruction would jump. */ + unsigned int begin, end; + } *loop_intervals; + size_t loop_interval_count, loop_interval_capacity; };
static void vsir_cfg_cleanup(struct vsir_cfg *cfg) @@ -3157,11 +3167,29 @@ static void vsir_cfg_cleanup(struct vsir_cfg *cfg) vkd3d_free(cfg->blocks); vkd3d_free(cfg->loops); vkd3d_free(cfg->loops_by_header); + vkd3d_free(cfg->loop_intervals);
if (TRACE_ON()) vkd3d_string_buffer_cleanup(&cfg->debug_buffer); }
+static enum vkd3d_result vsir_cfg_add_loop_interval(struct vsir_cfg *cfg, unsigned int begin, + unsigned int end) +{ + struct cfg_loop_interval *interval; + + if (!vkd3d_array_reserve((void **)&cfg->loop_intervals, &cfg->loop_interval_capacity, + cfg->loop_interval_count + 1, sizeof(*cfg->loop_intervals))) + return VKD3D_ERROR_OUT_OF_MEMORY; + + interval = &cfg->loop_intervals[cfg->loop_interval_count++]; + + interval->begin = begin; + interval->end = end; + + return VKD3D_OK; +} + static bool vsir_block_dominates(struct vsir_block *b1, struct vsir_block *b2) { return bitmap_is_set(b1->dominates, b2->label - 1); @@ -3496,6 +3524,7 @@ struct vsir_cfg_node_sorter { struct vsir_block_list *loop; unsigned int seen_count; + unsigned int open_time; } *stack; size_t stack_count, stack_capacity; struct vsir_block_list available_blocks; @@ -3522,6 +3551,7 @@ static enum vkd3d_result vsir_cfg_node_sorter_make_node_available(struct vsir_cf item = &sorter->stack[sorter->stack_count++]; item->loop = loop; item->seen_count = 0; + item->open_time = sorter->cfg->order.count;
return VKD3D_OK; } @@ -3646,6 +3676,10 @@ static enum vkd3d_result vsir_cfg_sort_nodes(struct vsir_cfg *cfg) if (inner_stack_item->seen_count != inner_stack_item->loop->count) break;
+ if ((ret = vsir_cfg_add_loop_interval(cfg, inner_stack_item->open_time, + cfg->order.count)) < 0) + goto fail; + new_seen_count = inner_stack_item->loop->count; --sorter.stack_count; } @@ -3694,6 +3728,9 @@ static enum vkd3d_result vsir_cfg_sort_nodes(struct vsir_cfg *cfg)
TRACE("%s\n", cfg->debug_buffer.buffer); vkd3d_string_buffer_clear(&cfg->debug_buffer); + + for (i = 0; i < cfg->loop_interval_count; ++i) + TRACE("Loop interval %u - %u\n", cfg->loop_intervals[i].begin, cfg->loop_intervals[i].end); }
return VKD3D_OK;
From: Giovanni Mascellani gmascellani@codeweavers.com
--- libs/vkd3d-shader/ir.c | 161 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 155 insertions(+), 6 deletions(-)
diff --git a/libs/vkd3d-shader/ir.c b/libs/vkd3d-shader/ir.c index 05b40cafd..51cbceab3 100644 --- a/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d-shader/ir.c @@ -3065,7 +3065,7 @@ static void vsir_block_list_remove_index(struct vsir_block_list *list, size_t id
struct vsir_block { - unsigned int label; + unsigned int label, order_pos; /* `begin' points to the instruction immediately following the * LABEL that introduces the block. `end' points to the terminator * instruction (either BRANCH or RET). They can coincide, meaning @@ -3148,6 +3148,27 @@ struct vsir_cfg * `continue' instruction would jump and `end' is where a * `break' instruction would jump. */ unsigned int begin, end; + /* Each loop interval can be natural or synthetic. Natural + * intervals are added to represent loops given by CFG back + * edges. Synthetic intervals do not correspond to loop into + * the input CFG, but are added to leverage their `break' + * instruction in order to execute forward edges. + * + * For a synthetic loop interval it's not really important + * which one is the `begin' block, since we don't need to + * execute `continue' for them. So we have some leeway for + * moving it provided that these conditions are met: 1. the + * interval must contain all `break' instructions that target + * it, which in practice means that `begin' can be moved + * forward and not backward; 2. intervals must retain the + * correct inclusion properties (for each pair of intervals, + * either one contains the other or they are disjoint). + * + * Subject to these conditions, we try to reuse the same loop + * as much as possible (if many forward edges target the same + * block), but we still try to keep `begin' as forward as + * possible, to keep the loop scope as small as possible. */ + bool synthetic; } *loop_intervals; size_t loop_interval_count, loop_interval_capacity; }; @@ -3174,7 +3195,7 @@ static void vsir_cfg_cleanup(struct vsir_cfg *cfg) }
static enum vkd3d_result vsir_cfg_add_loop_interval(struct vsir_cfg *cfg, unsigned int begin, - unsigned int end) + unsigned int end, bool synthetic) { struct cfg_loop_interval *interval;
@@ -3186,6 +3207,7 @@ static enum vkd3d_result vsir_cfg_add_loop_interval(struct vsir_cfg *cfg, unsign
interval->begin = begin; interval->end = end; + interval->synthetic = synthetic;
return VKD3D_OK; } @@ -3658,6 +3680,7 @@ static enum vkd3d_result vsir_cfg_sort_nodes(struct vsir_cfg *cfg) }
vsir_block_list_remove_index(&sorter.available_blocks, i); + block->order_pos = cfg->order.count; if ((ret = vsir_block_list_add_checked(&cfg->order, block)) < 0) goto fail;
@@ -3677,7 +3700,7 @@ static enum vkd3d_result vsir_cfg_sort_nodes(struct vsir_cfg *cfg) break;
if ((ret = vsir_cfg_add_loop_interval(cfg, inner_stack_item->open_time, - cfg->order.count)) < 0) + cfg->order.count, false)) < 0) goto fail;
new_seen_count = inner_stack_item->loop->count; @@ -3728,9 +3751,6 @@ static enum vkd3d_result vsir_cfg_sort_nodes(struct vsir_cfg *cfg)
TRACE("%s\n", cfg->debug_buffer.buffer); vkd3d_string_buffer_clear(&cfg->debug_buffer); - - for (i = 0; i < cfg->loop_interval_count; ++i) - TRACE("Loop interval %u - %u\n", cfg->loop_intervals[i].begin, cfg->loop_intervals[i].end); }
return VKD3D_OK; @@ -3743,6 +3763,129 @@ fail: return ret; }
+static enum vkd3d_result vsir_cfg_generate_synthetic_loop_intervals(struct vsir_cfg *cfg) +{ + enum vkd3d_result ret; + size_t i, j, k; + + for (i = 0; i < cfg->block_count; ++i) + { + struct vsir_block *block = &cfg->blocks[i]; + + if (block->label == 0) + continue; + + for (j = 0; j < block->successors.count; ++j) + { + struct vsir_block *successor = block->successors.blocks[j]; + struct cfg_loop_interval *extend = NULL; + unsigned int begin; + enum + { + ACTION_DO_NOTHING, + ACTION_CREATE_NEW, + ACTION_EXTEND, + } action = ACTION_CREATE_NEW; + + /* We've already contructed loop intervals for the back + * edges, there's nothing more to do. */ + if (vsir_block_dominates(successor, block)) + continue; + + assert(block->order_pos < successor->order_pos); + + /* Jumping from a block to the following one is always + * possible, so nothing to do. */ + if (block->order_pos + 1 == successor->order_pos) + continue; + + /* Let's look for a loop interval that already breaks at + * `successor' and either contains or can be extended to + * contain `block'. */ + for (k = 0; k < cfg->loop_interval_count; ++k) + { + struct cfg_loop_interval *interval = &cfg->loop_intervals[k]; + + if (interval->end != successor->order_pos) + continue; + + if (interval->begin <= block->order_pos) + { + action = ACTION_DO_NOTHING; + break; + } + + if (interval->synthetic) + { + action = ACTION_EXTEND; + extend = interval; + break; + } + } + + if (action == ACTION_DO_NOTHING) + continue; + + /* Ok, we have to decide where the new or replacing + * interval has to begin. These are the rules: 1. it must + * begin before `block'; 2. intervals must be correctly + * stacked (for any two intervals, either one is included + * in the other or they are disjoint); 3. the new interval + * should begin as late as possible, to limit control flow + * depth and extension. */ + begin = block->order_pos; + + /* Our candidate interval is always [begin, + * successor->order_pos), and we move `begin' backward + * until the candidate interval contains all the intervals + * whose endpoint lies in the candidate interval + * itself. */ + for (k = 0; k < cfg->loop_interval_count; ++k) + { + struct cfg_loop_interval *interval = &cfg->loop_intervals[k]; + + if (begin < interval->end && interval->end < successor->order_pos) + begin = min(begin, interval->begin); + } + + /* New we have to care about the intervals whose begin + * point lies in the candidate interval. We cannot move + * the condidate interval endpoint, because it is + * important that the loop break target matches + * `successor'. So we have to move that interval's begin + * point to the begin point of the candidate interval, + * i.e. `begin'. But what if the interval we should extend + * backward is not synthetic? This cannot happen, + * fortunately, because it would mean that there is a jump + * entering a loop via a block which is not the loop + * header, so the CFG would not be reducible. */ + for (k = 0; k < cfg->loop_interval_count; ++k) + { + struct cfg_loop_interval *interval = &cfg->loop_intervals[k]; + + if (interval->begin < successor->order_pos && successor->order_pos < interval->end) + { + if (interval->synthetic) + interval->begin = min(begin, interval->begin); + assert(begin >= interval->begin); + } + } + + if (action == ACTION_EXTEND) + extend->begin = begin; + else if ((ret = vsir_cfg_add_loop_interval(cfg, begin, successor->order_pos, true)) < 0) + return ret; + } + } + + if (TRACE_ON()) + for (i = 0; i < cfg->loop_interval_count; ++i) + TRACE("%s loop interval %u - %u\n", cfg->loop_intervals[i].synthetic ? "Synthetic" : "Natural", + cfg->loop_intervals[i].begin, cfg->loop_intervals[i].end); + + return VKD3D_OK; +} + enum vkd3d_result vkd3d_shader_normalise(struct vkd3d_shader_parser *parser, const struct vkd3d_shader_compile_info *compile_info) { @@ -3781,6 +3924,12 @@ enum vkd3d_result vkd3d_shader_normalise(struct vkd3d_shader_parser *parser, return result; }
+ if ((result = vsir_cfg_generate_synthetic_loop_intervals(&cfg)) < 0) + { + vsir_cfg_cleanup(&cfg); + return result; + } + if ((result = simple_structurizer_run(parser)) < 0) { vsir_cfg_cleanup(&cfg);
From: Giovanni Mascellani gmascellani@codeweavers.com
--- libs/vkd3d-shader/ir.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+)
diff --git a/libs/vkd3d-shader/ir.c b/libs/vkd3d-shader/ir.c index 51cbceab3..213fd5dcd 100644 --- a/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d-shader/ir.c @@ -3763,6 +3763,20 @@ fail: return ret; }
+/* Sort loop intervals first by ascending begin time and then by + * descending end time, so that inner intervals appear after outer + * ones and disjoint intervals appear in their proper order. */ +static int compare_loop_intervals(const void *ptr1, const void *ptr2) +{ + const struct cfg_loop_interval *interval1 = ptr1; + const struct cfg_loop_interval *interval2 = ptr2; + + if (interval1->begin != interval2->begin) + return vkd3d_u32_compare(interval1->begin, interval2->begin); + + return -vkd3d_u32_compare(interval1->end, interval2->end); +} + static enum vkd3d_result vsir_cfg_generate_synthetic_loop_intervals(struct vsir_cfg *cfg) { enum vkd3d_result ret; @@ -3878,6 +3892,8 @@ static enum vkd3d_result vsir_cfg_generate_synthetic_loop_intervals(struct vsir_ } }
+ qsort(cfg->loop_intervals, cfg->loop_interval_count, sizeof(*cfg->loop_intervals), compare_loop_intervals); + if (TRACE_ON()) for (i = 0; i < cfg->loop_interval_count; ++i) TRACE("%s loop interval %u - %u\n", cfg->loop_intervals[i].synthetic ? "Synthetic" : "Natural",
Conor McCarthy (@cmccarthy) commented about libs/vkd3d-shader/ir.c:
size_t *loops_by_header; struct vsir_block_list order;
- struct cfg_loop_interval
- {
/* `begin' is the position of the first block of the loop in
* the topological sort; `end' is the position of the first
* block after the loop. In other words, `begin' is where a
* `continue' instruction would jump and `end' is where a
* `break' instruction would jump. */
unsigned int begin, end;
Is `begin` always the header block? If so, is `header` a better name? Similarly, if `end` is always the merge block, can we call it `merge`?
Conor McCarthy (@cmccarthy) commented about libs/vkd3d-shader/ir.c:
{ struct vsir_block_list *loop; unsigned int seen_count;
unsigned int open_time;
In the same vein as my earlier comment about header/merge blocks, is `header_pos` or `open_pos` a better name? Using the word `time` is a bit of a misdirection.
Conor McCarthy (@cmccarthy) commented about libs/vkd3d-shader/ir.c:
if (action == ACTION_DO_NOTHING)
continue;
/* Ok, we have to decide where the new or replacing
* interval has to begin. These are the rules: 1. it must
* begin before `block'; 2. intervals must be correctly
* stacked (for any two intervals, either one is included
* in the other or they are disjoint); 3. the new interval
* should begin as late as possible, to limit control flow
* depth and extension. */
begin = block->order_pos;
/* Our candidate interval is always [begin,
* successor->order_pos), and we move `begin' backward
* until the candidate interval contains all the intervals
But the earlier comment in `struct vsir_cfg` says, "which in practice means that `begin' can be moved forward and not backward." Also, there is an opening `[` closed with `)`.
Conor McCarthy (@cmccarthy) commented about libs/vkd3d-shader/ir.c:
/* Our candidate interval is always [begin,
* successor->order_pos), and we move `begin' backward
* until the candidate interval contains all the intervals
* whose endpoint lies in the candidate interval
* itself. */
for (k = 0; k < cfg->loop_interval_count; ++k)
{
struct cfg_loop_interval *interval = &cfg->loop_intervals[k];
if (begin < interval->end && interval->end < successor->order_pos)
begin = min(begin, interval->begin);
}
/* New we have to care about the intervals whose begin
* point lies in the candidate interval. We cannot move
* the condidate interval endpoint, because it is
Typo 'condidate'
Conor McCarthy (@cmccarthy) commented about libs/vkd3d-shader/ir.c:
* important that the loop break target matches
* `successor'. So we have to move that interval's begin
* point to the begin point of the candidate interval,
* i.e. `begin'. But what if the interval we should extend
* backward is not synthetic? This cannot happen,
* fortunately, because it would mean that there is a jump
* entering a loop via a block which is not the loop
* header, so the CFG would not be reducible. */
for (k = 0; k < cfg->loop_interval_count; ++k)
{
struct cfg_loop_interval *interval = &cfg->loop_intervals[k];
if (interval->begin < successor->order_pos && successor->order_pos < interval->end)
{
if (interval->synthetic)
interval->begin = min(begin, interval->begin);
`interval` is nested within an outer interval (`begin`, `successor->order_pos`), correct? I understand the value of moving the nested `begin` up as high as possible in case there are other branches it should contain, but are there other reasons for doing this? In particular, does the algorithm depend on it?
Conor McCarthy (@cmccarthy) commented about libs/vkd3d-shader/ir.c:
* `continue' instruction would jump and `end' is where a * `break' instruction would jump. */ unsigned int begin, end;
/* Each loop interval can be natural or synthetic. Natural
* intervals are added to represent loops given by CFG back
* edges. Synthetic intervals do not correspond to loop into
Should this say, "correspond to loops in"?