From: Victor Chiletto vchiletto@codeweavers.com
Based on a patch by Nikolay Sivov.
Co-authored-by: Nikolay Sivov nsivov@codeweavers.com --- libs/vkd3d-shader/hlsl_codegen.c | 365 ++++++++++++++++++++++- libs/vkd3d-shader/vkd3d_shader_private.h | 1 + tests/hlsl/for.shader_test | 4 +- tests/hlsl/function-return.shader_test | 4 +- tests/hlsl/loop.shader_test | 16 +- tests/hlsl/return.shader_test | 4 +- tests/hlsl/texture-load.shader_test | 4 +- 7 files changed, 372 insertions(+), 26 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 8a94b4b8a..b4cff480c 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -2394,6 +2394,359 @@ static bool remove_trivial_conditional_branches(struct hlsl_ctx *ctx, struct hls return true; }
+static void transform_run_const_passes(struct hlsl_ctx *ctx, struct hlsl_block *body) +{ + bool progress; + + do + { + progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL); + progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_swizzles, body, NULL); + progress |= hlsl_copy_propagation_execute(ctx, body); + progress |= hlsl_transform_ir(ctx, fold_swizzle_chains, body, NULL); + progress |= hlsl_transform_ir(ctx, remove_trivial_swizzles, body, NULL); + progress |= hlsl_transform_ir(ctx, remove_trivial_conditional_branches, body, NULL); + } while (progress); +} + +static bool loop_unrolling_generate_const_bool_store(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, bool val, struct hlsl_block *block, struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *const_false, *store; + + if (!(const_false = hlsl_new_bool_constant(ctx, val, loc))) + return false; + hlsl_block_add_instr(block, const_false); + + if (!(store = hlsl_new_simple_store(ctx, var, const_false))) + return false; + hlsl_block_add_instr(block, store); + + return true; +} + +static bool loop_unrolling_remove_jumps_recurse(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_var *loop_broken, struct hlsl_ir_var *loop_continued); + +static bool loop_unrolling_remove_jumps_visit(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, struct hlsl_ir_var *loop_broken, struct hlsl_ir_var *loop_continued) +{ + switch (node->type) + { + case HLSL_IR_IF: + { + struct hlsl_ir_if *iff = hlsl_ir_if(node); + if (loop_unrolling_remove_jumps_recurse(ctx, &iff->then_block, loop_broken, loop_continued)) + return true; + + if (loop_unrolling_remove_jumps_recurse(ctx, &iff->else_block, loop_broken, loop_continued)) + return true; + + break; + } + case HLSL_IR_JUMP: + { + struct hlsl_ir_jump *jump = hlsl_ir_jump(node); + + if (jump->type == HLSL_IR_JUMP_CONTINUE || jump->type == HLSL_IR_JUMP_BREAK) + { + struct hlsl_block draft; + struct hlsl_ir_var *var; + hlsl_block_init(&draft); + + if (jump->type == HLSL_IR_JUMP_CONTINUE) + var = loop_continued; + else + var = loop_broken; + + if (!loop_unrolling_generate_const_bool_store(ctx, var, true, &draft, &jump->node.loc)) + return false; + + list_move_before(&jump->node.entry, &draft.instrs); + list_remove(&jump->node.entry); + hlsl_free_instr(&jump->node); + + return true; + } + + break; + } + default: + break; + } + + return false; +} + +static struct hlsl_ir_if *loop_unrolling_generate_var_check(struct hlsl_ctx *ctx, struct hlsl_block *dst, struct vkd3d_shader_location *loc, struct hlsl_ir_var *var) +{ + struct hlsl_ir_node *cond, *iff; + struct hlsl_block then_block; + struct hlsl_ir_load *load; + + hlsl_block_init(&then_block); + + if (!(load = hlsl_new_var_load(ctx, var, loc))) + return NULL; + hlsl_block_add_instr(dst, &load->node); + + if (!(cond = hlsl_new_unary_expr(ctx, HLSL_OP1_LOGIC_NOT, &load->node, loc))) + return NULL; + hlsl_block_add_instr(dst, cond); + + if (!(iff = hlsl_new_if(ctx, cond, &then_block, NULL, loc))) + return NULL; + hlsl_block_add_instr(dst, iff); + + return hlsl_ir_if(iff); +} + +static bool loop_unrolling_remove_jumps_recurse(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_var *loop_broken, struct hlsl_ir_var *loop_continued) +{ + struct hlsl_ir_node *node, *next; + + LIST_FOR_EACH_ENTRY_SAFE(node, next, &block->instrs, struct hlsl_ir_node, entry) + { + if (loop_unrolling_remove_jumps_visit(ctx, node, loop_broken, loop_continued)) + { + if (&next->entry != &block->instrs) + { + struct hlsl_ir_if *broken_check, *continued_check; + struct hlsl_block draft; + hlsl_block_init(&draft); + + broken_check = loop_unrolling_generate_var_check(ctx, &draft, &next->loc, loop_broken); + continued_check = loop_unrolling_generate_var_check(ctx, &broken_check->then_block, &next->loc, loop_continued); + list_move_before(&next->entry, &draft.instrs); + + list_move_slice_tail(&continued_check->then_block.instrs, &next->entry, list_tail(&block->instrs)); + } + + return true; + } + } + + return false; +} + +static void loop_unrolling_remove_jumps(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_var *loop_broken, struct hlsl_ir_var *loop_continued) +{ + while (loop_unrolling_remove_jumps_recurse(ctx, block, loop_broken, loop_continued)); +} + +static bool loop_unrolling_simplify_and_check_broken(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_var *loop_broken, + struct copy_propagation_state *previous_state, struct copy_propagation_state *current_state, unsigned int *index) +{ + struct copy_propagation_value *loop_broken_value; + unsigned int current_index; + bool progress; + + do { + copy_propagation_state_destroy(current_state); + copy_propagation_state_init(ctx, current_state, previous_state); + + progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, block, NULL); + progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_swizzles, block, NULL); + current_index = index_instructions(block, *index); + progress |= copy_propagation_transform_block(ctx, block, current_state); + progress |= hlsl_transform_ir(ctx, fold_swizzle_chains, block, NULL); + progress |= hlsl_transform_ir(ctx, remove_trivial_swizzles, block, NULL); + progress |= hlsl_transform_ir(ctx, remove_trivial_conditional_branches, block, NULL); + } while (progress); + + *index = current_index; + + if ((loop_broken_value = copy_propagation_get_value(current_state, loop_broken, 0, (unsigned int) -1)) && + loop_broken_value->node->type == HLSL_IR_CONSTANT) + { + struct hlsl_ir_constant *condition = hlsl_ir_constant(loop_broken_value->node); + if (condition->value.u[0].u) + return true; + } + + return false; +} + +#define LOOP_UNROLLING_DEFAULT_MAX_ITERATIONS 1024 + +static bool loop_unrolling_unroll_loop(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_loop *loop) +{ + struct copy_propagation_state partial_states[LOOP_UNROLLING_DEFAULT_MAX_ITERATIONS + 1] = { 0 }; + struct hlsl_ir_var *loop_broken = NULL, *loop_continued = NULL; + unsigned int max_iterations, index, i; + struct hlsl_ir_if *target_if = NULL; + struct hlsl_block draft, tmp_dst; + + max_iterations = LOOP_UNROLLING_DEFAULT_MAX_ITERATIONS; + if (loop->unroll_limit) + max_iterations = min(loop->unroll_limit, max_iterations); + + hlsl_block_init(&draft); + hlsl_block_init(&tmp_dst); + list_move_slice_tail(&draft.instrs, list_head(&block->instrs), list_prev(&block->instrs, &loop->node.entry)); + + if (!(loop_broken = hlsl_new_synthetic_var(ctx, "loop_broken", hlsl_get_scalar_type(ctx, HLSL_TYPE_BOOL), &loop->node.loc))) + goto fail; + + if (!(loop_continued = hlsl_new_synthetic_var(ctx, "loop_continued", hlsl_get_scalar_type(ctx, HLSL_TYPE_BOOL), &loop->node.loc))) + goto fail; + + if (!loop_unrolling_generate_const_bool_store(ctx, loop_broken, false, &draft, &loop->node.loc)) + goto fail; + + if (!(target_if = loop_unrolling_generate_var_check(ctx, &draft, &loop->node.loc, loop_broken))) + goto fail; + + copy_propagation_state_init(ctx, &partial_states[0], NULL); + index = index_instructions(&draft, 2); + copy_propagation_transform_block(ctx, &draft, &partial_states[0]); + + for (i = 0; i < max_iterations; ++i) + { + if (!loop_unrolling_generate_const_bool_store(ctx, loop_continued, false, &target_if->then_block, &loop->node.loc)) + goto fail; + + if (!hlsl_clone_block(ctx, &tmp_dst, &loop->body)) + goto fail; + hlsl_block_add_block(&target_if->then_block, &tmp_dst); + + loop_unrolling_remove_jumps(ctx, &target_if->then_block, loop_broken, loop_continued); + + if (loop_unrolling_simplify_and_check_broken(ctx, &target_if->then_block, loop_broken, &partial_states[i], &partial_states[i + 1], &index)) + break; + + if (!(target_if = loop_unrolling_generate_var_check(ctx, &draft, &loop->node.loc, loop_broken))) + goto fail; + + if (!hlsl_clone_block(ctx, &tmp_dst, &loop->iter)) + goto fail; + + hlsl_block_add_block(&target_if->then_block, &tmp_dst); + } + + /* Native gives up on unrolling entirely after 1024 iterations. + * It also will not insert a loop if there are iterations left + * after max_iterations, i.e [unroll(4)] for (i = 0; i < 8; ++i)) */ + if (i == LOOP_UNROLLING_DEFAULT_MAX_ITERATIONS) + { + hlsl_warning(ctx, &loop->node.loc, VKD3D_SHADER_WARNING_HLSL_UNABLE_TO_UNROLL, "Unable to unroll loop, maximum iterations reached (%u).", LOOP_UNROLLING_DEFAULT_MAX_ITERATIONS); + goto fail; + } + + list_remove(&loop->node.entry); + hlsl_free_instr(&loop->node); + + list_move_head(&block->instrs, &draft.instrs); + hlsl_block_cleanup(&tmp_dst); + hlsl_block_cleanup(&draft); + + for (i = 0; i < LOOP_UNROLLING_DEFAULT_MAX_ITERATIONS + 1; ++i) + copy_propagation_state_destroy(&partial_states[i]); + + return true; + +fail: + hlsl_block_cleanup(&draft); + hlsl_block_cleanup(&tmp_dst); + + for (i = 0; i < LOOP_UNROLLING_DEFAULT_MAX_ITERATIONS + 1; ++i) + copy_propagation_state_destroy(&partial_states[i]); + + if (!loop_continued) + hlsl_free_var(loop_continued); + + if (!loop_broken) + hlsl_free_var(loop_broken); + + return false; +} + +static struct hlsl_ir_loop *loop_unrolling_find_unrollable_loop(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_block **containing_block) +{ + struct hlsl_ir_node *instr, *next; + + LIST_FOR_EACH_ENTRY_SAFE(instr, next, &block->instrs, struct hlsl_ir_node, entry) + { + switch (instr->type) + { + case HLSL_IR_LOOP: + { + struct hlsl_ir_loop *nested_loop; + + struct hlsl_ir_loop *loop = hlsl_ir_loop(instr); + + if ((nested_loop = loop_unrolling_find_unrollable_loop(ctx, &loop->body, containing_block))) + return nested_loop; + + if (loop->unroll) + { + *containing_block = block; + return loop; + } + + break; + } + case HLSL_IR_IF: + { + struct hlsl_ir_loop *loop; + + struct hlsl_ir_if *iff = hlsl_ir_if(instr); + + if ((loop = loop_unrolling_find_unrollable_loop(ctx, &iff->then_block, containing_block))) + return loop; + + if ((loop = loop_unrolling_find_unrollable_loop(ctx, &iff->else_block, containing_block))) + return loop; + + break; + } + case HLSL_IR_SWITCH: + { + struct hlsl_ir_switch *s = hlsl_ir_switch(instr); + struct hlsl_ir_switch_case *c; + struct hlsl_ir_loop *loop; + + LIST_FOR_EACH_ENTRY(c, &s->cases, struct hlsl_ir_switch_case, entry) + { + if ((loop = loop_unrolling_find_unrollable_loop(ctx, &c->body, containing_block))) + return loop; + } + + break; + } + default: + break; + } + } + + return NULL; +} + +static void transform_unroll_loops(struct hlsl_ctx *ctx, struct hlsl_block *block) +{ + while (true) + { + struct hlsl_block clone, *containing_block; + struct hlsl_ir_loop *loop, *cloned_loop; + + if (!(loop = loop_unrolling_find_unrollable_loop(ctx, block, &containing_block))) + return; + + if (!hlsl_clone_block(ctx, &clone, block)) + return; + + cloned_loop = loop_unrolling_find_unrollable_loop(ctx, &clone, &containing_block); + assert(cloned_loop); + + if (!loop_unrolling_unroll_loop(ctx, containing_block, cloned_loop)) + { + hlsl_block_cleanup(&clone); + loop->unroll = false; + continue; + } + + hlsl_block_cleanup(block); + hlsl_block_init(block); + hlsl_block_add_block(block, &clone); + } +}
static void transform_insert_continue_iter_blocks(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_block *iter) { @@ -5562,17 +5915,9 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry lower_ir(ctx, lower_int_abs, body); lower_ir(ctx, lower_float_modulus, body); hlsl_transform_ir(ctx, fold_redundant_casts, body, NULL); - do - { - progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL); - progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_swizzles, body, NULL); - progress |= hlsl_copy_propagation_execute(ctx, body); - progress |= hlsl_transform_ir(ctx, fold_swizzle_chains, body, NULL); - progress |= hlsl_transform_ir(ctx, remove_trivial_swizzles, body, NULL); - progress |= hlsl_transform_ir(ctx, remove_trivial_conditional_branches, body, NULL); - } - while (progress); + transform_unroll_loops(ctx, body); transform_insert_continue_iter_blocks(ctx, body, NULL); + transform_run_const_passes(ctx, body); remove_unreachable_code(ctx, body); hlsl_transform_ir(ctx, normalize_switch_cases, body, NULL);
diff --git a/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d-shader/vkd3d_shader_private.h index b07a7bff7..10864e162 100644 --- a/libs/vkd3d-shader/vkd3d_shader_private.h +++ b/libs/vkd3d-shader/vkd3d_shader_private.h @@ -157,6 +157,7 @@ enum vkd3d_shader_error VKD3D_SHADER_WARNING_HLSL_IMAGINARY_NUMERIC_RESULT = 5303, VKD3D_SHADER_WARNING_HLSL_NON_FINITE_RESULT = 5304, VKD3D_SHADER_WARNING_HLSL_IGNORED_ATTRIBUTE = 5305, + VKD3D_SHADER_WARNING_HLSL_UNABLE_TO_UNROLL = 5306,
VKD3D_SHADER_ERROR_GLSL_INTERNAL = 6000,
diff --git a/tests/hlsl/for.shader_test b/tests/hlsl/for.shader_test index 7ce6c8213..b3fbd76d7 100644 --- a/tests/hlsl/for.shader_test +++ b/tests/hlsl/for.shader_test @@ -63,7 +63,7 @@ probe (481, 0, 640, 480) rgba ( 5.0, 10.0, 0.0, 0.0) [require] % Reset requirements
-[pixel shader todo(sm<4)] +[pixel shader] float4 main(float tex : texcoord) : sv_target { int i; @@ -76,7 +76,7 @@ float4 main(float tex : texcoord) : sv_target }
[test] -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe all rgba (10.0, 45.0, 0.0, 0.0)
[pixel shader fail(sm<6)] diff --git a/tests/hlsl/function-return.shader_test b/tests/hlsl/function-return.shader_test index 3c085a578..9d754a0e2 100644 --- a/tests/hlsl/function-return.shader_test +++ b/tests/hlsl/function-return.shader_test @@ -143,7 +143,7 @@ uniform 0 float 0.9 todo(sm<4 | glsl) draw quad probe all rgba (1.0, 0.9, 1.0, 0.6) 1
-[pixel shader todo(sm<4)] +[pixel shader] float func(out float o) { o = 0.1; @@ -181,7 +181,7 @@ float4 main() : sv_target }
[test] -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe all rgba (0.4, 0.3, 0.3, 0.9) 1
[pixel shader todo(sm<4)] diff --git a/tests/hlsl/loop.shader_test b/tests/hlsl/loop.shader_test index 2de10d986..47fece6ac 100644 --- a/tests/hlsl/loop.shader_test +++ b/tests/hlsl/loop.shader_test @@ -1,6 +1,6 @@ % TODO: dxcompiler emits no loops for any of these test shaders.
-[pixel shader todo(sm<4)] +[pixel shader] float a;
float4 main() : sv_target @@ -18,11 +18,11 @@ float4 main() : sv_target
[test] uniform 0 float 5.0 -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe all rgba (50.0, 50.0, 50.0, 50.0)
-[pixel shader todo(sm<4)] +[pixel shader] float a;
float4 main() : sv_target @@ -41,10 +41,10 @@ float4 main() : sv_target
[test] uniform 0 float 4.0 -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe all rgba (20.0, 20.0, 20.0, 20.0)
-[pixel shader todo(sm<4)] +[pixel shader] float a;
float4 main() : sv_target @@ -70,10 +70,10 @@ float4 main() : sv_target
[test] uniform 0 float 4.0 -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe all rgba (409.1, 409.1, 409.1, 409.1)
-[pixel shader todo(sm<4)] +[pixel shader] float a;
float4 main() : sv_target @@ -100,7 +100,7 @@ float4 main() : sv_target
[test] uniform 0 float 4.0 -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe all rgba (410.1, 410.1, 410.1, 410.1)
% loop attribute by itself diff --git a/tests/hlsl/return.shader_test b/tests/hlsl/return.shader_test index 2195f749a..24c157afd 100644 --- a/tests/hlsl/return.shader_test +++ b/tests/hlsl/return.shader_test @@ -124,7 +124,7 @@ uniform 0 float 0.9 todo(sm<4 | glsl) draw quad probe all rgba (0.4, 0.5, 0.6, 0.7) 1
-[pixel shader todo(sm<4)] +[pixel shader] void main(out float4 ret : sv_target) { ret = float4(0.1, 0.2, 0.3, 0.4); @@ -138,7 +138,7 @@ void main(out float4 ret : sv_target) }
[test] -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe all rgba (0.2, 0.4, 0.6, 0.8)
[pixel shader todo(sm<4)] diff --git a/tests/hlsl/texture-load.shader_test b/tests/hlsl/texture-load.shader_test index 3858f7ca6..bf63ec307 100644 --- a/tests/hlsl/texture-load.shader_test +++ b/tests/hlsl/texture-load.shader_test @@ -124,7 +124,7 @@ float4 main(float4 pos : sv_position) : sv_target shader model >= 4.0 shader model < 4.1
-[pixel shader todo] +[pixel shader] Texture2DMS<float4, 1> t;
float4 main(float4 pos : sv_position) : sv_target @@ -139,7 +139,7 @@ float4 main(float4 pos : sv_position) : sv_target }
[test] -todo draw quad +todo(glsl) draw quad probe (0, 0) rgba (0.1, 0.2, 0.3, 0.4) probe (1, 0) rgba (0.5, 0.7, 0.6, 0.8) probe (0, 1) rgba (0.6, 0.5, 0.2, 0.1)