From: Victor Chiletto vchiletto@codeweavers.com
Based on a patch by Nikolay Sivov.
Co-authored-by: Nikolay Sivov nsivov@codeweavers.com --- libs/vkd3d-shader/hlsl_codegen.c | 205 +++++++++++++++++++++-- libs/vkd3d-shader/vkd3d_shader_private.h | 2 + tests/hlsl/for.shader_test | 4 +- tests/hlsl/function-return.shader_test | 4 +- tests/hlsl/loop.shader_test | 16 +- tests/hlsl/return.shader_test | 4 +- tests/hlsl/texture-load.shader_test | 4 +- 7 files changed, 213 insertions(+), 26 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index e0812627d..4639e2798 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -5387,6 +5387,199 @@ void hlsl_prepend_global_uniform_copy(struct hlsl_ctx *ctx, struct hlsl_block *b } }
+static void transform_run_const_passes(struct hlsl_ctx *ctx, struct hlsl_block *body) +{ + bool progress; + + do + { + progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL); + progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_swizzles, body, NULL); + progress |= hlsl_copy_propagation_execute(ctx, body); + progress |= hlsl_transform_ir(ctx, fold_swizzle_chains, body, NULL); + progress |= hlsl_transform_ir(ctx, remove_trivial_swizzles, body, NULL); + progress |= hlsl_transform_ir(ctx, remove_trivial_conditional_branches, body, NULL); + } while (progress); +} + +static bool loop_unrolling_find_jump(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, void *context) +{ + struct hlsl_ir_jump **out = context; + + if (node->type == HLSL_IR_JUMP) + { + struct hlsl_ir_jump *jump = hlsl_ir_jump(node); + + if (jump->type == HLSL_IR_JUMP_BREAK || jump->type == HLSL_IR_JUMP_CONTINUE) + { + *out = jump; + return false; + } + } + + return true; +} + +#define LOOP_UNROLLING_DEFAULT_MAX_ITERATIONS 1024 + +static bool loop_unrolling_unroll_loop(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_loop *loop) +{ + struct hlsl_block draft, tmp_dst; + unsigned int max_iterations, i; + + max_iterations = LOOP_UNROLLING_DEFAULT_MAX_ITERATIONS; + if (loop->unroll_limit) + max_iterations = min(loop->unroll_limit, max_iterations); + + hlsl_block_init(&draft); + hlsl_block_init(&tmp_dst); + list_move_slice_tail(&draft.instrs, list_head(&block->instrs), list_prev(&block->instrs, &loop->node.entry)); + + for (i = 0; i < max_iterations; ++i) + { + struct hlsl_ir_jump *jump = NULL; + + if (!hlsl_clone_block(ctx, &tmp_dst, &loop->body)) + goto fail; + hlsl_block_add_block(&draft, &tmp_dst); + + transform_run_const_passes(ctx, &draft); + remove_unreachable_code(ctx, &draft); + hlsl_transform_ir(ctx, loop_unrolling_find_jump, &draft, &jump); + + if (jump) + { + enum hlsl_ir_jump_type type = jump->type; + + if (list_next(&draft.instrs, &jump->node.entry)) + { + hlsl_warning(ctx, &jump->node.loc, VKD3D_SHADER_WARNING_HLSL_UNABLE_TO_UNROLL, "Unable to unroll loop, unrolling loops with conditional jumps is currently not supported"); + goto fail; + } + + list_remove(&jump->node.entry); + hlsl_free_instr(&jump->node); + + if (type == HLSL_IR_JUMP_BREAK) + break; + } + } + + /* Native gives up on unrolling entirely after 1024 iterations. + * It also will not insert a loop if there are iterations left + * after max_iterations, i.e [unroll(4)] for (i = 0; i < 8; ++i)) */ + if (i == LOOP_UNROLLING_DEFAULT_MAX_ITERATIONS) + { + hlsl_warning(ctx, &loop->node.loc, VKD3D_SHADER_WARNING_HLSL_UNABLE_TO_UNROLL, "Unable to unroll loop, maximum iterations reached (%u).", LOOP_UNROLLING_DEFAULT_MAX_ITERATIONS); + goto fail; + } + + list_remove(&loop->node.entry); + hlsl_free_instr(&loop->node); + + list_move_head(&block->instrs, &draft.instrs); + hlsl_block_cleanup(&tmp_dst); + hlsl_block_cleanup(&draft); + + return true; + +fail: + if (loop->unroll_type == HLSL_IR_LOOP_FORCE_UNROLL) + hlsl_error(ctx, &loop->node.loc, VKD3D_SHADER_ERROR_HLSL_FAILED_FORCED_UNROLL, "Failed to unroll loop marked as forced unroll."); + + hlsl_block_cleanup(&draft); + hlsl_block_cleanup(&tmp_dst); + + return false; +} + +static struct hlsl_ir_loop *loop_unrolling_find_unrollable_loop(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_block **containing_block) +{ + struct hlsl_ir_node *instr, *next; + + LIST_FOR_EACH_ENTRY_SAFE(instr, next, &block->instrs, struct hlsl_ir_node, entry) + { + switch (instr->type) + { + case HLSL_IR_LOOP: + { + struct hlsl_ir_loop *nested_loop; + struct hlsl_ir_loop *loop = hlsl_ir_loop(instr); + + if ((nested_loop = loop_unrolling_find_unrollable_loop(ctx, &loop->body, containing_block))) + return nested_loop; + + if (loop->unroll_type == HLSL_IR_LOOP_UNROLL || loop->unroll_type == HLSL_IR_LOOP_FORCE_UNROLL) + { + *containing_block = block; + return loop; + } + + break; + } + case HLSL_IR_IF: + { + struct hlsl_ir_loop *loop; + struct hlsl_ir_if *iff = hlsl_ir_if(instr); + + if ((loop = loop_unrolling_find_unrollable_loop(ctx, &iff->then_block, containing_block))) + return loop; + if ((loop = loop_unrolling_find_unrollable_loop(ctx, &iff->else_block, containing_block))) + return loop; + + break; + } + case HLSL_IR_SWITCH: + { + struct hlsl_ir_switch *s = hlsl_ir_switch(instr); + struct hlsl_ir_switch_case *c; + struct hlsl_ir_loop *loop; + + LIST_FOR_EACH_ENTRY(c, &s->cases, struct hlsl_ir_switch_case, entry) + { + if ((loop = loop_unrolling_find_unrollable_loop(ctx, &c->body, containing_block))) + return loop; + } + + break; + } + default: + break; + } + } + + return NULL; +} + +static void transform_unroll_loops(struct hlsl_ctx *ctx, struct hlsl_block *block) +{ + while (true) + { + struct hlsl_block clone, *containing_block; + struct hlsl_ir_loop *loop, *cloned_loop; + + if (!(loop = loop_unrolling_find_unrollable_loop(ctx, block, &containing_block))) + return; + + if (!hlsl_clone_block(ctx, &clone, block)) + return; + + cloned_loop = loop_unrolling_find_unrollable_loop(ctx, &clone, &containing_block); + assert(cloned_loop); + + if (!loop_unrolling_unroll_loop(ctx, containing_block, cloned_loop)) + { + hlsl_block_cleanup(&clone); + loop->unroll_type = HLSL_IR_LOOP_FORCE_LOOP; + continue; + } + + hlsl_block_cleanup(block); + hlsl_block_init(block); + hlsl_block_add_block(block, &clone); + } +} + int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, enum vkd3d_shader_target_type target_type, struct vkd3d_shader_code *out) { @@ -5487,16 +5680,8 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry lower_ir(ctx, lower_casts_to_bool, body); lower_ir(ctx, lower_float_modulus, body); hlsl_transform_ir(ctx, fold_redundant_casts, body, NULL); - do - { - progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL); - progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_swizzles, body, NULL); - progress |= hlsl_copy_propagation_execute(ctx, body); - progress |= hlsl_transform_ir(ctx, fold_swizzle_chains, body, NULL); - progress |= hlsl_transform_ir(ctx, remove_trivial_swizzles, body, NULL); - progress |= hlsl_transform_ir(ctx, remove_trivial_conditional_branches, body, NULL); - } - while (progress); + transform_unroll_loops(ctx, body); + transform_run_const_passes(ctx, body); remove_unreachable_code(ctx, body); hlsl_transform_ir(ctx, normalize_switch_cases, body, NULL);
diff --git a/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d-shader/vkd3d_shader_private.h index b07a7bff7..ce78d85f9 100644 --- a/libs/vkd3d-shader/vkd3d_shader_private.h +++ b/libs/vkd3d-shader/vkd3d_shader_private.h @@ -150,6 +150,7 @@ enum vkd3d_shader_error VKD3D_SHADER_ERROR_HLSL_DUPLICATE_SWITCH_CASE = 5028, VKD3D_SHADER_ERROR_HLSL_MISSING_TECHNIQUE = 5029, VKD3D_SHADER_ERROR_HLSL_UNKNOWN_MODIFIER = 5030, + VKD3D_SHADER_ERROR_HLSL_FAILED_FORCED_UNROLL = 5031,
VKD3D_SHADER_WARNING_HLSL_IMPLICIT_TRUNCATION = 5300, VKD3D_SHADER_WARNING_HLSL_DIVISION_BY_ZERO = 5301, @@ -157,6 +158,7 @@ enum vkd3d_shader_error VKD3D_SHADER_WARNING_HLSL_IMAGINARY_NUMERIC_RESULT = 5303, VKD3D_SHADER_WARNING_HLSL_NON_FINITE_RESULT = 5304, VKD3D_SHADER_WARNING_HLSL_IGNORED_ATTRIBUTE = 5305, + VKD3D_SHADER_WARNING_HLSL_UNABLE_TO_UNROLL = 5306,
VKD3D_SHADER_ERROR_GLSL_INTERNAL = 6000,
diff --git a/tests/hlsl/for.shader_test b/tests/hlsl/for.shader_test index 7ce6c8213..b3fbd76d7 100644 --- a/tests/hlsl/for.shader_test +++ b/tests/hlsl/for.shader_test @@ -63,7 +63,7 @@ probe (481, 0, 640, 480) rgba ( 5.0, 10.0, 0.0, 0.0) [require] % Reset requirements
-[pixel shader todo(sm<4)] +[pixel shader] float4 main(float tex : texcoord) : sv_target { int i; @@ -76,7 +76,7 @@ float4 main(float tex : texcoord) : sv_target }
[test] -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe all rgba (10.0, 45.0, 0.0, 0.0)
[pixel shader fail(sm<6)] diff --git a/tests/hlsl/function-return.shader_test b/tests/hlsl/function-return.shader_test index 3c085a578..9d754a0e2 100644 --- a/tests/hlsl/function-return.shader_test +++ b/tests/hlsl/function-return.shader_test @@ -143,7 +143,7 @@ uniform 0 float 0.9 todo(sm<4 | glsl) draw quad probe all rgba (1.0, 0.9, 1.0, 0.6) 1
-[pixel shader todo(sm<4)] +[pixel shader] float func(out float o) { o = 0.1; @@ -181,7 +181,7 @@ float4 main() : sv_target }
[test] -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe all rgba (0.4, 0.3, 0.3, 0.9) 1
[pixel shader todo(sm<4)] diff --git a/tests/hlsl/loop.shader_test b/tests/hlsl/loop.shader_test index 2de10d986..47fece6ac 100644 --- a/tests/hlsl/loop.shader_test +++ b/tests/hlsl/loop.shader_test @@ -1,6 +1,6 @@ % TODO: dxcompiler emits no loops for any of these test shaders.
-[pixel shader todo(sm<4)] +[pixel shader] float a;
float4 main() : sv_target @@ -18,11 +18,11 @@ float4 main() : sv_target
[test] uniform 0 float 5.0 -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe all rgba (50.0, 50.0, 50.0, 50.0)
-[pixel shader todo(sm<4)] +[pixel shader] float a;
float4 main() : sv_target @@ -41,10 +41,10 @@ float4 main() : sv_target
[test] uniform 0 float 4.0 -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe all rgba (20.0, 20.0, 20.0, 20.0)
-[pixel shader todo(sm<4)] +[pixel shader] float a;
float4 main() : sv_target @@ -70,10 +70,10 @@ float4 main() : sv_target
[test] uniform 0 float 4.0 -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe all rgba (409.1, 409.1, 409.1, 409.1)
-[pixel shader todo(sm<4)] +[pixel shader] float a;
float4 main() : sv_target @@ -100,7 +100,7 @@ float4 main() : sv_target
[test] uniform 0 float 4.0 -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe all rgba (410.1, 410.1, 410.1, 410.1)
% loop attribute by itself diff --git a/tests/hlsl/return.shader_test b/tests/hlsl/return.shader_test index 2195f749a..24c157afd 100644 --- a/tests/hlsl/return.shader_test +++ b/tests/hlsl/return.shader_test @@ -124,7 +124,7 @@ uniform 0 float 0.9 todo(sm<4 | glsl) draw quad probe all rgba (0.4, 0.5, 0.6, 0.7) 1
-[pixel shader todo(sm<4)] +[pixel shader] void main(out float4 ret : sv_target) { ret = float4(0.1, 0.2, 0.3, 0.4); @@ -138,7 +138,7 @@ void main(out float4 ret : sv_target) }
[test] -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe all rgba (0.2, 0.4, 0.6, 0.8)
[pixel shader todo(sm<4)] diff --git a/tests/hlsl/texture-load.shader_test b/tests/hlsl/texture-load.shader_test index 3858f7ca6..bf63ec307 100644 --- a/tests/hlsl/texture-load.shader_test +++ b/tests/hlsl/texture-load.shader_test @@ -124,7 +124,7 @@ float4 main(float4 pos : sv_position) : sv_target shader model >= 4.0 shader model < 4.1
-[pixel shader todo] +[pixel shader] Texture2DMS<float4, 1> t;
float4 main(float4 pos : sv_position) : sv_target @@ -139,7 +139,7 @@ float4 main(float4 pos : sv_position) : sv_target }
[test] -todo draw quad +todo(glsl) draw quad probe (0, 0) rgba (0.1, 0.2, 0.3, 0.4) probe (1, 0) rgba (0.5, 0.7, 0.6, 0.8) probe (0, 1) rgba (0.6, 0.5, 0.2, 0.1)