From: Giovanni Mascellani gmascellani@codeweavers.com
--- libs/vkd3d-shader/ir.c | 11 +++++++++++ 1 file changed, 11 insertions(+)
diff --git a/libs/vkd3d-shader/ir.c b/libs/vkd3d-shader/ir.c index 610d907d9..b0b3499c2 100644 --- a/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d-shader/ir.c @@ -4268,6 +4268,17 @@ static enum vkd3d_result vsir_cfg_move_breaks_out_of_selections(struct vsir_cfg --cfg->loop_intervals[else_target].target_count; }
+ /* If a branch becomes empty, make it the else branch, so we save a block. */ + if (selection->u.selection.if_body.count == 0) + { + struct vsir_cfg_structure_list tmp; + + selection->u.selection.invert_condition = !selection->u.selection.invert_condition; + tmp = selection->u.selection.if_body; + selection->u.selection.if_body = selection->u.selection.else_body; + selection->u.selection.else_body = tmp; + } + return VKD3D_OK; }
From: Giovanni Mascellani gmascellani@codeweavers.com
--- libs/vkd3d-shader/ir.c | 54 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 52 insertions(+), 2 deletions(-)
diff --git a/libs/vkd3d-shader/ir.c b/libs/vkd3d-shader/ir.c index b0b3499c2..77ad28236 100644 --- a/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d-shader/ir.c @@ -2986,6 +2986,8 @@ struct vsir_cfg_structure { struct vsir_cfg_structure_list body; unsigned idx; + bool needs_trampoline; + struct vsir_cfg_structure *outer_loop; } loop; struct vsir_cfg_structure_selection { @@ -3257,7 +3259,8 @@ static void vsir_cfg_structure_dump(struct vsir_cfg *cfg, struct vsir_cfg_struct
vsir_cfg_structure_list_dump(cfg, &structure->u.loop.body);
- TRACE("%s} # %u\n", cfg->debug_buffer.buffer, structure->u.loop.idx); + TRACE("%s} # %u%s\n", cfg->debug_buffer.buffer, structure->u.loop.idx, + structure->u.loop.needs_trampoline ? ", tramp" : ""); break;
case STRUCTURE_TYPE_SELECTION: @@ -4503,6 +4506,51 @@ static void vsir_cfg_count_targets(struct vsir_cfg *cfg, struct vsir_cfg_structu } }
+/* Trampolines are code gadgets used to emulate multilevel jumps (which are not natively supported + * by SPIR-V). A trampoline is inserted just after a loop and checks whether control has reached the + * intended site (i.e., we just jumped out of the target block) or if other levels of jumping are + * needed. For each jump a trampoline is required for all the loops between the jump itself and the + * target loop, excluding the target loop itself. */ +static void vsir_cfg_mark_trampolines(struct vsir_cfg *cfg, struct vsir_cfg_structure_list *list, + struct vsir_cfg_structure *loop) +{ + size_t i; + + for (i = 0; i < list->count; ++i) + { + struct vsir_cfg_structure *structure = &list->structures[i]; + + switch (structure->type) + { + case STRUCTURE_TYPE_BLOCK: + break; + + case STRUCTURE_TYPE_LOOP: + structure->u.loop.outer_loop = loop; + vsir_cfg_mark_trampolines(cfg, &structure->u.loop.body, structure); + break; + + case STRUCTURE_TYPE_SELECTION: + vsir_cfg_mark_trampolines(cfg, &structure->u.selection.if_body, loop); + vsir_cfg_mark_trampolines(cfg, &structure->u.selection.else_body, loop); + break; + + case STRUCTURE_TYPE_JUMP: + { + struct vsir_cfg_structure *l; + if (structure->u.jump.type != JUMP_BREAK && structure->u.jump.type != JUMP_CONTINUE) + break; + for (l = loop; l && l->u.loop.idx != structure->u.jump.target; l = l->u.loop.outer_loop) + { + assert(l->type == STRUCTURE_TYPE_LOOP); + l->u.loop.needs_trampoline = true; + } + break; + } + } + } +} + static enum vkd3d_result vsir_cfg_optimize(struct vsir_cfg *cfg) { enum vkd3d_result ret; @@ -4511,6 +4559,8 @@ static enum vkd3d_result vsir_cfg_optimize(struct vsir_cfg *cfg)
ret = vsir_cfg_optimize_recurse(cfg, &cfg->structured_program);
+ vsir_cfg_mark_trampolines(cfg, &cfg->structured_program, NULL); + if (TRACE_ON()) vsir_cfg_dump_structured_program(cfg);
@@ -4559,7 +4609,7 @@ static enum vkd3d_result vsir_cfg_structure_list_emit_loop(struct vsir_cfg *cfg,
/* Add a trampoline to implement multilevel jumping depending on the stored * jump_target value. */ - if (loop_idx != UINT_MAX) + if (loop->needs_trampoline) { /* If the multilevel jump is a `continue' and the target is the loop we're inside * right now, then we can finally do the `continue'. */
From: Giovanni Mascellani gmascellani@codeweavers.com
--- libs/vkd3d-shader/ir.c | 53 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 50 insertions(+), 3 deletions(-)
diff --git a/libs/vkd3d-shader/ir.c b/libs/vkd3d-shader/ir.c index 77ad28236..97dc17de5 100644 --- a/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d-shader/ir.c @@ -3010,6 +3010,7 @@ struct vsir_cfg_structure unsigned int target; struct vkd3d_shader_src_param *condition; bool invert_condition; + bool needs_launcher; } jump; } u; }; @@ -3304,8 +3305,9 @@ static void vsir_cfg_structure_dump(struct vsir_cfg *cfg, struct vsir_cfg_struct vkd3d_unreachable(); }
- TRACE("%s%s%s %u\n", cfg->debug_buffer.buffer, type_str, - structure->u.jump.condition ? "c" : "", structure->u.jump.target); + TRACE("%s%s%s %u%s\n", cfg->debug_buffer.buffer, type_str, + structure->u.jump.condition ? "c" : "", structure->u.jump.target, + structure->u.jump.needs_launcher ? " # launch" : ""); break; }
@@ -4551,6 +4553,45 @@ static void vsir_cfg_mark_trampolines(struct vsir_cfg *cfg, struct vsir_cfg_stru } }
+/* Launchers are the counterpart of trampolines. A launcher is inserted just before a jump, and + * writes in a well-known variable what is the target of the jump. Trampolines will then read that + * variable to decide how to redirect the jump to its intended target. A launcher is needed each + * time the innermost loop containing the jump itself has a trampoline (independently of whether the + * jump is targeting that loop or not). */ +static void vsir_cfg_mark_launchers(struct vsir_cfg *cfg, struct vsir_cfg_structure_list *list, + struct vsir_cfg_structure *loop) +{ + size_t i; + + for (i = 0; i < list->count; ++i) + { + struct vsir_cfg_structure *structure = &list->structures[i]; + + switch (structure->type) + { + case STRUCTURE_TYPE_BLOCK: + break; + + case STRUCTURE_TYPE_LOOP: + vsir_cfg_mark_launchers(cfg, &structure->u.loop.body, structure); + break; + + case STRUCTURE_TYPE_SELECTION: + vsir_cfg_mark_launchers(cfg, &structure->u.selection.if_body, loop); + vsir_cfg_mark_launchers(cfg, &structure->u.selection.else_body, loop); + break; + + case STRUCTURE_TYPE_JUMP: + if (structure->u.jump.type != JUMP_BREAK && structure->u.jump.type != JUMP_CONTINUE) + break; + assert(loop && loop->type == STRUCTURE_TYPE_LOOP); + if (loop->u.loop.needs_trampoline) + structure->u.jump.needs_launcher = true; + break; + } + } +} + static enum vkd3d_result vsir_cfg_optimize(struct vsir_cfg *cfg) { enum vkd3d_result ret; @@ -4559,7 +4600,13 @@ static enum vkd3d_result vsir_cfg_optimize(struct vsir_cfg *cfg)
ret = vsir_cfg_optimize_recurse(cfg, &cfg->structured_program);
+ /* Trampolines and launchers cannot be marked with the same pass, + * because a jump might have to be marked as launcher even when it + * targets its innermost loop, if other jumps in the same loop + * need a trampoline anyway. So launchers can be discovered only + * once all the trampolines are known. */ vsir_cfg_mark_trampolines(cfg, &cfg->structured_program, NULL); + vsir_cfg_mark_launchers(cfg, &cfg->structured_program, NULL);
if (TRACE_ON()) vsir_cfg_dump_structured_program(cfg); @@ -4746,7 +4793,7 @@ static enum vkd3d_result vsir_cfg_structure_list_emit_jump(struct vsir_cfg *cfg, if (!reserve_instructions(&target->instructions, &target->ins_capacity, target->ins_count + 2)) return VKD3D_ERROR_OUT_OF_MEMORY;
- if (opcode == VKD3DSIH_BREAK || opcode == VKD3DSIH_BREAKP) + if (jump->needs_launcher) { if (!vsir_instruction_init_with_params(cfg->program, &target->instructions[target->ins_count], &no_loc, VKD3DSIH_MOV, 1, 1))
From: Conor McCarthy cmccarthy@codeweavers.com
--- Makefile.am | 2 + tests/hlsl/wave-ops-float.shader_test | 399 ++++++++++++++++++++++++++ tests/hlsl/wave-ops-uint.shader_test | 267 +++++++++++++++++ 3 files changed, 668 insertions(+) create mode 100644 tests/hlsl/wave-ops-float.shader_test create mode 100644 tests/hlsl/wave-ops-uint.shader_test
diff --git a/Makefile.am b/Makefile.am index f823cbc85..28f076270 100644 --- a/Makefile.am +++ b/Makefile.am @@ -225,6 +225,8 @@ vkd3d_shader_tests = \ tests/hlsl/vector-indexing-uniform.shader_test \ tests/hlsl/vector-indexing.shader_test \ tests/hlsl/vertex-shader-ops.shader_test \ + tests/hlsl/wave-ops-float.shader_test \ + tests/hlsl/wave-ops-uint.shader_test \ tests/hlsl/writemask-assignop-0.shader_test \ tests/hlsl/writemask-assignop-1.shader_test \ tests/hlsl/writemask-assignop-2.shader_test \ diff --git a/tests/hlsl/wave-ops-float.shader_test b/tests/hlsl/wave-ops-float.shader_test new file mode 100644 index 000000000..d4c771d32 --- /dev/null +++ b/tests/hlsl/wave-ops-float.shader_test @@ -0,0 +1,399 @@ +[require] +shader model >= 6.0 + +[uav 0] +format r32g32b32a32 float +size (buffer, 4) + +3.5 1.0 4.0 2.5 +3.5 1.0 4.0 2.5 +3.5 1.5 4.0 2.5 +3.5 1.0 4.5 2.5 + +[uav 1] +format r32g32b32a32 uint +size (buffer, 8) + +0 0 0 0 +0 0 0 0 +0 0 0 0 +0 0 0 0 +0 0 0 0 +0 0 0 0 +0 0 0 0 +0 0 0 0 + +[compute shader] +RWBuffer<float4> u0; +RWBuffer<uint4> u1; + + [numthreads(4, 1, 1)] +void main(uint id : SV_GroupIndex) +{ + bool4 b = WaveActiveAllEqual(u0[id]); + u1[id] = uint4(b.x, b.y, b.z, b.w); +} + +[test] +todo dispatch 4 1 1 +probe uav 1 (0) rgbaui (1, 0, 0, 1) +probe uav 1 (1) rgbaui (1, 0, 0, 1) +probe uav 1 (2) rgbaui (1, 0, 0, 1) +probe uav 1 (3) rgbaui (1, 0, 0, 1) + + +[compute shader] +RWBuffer<float4> u0; +RWBuffer<uint4> u1; + + [numthreads(4, 1, 1)] +void main(uint id : SV_GroupIndex) +{ + bool any = u0[id].y == 1.5f; + bool all = u0[id].x == 3.5f; + bool none = u0[id].z == 3.0f; + u1[id] = uint4(WaveActiveAnyTrue(any), WaveActiveAnyTrue(all), WaveActiveAnyTrue(none), 0); + u1[4 + id] = uint4(WaveActiveAllTrue(any), WaveActiveAllTrue(all), WaveActiveAllTrue(none), 0); +} + +[test] +todo dispatch 4 1 1 +probe uav 1 (0) rgbaui (1, 1, 0, 0) +probe uav 1 (1) rgbaui (1, 1, 0, 0) +probe uav 1 (2) rgbaui (1, 1, 0, 0) +probe uav 1 (3) rgbaui (1, 1, 0, 0) +probe uav 1 (4) rgbaui (0, 1, 0, 0) +probe uav 1 (5) rgbaui (0, 1, 0, 0) +probe uav 1 (6) rgbaui (0, 1, 0, 0) +probe uav 1 (7) rgbaui (0, 1, 0, 0) + + +[compute shader] +RWBuffer<float4> u0; +RWBuffer<uint4> u1; + + [numthreads(4, 1, 1)] +void main(uint id : SV_GroupIndex) +{ + bool b = u0[id].y < 1.5; + u1[id] = WaveActiveBallot(b); +} + +[test] +todo dispatch 4 1 1 +probe uav 1 (0) rgbaui (0xb, 0, 0, 0) + + +[uav 1] +format r32g32b32a32 float +size (buffer, 8) + +0 0 0 0 +0 0 0 0 +0 0 0 0 +0 0 0 0 +0 0 0 0 +0 0 0 0 +0 0 0 0 +0 0 0 0 + +[compute shader] +RWBuffer<float4> u0; +RWBuffer<float4> u1; + + [numthreads(4, 1, 1)] +void main(uint id : SV_GroupIndex) +{ + float4 f = u0[id] + u0[id ^ 1]; + u1[id] = WaveReadLaneFirst(f); + u1[4 + id] = WaveReadLaneAt(f, 3); +} + +[test] +todo dispatch 4 1 1 +probe uav 1 (0) rgba (7.0, 2.0, 8.0, 5.0) +probe uav 1 (1) rgba (7.0, 2.0, 8.0, 5.0) +probe uav 1 (2) rgba (7.0, 2.0, 8.0, 5.0) +probe uav 1 (3) rgba (7.0, 2.0, 8.0, 5.0) +probe uav 1 (4) rgba (7.0, 2.5, 8.5, 5.0) +probe uav 1 (5) rgba (7.0, 2.5, 8.5, 5.0) +probe uav 1 (6) rgba (7.0, 2.5, 8.5, 5.0) +probe uav 1 (7) rgba (7.0, 2.5, 8.5, 5.0) + + +[compute shader] +RWBuffer<float4> u0; +RWBuffer<float4> u1; + + [numthreads(4, 1, 1)] +void main(uint id : SV_GroupIndex) +{ + u1[id] = WaveActiveSum(u0[id]); +} + +[test] +todo dispatch 4 1 1 +probe uav 1 (0) rgba (14.0, 4.5, 16.5, 10.0) +probe uav 1 (1) rgba (14.0, 4.5, 16.5, 10.0) +probe uav 1 (2) rgba (14.0, 4.5, 16.5, 10.0) +probe uav 1 (3) rgba (14.0, 4.5, 16.5, 10.0) + + +[compute shader] +RWBuffer<float4> u0; +RWBuffer<float4> u1; + + [numthreads(4, 1, 1)] +void main(uint id : SV_GroupIndex) +{ + u1[id] = WaveActiveProduct(u0[id]); +} + +[test] +todo dispatch 4 1 1 +probe uav 1 (0) rgba (150.0625, 1.5, 288.0, 39.0625) +probe uav 1 (1) rgba (150.0625, 1.5, 288.0, 39.0625) +probe uav 1 (2) rgba (150.0625, 1.5, 288.0, 39.0625) +probe uav 1 (3) rgba (150.0625, 1.5, 288.0, 39.0625) + + +[compute shader] +RWBuffer<float4> u0; +RWBuffer<float4> u1; + + [numthreads(4, 1, 1)] +void main(uint id : SV_GroupIndex) +{ + u1[id] = WaveActiveMin(u0[id]); +} + +[test] +todo dispatch 4 1 1 +probe uav 1 (0) rgba (3.5, 1.0, 4.0, 2.5) +probe uav 1 (1) rgba (3.5, 1.0, 4.0, 2.5) +probe uav 1 (2) rgba (3.5, 1.0, 4.0, 2.5) +probe uav 1 (3) rgba (3.5, 1.0, 4.0, 2.5) + + +[compute shader] +RWBuffer<float4> u0; +RWBuffer<float4> u1; + + [numthreads(4, 1, 1)] +void main(uint id : SV_GroupIndex) +{ + u1[id] = WaveActiveMax(u0[id]); +} + +[test] +todo dispatch 4 1 1 +probe uav 1 (0) rgba (3.5, 1.5, 4.5, 2.5) +probe uav 1 (1) rgba (3.5, 1.5, 4.5, 2.5) +probe uav 1 (2) rgba (3.5, 1.5, 4.5, 2.5) +probe uav 1 (3) rgba (3.5, 1.5, 4.5, 2.5) + + +[compute shader] +RWBuffer<float4> u0; +RWBuffer<float4> u1; + + [numthreads(4, 1, 1)] +void main(uint id : SV_GroupIndex) +{ + u1[id] = WavePrefixSum(u0[id]); +} + +[test] +todo dispatch 4 1 1 +probe uav 1 (0) rgba (0.0, 0.0, 0.0, 0.0) +probe uav 1 (1) rgba (3.5, 1.0, 4.0, 2.5) +probe uav 1 (2) rgba (7.0, 2.0, 8.0, 5.0) +probe uav 1 (3) rgba (10.5, 3.5, 12.0, 7.5) + + +[compute shader] +RWBuffer<float4> u0; +RWBuffer<float4> u1; + + [numthreads(4, 1, 1)] +void main(uint id : SV_GroupIndex) +{ + u1[id] = WavePrefixProduct(u0[id]); +} + +[test] +todo dispatch 4 1 1 +probe uav 1 (0) rgba (1.0, 1.0, 1.0, 1.0) +probe uav 1 (1) rgba (3.5, 1.0, 4.0, 2.5) +probe uav 1 (2) rgba (12.25, 1.0, 16.0, 6.25) +probe uav 1 (3) rgba (42.875, 1.5, 64.0, 15.625) + + +[uav 0] +format r32g32b32a32 float +size (buffer, 4) + +0.25 0.50 0.75 1.00 +0.50 0.25 1.00 0.75 +0.75 1.00 0.50 0.25 +1.00 0.75 0.25 0.50 + +[compute shader] +RWBuffer<float4> u0; +RWBuffer<float4> u1; + + [numthreads(4, 1, 1)] +void main(uint id : SV_GroupIndex) +{ + float4 f = u0[WaveGetLaneIndex() % 4]; + u1[WaveGetLaneIndex() % 4] = QuadReadAcrossX(f); +} + +[test] +todo dispatch 4 1 1 +probe uav 1 (0) rgba (0.5, 0.25, 1.0, 0.75) +probe uav 1 (1) rgba (0.25, 0.5, 0.75, 1.0) +probe uav 1 (2) rgba (1.0, 0.75, 0.25, 0.5) +probe uav 1 (3) rgba (0.75, 1.0, 0.5, 0.25) + + +[compute shader] +RWBuffer<float4> u0; +RWBuffer<float4> u1; + + [numthreads(4, 1, 1)] +void main(uint id : SV_GroupIndex) +{ + float4 f = u0[WaveGetLaneIndex() % 4]; + // Constant lane id. + u1[WaveGetLaneIndex() % 4] = QuadReadLaneAt(f, 2); +} + +[test] +todo dispatch 4 1 1 +probe uav 1 (0) rgba (0.75, 1.0, 0.5, 0.25) +probe uav 1 (1) rgba (0.75, 1.0, 0.5, 0.25) +probe uav 1 (2) rgba (0.75, 1.0, 0.5, 0.25) +probe uav 1 (3) rgba (0.75, 1.0, 0.5, 0.25) + + +[pixel shader] +RWBuffer<float4> u0; +RWBuffer<float4> u1; + +float4 main(float4 pos : SV_Position) : SV_Target +{ + float4 f = u0[WaveGetLaneIndex() % 4]; + u1[WaveGetLaneIndex() % 4] = QuadReadAcrossX(f); + return f; +} + +[test] +todo draw quad +probe rtv 0 (0, 0) rgba (0.25, 0.5, 0.75, 1.0) +probe rtv 0 (1, 0) rgba (0.5, 0.25, 1.0, 0.75) +probe rtv 0 (0, 1) rgba (0.75, 1.0, 0.5, 0.25) +probe rtv 0 (1, 1) rgba (1.0, 0.75, 0.25, 0.5) +probe uav 1 (0) rgba (0.5, 0.25, 1.0, 0.75) +probe uav 1 (1) rgba (0.25, 0.5, 0.75, 1.0) +probe uav 1 (2) rgba (1.0, 0.75, 0.25, 0.5) +probe uav 1 (3) rgba (0.75, 1.0, 0.5, 0.25) + + +[pixel shader] +RWBuffer<float4> u0; +RWBuffer<float4> u1; + +float4 main(float4 pos : SV_Position) : SV_Target +{ + float4 f = u0[WaveGetLaneIndex() % 4]; + u1[WaveGetLaneIndex() % 4] = QuadReadAcrossY(f); + return f; +} + +[test] +todo draw quad +probe uav 1 (0) rgba (0.75, 1.0, 0.5, 0.25) +probe uav 1 (1) rgba (1.0, 0.75, 0.25, 0.5) +probe uav 1 (2) rgba (0.25, 0.5, 0.75, 1.0) +probe uav 1 (3) rgba (0.5, 0.25, 1.0, 0.75) + + +[pixel shader] +RWBuffer<float4> u0; +RWBuffer<float4> u1; + +float4 main(float4 pos : SV_Position) : SV_Target +{ + float4 f = u0[WaveGetLaneIndex() % 4]; + u1[WaveGetLaneIndex() % 4] = QuadReadAcrossDiagonal(f); + return f; +} + +[test] +todo draw quad +probe uav 1 (1) rgba (0.75, 1.0, 0.5, 0.25) +probe uav 1 (0) rgba (1.0, 0.75, 0.25, 0.5) +probe uav 1 (3) rgba (0.25, 0.5, 0.75, 1.0) +probe uav 1 (2) rgba (0.5, 0.25, 1.0, 0.75) + + +[pixel shader] +RWBuffer<float4> u0; +RWBuffer<float4> u1; + +float4 main(float4 pos : SV_Position) : SV_Target +{ + float4 f = u0[WaveGetLaneIndex() % 4]; + // Constant lane id. + u1[WaveGetLaneIndex() % 4] = QuadReadLaneAt(f, 1); + return f; +} + +[test] +todo draw quad +probe uav 1 (0) rgba (0.5, 0.25, 1.0, 0.75) +probe uav 1 (1) rgba (0.5, 0.25, 1.0, 0.75) +probe uav 1 (2) rgba (0.5, 0.25, 1.0, 0.75) +probe uav 1 (3) rgba (0.5, 0.25, 1.0, 0.75) + + +[pixel shader] +uniform uint i; + +RWBuffer<float4> u0; +RWBuffer<float4> u1; + +float4 main(float4 pos : SV_Position) : SV_Target +{ + float4 f = u0[WaveGetLaneIndex() % 4]; + // Uniform lane id. + u1[WaveGetLaneIndex() % 4] = QuadReadLaneAt(f, i); + return f; +} + +[test] +uniform 0 uint 0 +todo draw quad +probe uav 1 (0) rgba (0.25, 0.5, 0.75, 1.0) +probe uav 1 (1) rgba (0.25, 0.5, 0.75, 1.0) +probe uav 1 (2) rgba (0.25, 0.5, 0.75, 1.0) +probe uav 1 (3) rgba (0.25, 0.5, 0.75, 1.0) +uniform 0 uint 1 +todo draw quad +probe uav 1 (0) rgba (0.5, 0.25, 1.0, 0.75) +probe uav 1 (1) rgba (0.5, 0.25, 1.0, 0.75) +probe uav 1 (2) rgba (0.5, 0.25, 1.0, 0.75) +probe uav 1 (3) rgba (0.5, 0.25, 1.0, 0.75) +uniform 0 uint 2 +todo draw quad +probe uav 1 (0) rgba (0.75, 1.0, 0.5, 0.25) +probe uav 1 (1) rgba (0.75, 1.0, 0.5, 0.25) +probe uav 1 (2) rgba (0.75, 1.0, 0.5, 0.25) +probe uav 1 (3) rgba (0.75, 1.0, 0.5, 0.25) +uniform 0 uint 3 +todo draw quad +probe uav 1 (0) rgba (1.0, 0.75, 0.25, 0.5) +probe uav 1 (1) rgba (1.0, 0.75, 0.25, 0.5) +probe uav 1 (2) rgba (1.0, 0.75, 0.25, 0.5) +probe uav 1 (3) rgba (1.0, 0.75, 0.25, 0.5) diff --git a/tests/hlsl/wave-ops-uint.shader_test b/tests/hlsl/wave-ops-uint.shader_test new file mode 100644 index 000000000..60519ae38 --- /dev/null +++ b/tests/hlsl/wave-ops-uint.shader_test @@ -0,0 +1,267 @@ +[require] +shader model >= 6.0 + +[uav 0] +format r32 uint +size (buffer, 4) + +8 15 8 10 + +[uav 1] +format r32 uint +size (buffer, 8) + +0 0 0 0 0 0 0 0 + +[compute shader] +RWBuffer<uint> u1 : register(u1); + + [numthreads(4, 1, 1)] +void main(uint id : SV_GroupIndex) +{ + u1[id] = WaveIsFirstLane(); +} + +[test] +todo dispatch 4 1 1 +probe uav 1 (0) rui (1) +probe uav 1 (1) rui (0) +probe uav 1 (2) rui (0) +probe uav 1 (3) rui (0) + + +[compute shader] +RWBuffer<uint> u1 : register(u1); + + [numthreads(4, 1, 1)] +void main(uint id : SV_GroupIndex) +{ + u1[id] = WaveGetLaneIndex(); +} + +[test] +todo dispatch 4 1 1 +probe uav 1 (0) rui (0) +probe uav 1 (1) rui (1) +probe uav 1 (2) rui (2) +probe uav 1 (3) rui (3) + + +[compute shader] +RWBuffer<uint> u1 : register(u1); + + [numthreads(4, 1, 1)] +void main(uint id : SV_GroupIndex) +{ + // Between 4 and 128, and includes inactive and/or helper lanes. + uint count = WaveGetLaneCount(); + u1[id] = count >= 4 && count <= 128; +} + +[test] +todo dispatch 4 1 1 +probe uav 1 (0) rui (1) +probe uav 1 (1) rui (1) +probe uav 1 (2) rui (1) +probe uav 1 (3) rui (1) + + +[compute shader] +RWBuffer<uint> u0; +RWBuffer<uint> u1; + + [numthreads(4, 1, 1)] +void main(uint id : SV_GroupIndex) +{ + bool b = u0[id] == 8; + u1[id] = WaveActiveBallot(b).x; +} + +[test] +todo dispatch 4 1 1 +probe uav 1 (0) rui (5) +probe uav 1 (1) rui (5) +probe uav 1 (2) rui (5) +probe uav 1 (3) rui (5) + + +[compute shader] +RWBuffer<uint> u0; +RWBuffer<uint> u1; + + [numthreads(4, 1, 1)] +void main(uint id : SV_GroupIndex) +{ + uint i = u0[id] + u0[id ^ 1]; + u1[id] = WaveReadLaneFirst(i); +} + +[test] +todo dispatch 4 1 1 +probe uav 1 (0) rui (23) +probe uav 1 (1) rui (23) +probe uav 1 (2) rui (23) +probe uav 1 (3) rui (23) + + +[compute shader] +RWBuffer<uint> u0; +RWBuffer<uint> u1; + + [numthreads(4, 1, 1)] +void main(uint id : SV_GroupIndex) +{ + uint i = u0[id] + u0[id ^ 1]; + u1[id] = WaveReadLaneAt(i, 3); +} + +[test] +todo dispatch 4 1 1 +probe uav 1 (0) rui (18) +probe uav 1 (1) rui (18) +probe uav 1 (2) rui (18) +probe uav 1 (3) rui (18) + + +[compute shader] +RWBuffer<uint> u0; +RWBuffer<uint> u1; + + [numthreads(4, 1, 1)] +void main(uint id : SV_GroupIndex) +{ + bool b = u0[id] == 8; + u1[id] = WaveActiveCountBits(b); +} + +[test] +todo dispatch 4 1 1 +probe uav 1 (0) rui (2) +probe uav 1 (1) rui (2) +probe uav 1 (2) rui (2) +probe uav 1 (3) rui (2) + + +[compute shader] +RWBuffer<uint> u0; +RWBuffer<uint> u1; + + [numthreads(4, 1, 1)] +void main(uint id : SV_GroupIndex) +{ + bool b = u0[id] == 8; + u1[id] = WavePrefixCountBits(b); +} + +[test] +todo dispatch 4 1 1 +probe uav 1 (0) rui (0) +probe uav 1 (1) rui (1) +probe uav 1 (2) rui (1) +probe uav 1 (3) rui (2) + + +[compute shader] +RWBuffer<uint> u0; +RWBuffer<uint> u1; + + [numthreads(4, 1, 1)] +void main(uint id : SV_GroupIndex) +{ + u1[id] = WaveActiveSum(u0[id]); +} + +[test] +todo dispatch 4 1 1 +probe uav 1 (0) rui (41) +probe uav 1 (1) rui (41) +probe uav 1 (2) rui (41) +probe uav 1 (3) rui (41) + + +[compute shader] +RWBuffer<uint> u0; +RWBuffer<uint> u1; + + [numthreads(4, 1, 1)] +void main(uint id : SV_GroupIndex) +{ + u1[id] = WaveActiveBitAnd(u0[id]); +} + +[test] +todo dispatch 4 1 1 +probe uav 1 (0) rui (8) +probe uav 1 (1) rui (8) +probe uav 1 (2) rui (8) +probe uav 1 (3) rui (8) + + +[compute shader] +RWBuffer<uint> u0; +RWBuffer<uint> u1; + + [numthreads(4, 1, 1)] +void main(uint id : SV_GroupIndex) +{ + u1[id] = WaveActiveBitOr(u0[id]); +} + +[test] +todo dispatch 4 1 1 +probe uav 1 (0) rui (15) +probe uav 1 (1) rui (15) +probe uav 1 (2) rui (15) +probe uav 1 (3) rui (15) + + +[compute shader] +RWBuffer<uint> u0; +RWBuffer<uint> u1; + + [numthreads(4, 1, 1)] +void main(uint id : SV_GroupIndex) +{ + u1[id] = WaveActiveBitXor(u0[id]); +} + +[test] +todo dispatch 4 1 1 +probe uav 1 (0) rui (5) +probe uav 1 (1) rui (5) +probe uav 1 (2) rui (5) +probe uav 1 (3) rui (5) + + +[compute shader] +uniform uint4 u; + +RWBuffer<uint> u0; +RWBuffer<uint> u1; + + [numthreads(8, 1, 1)] +void main(uint id : SV_GroupIndex) +{ + uint table[] = {u.x, u.y, u.z, u.w}; + uint i; + + // Value depends on control flow. + if (id & 1) + i = table[id / 2u]; + else + i = u0[id / 2u]; + + u1[id] = WaveActiveSum(i); +} + +[test] +uniform 0 uint4 1 5 3 9 +todo dispatch 8 1 1 +probe uav 1 (0) rui (59) +probe uav 1 (1) rui (59) +probe uav 1 (2) rui (59) +probe uav 1 (3) rui (59) +probe uav 1 (4) rui (59) +probe uav 1 (5) rui (59) +probe uav 1 (6) rui (59) +probe uav 1 (7) rui (59)