From: Francisco Casas fcasas@codeweavers.com
Otherwise we end up with ABS and NEG on bool types. --- libs/vkd3d-shader/hlsl_codegen.c | 11 ++++- .../hlsl/arithmetic-float-uniform.shader_test | 16 +++---- tests/hlsl/float-comparison.shader_test | 4 +- tests/hlsl/fmod.shader_test | 12 ++--- tests/hlsl/inverse-trig.shader_test | 44 +++++++++---------- tests/hlsl/lit.shader_test | 12 ++--- tests/hlsl/ternary.shader_test | 18 ++++---- tests/hlsl/vertex-shader-ops.shader_test | 6 +-- 8 files changed, 65 insertions(+), 58 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 5c09ce04f..e6490265d 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -2955,7 +2955,7 @@ static bool lower_logic_not(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, st static bool lower_ternary(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) { struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = { 0 }, *replacement; - struct hlsl_ir_node *zero, *cond, *first, *second; + struct hlsl_ir_node *zero, *cond, *first, *second, *float_cond; struct hlsl_constant_value zero_value = { 0 }; struct hlsl_ir_expr *expr; struct hlsl_type *type; @@ -2979,9 +2979,16 @@ static bool lower_ternary(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, stru
if (ctx->profile->major_version < 4) { + struct hlsl_type *float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, instr->data_type->dimx); struct hlsl_ir_node *abs, *neg;
- if (!(abs = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, cond, &instr->loc))) + memset(operands, 0, sizeof(operands)); + operands[0] = cond; + if (!(float_cond = hlsl_new_expr(ctx, HLSL_OP1_REINTERPRET, operands, float_type, &instr->loc))) + return false; + hlsl_block_add_instr(block, float_cond); + + if (!(abs = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, float_cond, &instr->loc))) return false; hlsl_block_add_instr(block, abs);
diff --git a/tests/hlsl/arithmetic-float-uniform.shader_test b/tests/hlsl/arithmetic-float-uniform.shader_test index 8bc3992e7..61957f2bb 100644 --- a/tests/hlsl/arithmetic-float-uniform.shader_test +++ b/tests/hlsl/arithmetic-float-uniform.shader_test @@ -13,7 +13,7 @@ uniform 0 float4 5.0 15.0 0.0 0.0 todo(glsl) draw quad probe all rgba (20.0, -10.0, 75.0, 0.33333333) 1
-[pixel shader todo(sm<4)] +[pixel shader] uniform float2 a;
float4 main() : SV_TARGET @@ -25,10 +25,10 @@ float4 main() : SV_TARGET
[test] uniform 0 float4 5.0 15.0 0.0 0.0 -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe all rgba (5.0, 5.0, -5.0, 3.0) 1
-[pixel shader todo(sm<4)] +[pixel shader] uniform float2 a;
float4 main() : SV_TARGET @@ -40,10 +40,10 @@ float4 main() : SV_TARGET
[test] uniform 0 float4 42.0 5.0 0.0 0.0 -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe all rgba (2.0, -2.0, 2.0, -2.0) 16
-[pixel shader todo(sm<4)] +[pixel shader] uniform float2 a;
float4 main() : SV_TARGET @@ -55,10 +55,10 @@ float4 main() : SV_TARGET
[test] uniform 0 float4 45.0 5.0 0.0 0.0 -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe all rgba (0.0, 0.0, 0.0, 0.0)
-[pixel shader todo(sm<4)] +[pixel shader] float4 x, y;
float4 main() : sv_target @@ -69,7 +69,7 @@ float4 main() : sv_target [test] uniform 0 float4 5.0 -42.1 4.0 45.0 uniform 4 float4 15.0 -5.0 4.1 5.0 -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe all rgba (5.0, -2.1, 4.0, 0.0) 6
[require] diff --git a/tests/hlsl/float-comparison.shader_test b/tests/hlsl/float-comparison.shader_test index 84c09c129..56ce46f36 100644 --- a/tests/hlsl/float-comparison.shader_test +++ b/tests/hlsl/float-comparison.shader_test @@ -13,7 +13,7 @@ todo(glsl) draw quad probe all rgba (0.0, 0.0, 0.0, 0.0)
-[pixel shader todo(sm<4)] +[pixel shader] uniform float4 f;
float4 main() : sv_target @@ -55,7 +55,7 @@ float4 main() : sv_target
[test] uniform 0 float4 0.0 1.5 1.5 0.0 -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad % SM1-3 apparently treats '0/0' as zero. if(sm<4) todo probe all rgba (1010101.0, 11001100.0, 1101001.0, 11.0) % SM4-5 optimises away the 'not' by inverting the condition, even though this is invalid for NaN. diff --git a/tests/hlsl/fmod.shader_test b/tests/hlsl/fmod.shader_test index ccb7b99e7..40dc66e8c 100644 --- a/tests/hlsl/fmod.shader_test +++ b/tests/hlsl/fmod.shader_test @@ -1,4 +1,4 @@ -[pixel shader todo(sm<4)] +[pixel shader] uniform float4 u;
float4 main() : sv_target @@ -8,13 +8,13 @@ float4 main() : sv_target
[test] uniform 0 float4 -0.5 6.5 0.0 0.0 -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe all rgba (-0.5, 0.0, 0.0, 0.0) 4 uniform 0 float4 1.1 0.3 0.0 0.0 -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe all rgba (0.2, 0.0, 0.0, 0.0) 4
-[pixel shader todo(sm<4)] +[pixel shader] uniform float4 u;
float4 main() : sv_target @@ -24,8 +24,8 @@ float4 main() : sv_target
[test] uniform 0 float4 -0.5 6.5 2.0 0.0 -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe all rgba (-0.5, 0.5, 0.0, 0.0) 4 uniform 0 float4 1.1 0.3 3.0 0.0 -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe all rgba (1.1, 0.3, 0.0, 0.0) 4 diff --git a/tests/hlsl/inverse-trig.shader_test b/tests/hlsl/inverse-trig.shader_test index 31af0ceef..62d79e9ff 100644 --- a/tests/hlsl/inverse-trig.shader_test +++ b/tests/hlsl/inverse-trig.shader_test @@ -92,7 +92,7 @@ todo(glsl) draw quad probe all rgba (31416.0, 0.0, 0.0, 0.0)
-[pixel shader todo(sm<4)] +[pixel shader] uniform float4 a;
float4 main() : sv_target @@ -102,26 +102,26 @@ float4 main() : sv_target
[test] uniform 0 float4 -1.0 0.0 0.0 0.0 -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe all rgba (-0.785409629, 0.0, 0.0, 0.0) 512
uniform 0 float4 -0.5 0.0 0.0 0.0 -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe all rgba (-0.4636476, 0.0, 0.0, 0.0) 256
uniform 0 float4 0.0 0.0 0.0 0.0 -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe all rgba (0.0, 0.0, 0.0, 0.0) 256
uniform 0 float4 0.5 0.0 0.0 0.0 -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe all rgba (0.4636476, 0.0, 0.0, 0.0) 256
uniform 0 float4 1.0 0.0 0.0 0.0 -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe all rgba (0.785409629, 0.0, 0.0, 0.0) 512
-[pixel shader todo(sm<4)] +[pixel shader] uniform float4 a;
float4 main() : sv_target @@ -133,64 +133,64 @@ float4 main() : sv_target [test] % Non-degenerate cases uniform 0 float4 1.0 1.0 0.0 0.0 -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe all rgba (0.785385, 0.0, 0.0, 0.0) 512
uniform 0 float4 5.0 -5.0 0.0 0.0 -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe all rgba (2.356194, 0.0, 0.0, 0.0) 256
uniform 0 float4 -3.0 -3.0 0.0 0.0 -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe all rgba (-2.356194, 0.0, 0.0, 0.0) 256
uniform 0 float4 1.0 0.0 0.0 0.0 -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe all rgba (1.570796, 0.0, 0.0, 0.0) 256
uniform 0 float4 -1.0 0.0 0.0 0.0 -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe all rgba (-1.570796, 0.0, 0.0, 0.0) 256
uniform 0 float4 0.0 1.0 0.0 0.0 -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe all rgba (0.0, 0.0, 0.0, 0.0) 256
uniform 0 float4 0.0 -1.0 0.0 0.0 -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe all rgba (3.1415927, 0.0, 0.0, 0.0) 256
% Degenerate cases uniform 0 float4 0.00001 0.00002 0.0 0.0 -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe all rgba (0.463647, 0.0, 0.0, 0.0) 256
uniform 0 float4 0.00001 -0.00002 0.0 0.0 -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe all rgba (2.677945, 0.0, 0.0, 0.0) 256
uniform 0 float4 -0.00001 100000.0 0.0 0.0 -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe all rgba (-0.000000000099986595, 0.0, 0.0, 0.0) 2048
uniform 0 float4 10000000.0 0.00000001 0.0 0.0 -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe all rgba (1.570796, 0.0, 0.0, 0.0) 256
% Negative zero behavior should be to treat it the % same as normal zero. uniform 0 float4 1000000000.0 0.0 0.0 0.0 -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe all rgba (1.570796, 0.0, 0.0, 0.0) 256
uniform 0 float4 1000000000.0 -0.0 0.0 0.0 -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe all rgba (1.570796, 0.0, 0.0, 0.0) 256
uniform 0 float4 0.0 -1.0 0.0 0.0 -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe all rgba (3.1415927, 0.0, 0.0, 0.0) 256
uniform 0 float4 -0.0 -1.0 0.0 0.0 -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe all rgba (3.1415927, 0.0, 0.0, 0.0) 256 diff --git a/tests/hlsl/lit.shader_test b/tests/hlsl/lit.shader_test index efb249dba..ce68d6ea9 100644 --- a/tests/hlsl/lit.shader_test +++ b/tests/hlsl/lit.shader_test @@ -1,4 +1,4 @@ -[pixel shader todo(sm<4)] +[pixel shader] uniform float4 u;
float4 main() : sv_target @@ -8,20 +8,20 @@ float4 main() : sv_target
[test] uniform 0 float4 -0.1 10.0 0.0 0.0 -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe all rgba (1.0, 0.0, 0.0, 1.0)
[test] uniform 0 float4 1.2 -0.1 0.0 0.0 -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe all rgba (1.0, 1.2, 0.0, 1.0)
[test] uniform 0 float4 1.2 2.0 3.0 0.0 -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe all rgba (1.0, 1.2, 8.0, 1.0)
-[pixel shader todo(sm<4)] +[pixel shader] uniform float4 u;
float4 main() : sv_target @@ -31,7 +31,7 @@ float4 main() : sv_target
[test] uniform 0 float4 1.2 2.0 3.0 0.0 -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe all rgba (2.0, 2.4, 16.0, 2.0)
[pixel shader fail] diff --git a/tests/hlsl/ternary.shader_test b/tests/hlsl/ternary.shader_test index c075b1e5a..91802afd4 100644 --- a/tests/hlsl/ternary.shader_test +++ b/tests/hlsl/ternary.shader_test @@ -3,7 +3,7 @@ shader model < 6.0
-[pixel shader todo(sm<4)] +[pixel shader] uniform float4 x;
float4 main() : sv_target @@ -13,14 +13,14 @@ float4 main() : sv_target
[test] uniform 0 float4 2.0 3.0 4.0 5.0 -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe all rgba (2.0, 3.0, 4.0, 5.0) uniform 0 float4 0.0 10.0 11.0 12.0 -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe all rgba (-1.0, 9.0, 10.0, 11.0)
-[pixel shader todo(sm<4)] +[pixel shader] uniform float4 x;
float4 main() : sv_target @@ -35,11 +35,11 @@ float4 main() : sv_target
[test] uniform 0 float4 1.1 3.0 4.0 5.0 -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe all rgba (1.1, 2.0, 0.0, 0.0)
-[pixel shader todo(sm<4)] +[pixel shader] float4 f;
float4 main() : sv_target @@ -51,7 +51,7 @@ float4 main() : sv_target
[test] uniform 0 float4 1.0 0.0 0.0 0.0 -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe all rgba (0.5, 0.6, 0.7, 0.0)
@@ -246,7 +246,7 @@ todo(glsl) draw quad probe all rgba (3.0, 3.0, 3.0, 3.0)
-[pixel shader todo(sm<4)] +[pixel shader]
uniform float cond; uniform float4 a, b; @@ -260,7 +260,7 @@ float4 main() : sv_target uniform 0 float4 1.0 0.0 0.0 0.0 uniform 4 float4 1.0 2.0 3.0 4.0 uniform 8 float4 5.0 6.0 7.0 8.0 -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe all rgba (1.0, 2.0, 3.0, 4.0)
diff --git a/tests/hlsl/vertex-shader-ops.shader_test b/tests/hlsl/vertex-shader-ops.shader_test index ee2a72f02..ea2a3df81 100644 --- a/tests/hlsl/vertex-shader-ops.shader_test +++ b/tests/hlsl/vertex-shader-ops.shader_test @@ -88,7 +88,7 @@ probe all rgba (1.0, 1.0, 1.0, 1.0) % The ternary operator works differently in sm6. See sm6-ternary.shader_test. shader model < 6.0
-[vertex shader todo(sm<4)] +[vertex shader] int a, b, c;
void main(out float4 res : COLOR1, in float4 pos : position, out float4 out_pos : sv_position) @@ -103,11 +103,11 @@ if(sm<4) uniform 0 float 0 if(sm<4) uniform 4 float 100 if(sm<4) uniform 8 float 200 if(sm>=4) uniform 0 int4 0 100 200 0 -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe all rgba (0.2, 0.2, 0.2, 0.2) if(sm<4) uniform 0 float -4 if(sm<4) uniform 4 float 100 if(sm<4) uniform 8 float 200 if(sm>=4) uniform 0 int4 -4 100 200 0 -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe all rgba (0.1, 0.1, 0.1, 0.1)
From: Francisco Casas fcasas@codeweavers.com
--- tests/shader_runner_gl.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/tests/shader_runner_gl.c b/tests/shader_runner_gl.c index 3c2a41965..cbcfd95bb 100644 --- a/tests/shader_runner_gl.c +++ b/tests/shader_runner_gl.c @@ -1043,6 +1043,7 @@ static bool gl_runner_draw(struct shader_runner *r,
signature_element = vkd3d_shader_find_signature_element(&vs_input_signature, element->name, element->index, 0); + ok(signature_element, "Cannot find signature element %s%u.\n", element->name, element->index); attribute_idx = signature_element->register_index; format = get_format_info(element->format, false);
From: Francisco Casas fcasas@codeweavers.com
--- libs/vkd3d-shader/tpf.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/libs/vkd3d-shader/tpf.c b/libs/vkd3d-shader/tpf.c index 4d0658313..5c25f262b 100644 --- a/libs/vkd3d-shader/tpf.c +++ b/libs/vkd3d-shader/tpf.c @@ -5399,7 +5399,8 @@ static void write_sm4_jump(const struct tpf_writer *tpf, const struct hlsl_ir_ju
case HLSL_IR_JUMP_DISCARD_NZ: { - instr.opcode = VKD3D_SM4_OP_DISCARD | VKD3D_SM4_CONDITIONAL_NZ; + instr.opcode = VKD3D_SM4_OP_DISCARD; + instr.extra_bits = VKD3D_SM4_CONDITIONAL_NZ;
memset(&instr.srcs[0], 0, sizeof(*instr.srcs)); instr.src_count = 1;
From: Francisco Casas fcasas@codeweavers.com
We are directly translating HLSL_IR_JUMP_DISCARD_NEG to texkill, but texkill only takes the first three components into account, so if only the 4th component is negative, we get different results. --- tests/hlsl/clip.shader_test | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/tests/hlsl/clip.shader_test b/tests/hlsl/clip.shader_test index 1ebc06871..68059f216 100644 --- a/tests/hlsl/clip.shader_test +++ b/tests/hlsl/clip.shader_test @@ -20,3 +20,6 @@ probe all rgba (9, 8, 7, 6) uniform 0 float4 9 0 7 6 todo(glsl) draw quad probe all rgba (9, 0, 7, 6) +uniform 0 float4 3 3 3 -1 +todo(glsl) draw quad +todo(sm<4) probe all rgba (9, 0, 7, 6)
From: Francisco Casas fcasas@codeweavers.com
Instead of mapping HLSL_IR_JUMP_DISCARD_NEG directly to texkill, we make use of the HLSL_IR_JUMP_DISCARD_NEG -> HLSL_IR_JUMP_DISCARD_NZ pass, which has the benefit of reducing the condition to a single bool, and then lower HLSL_IR_JUMP_DISCARD_NZ -> HLSL_IR_JUMP_TEXKILL for SM1. --- libs/vkd3d-shader/d3dbc.c | 2 +- libs/vkd3d-shader/hlsl.c | 5 ++++ libs/vkd3d-shader/hlsl.h | 5 ++++ libs/vkd3d-shader/hlsl_codegen.c | 45 +++++++++++++++++++++++++++++--- tests/hlsl/clip.shader_test | 2 +- 5 files changed, 53 insertions(+), 6 deletions(-)
diff --git a/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d-shader/d3dbc.c index 099729fbb..09e020426 100644 --- a/libs/vkd3d-shader/d3dbc.c +++ b/libs/vkd3d-shader/d3dbc.c @@ -2369,7 +2369,7 @@ static void write_sm1_jump(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b
switch (jump->type) { - case HLSL_IR_JUMP_DISCARD_NEG: + case HLSL_IR_JUMP_TEXKILL: { struct hlsl_reg *reg = &jump->condition.node->reg;
diff --git a/libs/vkd3d-shader/hlsl.c b/libs/vkd3d-shader/hlsl.c index cba954c98..2ff5c4215 100644 --- a/libs/vkd3d-shader/hlsl.c +++ b/libs/vkd3d-shader/hlsl.c @@ -2382,6 +2382,7 @@ const char *hlsl_jump_type_to_string(enum hlsl_ir_jump_type type) [HLSL_IR_JUMP_DISCARD_NEG] = "HLSL_IR_JUMP_DISCARD_NEG", [HLSL_IR_JUMP_DISCARD_NZ] = "HLSL_IR_JUMP_DISCARD_NZ", [HLSL_IR_JUMP_RETURN] = "HLSL_IR_JUMP_RETURN", + [HLSL_IR_JUMP_TEXKILL] = "HLSL_IR_JUMP_TEXKILL", };
assert(type < ARRAY_SIZE(names)); @@ -2684,6 +2685,10 @@ static void dump_ir_jump(struct vkd3d_string_buffer *buffer, const struct hlsl_i vkd3d_string_buffer_printf(buffer, "return"); break;
+ case HLSL_IR_JUMP_TEXKILL: + vkd3d_string_buffer_printf(buffer, "texkill"); + break; + case HLSL_IR_JUMP_UNRESOLVED_CONTINUE: vkd3d_string_buffer_printf(buffer, "unresolved_continue"); break; diff --git a/libs/vkd3d-shader/hlsl.h b/libs/vkd3d-shader/hlsl.h index 64111f3fc..add94384c 100644 --- a/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d-shader/hlsl.h @@ -625,8 +625,13 @@ enum hlsl_ir_jump_type HLSL_IR_JUMP_BREAK, HLSL_IR_JUMP_CONTINUE, HLSL_IR_JUMP_DISCARD_NEG, + /* DISCARD_NZ cancels rendering of the current pixel if the condition, which in HLSL IR is + expected be a bool scalar, is true. */ HLSL_IR_JUMP_DISCARD_NZ, HLSL_IR_JUMP_RETURN, + /* JUMP_TEXKILL cancels rendering of the current pixel if the condition, which in HLSL IR is + expected to be a float scalar, is negative. */ + HLSL_IR_JUMP_TEXKILL, /* UNRESOLVED_CONTINUE type is used by the parser when 'continue' statement is found, it never reaches code generation, and is resolved to CONTINUE type once iteration and loop exit logic was properly applied. */ diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index e6490265d..590d15d70 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -3725,6 +3725,44 @@ static bool lower_discard_neg(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, return true; }
+static bool lower_discard_nz(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +{ + struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = { 0 }; + struct hlsl_ir_node *cast, *neg, *condition; + struct hlsl_type *float_type; + struct hlsl_ir_jump *jump; + struct hlsl_block block; + + if (instr->type != HLSL_IR_JUMP) + return false; + jump = hlsl_ir_jump(instr); + if (jump->type != HLSL_IR_JUMP_DISCARD_NZ) + return false; + condition = jump->condition.node; + + hlsl_block_init(&block); + + assert(condition->data_type->dimx == 1 && condition->data_type->dimy == 1); + float_type = hlsl_get_scalar_type(ctx, HLSL_TYPE_FLOAT); + + if (!(cast = hlsl_new_cast(ctx, condition, float_type, &instr->loc))) + return false; + hlsl_block_add_instr(&block, cast); + + memset(operands, 0, sizeof(operands)); + operands[0] = cast; + if (!(neg = hlsl_new_expr(ctx, HLSL_OP1_NEG, operands, float_type, &instr->loc))) + return false; + hlsl_block_add_instr(&block, neg); + + list_move_tail(&instr->entry, &block.instrs); + hlsl_src_remove(&jump->condition); + hlsl_src_from_node(&jump->condition, neg); + jump->type = HLSL_IR_JUMP_TEXKILL; + + return true; +} + static bool dce(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) { switch (instr->type) @@ -5411,10 +5449,9 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_ATTRIBUTE, "Entry point "%s" is missing a [numthreads] attribute.", entry_func->func->name);
- if (profile->major_version >= 4) - { - hlsl_transform_ir(ctx, lower_discard_neg, body, NULL); - } + hlsl_transform_ir(ctx, lower_discard_neg, body, NULL); + if (profile->major_version < 4) + hlsl_transform_ir(ctx, lower_discard_nz, body, NULL); lower_ir(ctx, lower_broadcasts, body); while (hlsl_transform_ir(ctx, fold_redundant_casts, body, NULL)); do diff --git a/tests/hlsl/clip.shader_test b/tests/hlsl/clip.shader_test index 68059f216..33adbda12 100644 --- a/tests/hlsl/clip.shader_test +++ b/tests/hlsl/clip.shader_test @@ -22,4 +22,4 @@ todo(glsl) draw quad probe all rgba (9, 0, 7, 6) uniform 0 float4 3 3 3 -1 todo(glsl) draw quad -todo(sm<4) probe all rgba (9, 0, 7, 6) +probe all rgba (9, 0, 7, 6)
From: Francisco Casas fcasas@codeweavers.com
Note that BIT_OR is not available for SM1 bools, so we must prefer LOGIC_OR when possible. --- libs/vkd3d-shader/hlsl.y | 68 ++++++++++++++------------------------ tests/hlsl/any.shader_test | 20 +++++------ 2 files changed, 34 insertions(+), 54 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl.y b/libs/vkd3d-shader/hlsl.y index 52c217654..35b55fcf8 100644 --- a/libs/vkd3d-shader/hlsl.y +++ b/libs/vkd3d-shader/hlsl.y @@ -2721,6 +2721,14 @@ static bool intrinsic_acos(struct hlsl_ctx *ctx, return write_acos_or_asin(ctx, params, loc, false); }
+/* Find the type corresponding to the given source type, with the same + * dimensions but a different base type. */ +static struct hlsl_type *convert_numeric_type(const struct hlsl_ctx *ctx, + const struct hlsl_type *type, enum hlsl_base_type base_type) +{ + return hlsl_get_numeric_type(ctx, type->class, base_type, type->dimx, type->dimy); +} + static bool intrinsic_all(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -2750,52 +2758,33 @@ static bool intrinsic_all(struct hlsl_ctx *ctx, return !!add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_NEQUAL, mul, zero, loc); }
-static bool intrinsic_any(struct hlsl_ctx *ctx, - const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +static bool intrinsic_any(struct hlsl_ctx *ctx, const struct parse_initializer *params, + const struct vkd3d_shader_location *loc) { - struct hlsl_ir_node *arg = params->args[0], *dot, *or, *zero, *bfalse, *load; + struct hlsl_ir_node *arg = params->args[0], *or, *load, *cast; + struct hlsl_type *bool_type; unsigned int i, count;
- if (arg->data_type->class != HLSL_CLASS_VECTOR && arg->data_type->class != HLSL_CLASS_SCALAR) - { - hlsl_fixme(ctx, loc, "any() implementation for non-vector, non-scalar"); - return false; - } + count = hlsl_type_component_count(arg->data_type); + bool_type = convert_numeric_type(ctx, arg->data_type, HLSL_TYPE_BOOL);
- if (arg->data_type->base_type == HLSL_TYPE_FLOAT) - { - if (!(zero = hlsl_new_float_constant(ctx, 0.0f, loc))) - return false; - hlsl_block_add_instr(params->instrs, zero); + if (!(cast = add_cast(ctx, params->instrs, arg, bool_type, loc))) + return false;
- if (!(dot = add_binary_dot_expr(ctx, params->instrs, arg, arg, loc))) - return false; + if (!(or = hlsl_add_load_component(ctx, params->instrs, cast, 0, loc))) + return false;
- return !!add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_NEQUAL, dot, zero, loc); - } - else if (arg->data_type->base_type == HLSL_TYPE_BOOL) + for (i = 1; i < count; ++i) { - if (!(bfalse = hlsl_new_bool_constant(ctx, false, loc))) + if (!(load = hlsl_add_load_component(ctx, params->instrs, cast, i, loc))) return false; - hlsl_block_add_instr(params->instrs, bfalse);
- or = bfalse; - - count = hlsl_type_component_count(arg->data_type); - for (i = 0; i < count; ++i) - { - if (!(load = hlsl_add_load_component(ctx, params->instrs, arg, i, loc))) - return false; - - if (!(or = add_binary_bitwise_expr(ctx, params->instrs, HLSL_OP2_BIT_OR, or, load, loc))) - return false; - } - - return true; + if (!(or = hlsl_new_binary_expr(ctx, HLSL_OP2_LOGIC_OR, or, load))) + return NULL; + hlsl_block_add_instr(params->instrs, or); }
- hlsl_fixme(ctx, loc, "any() implementation for non-float, non-bool"); - return false; + return true; }
static bool intrinsic_asin(struct hlsl_ctx *ctx, @@ -2896,15 +2885,6 @@ static bool intrinsic_atan2(struct hlsl_ctx *ctx, return write_atan_or_atan2(ctx, params, loc, true); }
- -/* Find the type corresponding to the given source type, with the same - * dimensions but a different base type. */ -static struct hlsl_type *convert_numeric_type(const struct hlsl_ctx *ctx, - const struct hlsl_type *type, enum hlsl_base_type base_type) -{ - return hlsl_get_numeric_type(ctx, type->class, base_type, type->dimx, type->dimy); -} - static bool intrinsic_asfloat(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { diff --git a/tests/hlsl/any.shader_test b/tests/hlsl/any.shader_test index b143dd414..8a7408286 100644 --- a/tests/hlsl/any.shader_test +++ b/tests/hlsl/any.shader_test @@ -49,7 +49,7 @@ todo(glsl) draw quad probe all rgba (1.0, 1.0, 1.0, 1.0)
-[pixel shader todo(sm<4)] +[pixel shader] uniform uint4 b;
float4 main() : sv_target @@ -60,30 +60,30 @@ float4 main() : sv_target [test] if(sm<4) uniform 0 float4 1 1 1 1 if(sm>=4) uniform 0 uint4 1 1 1 1 -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe all rgba (1.0, 1.0, 1.0, 1.0) if(sm<4) uniform 0 float4 1 0 0 0 if(sm>=4) uniform 0 uint4 1 0 0 0 -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe all rgba (1.0, 1.0, 1.0, 1.0) if(sm<4) uniform 0 float4 0 1 0 0 if(sm>=4) uniform 0 uint4 0 1 0 0 -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe all rgba (1.0, 1.0, 1.0, 1.0) if(sm<4) uniform 0 float4 0 0 1 0 if(sm>=4) uniform 0 uint4 0 0 1 0 -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe all rgba (1.0, 1.0, 1.0, 1.0) if(sm<4) uniform 0 float4 0 0 0 1 if(sm>=4) uniform 0 uint4 0 0 0 1 -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe all rgba (1.0, 1.0, 1.0, 1.0) if(sm<4) uniform 0 float4 0 0 0 0 if(sm>=4) uniform 0 uint4 0 0 0 0 -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe all rgba (0.0, 0.0, 0.0, 0.0)
-[pixel shader todo(sm<4)] +[pixel shader] uniform uint b;
float4 main() : sv_target @@ -94,9 +94,9 @@ float4 main() : sv_target [test] if(sm<4) uniform 0 float4 1 0 0 0 if(sm>=4) uniform 0 uint4 1 0 0 0 -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe all rgba (1.0, 1.0, 1.0, 1.0) if(sm<4) uniform 0 float4 0 0 0 0 if(sm>=4) uniform 0 uint4 0 0 0 0 -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe all rgba (0.0, 0.0, 0.0, 0.0)
From: Francisco Casas fcasas@codeweavers.com
--- libs/vkd3d-shader/hlsl.y | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl.y b/libs/vkd3d-shader/hlsl.y index 35b55fcf8..fab585f96 100644 --- a/libs/vkd3d-shader/hlsl.y +++ b/libs/vkd3d-shader/hlsl.y @@ -2732,30 +2732,30 @@ static struct hlsl_type *convert_numeric_type(const struct hlsl_ctx *ctx, static bool intrinsic_all(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { - struct hlsl_ir_node *arg = params->args[0], *mul, *one, *zero, *load; + struct hlsl_ir_node *arg = params->args[0], *and, *load, *cast; + struct hlsl_type *bool_type; unsigned int i, count;
- if (!(one = hlsl_new_float_constant(ctx, 1.0f, loc))) - return false; - hlsl_block_add_instr(params->instrs, one); + count = hlsl_type_component_count(arg->data_type); + bool_type = convert_numeric_type(ctx, arg->data_type, HLSL_TYPE_BOOL);
- if (!(zero = hlsl_new_float_constant(ctx, 0.0f, loc))) + if (!(cast = add_cast(ctx, params->instrs, arg, bool_type, loc))) return false; - hlsl_block_add_instr(params->instrs, zero);
- mul = one; + if (!(and = hlsl_add_load_component(ctx, params->instrs, cast, 0, loc))) + return false;
- count = hlsl_type_component_count(arg->data_type); - for (i = 0; i < count; ++i) + for (i = 1; i < count; ++i) { - if (!(load = hlsl_add_load_component(ctx, params->instrs, arg, i, loc))) + if (!(load = hlsl_add_load_component(ctx, params->instrs, cast, i, loc))) return false;
- if (!(mul = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, load, mul, loc))) - return false; + if (!(and = hlsl_new_binary_expr(ctx, HLSL_OP2_LOGIC_AND, and, load))) + return NULL; + hlsl_block_add_instr(params->instrs, and); }
- return !!add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_NEQUAL, mul, zero, loc); + return true; }
static bool intrinsic_any(struct hlsl_ctx *ctx, const struct parse_initializer *params,
1/7 looks wrong. I haven't checked, but won't it do the wrong thing if we have something like
const int x = 0x80000000; return x ? 1 : 0;
HLSL_IR_JUMP_DISCARD_NEG, + /* DISCARD_NZ cancels rendering of the current pixel if the condition, which in HLSL IR is + expected be a bool scalar, is true. */ HLSL_IR_JUMP_DISCARD_NZ, HLSL_IR_JUMP_RETURN, + /* JUMP_TEXKILL cancels rendering of the current pixel if the condition, which in HLSL IR is + expected to be a float scalar, is negative. */ + HLSL_IR_JUMP_TEXKILL,
How is TEXKILL different from DISCARD_NEG, then? That needs to be made clear.
On Thu Mar 28 21:37:16 2024 +0000, Zebediah Figura wrote:
1/7 looks wrong. I haven't checked, but won't it do the wrong thing if we have something like const int x = 0x80000000; return x ? 1 : 0;
Currently, every time we generate a HLSL IR ternary operation, the condition (arg[0]) is boolean so in SM1 it should only acquire two values at this point, 1.0f or 0.0f.
On Thu Mar 28 21:37:16 2024 +0000, Zebediah Figura wrote:
HLSL_IR_JUMP_DISCARD_NEG, + /* DISCARD_NZ cancels rendering of the current pixel if the
condition, which in HLSL IR is
HLSL_IR_JUMP_DISCARD_NZ, HLSL_IR_JUMP_RETURN,expected be a bool scalar, is true. */
- /* JUMP_TEXKILL cancels rendering of the current pixel if the
condition, which in HLSL IR is
expected to be a float scalar, is negative. */
- HLSL_IR_JUMP_TEXKILL,
How is TEXKILL different from DISCARD_NEG, then? That needs to be made clear.
I see, the difference is that in `HLSL_IR_JUMP_DISCARD_NEG` the condition doesn't need to be an scalar, it can be a vector or a matrix. And the rendering is aborted if any component is negative. I will add a comment.
Our current path for SM4 is:
DISCARD_NEG -> DISCARD_NZ
My patch proposes also doing that for SM1, and going this route to get TEXKILLs in SM1:
DISCARD_NEG -> DISCARD_NZ -> TEXKILL
Actually, the only place we are generating `HLSL_IR_JUMP_DISCARD_NEG` is in `intrinsic_clip()`. I would go further and rename it to `HLSL_IR_JUMP_CLIP`, what do you think of this?
On Thu Mar 28 21:37:16 2024 +0000, Francisco Casas wrote:
Currently, every time we generate a HLSL IR ternary operation, the condition (arg[0]) is boolean so in SM1 it should only acquire two values at this point, 1.0f or 0.0f.
Hrm? I don't see us always casting. Although, arguably, we should.
But if we did always cast to bool, why would we want to reinterpret to float here?
On Thu Mar 28 21:37:16 2024 +0000, Francisco Casas wrote:
I see, the difference is that in `HLSL_IR_JUMP_DISCARD_NEG` the condition doesn't need to be an scalar, it can be a vector or a matrix. And the rendering is aborted if any component is negative. I will add a comment. Our current path for SM4 is: DISCARD_NEG -> DISCARD_NZ My patch proposes also doing that for SM1, and going this route to get TEXKILLs in SM1: DISCARD_NEG -> DISCARD_NZ -> TEXKILL Actually, the only place we are generating `HLSL_IR_JUMP_DISCARD_NEG` is in `intrinsic_clip()`. I would go further and rename it to `HLSL_IR_JUMP_CLIP`, what do you think of this?
Looking back, the whole point of `HLSL_IR_JUMP_DISCARD_NEG` was for sm1. If we're not going to use it anymore, then we should just get rid of it, and generate DISCARD_NZ directly from hlsl.y.
That said, if we lower clip(var.xyz) down to a scalar condition, converting that *back* to texkill seems tricky.
On Thu Mar 28 22:03:42 2024 +0000, Zebediah Figura wrote:
Looking back, the whole point of `HLSL_IR_JUMP_DISCARD_NEG` was for sm1. If we're not going to use it anymore, then we should just get rid of it, and generate DISCARD_NZ directly from hlsl.y. That said, if we lower clip(var.xyz) down to a scalar condition, converting that *back* to texkill seems tricky.
Actually, more importantly, texkill *does* operate on all four components for 2.0, and for 1.x it has very restricted usage anyway (you can't use it on arbitrary expressions). Not sure why the test is failing in that case...
Hrm? I don't see us always casting. Although, arguably, we should.
Searching all the uses of `HLSL_OP3_TERNARY` in the codebase, I see that the condition is either a cast to bool or a comparison operation, which gives a bool result. I am missing a case?
But if we did always cast to bool, why would we want to reinterpret to float here?
On SM1 casts to bool are rather complicated and handled by the earlier `lower_casts_to_bool()` pass. After this pass, we know that all bool values are represented as 0.0f or 1.0f at runtime, so every time we need to cast a float to bool it should be reinterpret instead.
Hrm? I don't see us always casting. Although, arguably, we should.
Searching all the uses of `HLSL_OP3_TERNARY` in the codebase, I see that the condition is either a cast to bool or a comparison operation, which gives a bool result. I am missing a case?
add_ternary() casts the condition to bool *if* it's scalar, but not otherwise. That's kind of incidental, since the main point there was to match the shape.
But if we did always cast to bool, why would we want to reinterpret to float here?
On SM1 casts to bool are rather complicated and handled by the earlier `lower_casts_to_bool()` pass. After this pass, we know that all bool values are represented as 0.0f or 1.0f at runtime, so every time we need to cast a float to bool it should be reinterpret instead.
Sure, but if HLSL_OP3_TERNARY is allowed to take a float, then why do we need to do anything at the HLSL level? We can just translate OP3_TERNARY directly when writing sm1.
On Thu Mar 28 22:04:44 2024 +0000, Zebediah Figura wrote:
Actually, more importantly, texkill *does* operate on all four components for 2.0, and for 1.x it has very restricted usage anyway (you can't use it on arbitrary expressions). Not sure why the test is failing in that case...
Are you sure this difference exists? I tried running that test on native compiling to both 2.0 and 3.0, and they seem to work identically. Though I only used the WARP driver, which might miss some of these finer details.
On Fri Mar 29 11:25:20 2024 +0000, Giovanni Mascellani wrote:
Are you sure this difference exists? I tried running that test on native compiling to both 2.0 and 3.0, and they seem to work identically. Though I only used the WARP driver, which might miss some of these finer details.
I meant "2.0 and up", sorry. It's only 1.x where it has different behaviour.
add_ternary() casts the condition to bool _if_ it's scalar, but not otherwise. That's kind of incidental, since the main point there was to match the shape.
Oh, you are right. Sorry, I should have paid more attention. Please ignore my previous argumentation.
Going back to your example:
1/7 looks wrong. I haven't checked, but won't it do the wrong thing if we have something like
const int x = 0x80000000; return x ? 1 : 0;
So, integers and bools are represented internally as float in SM1, so a reinterpret has no real effect (doesn't emit any instruction) besides avoiding the "SM1 non-float expression" fixme in d3dbc.
I think that emitting this fixme is correct for ABS and NEG operations on bool types, and thus, the reinterpret should be explicit in HLSL IR.