Mainly the implementation of SM1 comparison operators, defined in terms of CMP and SLT instructions, and the capacity to transform SLT->CMP for pixel shaders and CMP->SLT for vertex shaders.
-- v2: vkd3d-shader/hlsl: Lower CMP instructions for vertex shaders. vkd3d-shader/hlsl: Use hlsl_fixme() on missing SM1 matrix writemask lowering. vkd3d-shader/hlsl: Lower SLT instructions for pixel shaders. tests: Test equality between tiny and between large numbers on ps_2_0. vkd3d-shader/hlsl: Implement SM1 comparison operators.
From: Francisco Casas fcasas@codeweavers.com
--- libs/vkd3d-shader/d3dbc.c | 4 ++++ libs/vkd3d-shader/hlsl.h | 1 + 2 files changed, 5 insertions(+)
diff --git a/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d-shader/d3dbc.c index 57dd0258a..3a815dc57 100644 --- a/libs/vkd3d-shader/d3dbc.c +++ b/libs/vkd3d-shader/d3dbc.c @@ -2334,10 +2334,14 @@ static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b break;
case HLSL_OP2_SLT: + if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) + hlsl_fixme(ctx, &instr->loc, "Lower SLT instructions for pixel shaders."); write_sm1_binary_op(ctx, buffer, D3DSIO_SLT, &instr->reg, &arg1->reg, &arg2->reg); break;
case HLSL_OP3_CMP: + if (ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX) + hlsl_fixme(ctx, &instr->loc, "Lower CMP instructions for vertex shaders."); write_sm1_ternary_op(ctx, buffer, D3DSIO_CMP, &instr->reg, &arg1->reg, &arg2->reg, &arg3->reg); break;
diff --git a/libs/vkd3d-shader/hlsl.h b/libs/vkd3d-shader/hlsl.h index da4bb1e78..918a01a5a 100644 --- a/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d-shader/hlsl.h @@ -593,6 +593,7 @@ enum hlsl_ir_expr_op HLSL_OP2_MUL, HLSL_OP2_NEQUAL, HLSL_OP2_RSHIFT, + /* SLT(a, b) retrieves 1.0 if (a < b), else 0.0. Only used for SM1-SM3 target vertex shaders. */ HLSL_OP2_SLT,
/* DP2ADD(a, b, c) computes the scalar product of a.xy and b.xy,
From: Francisco Casas fcasas@codeweavers.com
--- libs/vkd3d-shader/hlsl_codegen.c | 114 +++++++++++++++++++++++ tests/hlsl/vertex-shader-ops.shader_test | 10 +- 2 files changed, 119 insertions(+), 5 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 9eb65dc01..bba90461c 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -3020,6 +3020,119 @@ static bool lower_ternary(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, stru return true; }
+static bool lower_comparison_operators(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, + struct hlsl_block *block) +{ + struct hlsl_ir_node *arg1, *arg1_cast, *arg2, *arg2_cast, *slt, *res, *ret; + struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS]; + struct hlsl_type *float_type; + struct hlsl_ir_expr *expr; + bool negate = false; + + if (instr->type != HLSL_IR_EXPR) + return false; + expr = hlsl_ir_expr(instr); + if (expr->op != HLSL_OP2_EQUAL && expr->op != HLSL_OP2_NEQUAL && expr->op != HLSL_OP2_LESS + && expr->op != HLSL_OP2_GEQUAL) + return false; + + arg1 = expr->operands[0].node; + arg2 = expr->operands[1].node; + float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, instr->data_type->dimx); + + if (!(arg1_cast = hlsl_new_cast(ctx, arg1, float_type, &instr->loc))) + return false; + hlsl_block_add_instr(block, arg1_cast); + + if (!(arg2_cast = hlsl_new_cast(ctx, arg2, float_type, &instr->loc))) + return false; + hlsl_block_add_instr(block, arg2_cast); + + switch (expr->op) + { + case HLSL_OP2_EQUAL: + case HLSL_OP2_NEQUAL: + { + struct hlsl_ir_node *neg, *sub, *mul, *zero; + struct hlsl_constant_value zero_value; + + if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, arg2_cast, &instr->loc))) + return false; + hlsl_block_add_instr(block, neg); + + if (!(sub = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, arg1_cast, neg))) + return false; + hlsl_block_add_instr(block, sub); + + /* Use MUL as a precarious ABS. */ + if (!(mul = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, sub, sub))) + return false; + hlsl_block_add_instr(block, mul); + + memset(&zero_value, 0, sizeof(zero_value)); + if (!(zero = hlsl_new_constant(ctx, float_type, &zero_value, &instr->loc))) + return false; + hlsl_block_add_instr(block, zero); + + if (!(slt = hlsl_new_binary_expr(ctx, HLSL_OP2_SLT, zero, mul))) + return false; + hlsl_block_add_instr(block, slt); + + negate = (expr->op == HLSL_OP2_EQUAL); + break; + } + case HLSL_OP2_GEQUAL: + case HLSL_OP2_LESS: + { + if (!(slt = hlsl_new_binary_expr(ctx, HLSL_OP2_SLT, arg1_cast, arg2_cast))) + return false; + hlsl_block_add_instr(block, slt); + + negate = (expr->op == HLSL_OP2_GEQUAL); + break; + } + + default: + vkd3d_unreachable(); + } + + if (negate) + { + struct hlsl_constant_value one_value; + struct hlsl_ir_node *one, *slt_neg; + + one_value.u[0].f = 1.0; + one_value.u[1].f = 1.0; + one_value.u[2].f = 1.0; + one_value.u[3].f = 1.0; + if (!(one = hlsl_new_constant(ctx, float_type, &one_value, &instr->loc))) + return false; + hlsl_block_add_instr(block, one); + + if (!(slt_neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, slt, &instr->loc))) + return false; + hlsl_block_add_instr(block, slt_neg); + + if (!(res = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, one, slt_neg))) + return false; + hlsl_block_add_instr(block, res); + } + else + { + res = slt; + } + + /* We need a REINTERPRET so that the HLSL IR code is valid. SLT and its arguments must be FLOAT, + * and casts to BOOL have already been lowered to "!= 0". */ + memset(operands, 0, sizeof(operands)); + operands[0] = res; + if (!(ret = hlsl_new_expr(ctx, HLSL_OP1_REINTERPRET, operands, instr->data_type, &instr->loc))) + return false; + hlsl_block_add_instr(block, ret); + + return true; +} + static bool lower_casts_to_bool(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) { struct hlsl_type *type = instr->data_type, *arg_type; @@ -5209,6 +5322,7 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry lower_ir(ctx, lower_round, body); lower_ir(ctx, lower_ceil, body); lower_ir(ctx, lower_floor, body); + lower_ir(ctx, lower_comparison_operators, body); }
if (profile->major_version < 2) diff --git a/tests/hlsl/vertex-shader-ops.shader_test b/tests/hlsl/vertex-shader-ops.shader_test index 34d496727..38f3db658 100644 --- a/tests/hlsl/vertex-shader-ops.shader_test +++ b/tests/hlsl/vertex-shader-ops.shader_test @@ -15,7 +15,7 @@ float4 main(in float4 res : COLOR1) : sv_target
% Check that -0.0f is not less than 0.0f -[vertex shader todo(sm<4)] +[vertex shader] float a;
void main(out float4 res : COLOR1, in float4 pos : position, out float4 out_pos : sv_position) @@ -28,11 +28,11 @@ void main(out float4 res : COLOR1, in float4 pos : position, out float4 out_pos [test] if(sm<4) uniform 0 float 0.0 if(sm>=4) uniform 0 float4 0.0 0.0 0.0 0.0 -todo(sm<4) draw quad +draw quad probe all rgba (0.0, 0.0, 0.0, 0.0)
-[vertex shader todo(sm<4)] +[vertex shader] int a, b;
void main(out float4 res : COLOR1, in float4 pos : position, out float4 out_pos : sv_position) @@ -49,12 +49,12 @@ void main(out float4 res : COLOR1, in float4 pos : position, out float4 out_pos if(sm<4) uniform 0 float 3 if(sm<4) uniform 4 float 4 if(sm>=4) uniform 0 int4 3 4 0 0 -todo(sm<4) draw quad +draw quad probe all rgba (0.0, 1.0, 0.0, 1.0) if(sm<4) uniform 0 float -2 if(sm<4) uniform 4 float -2 if(sm>=4) uniform 0 int4 -2 -2 0 0 -todo(sm<4) draw quad +draw quad probe all rgba (1.0, 0.0, 0.0, 1.0)
From: Francisco Casas fcasas@codeweavers.com
--- tests/hlsl/float-comparison.shader_test | 48 +++++++++++++++++++++++++ 1 file changed, 48 insertions(+)
diff --git a/tests/hlsl/float-comparison.shader_test b/tests/hlsl/float-comparison.shader_test index c351ac098..9334df2bc 100644 --- a/tests/hlsl/float-comparison.shader_test +++ b/tests/hlsl/float-comparison.shader_test @@ -64,6 +64,53 @@ if(sm>=4 & sm<6) todo probe all rgba (1010101.0, 0.0, 1101001.0, 1.0) if(sm>=6) probe all rgba (1010101.0, 11110000.0, 1101001.0, 1.0)
+% In shader model 2.0, native compares two numbers for equality checking if (a - b)*(a - b) is +% positive instead of |a - b|. We check if this causes some changes in behavior for very small and +% very large numbers. +% For large numbers the behavior is the same, even though the multiplication reaches inf, but for +% very small ones it is not because the multiplication results in 0. +% +% NOTE: Seems that subnormal numbers are considered equal to zero, at least in the WARP driver. +% Probably this is implementation dependent and deserves separate testing, so only normal numbers +% are passed on these tests. +[require] +shader model >= 2.0 +shader model < 3.0 + +[pixel shader todo(sm<4)] +float4 a, b; + +float4 main() : sv_target +{ + return a == b; +} + +[test] +uniform 0 float4 1e-37 1e-37 1e+38 1e+38 +uniform 4 float4 0 -1e-37 1e+38 -1e+38 +todo(sm<4) draw quad +probe all rgba (1.0, 1.0, 1.0, 0.0) + + +[require] +shader model >= 3.0 +shader model < 4.0 + +[pixel shader todo(sm<4)] +float4 a, b; + +float4 main() : sv_target +{ + return a == b; +} + +[test] +uniform 0 float4 1e-37 1e-37 1e+38 1e+38 +uniform 4 float4 0 -1e-37 1e+38 -1e+38 +todo(sm<4) draw quad +probe all rgba (0.0, 0.0, 1.0, 0.0) + + [require] shader model >= 6.0
@@ -94,3 +141,4 @@ float4 main() : sv_target uniform 0 float4 1.5 0.0 1.0 -1.0 draw quad probe all rgba (0.0, 0.0, 1.0, 0.0) +
From: Francisco Casas fcasas@codeweavers.com
Properly passing the inverse-trig.shader_test tests whose qualifiers have been removed requires making spirv.c capable of handling ABS. The same happens for the ps_3_0 equality test in float-comparison.shader_test. --- libs/vkd3d-shader/hlsl.c | 10 +++ libs/vkd3d-shader/hlsl.h | 2 + libs/vkd3d-shader/hlsl_codegen.c | 90 ++++++++++++++++--- tests/hlsl/any.shader_test | 24 ++--- tests/hlsl/expr-indexing.shader_test | 14 +-- tests/hlsl/float-comparison.shader_test | 10 +-- tests/hlsl/inverse-trig.shader_test | 4 +- tests/hlsl/matrix-indexing.shader_test | 6 +- tests/hlsl/non-const-indexing.shader_test | 10 +-- tests/hlsl/step.shader_test | 4 +- .../hlsl/vector-indexing-uniform.shader_test | 4 +- 11 files changed, 130 insertions(+), 48 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl.c b/libs/vkd3d-shader/hlsl.c index c315000a6..9fa6423f7 100644 --- a/libs/vkd3d-shader/hlsl.c +++ b/libs/vkd3d-shader/hlsl.c @@ -1348,6 +1348,16 @@ struct hlsl_ir_node *hlsl_new_binary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_exp return hlsl_new_expr(ctx, op, operands, arg1->data_type, &arg1->loc); }
+struct hlsl_ir_node *hlsl_new_ternary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op, + struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, struct hlsl_ir_node *arg3) +{ + struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {arg1, arg2, arg3}; + + assert(hlsl_types_are_equal(arg1->data_type, arg2->data_type)); + assert(hlsl_types_are_equal(arg1->data_type, arg3->data_type)); + return hlsl_new_expr(ctx, op, operands, arg1->data_type, &arg1->loc); +} + struct hlsl_ir_node *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *condition, struct hlsl_block *then_block, struct hlsl_block *else_block, const struct vkd3d_shader_location *loc) { diff --git a/libs/vkd3d-shader/hlsl.h b/libs/vkd3d-shader/hlsl.h index 918a01a5a..e9beead5a 100644 --- a/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d-shader/hlsl.h @@ -1246,6 +1246,8 @@ struct hlsl_ir_node *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *cond struct hlsl_ir_node *hlsl_new_int_constant(struct hlsl_ctx *ctx, int32_t n, const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_new_jump(struct hlsl_ctx *ctx, enum hlsl_ir_jump_type type, struct hlsl_ir_node *condition, const struct vkd3d_shader_location *loc); +struct hlsl_ir_node *hlsl_new_ternary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op, + struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, struct hlsl_ir_node *arg3);
void hlsl_init_simple_deref_from_var(struct hlsl_deref *deref, struct hlsl_ir_var *var);
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index bba90461c..7349ab059 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -3053,8 +3053,7 @@ static bool lower_comparison_operators(struct hlsl_ctx *ctx, struct hlsl_ir_node case HLSL_OP2_EQUAL: case HLSL_OP2_NEQUAL: { - struct hlsl_ir_node *neg, *sub, *mul, *zero; - struct hlsl_constant_value zero_value; + struct hlsl_ir_node *neg, *sub, *abs, *abs_neg;
if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, arg2_cast, &instr->loc))) return false; @@ -3064,17 +3063,25 @@ static bool lower_comparison_operators(struct hlsl_ctx *ctx, struct hlsl_ir_node return false; hlsl_block_add_instr(block, sub);
- /* Use MUL as a precarious ABS. */ - if (!(mul = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, sub, sub))) - return false; - hlsl_block_add_instr(block, mul); + if (ctx->profile->major_version >= 3) + { + if (!(abs = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, sub, &instr->loc))) + return false; + hlsl_block_add_instr(block, abs); + } + else + { + /* Use MUL as a precarious ABS. */ + if (!(abs = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, sub, sub))) + return false; + hlsl_block_add_instr(block, abs); + }
- memset(&zero_value, 0, sizeof(zero_value)); - if (!(zero = hlsl_new_constant(ctx, float_type, &zero_value, &instr->loc))) + if (!(abs_neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, abs, &instr->loc))) return false; - hlsl_block_add_instr(block, zero); + hlsl_block_add_instr(block, abs_neg);
- if (!(slt = hlsl_new_binary_expr(ctx, HLSL_OP2_SLT, zero, mul))) + if (!(slt = hlsl_new_binary_expr(ctx, HLSL_OP2_SLT, abs_neg, abs))) return false; hlsl_block_add_instr(block, slt);
@@ -3133,6 +3140,67 @@ static bool lower_comparison_operators(struct hlsl_ctx *ctx, struct hlsl_ir_node return true; }
+/* Intended to be used for SM1-SM3, lowers SLT instructions (only available in vertex shaders) to + * CMP instructions (only available in pixel shaders). + * Based on the following equivalence: + * SLT(x, y) + * = (x < y) ? 1.0 : 0.0 + * = ((x - y) >= 0) ? 0.0 : 1.0 + * = CMP(x - y, 0.0, 1.0) + */ +static bool lower_slt(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) +{ + struct hlsl_ir_node *arg1, *arg2, *arg1_cast, *arg2_cast, *neg, *sub, *zero, *one, *cmp; + struct hlsl_constant_value zero_value, one_value; + struct hlsl_type *float_type; + struct hlsl_ir_expr *expr; + + if (instr->type != HLSL_IR_EXPR) + return false; + expr = hlsl_ir_expr(instr); + if (expr->op != HLSL_OP2_SLT) + return false; + + arg1 = expr->operands[0].node; + arg2 = expr->operands[1].node; + float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, instr->data_type->dimx); + + if (!(arg1_cast = hlsl_new_cast(ctx, arg1, float_type, &instr->loc))) + return false; + hlsl_block_add_instr(block, arg1_cast); + + if (!(arg2_cast = hlsl_new_cast(ctx, arg2, float_type, &instr->loc))) + return false; + hlsl_block_add_instr(block, arg2_cast); + + if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, arg2_cast, &instr->loc))) + return false; + hlsl_block_add_instr(block, neg); + + if (!(sub = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, arg1_cast, neg))) + return false; + hlsl_block_add_instr(block, sub); + + memset(&zero_value, 0, sizeof(zero_value)); + if (!(zero = hlsl_new_constant(ctx, float_type, &zero_value, &instr->loc))) + return false; + hlsl_block_add_instr(block, zero); + + one_value.u[0].f = 1.0; + one_value.u[1].f = 1.0; + one_value.u[2].f = 1.0; + one_value.u[3].f = 1.0; + if (!(one = hlsl_new_constant(ctx, float_type, &one_value, &instr->loc))) + return false; + hlsl_block_add_instr(block, one); + + if (!(cmp = hlsl_new_ternary_expr(ctx, HLSL_OP3_CMP, sub, zero, one))) + return false; + hlsl_block_add_instr(block, cmp); + + return true; +} + static bool lower_casts_to_bool(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) { struct hlsl_type *type = instr->data_type, *arg_type; @@ -5323,6 +5391,8 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry lower_ir(ctx, lower_ceil, body); lower_ir(ctx, lower_floor, body); lower_ir(ctx, lower_comparison_operators, body); + if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) + lower_ir(ctx, lower_slt, body); }
if (profile->major_version < 2) diff --git a/tests/hlsl/any.shader_test b/tests/hlsl/any.shader_test index 83a1dad97..45df3bdc4 100644 --- a/tests/hlsl/any.shader_test +++ b/tests/hlsl/any.shader_test @@ -1,4 +1,4 @@ -[pixel shader todo(sm<4)] +[pixel shader] uniform float4 f;
float4 main() : sv_target @@ -8,28 +8,28 @@ float4 main() : sv_target
[test] uniform 0 float4 1.0 1.0 1.0 1.0 -todo(sm<4) draw quad +draw quad probe all rgba (1.0, 1.0, 1.0, 1.0) uniform 0 float4 1.0 0.0 0.0 0.0 -todo(sm<4) draw quad +draw quad probe all rgba (1.0, 1.0, 1.0, 1.0) uniform 0 float4 0.0 1.0 0.0 0.0 -todo(sm<4) draw quad +draw quad probe all rgba (1.0, 1.0, 1.0, 1.0) uniform 0 float4 0.0 0.0 1.0 0.0 -todo(sm<4) draw quad +draw quad probe all rgba (1.0, 1.0, 1.0, 1.0) uniform 0 float4 0.0 0.0 0.0 1.0 -todo(sm<4) draw quad +draw quad probe all rgba (1.0, 1.0, 1.0, 1.0) uniform 0 float4 0.0 0.0 0.0 0.0 -todo(sm<4) draw quad +draw quad probe all rgba (0.0, 0.0, 0.0, 0.0) uniform 0 float4 -1.0 -1.0 -1.0 -1.0 -todo(sm<4) draw quad +draw quad probe all rgba (1.0, 1.0, 1.0, 1.0)
-[pixel shader todo(sm<4)] +[pixel shader] uniform float f;
float4 main() : sv_target @@ -39,13 +39,13 @@ float4 main() : sv_target
[test] uniform 0 float4 1.0 0.0 0.0 0.0 -todo(sm<4) draw quad +draw quad probe all rgba (1.0, 1.0, 1.0, 1.0) uniform 0 float4 0.0 0.0 0.0 0.0 -todo(sm<4) draw quad +draw quad probe all rgba (0.0, 0.0, 0.0, 0.0) uniform 0 float4 -1.0 0.0 0.0 0.0 -todo(sm<4) draw quad +draw quad probe all rgba (1.0, 1.0, 1.0, 1.0)
diff --git a/tests/hlsl/expr-indexing.shader_test b/tests/hlsl/expr-indexing.shader_test index 2aa99b40d..3dcc5727e 100644 --- a/tests/hlsl/expr-indexing.shader_test +++ b/tests/hlsl/expr-indexing.shader_test @@ -13,7 +13,7 @@ draw quad probe all rgba (8.0, 8.0, 8.0, 8.0)
-[pixel shader todo(sm<4)] +[pixel shader] float4 a, b; float i;
@@ -26,7 +26,7 @@ float4 main() : sv_target uniform 0 float4 1.0 2.0 3.0 4.0 uniform 4 float4 5.0 6.0 7.0 8.0 uniform 8 float 2 -todo(sm<4) draw quad +draw quad probe all rgba (10.0, 10.0, 10.0, 10.0)
@@ -44,7 +44,7 @@ draw quad probe all rgba (3.0, 3.0, 3.0, 3.0)
-[pixel shader todo(sm<4)] +[pixel shader] float4 a; float i;
@@ -56,10 +56,10 @@ float4 main() : sv_target [test] uniform 0 float4 1.0 2.0 3.0 4.0 uniform 4 float 0 -todo(sm<4) draw quad +draw quad probe all rgba (4.0, 4.0, 4.0, 4.0) uniform 4 float 2 -todo(sm<4) draw quad +draw quad probe all rgba (1.0, 1.0, 1.0, 1.0)
@@ -82,7 +82,7 @@ draw quad probe all rgba (4.0, 4.0, 4.0, 4.0)
-[pixel shader todo(sm<4)] +[pixel shader] float4 a; float i;
@@ -99,5 +99,5 @@ float4 main() : sv_target [test] uniform 0 float4 1.0 2.0 3.0 4.0 uniform 4 float 1 -todo(sm<4) draw quad +draw quad probe all rgba (2.0, 2.0, 2.0, 2.0) diff --git a/tests/hlsl/float-comparison.shader_test b/tests/hlsl/float-comparison.shader_test index 9334df2bc..09d36abef 100644 --- a/tests/hlsl/float-comparison.shader_test +++ b/tests/hlsl/float-comparison.shader_test @@ -1,5 +1,5 @@ % Check that -0.0f is not less than 0.0f -[pixel shader todo(sm<4)] +[pixel shader] float a;
float4 main() : sv_target @@ -9,7 +9,7 @@ float4 main() : sv_target
[test] uniform 0 float 0.0 -todo(sm<4) draw quad +draw quad probe all rgba (0.0, 0.0, 0.0, 0.0)
@@ -77,7 +77,7 @@ if(sm>=6) probe all rgba (1010101.0, 11110000.0, 1101001.0, 1.0) shader model >= 2.0 shader model < 3.0
-[pixel shader todo(sm<4)] +[pixel shader] float4 a, b;
float4 main() : sv_target @@ -88,7 +88,7 @@ float4 main() : sv_target [test] uniform 0 float4 1e-37 1e-37 1e+38 1e+38 uniform 4 float4 0 -1e-37 1e+38 -1e+38 -todo(sm<4) draw quad +draw quad probe all rgba (1.0, 1.0, 1.0, 0.0)
@@ -96,7 +96,7 @@ probe all rgba (1.0, 1.0, 1.0, 0.0) shader model >= 3.0 shader model < 4.0
-[pixel shader todo(sm<4)] +[pixel shader] float4 a, b;
float4 main() : sv_target diff --git a/tests/hlsl/inverse-trig.shader_test b/tests/hlsl/inverse-trig.shader_test index e428d3090..27a5025c2 100644 --- a/tests/hlsl/inverse-trig.shader_test +++ b/tests/hlsl/inverse-trig.shader_test @@ -3,7 +3,7 @@ % implementations. DXIL defines intrinsics for inverse trig, to be implemented % by the backend.
-[pixel shader todo(sm<4)] +[pixel shader] uniform float4 a;
float4 main() : sv_target @@ -32,7 +32,7 @@ uniform 0 float4 1.0 0.0 0.0 0.0 todo(sm<4) draw quad probe all rgba (0.0, 0.0, 0.0, 0.0) 128
-[pixel shader todo(sm<4)] +[pixel shader] uniform float4 a;
float4 main() : sv_target diff --git a/tests/hlsl/matrix-indexing.shader_test b/tests/hlsl/matrix-indexing.shader_test index 6e2f01b7a..f17267631 100644 --- a/tests/hlsl/matrix-indexing.shader_test +++ b/tests/hlsl/matrix-indexing.shader_test @@ -108,7 +108,7 @@ draw quad probe all rgba (3.0, 4.0, 50.0, 60.0)
-[pixel shader todo(sm<4)] +[pixel shader] uniform float i;
float4 main() : sv_target @@ -120,8 +120,8 @@ float4 main() : sv_target
[test] uniform 0 float 2 -todo(sm<4) draw quad -probe all rgba (8, 9, 10, 11) +draw quad +todo(sm<4) probe all rgba (8, 9, 10, 11)
[pixel shader todo(sm<4)] diff --git a/tests/hlsl/non-const-indexing.shader_test b/tests/hlsl/non-const-indexing.shader_test index aed33092c..d11463349 100644 --- a/tests/hlsl/non-const-indexing.shader_test +++ b/tests/hlsl/non-const-indexing.shader_test @@ -25,7 +25,7 @@ todo(sm<4) draw quad probe all rgba (9.0, 10.0, 11.0, 12.0)
-[pixel shader todo(sm<4)] +[pixel shader] uniform float i;
float4 main() : SV_TARGET @@ -36,16 +36,16 @@ float4 main() : SV_TARGET
[test] uniform 0 float 0 -todo(sm<4) draw quad +draw quad probe all rgba (11.0, 11.0, 11.0, 11.0) uniform 0 float 1 -todo(sm<4) draw quad +draw quad probe all rgba (12.0, 12.0, 12.0, 12.0) uniform 0 float 2 -todo(sm<4) draw quad +draw quad probe all rgba (13.0, 13.0, 13.0, 13.0) uniform 0 float 3 -todo(sm<4) draw quad +draw quad probe all rgba (14.0, 14.0, 14.0, 14.0)
diff --git a/tests/hlsl/step.shader_test b/tests/hlsl/step.shader_test index b965f33e1..e201e15f9 100644 --- a/tests/hlsl/step.shader_test +++ b/tests/hlsl/step.shader_test @@ -1,4 +1,4 @@ -[pixel shader todo(sm<4)] +[pixel shader] uniform float4 f, p;
float4 main() : sv_target @@ -9,7 +9,7 @@ float4 main() : sv_target [test] uniform 0 float4 5.0 -2.6 3.0 2.0 uniform 4 float4 1.0 -4.3 3.0 4.0 -todo(sm<4) draw quad +draw quad probe all rgba (0.0, 0.0, 1.0, 1.0)
diff --git a/tests/hlsl/vector-indexing-uniform.shader_test b/tests/hlsl/vector-indexing-uniform.shader_test index cd77462ec..e5ffbdd02 100644 --- a/tests/hlsl/vector-indexing-uniform.shader_test +++ b/tests/hlsl/vector-indexing-uniform.shader_test @@ -1,6 +1,6 @@ % Use a uniform to prevent the compiler from optimizing.
-[pixel shader todo(sm<4)] +[pixel shader] uniform float i; float4 main() : SV_TARGET { @@ -12,5 +12,5 @@ float4 main() : SV_TARGET
[test] uniform 0 float 2 -todo(sm<4) draw quad +draw quad probe all rgba (0.5, 0.3, 0.8, 0.2)
From: Francisco Casas fcasas@codeweavers.com
Instead of FIXME(). Otherwise we compile invalid d3dbc. --- libs/vkd3d-shader/d3dbc.c | 2 +- tests/hlsl/matrix-indexing.shader_test | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d-shader/d3dbc.c index 3a815dc57..aca0f9931 100644 --- a/libs/vkd3d-shader/d3dbc.c +++ b/libs/vkd3d-shader/d3dbc.c @@ -2500,7 +2500,7 @@ static void write_sm1_store(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *
if (store->lhs.var->data_type->class == HLSL_CLASS_MATRIX) { - FIXME("Matrix writemasks need to be lowered.\n"); + hlsl_fixme(ctx, &instr->loc, "Lower matrix writemasks.\n"); return; }
diff --git a/tests/hlsl/matrix-indexing.shader_test b/tests/hlsl/matrix-indexing.shader_test index f17267631..6e2f01b7a 100644 --- a/tests/hlsl/matrix-indexing.shader_test +++ b/tests/hlsl/matrix-indexing.shader_test @@ -108,7 +108,7 @@ draw quad probe all rgba (3.0, 4.0, 50.0, 60.0)
-[pixel shader] +[pixel shader todo(sm<4)] uniform float i;
float4 main() : sv_target @@ -120,8 +120,8 @@ float4 main() : sv_target
[test] uniform 0 float 2 -draw quad -todo(sm<4) probe all rgba (8, 9, 10, 11) +todo(sm<4) draw quad +probe all rgba (8, 9, 10, 11)
[pixel shader todo(sm<4)]
From: Francisco Casas fcasas@codeweavers.com
--- libs/vkd3d-shader/hlsl_codegen.c | 124 +++++++++++++++++++------------ 1 file changed, 77 insertions(+), 47 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 7349ab059..fd02ff4e8 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -2902,7 +2902,7 @@ static bool lower_floor(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct return true; }
-/* Use movc/cmp/slt for the ternary operator. */ +/* Use movc/cmp for the ternary operator. */ static bool lower_ternary(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) { struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = { 0 }, *replacement; @@ -2928,7 +2928,7 @@ static bool lower_ternary(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, stru return false; }
- if (ctx->profile->major_version < 4 && ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) + if (ctx->profile->major_version < 4) { struct hlsl_ir_node *abs, *neg;
@@ -2946,51 +2946,6 @@ static bool lower_ternary(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, stru if (!(replacement = hlsl_new_expr(ctx, HLSL_OP3_CMP, operands, first->data_type, &instr->loc))) return false; } - else if (ctx->profile->major_version < 4 && ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX) - { - struct hlsl_ir_node *neg, *slt, *sum, *cond2, *slt_cast, *mul; - - /* Expression used here is "slt(<cond>) * (first - second) + second". */ - - if (ctx->profile->major_version == 3) - { - if (!(cond2 = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, cond, &instr->loc))) - return false; - } - else - { - if (!(cond2 = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, cond, cond))) - return false; - } - hlsl_block_add_instr(block, cond2); - - if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, cond2, &instr->loc))) - return false; - hlsl_block_add_instr(block, neg); - - if (!(slt = hlsl_new_binary_expr(ctx, HLSL_OP2_SLT, neg, cond2))) - return false; - hlsl_block_add_instr(block, slt); - - if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, second, &instr->loc))) - return false; - hlsl_block_add_instr(block, neg); - - if (!(sum = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, first, neg))) - return false; - hlsl_block_add_instr(block, sum); - - if (!(slt_cast = hlsl_new_cast(ctx, slt, sum->data_type, &instr->loc))) - return false; - hlsl_block_add_instr(block, slt_cast); - - if (!(mul = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, slt_cast, sum))) - return false; - hlsl_block_add_instr(block, mul); - - if (!(replacement = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, mul, second))) - return false; - } else { if (cond->data_type->base_type == HLSL_TYPE_FLOAT) @@ -3201,6 +3156,79 @@ static bool lower_slt(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct h return true; }
+/* Intended to be used for SM1-SM3, lowers CMP instructions (only available in pixel shaders) to + * SLT instructions (only available in vertex shaders). + * Based on the following equivalence: + * CMP(x, y, z) + * = (x >= 0) ? y : z + * = z * ((x < 0) ? 1.0 : 0.0) + y * ((x < 0) ? 0.0 : 1.0) + * = z * SLT(x, 0.0) + y * (1 - SLT(x, 0.0)) + */ +static bool lower_cmp(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) +{ + struct hlsl_ir_node *args[3], *args_cast[3], *slt, *neg_slt, *sub, *zero, *one, *mul1, *mul2, *add; + struct hlsl_constant_value zero_value, one_value; + struct hlsl_type *float_type; + struct hlsl_ir_expr *expr; + unsigned int i; + + if (instr->type != HLSL_IR_EXPR) + return false; + expr = hlsl_ir_expr(instr); + if (expr->op != HLSL_OP3_CMP) + return false; + + float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, instr->data_type->dimx); + + for (i = 0; i < 3; ++i) + { + args[i] = expr->operands[i].node; + + if (!(args_cast[i] = hlsl_new_cast(ctx, args[i], float_type, &instr->loc))) + return false; + hlsl_block_add_instr(block, args_cast[i]); + } + + memset(&zero_value, 0, sizeof(zero_value)); + if (!(zero = hlsl_new_constant(ctx, float_type, &zero_value, &instr->loc))) + return false; + hlsl_block_add_instr(block, zero); + + one_value.u[0].f = 1.0; + one_value.u[1].f = 1.0; + one_value.u[2].f = 1.0; + one_value.u[3].f = 1.0; + if (!(one = hlsl_new_constant(ctx, float_type, &one_value, &instr->loc))) + return false; + hlsl_block_add_instr(block, one); + + if (!(slt = hlsl_new_binary_expr(ctx, HLSL_OP2_SLT, args_cast[0], zero))) + return false; + hlsl_block_add_instr(block, slt); + + if (!(mul1 = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, args_cast[2], slt))) + return false; + hlsl_block_add_instr(block, mul1); + + if (!(neg_slt = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, slt, &instr->loc))) + return false; + hlsl_block_add_instr(block, neg_slt); + + if (!(sub = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, one, neg_slt))) + return false; + hlsl_block_add_instr(block, sub); + + if (!(mul2 = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, args_cast[1], sub))) + return false; + hlsl_block_add_instr(block, mul2); + + if (!(add = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, mul1, mul2))) + return false; + hlsl_block_add_instr(block, add); + + return true; +} + static bool lower_casts_to_bool(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) { struct hlsl_type *type = instr->data_type, *arg_type; @@ -5393,6 +5421,8 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry lower_ir(ctx, lower_comparison_operators, body); if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) lower_ir(ctx, lower_slt, body); + else + lower_ir(ctx, lower_cmp, body); }
if (profile->major_version < 2)
On Tue Mar 12 18:40:16 2024 +0000, Giovanni Mascellani wrote:
It doesn't look like this is used.
I see, I probably intended this for creating the CMP instruction. So I did that.
On Tue Mar 12 22:02:29 2024 +0000, Zebediah Figura wrote:
vs_2_0 uses "slt dst, -mul, mul" instead of "slt dst, 0, mul". Does this ever make a difference? Should we do it anyway just to avoid an otherwise unnecessary constant?
If I am not mistaken, it should not make a difference. But since we don't have neg as a modifier, only as an instruction, in HLSL IR, we have to add the NEG as an additional instruction.
I added the change though, in case there is some difference I am not considering.
On Tue Mar 12 21:59:04 2024 +0000, Francisco Casas wrote:
changed this line in [version 2 of the diff](/wine/vkd3d/-/merge_requests/706/diffs?diff_id=104718&start_sha=2a8592a57b97d5c36af233dec13d88f5b9abb1a9#3cf804f245af47d51595ff932bf817c50967eea2_3027_3021)
I checked. It doesn't make a difference for large numbers (whose multiplication results in inf), but it **does** make a difference for smaller numbers whole multiplication results in zero. So I added the condition on the `profile->major_version` paths.
I added a pertinent tests.
On Tue Mar 12 22:03:28 2024 +0000, Zebediah Figura wrote:
Why does patch 5/5 not use hlsl_new_binary_expr() et al.?
Good question, I probably forgot about those functions. This also happened in 3/6 (ex 3/5). I am using the helpers now.
This merge request was approved by Zebediah Figura.
Giovanni Mascellani (@giomasce) commented about libs/vkd3d-shader/hlsl_codegen.c:
return false;
hlsl_block_add_instr(block, mul);
memset(&zero_value, 0, sizeof(zero_value));
if (!(zero = hlsl_new_constant(ctx, float_type, &zero_value, &instr->loc)))
return false;
hlsl_block_add_instr(block, zero);
if (!(slt = hlsl_new_binary_expr(ctx, HLSL_OP2_SLT, zero, mul)))
return false;
hlsl_block_add_instr(block, slt);
negate = (expr->op == HLSL_OP2_EQUAL);
break;
}
case HLSL_OP2_GEQUAL:
Nitpick: missing empty line.
Giovanni Mascellani (@giomasce) commented about libs/vkd3d-shader/hlsl_codegen.c:
if (!(mul = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, sub, sub)))
return false;
hlsl_block_add_instr(block, mul);
if (ctx->profile->major_version >= 3)
{
if (!(abs = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, sub, &instr->loc)))
return false;
hlsl_block_add_instr(block, abs);
}
else
{
/* Use MUL as a precarious ABS. */
if (!(abs = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, sub, sub)))
return false;
hlsl_block_add_instr(block, abs);
}
Any reason I'm missing for not introducing this directly in 2/6?
Giovanni Mascellani (@giomasce) commented about libs/vkd3d-shader/hlsl_codegen.c:
if (!(slt = hlsl_new_binary_expr(ctx, HLSL_OP2_SLT, zero, mul)))
return false;
hlsl_block_add_instr(block, slt);
negate = (expr->op == HLSL_OP2_EQUAL);
break;
}
case HLSL_OP2_GEQUAL:
case HLSL_OP2_LESS:
{
if (!(slt = hlsl_new_binary_expr(ctx, HLSL_OP2_SLT, arg1_cast, arg2_cast)))
return false;
hlsl_block_add_instr(block, slt);
negate = (expr->op == HLSL_OP2_GEQUAL);
I think that's fine too (reproducing the native code opcode by opcode has never been a target of our HLSL compiler), but it seems that SGE also exists and FXC is willing to emit it for `vs_3_0`: https://shader-playground.timjones.io/27b940d613fd9365f083b11bb18139bf, just in case at some point this turns out to be relevant (in theory `<` and `>=` are not exactly one the opposite of the other, because of NaNs, but SM1-3 mostly pretend NaNs do not exist).
This merge request was approved by Giovanni Mascellani.