Mainly the implementation of SM1 comparison operators, defined in terms of CMP and SLT instructions, and the capacity to transform SLT->CMP for pixel shaders and CMP->SLT for vertex shaders.
From: Francisco Casas fcasas@codeweavers.com
--- libs/vkd3d-shader/d3dbc.c | 4 ++++ libs/vkd3d-shader/hlsl.h | 1 + 2 files changed, 5 insertions(+)
diff --git a/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d-shader/d3dbc.c index 57dd0258a..3a815dc57 100644 --- a/libs/vkd3d-shader/d3dbc.c +++ b/libs/vkd3d-shader/d3dbc.c @@ -2334,10 +2334,14 @@ static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b break;
case HLSL_OP2_SLT: + if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) + hlsl_fixme(ctx, &instr->loc, "Lower SLT instructions for pixel shaders."); write_sm1_binary_op(ctx, buffer, D3DSIO_SLT, &instr->reg, &arg1->reg, &arg2->reg); break;
case HLSL_OP3_CMP: + if (ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX) + hlsl_fixme(ctx, &instr->loc, "Lower CMP instructions for vertex shaders."); write_sm1_ternary_op(ctx, buffer, D3DSIO_CMP, &instr->reg, &arg1->reg, &arg2->reg, &arg3->reg); break;
diff --git a/libs/vkd3d-shader/hlsl.h b/libs/vkd3d-shader/hlsl.h index da4bb1e78..918a01a5a 100644 --- a/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d-shader/hlsl.h @@ -593,6 +593,7 @@ enum hlsl_ir_expr_op HLSL_OP2_MUL, HLSL_OP2_NEQUAL, HLSL_OP2_RSHIFT, + /* SLT(a, b) retrieves 1.0 if (a < b), else 0.0. Only used for SM1-SM3 target vertex shaders. */ HLSL_OP2_SLT,
/* DP2ADD(a, b, c) computes the scalar product of a.xy and b.xy,
From: Francisco Casas fcasas@codeweavers.com
--- libs/vkd3d-shader/hlsl_codegen.c | 133 +++++++++++++++++++++++ tests/hlsl/vertex-shader-ops.shader_test | 10 +- 2 files changed, 138 insertions(+), 5 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 9eb65dc01..9c9b781ce 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -3020,6 +3020,138 @@ static bool lower_ternary(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, stru return true; }
+static bool lower_comparison_operators(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, + struct hlsl_block *block) +{ + struct hlsl_ir_node *arg1, *arg1_cast, *arg2, *arg2_cast, *slt, *res, *ret; + struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS]; + struct hlsl_type *float_type; + struct hlsl_ir_expr *expr; + bool negate = false; + + if (instr->type != HLSL_IR_EXPR) + return false; + expr = hlsl_ir_expr(instr); + if (expr->op != HLSL_OP2_EQUAL && expr->op != HLSL_OP2_NEQUAL && expr->op != HLSL_OP2_LESS + && expr->op != HLSL_OP2_GEQUAL) + return false; + + arg1 = expr->operands[0].node; + arg2 = expr->operands[1].node; + float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, instr->data_type->dimx); + + if (!(arg1_cast = hlsl_new_cast(ctx, arg1, float_type, &instr->loc))) + return false; + hlsl_block_add_instr(block, arg1_cast); + + if (!(arg2_cast = hlsl_new_cast(ctx, arg2, float_type, &instr->loc))) + return false; + hlsl_block_add_instr(block, arg2_cast); + + switch (expr->op) + { + case HLSL_OP2_EQUAL: + case HLSL_OP2_NEQUAL: + { + struct hlsl_ir_node *neg, *sub, *mul, *zero; + struct hlsl_constant_value zero_value; + + memset(operands, 0, sizeof(operands)); + operands[0] = arg2_cast; + if (!(neg = hlsl_new_expr(ctx, HLSL_OP1_NEG, operands, float_type, &instr->loc))) + return false; + hlsl_block_add_instr(block, neg); + + memset(operands, 0, sizeof(operands)); + operands[0] = arg1_cast; + operands[1] = neg; + if (!(sub = hlsl_new_expr(ctx, HLSL_OP2_ADD, operands, float_type, &instr->loc))) + return false; + hlsl_block_add_instr(block, sub); + + /* Use MUL as a precarious ABS. */ + memset(operands, 0, sizeof(operands)); + operands[0] = sub; + operands[1] = sub; + if (!(mul = hlsl_new_expr(ctx, HLSL_OP2_MUL, operands, float_type, &instr->loc))) + return false; + hlsl_block_add_instr(block, mul); + + memset(&zero_value, 0, sizeof(zero_value)); + if (!(zero = hlsl_new_constant(ctx, float_type, &zero_value, &instr->loc))) + return false; + hlsl_block_add_instr(block, zero); + + memset(operands, 0, sizeof(operands)); + operands[0] = zero; + operands[1] = mul; + if (!(slt = hlsl_new_expr(ctx, HLSL_OP2_SLT, operands, float_type, &instr->loc))) + return false; + hlsl_block_add_instr(block, slt); + + negate = (expr->op == HLSL_OP2_EQUAL); + break; + } + case HLSL_OP2_GEQUAL: + case HLSL_OP2_LESS: + { + memset(operands, 0, sizeof(operands)); + operands[0] = arg1_cast; + operands[1] = arg2_cast; + if (!(slt = hlsl_new_expr(ctx, HLSL_OP2_SLT, operands, float_type, &instr->loc))) + return false; + hlsl_block_add_instr(block, slt); + + negate = (expr->op == HLSL_OP2_GEQUAL); + break; + } + + default: + vkd3d_unreachable(); + } + + if (negate) + { + struct hlsl_constant_value one_value; + struct hlsl_ir_node *one, *slt_neg; + + one_value.u[0].f = 1.0; + one_value.u[1].f = 1.0; + one_value.u[2].f = 1.0; + one_value.u[3].f = 1.0; + if (!(one = hlsl_new_constant(ctx, float_type, &one_value, &instr->loc))) + return false; + hlsl_block_add_instr(block, one); + + memset(operands, 0, sizeof(operands)); + operands[0] = slt; + if (!(slt_neg = hlsl_new_expr(ctx, HLSL_OP1_NEG, operands, float_type, &instr->loc))) + return false; + hlsl_block_add_instr(block, slt_neg); + + memset(operands, 0, sizeof(operands)); + operands[0] = one; + operands[1] = slt_neg; + if (!(res = hlsl_new_expr(ctx, HLSL_OP2_ADD, operands, float_type, &instr->loc))) + return false; + hlsl_block_add_instr(block, res); + } + else + { + res = slt; + } + + /* We need a REINTERPRET so that the HLSL IR code is valid. SLT and its arguments must be FLOAT, + * and casts to BOOL have already been lowered to "!= 0". */ + memset(operands, 0, sizeof(operands)); + operands[0] = res; + if (!(ret = hlsl_new_expr(ctx, HLSL_OP1_REINTERPRET, operands, instr->data_type, &instr->loc))) + return false; + hlsl_block_add_instr(block, ret); + + return true; +} + static bool lower_casts_to_bool(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) { struct hlsl_type *type = instr->data_type, *arg_type; @@ -5209,6 +5341,7 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry lower_ir(ctx, lower_round, body); lower_ir(ctx, lower_ceil, body); lower_ir(ctx, lower_floor, body); + lower_ir(ctx, lower_comparison_operators, body); }
if (profile->major_version < 2) diff --git a/tests/hlsl/vertex-shader-ops.shader_test b/tests/hlsl/vertex-shader-ops.shader_test index 34d496727..38f3db658 100644 --- a/tests/hlsl/vertex-shader-ops.shader_test +++ b/tests/hlsl/vertex-shader-ops.shader_test @@ -15,7 +15,7 @@ float4 main(in float4 res : COLOR1) : sv_target
% Check that -0.0f is not less than 0.0f -[vertex shader todo(sm<4)] +[vertex shader] float a;
void main(out float4 res : COLOR1, in float4 pos : position, out float4 out_pos : sv_position) @@ -28,11 +28,11 @@ void main(out float4 res : COLOR1, in float4 pos : position, out float4 out_pos [test] if(sm<4) uniform 0 float 0.0 if(sm>=4) uniform 0 float4 0.0 0.0 0.0 0.0 -todo(sm<4) draw quad +draw quad probe all rgba (0.0, 0.0, 0.0, 0.0)
-[vertex shader todo(sm<4)] +[vertex shader] int a, b;
void main(out float4 res : COLOR1, in float4 pos : position, out float4 out_pos : sv_position) @@ -49,12 +49,12 @@ void main(out float4 res : COLOR1, in float4 pos : position, out float4 out_pos if(sm<4) uniform 0 float 3 if(sm<4) uniform 4 float 4 if(sm>=4) uniform 0 int4 3 4 0 0 -todo(sm<4) draw quad +draw quad probe all rgba (0.0, 1.0, 0.0, 1.0) if(sm<4) uniform 0 float -2 if(sm<4) uniform 4 float -2 if(sm>=4) uniform 0 int4 -2 -2 0 0 -todo(sm<4) draw quad +draw quad probe all rgba (1.0, 0.0, 0.0, 1.0)
From: Francisco Casas fcasas@codeweavers.com
Properly passing the inverse-trig.shader_test tests whose qualifiers have been removed require making spirv.c capable of handling ABS. --- libs/vkd3d-shader/hlsl.c | 10 +++ libs/vkd3d-shader/hlsl.h | 2 + libs/vkd3d-shader/hlsl_codegen.c | 73 +++++++++++++++++++ tests/hlsl/any.shader_test | 24 +++--- tests/hlsl/expr-indexing.shader_test | 14 ++-- tests/hlsl/float-comparison.shader_test | 4 +- tests/hlsl/inverse-trig.shader_test | 4 +- tests/hlsl/matrix-indexing.shader_test | 6 +- tests/hlsl/non-const-indexing.shader_test | 10 +-- tests/hlsl/step.shader_test | 4 +- .../hlsl/vector-indexing-uniform.shader_test | 4 +- 11 files changed, 120 insertions(+), 35 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl.c b/libs/vkd3d-shader/hlsl.c index c315000a6..9fa6423f7 100644 --- a/libs/vkd3d-shader/hlsl.c +++ b/libs/vkd3d-shader/hlsl.c @@ -1348,6 +1348,16 @@ struct hlsl_ir_node *hlsl_new_binary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_exp return hlsl_new_expr(ctx, op, operands, arg1->data_type, &arg1->loc); }
+struct hlsl_ir_node *hlsl_new_ternary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op, + struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, struct hlsl_ir_node *arg3) +{ + struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {arg1, arg2, arg3}; + + assert(hlsl_types_are_equal(arg1->data_type, arg2->data_type)); + assert(hlsl_types_are_equal(arg1->data_type, arg3->data_type)); + return hlsl_new_expr(ctx, op, operands, arg1->data_type, &arg1->loc); +} + struct hlsl_ir_node *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *condition, struct hlsl_block *then_block, struct hlsl_block *else_block, const struct vkd3d_shader_location *loc) { diff --git a/libs/vkd3d-shader/hlsl.h b/libs/vkd3d-shader/hlsl.h index 918a01a5a..e9beead5a 100644 --- a/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d-shader/hlsl.h @@ -1246,6 +1246,8 @@ struct hlsl_ir_node *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *cond struct hlsl_ir_node *hlsl_new_int_constant(struct hlsl_ctx *ctx, int32_t n, const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_new_jump(struct hlsl_ctx *ctx, enum hlsl_ir_jump_type type, struct hlsl_ir_node *condition, const struct vkd3d_shader_location *loc); +struct hlsl_ir_node *hlsl_new_ternary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op, + struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, struct hlsl_ir_node *arg3);
void hlsl_init_simple_deref_from_var(struct hlsl_deref *deref, struct hlsl_ir_var *var);
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 9c9b781ce..3d8909713 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -3152,6 +3152,77 @@ static bool lower_comparison_operators(struct hlsl_ctx *ctx, struct hlsl_ir_node return true; }
+/* Intended to be used for SM1-SM3, lowers SLT instructions (only available in vertex shaders) to + * CMP instructions (only available in pixel shaders). + * Based on the following equivalence: + * SLT(x, y) + * = (x < y) ? 1.0 : 0.0 + * = ((x - y) >= 0) ? 0.0 : 1.0 + * = CMP(x - y, 0.0, 1.0) + */ +static bool lower_slt(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) +{ + struct hlsl_ir_node *arg1, *arg2, *arg1_cast, *arg2_cast, *neg, *sub, *zero, *one, *cmp; + struct hlsl_constant_value zero_value, one_value; + struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS]; + struct hlsl_type *float_type; + struct hlsl_ir_expr *expr; + + if (instr->type != HLSL_IR_EXPR) + return false; + expr = hlsl_ir_expr(instr); + if (expr->op != HLSL_OP2_SLT) + return false; + + arg1 = expr->operands[0].node; + arg2 = expr->operands[1].node; + float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, instr->data_type->dimx); + + if (!(arg1_cast = hlsl_new_cast(ctx, arg1, float_type, &instr->loc))) + return false; + hlsl_block_add_instr(block, arg1_cast); + + if (!(arg2_cast = hlsl_new_cast(ctx, arg2, float_type, &instr->loc))) + return false; + hlsl_block_add_instr(block, arg2_cast); + + memset(operands, 0, sizeof(operands)); + operands[0] = arg2_cast; + if (!(neg = hlsl_new_expr(ctx, HLSL_OP1_NEG, operands, float_type, &instr->loc))) + return false; + hlsl_block_add_instr(block, neg); + + memset(operands, 0, sizeof(operands)); + operands[0] = arg1_cast; + operands[1] = neg; + if (!(sub = hlsl_new_expr(ctx, HLSL_OP2_ADD, operands, float_type, &instr->loc))) + return false; + hlsl_block_add_instr(block, sub); + + memset(&zero_value, 0, sizeof(zero_value)); + if (!(zero = hlsl_new_constant(ctx, float_type, &zero_value, &instr->loc))) + return false; + hlsl_block_add_instr(block, zero); + + one_value.u[0].f = 1.0; + one_value.u[1].f = 1.0; + one_value.u[2].f = 1.0; + one_value.u[3].f = 1.0; + if (!(one = hlsl_new_constant(ctx, float_type, &one_value, &instr->loc))) + return false; + hlsl_block_add_instr(block, one); + + memset(operands, 0, sizeof(operands)); + operands[0] = sub; + operands[1] = zero; + operands[2] = one; + if (!(cmp = hlsl_new_expr(ctx, HLSL_OP3_CMP, operands, float_type, &instr->loc))) + return false; + hlsl_block_add_instr(block, cmp); + + return true; +} + static bool lower_casts_to_bool(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) { struct hlsl_type *type = instr->data_type, *arg_type; @@ -5342,6 +5413,8 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry lower_ir(ctx, lower_ceil, body); lower_ir(ctx, lower_floor, body); lower_ir(ctx, lower_comparison_operators, body); + if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) + lower_ir(ctx, lower_slt, body); }
if (profile->major_version < 2) diff --git a/tests/hlsl/any.shader_test b/tests/hlsl/any.shader_test index 83a1dad97..45df3bdc4 100644 --- a/tests/hlsl/any.shader_test +++ b/tests/hlsl/any.shader_test @@ -1,4 +1,4 @@ -[pixel shader todo(sm<4)] +[pixel shader] uniform float4 f;
float4 main() : sv_target @@ -8,28 +8,28 @@ float4 main() : sv_target
[test] uniform 0 float4 1.0 1.0 1.0 1.0 -todo(sm<4) draw quad +draw quad probe all rgba (1.0, 1.0, 1.0, 1.0) uniform 0 float4 1.0 0.0 0.0 0.0 -todo(sm<4) draw quad +draw quad probe all rgba (1.0, 1.0, 1.0, 1.0) uniform 0 float4 0.0 1.0 0.0 0.0 -todo(sm<4) draw quad +draw quad probe all rgba (1.0, 1.0, 1.0, 1.0) uniform 0 float4 0.0 0.0 1.0 0.0 -todo(sm<4) draw quad +draw quad probe all rgba (1.0, 1.0, 1.0, 1.0) uniform 0 float4 0.0 0.0 0.0 1.0 -todo(sm<4) draw quad +draw quad probe all rgba (1.0, 1.0, 1.0, 1.0) uniform 0 float4 0.0 0.0 0.0 0.0 -todo(sm<4) draw quad +draw quad probe all rgba (0.0, 0.0, 0.0, 0.0) uniform 0 float4 -1.0 -1.0 -1.0 -1.0 -todo(sm<4) draw quad +draw quad probe all rgba (1.0, 1.0, 1.0, 1.0)
-[pixel shader todo(sm<4)] +[pixel shader] uniform float f;
float4 main() : sv_target @@ -39,13 +39,13 @@ float4 main() : sv_target
[test] uniform 0 float4 1.0 0.0 0.0 0.0 -todo(sm<4) draw quad +draw quad probe all rgba (1.0, 1.0, 1.0, 1.0) uniform 0 float4 0.0 0.0 0.0 0.0 -todo(sm<4) draw quad +draw quad probe all rgba (0.0, 0.0, 0.0, 0.0) uniform 0 float4 -1.0 0.0 0.0 0.0 -todo(sm<4) draw quad +draw quad probe all rgba (1.0, 1.0, 1.0, 1.0)
diff --git a/tests/hlsl/expr-indexing.shader_test b/tests/hlsl/expr-indexing.shader_test index 2aa99b40d..3dcc5727e 100644 --- a/tests/hlsl/expr-indexing.shader_test +++ b/tests/hlsl/expr-indexing.shader_test @@ -13,7 +13,7 @@ draw quad probe all rgba (8.0, 8.0, 8.0, 8.0)
-[pixel shader todo(sm<4)] +[pixel shader] float4 a, b; float i;
@@ -26,7 +26,7 @@ float4 main() : sv_target uniform 0 float4 1.0 2.0 3.0 4.0 uniform 4 float4 5.0 6.0 7.0 8.0 uniform 8 float 2 -todo(sm<4) draw quad +draw quad probe all rgba (10.0, 10.0, 10.0, 10.0)
@@ -44,7 +44,7 @@ draw quad probe all rgba (3.0, 3.0, 3.0, 3.0)
-[pixel shader todo(sm<4)] +[pixel shader] float4 a; float i;
@@ -56,10 +56,10 @@ float4 main() : sv_target [test] uniform 0 float4 1.0 2.0 3.0 4.0 uniform 4 float 0 -todo(sm<4) draw quad +draw quad probe all rgba (4.0, 4.0, 4.0, 4.0) uniform 4 float 2 -todo(sm<4) draw quad +draw quad probe all rgba (1.0, 1.0, 1.0, 1.0)
@@ -82,7 +82,7 @@ draw quad probe all rgba (4.0, 4.0, 4.0, 4.0)
-[pixel shader todo(sm<4)] +[pixel shader] float4 a; float i;
@@ -99,5 +99,5 @@ float4 main() : sv_target [test] uniform 0 float4 1.0 2.0 3.0 4.0 uniform 4 float 1 -todo(sm<4) draw quad +draw quad probe all rgba (2.0, 2.0, 2.0, 2.0) diff --git a/tests/hlsl/float-comparison.shader_test b/tests/hlsl/float-comparison.shader_test index c351ac098..0e858d9f1 100644 --- a/tests/hlsl/float-comparison.shader_test +++ b/tests/hlsl/float-comparison.shader_test @@ -1,5 +1,5 @@ % Check that -0.0f is not less than 0.0f -[pixel shader todo(sm<4)] +[pixel shader] float a;
float4 main() : sv_target @@ -9,7 +9,7 @@ float4 main() : sv_target
[test] uniform 0 float 0.0 -todo(sm<4) draw quad +draw quad probe all rgba (0.0, 0.0, 0.0, 0.0)
diff --git a/tests/hlsl/inverse-trig.shader_test b/tests/hlsl/inverse-trig.shader_test index e428d3090..27a5025c2 100644 --- a/tests/hlsl/inverse-trig.shader_test +++ b/tests/hlsl/inverse-trig.shader_test @@ -3,7 +3,7 @@ % implementations. DXIL defines intrinsics for inverse trig, to be implemented % by the backend.
-[pixel shader todo(sm<4)] +[pixel shader] uniform float4 a;
float4 main() : sv_target @@ -32,7 +32,7 @@ uniform 0 float4 1.0 0.0 0.0 0.0 todo(sm<4) draw quad probe all rgba (0.0, 0.0, 0.0, 0.0) 128
-[pixel shader todo(sm<4)] +[pixel shader] uniform float4 a;
float4 main() : sv_target diff --git a/tests/hlsl/matrix-indexing.shader_test b/tests/hlsl/matrix-indexing.shader_test index 6e2f01b7a..f17267631 100644 --- a/tests/hlsl/matrix-indexing.shader_test +++ b/tests/hlsl/matrix-indexing.shader_test @@ -108,7 +108,7 @@ draw quad probe all rgba (3.0, 4.0, 50.0, 60.0)
-[pixel shader todo(sm<4)] +[pixel shader] uniform float i;
float4 main() : sv_target @@ -120,8 +120,8 @@ float4 main() : sv_target
[test] uniform 0 float 2 -todo(sm<4) draw quad -probe all rgba (8, 9, 10, 11) +draw quad +todo(sm<4) probe all rgba (8, 9, 10, 11)
[pixel shader todo(sm<4)] diff --git a/tests/hlsl/non-const-indexing.shader_test b/tests/hlsl/non-const-indexing.shader_test index aed33092c..d11463349 100644 --- a/tests/hlsl/non-const-indexing.shader_test +++ b/tests/hlsl/non-const-indexing.shader_test @@ -25,7 +25,7 @@ todo(sm<4) draw quad probe all rgba (9.0, 10.0, 11.0, 12.0)
-[pixel shader todo(sm<4)] +[pixel shader] uniform float i;
float4 main() : SV_TARGET @@ -36,16 +36,16 @@ float4 main() : SV_TARGET
[test] uniform 0 float 0 -todo(sm<4) draw quad +draw quad probe all rgba (11.0, 11.0, 11.0, 11.0) uniform 0 float 1 -todo(sm<4) draw quad +draw quad probe all rgba (12.0, 12.0, 12.0, 12.0) uniform 0 float 2 -todo(sm<4) draw quad +draw quad probe all rgba (13.0, 13.0, 13.0, 13.0) uniform 0 float 3 -todo(sm<4) draw quad +draw quad probe all rgba (14.0, 14.0, 14.0, 14.0)
diff --git a/tests/hlsl/step.shader_test b/tests/hlsl/step.shader_test index b965f33e1..e201e15f9 100644 --- a/tests/hlsl/step.shader_test +++ b/tests/hlsl/step.shader_test @@ -1,4 +1,4 @@ -[pixel shader todo(sm<4)] +[pixel shader] uniform float4 f, p;
float4 main() : sv_target @@ -9,7 +9,7 @@ float4 main() : sv_target [test] uniform 0 float4 5.0 -2.6 3.0 2.0 uniform 4 float4 1.0 -4.3 3.0 4.0 -todo(sm<4) draw quad +draw quad probe all rgba (0.0, 0.0, 1.0, 1.0)
diff --git a/tests/hlsl/vector-indexing-uniform.shader_test b/tests/hlsl/vector-indexing-uniform.shader_test index cd77462ec..e5ffbdd02 100644 --- a/tests/hlsl/vector-indexing-uniform.shader_test +++ b/tests/hlsl/vector-indexing-uniform.shader_test @@ -1,6 +1,6 @@ % Use a uniform to prevent the compiler from optimizing.
-[pixel shader todo(sm<4)] +[pixel shader] uniform float i; float4 main() : SV_TARGET { @@ -12,5 +12,5 @@ float4 main() : SV_TARGET
[test] uniform 0 float 2 -todo(sm<4) draw quad +draw quad probe all rgba (0.5, 0.3, 0.8, 0.2)
From: Francisco Casas fcasas@codeweavers.com
Instead of FIXME(). Otherwise we compile invalid d3dbc. --- libs/vkd3d-shader/d3dbc.c | 2 +- tests/hlsl/matrix-indexing.shader_test | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d-shader/d3dbc.c index 3a815dc57..aca0f9931 100644 --- a/libs/vkd3d-shader/d3dbc.c +++ b/libs/vkd3d-shader/d3dbc.c @@ -2500,7 +2500,7 @@ static void write_sm1_store(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *
if (store->lhs.var->data_type->class == HLSL_CLASS_MATRIX) { - FIXME("Matrix writemasks need to be lowered.\n"); + hlsl_fixme(ctx, &instr->loc, "Lower matrix writemasks.\n"); return; }
diff --git a/tests/hlsl/matrix-indexing.shader_test b/tests/hlsl/matrix-indexing.shader_test index f17267631..6e2f01b7a 100644 --- a/tests/hlsl/matrix-indexing.shader_test +++ b/tests/hlsl/matrix-indexing.shader_test @@ -108,7 +108,7 @@ draw quad probe all rgba (3.0, 4.0, 50.0, 60.0)
-[pixel shader] +[pixel shader todo(sm<4)] uniform float i;
float4 main() : sv_target @@ -120,8 +120,8 @@ float4 main() : sv_target
[test] uniform 0 float 2 -draw quad -todo(sm<4) probe all rgba (8, 9, 10, 11) +todo(sm<4) draw quad +probe all rgba (8, 9, 10, 11)
[pixel shader todo(sm<4)]
From: Francisco Casas fcasas@codeweavers.com
--- libs/vkd3d-shader/hlsl_codegen.c | 142 +++++++++++++++++++++---------- 1 file changed, 95 insertions(+), 47 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 3d8909713..40248bf98 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -2902,7 +2902,7 @@ static bool lower_floor(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct return true; }
-/* Use movc/cmp/slt for the ternary operator. */ +/* Use movc/cmp for the ternary operator. */ static bool lower_ternary(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) { struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = { 0 }, *replacement; @@ -2928,7 +2928,7 @@ static bool lower_ternary(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, stru return false; }
- if (ctx->profile->major_version < 4 && ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) + if (ctx->profile->major_version < 4) { struct hlsl_ir_node *abs, *neg;
@@ -2946,51 +2946,6 @@ static bool lower_ternary(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, stru if (!(replacement = hlsl_new_expr(ctx, HLSL_OP3_CMP, operands, first->data_type, &instr->loc))) return false; } - else if (ctx->profile->major_version < 4 && ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX) - { - struct hlsl_ir_node *neg, *slt, *sum, *cond2, *slt_cast, *mul; - - /* Expression used here is "slt(<cond>) * (first - second) + second". */ - - if (ctx->profile->major_version == 3) - { - if (!(cond2 = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, cond, &instr->loc))) - return false; - } - else - { - if (!(cond2 = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, cond, cond))) - return false; - } - hlsl_block_add_instr(block, cond2); - - if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, cond2, &instr->loc))) - return false; - hlsl_block_add_instr(block, neg); - - if (!(slt = hlsl_new_binary_expr(ctx, HLSL_OP2_SLT, neg, cond2))) - return false; - hlsl_block_add_instr(block, slt); - - if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, second, &instr->loc))) - return false; - hlsl_block_add_instr(block, neg); - - if (!(sum = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, first, neg))) - return false; - hlsl_block_add_instr(block, sum); - - if (!(slt_cast = hlsl_new_cast(ctx, slt, sum->data_type, &instr->loc))) - return false; - hlsl_block_add_instr(block, slt_cast); - - if (!(mul = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, slt_cast, sum))) - return false; - hlsl_block_add_instr(block, mul); - - if (!(replacement = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, mul, second))) - return false; - } else { if (cond->data_type->base_type == HLSL_TYPE_FLOAT) @@ -3223,6 +3178,97 @@ static bool lower_slt(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct h return true; }
+/* Intended to be used for SM1-SM3, lowers CMP instructions (only available in pixel shaders) to + * SLT instructions (only available in vertex shaders). + * Based on the following equivalence: + * CMP(x, y, z) + * = (x >= 0) ? y : z + * = z * ((x < 0) ? 1.0 : 0.0) + y * ((x < 0) ? 0.0 : 1.0) + * = z * SLT(x, 0.0) + y * (1 - SLT(x, 0.0)) + */ +static bool lower_cmp(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) +{ + struct hlsl_ir_node *args[3], *args_cast[3], *slt, *neg_slt, *sub, *zero, *one, *mul1, *mul2, *add; + struct hlsl_constant_value zero_value, one_value; + struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS]; + struct hlsl_type *float_type; + struct hlsl_ir_expr *expr; + unsigned int i; + + if (instr->type != HLSL_IR_EXPR) + return false; + expr = hlsl_ir_expr(instr); + if (expr->op != HLSL_OP3_CMP) + return false; + + float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, instr->data_type->dimx); + + for (i = 0; i < 3; ++i) + { + args[i] = expr->operands[i].node; + + if (!(args_cast[i] = hlsl_new_cast(ctx, args[i], float_type, &instr->loc))) + return false; + hlsl_block_add_instr(block, args_cast[i]); + } + + memset(&zero_value, 0, sizeof(zero_value)); + if (!(zero = hlsl_new_constant(ctx, float_type, &zero_value, &instr->loc))) + return false; + hlsl_block_add_instr(block, zero); + + one_value.u[0].f = 1.0; + one_value.u[1].f = 1.0; + one_value.u[2].f = 1.0; + one_value.u[3].f = 1.0; + if (!(one = hlsl_new_constant(ctx, float_type, &one_value, &instr->loc))) + return false; + hlsl_block_add_instr(block, one); + + memset(operands, 0, sizeof(operands)); + operands[0] = args_cast[0]; + operands[1] = zero; + if (!(slt = hlsl_new_expr(ctx, HLSL_OP2_SLT, operands, float_type, &instr->loc))) + return false; + hlsl_block_add_instr(block, slt); + + memset(operands, 0, sizeof(operands)); + operands[0] = args_cast[2]; + operands[1] = slt; + if (!(mul1 = hlsl_new_expr(ctx, HLSL_OP2_MUL, operands, float_type, &instr->loc))) + return false; + hlsl_block_add_instr(block, mul1); + + memset(operands, 0, sizeof(operands)); + operands[0] = slt; + if (!(neg_slt = hlsl_new_expr(ctx, HLSL_OP1_NEG, operands, float_type, &instr->loc))) + return false; + hlsl_block_add_instr(block, neg_slt); + + memset(operands, 0, sizeof(operands)); + operands[0] = one; + operands[1] = neg_slt; + if (!(sub = hlsl_new_expr(ctx, HLSL_OP2_ADD, operands, float_type, &instr->loc))) + return false; + hlsl_block_add_instr(block, sub); + + memset(operands, 0, sizeof(operands)); + operands[0] = args_cast[1]; + operands[1] = sub; + if (!(mul2 = hlsl_new_expr(ctx, HLSL_OP2_MUL, operands, float_type, &instr->loc))) + return false; + hlsl_block_add_instr(block, mul2); + + memset(operands, 0, sizeof(operands)); + operands[0] = mul1; + operands[1] = mul2; + if (!(add = hlsl_new_expr(ctx, HLSL_OP2_ADD, operands, float_type, &instr->loc))) + return false; + hlsl_block_add_instr(block, add); + + return true; +} + static bool lower_casts_to_bool(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) { struct hlsl_type *type = instr->data_type, *arg_type; @@ -5415,6 +5461,8 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry lower_ir(ctx, lower_comparison_operators, body); if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) lower_ir(ctx, lower_slt, body); + else + lower_ir(ctx, lower_cmp, body); }
if (profile->major_version < 2)
Giovanni Mascellani (@giomasce) commented about libs/vkd3d-shader/hlsl.c:
return hlsl_new_expr(ctx, op, operands, arg1->data_type, &arg1->loc);
}
+struct hlsl_ir_node *hlsl_new_ternary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op,
It doesn't look like this is used.
Giovanni Mascellani (@giomasce) commented about libs/vkd3d-shader/hlsl_codegen.c:
struct hlsl_constant_value zero_value;
memset(operands, 0, sizeof(operands));
operands[0] = arg2_cast;
if (!(neg = hlsl_new_expr(ctx, HLSL_OP1_NEG, operands, float_type, &instr->loc)))
return false;
hlsl_block_add_instr(block, neg);
memset(operands, 0, sizeof(operands));
operands[0] = arg1_cast;
operands[1] = neg;
if (!(sub = hlsl_new_expr(ctx, HLSL_OP2_ADD, operands, float_type, &instr->loc)))
return false;
hlsl_block_add_instr(block, sub);
/* Use MUL as a precarious ABS. */
Why aren't you using ABS itself?
On Tue Mar 12 11:50:06 2024 +0000, Giovanni Mascellani wrote:
Why aren't you using ABS itself?
While according to the documentation the "abs" instruction exists for all shader profiles starting from ps_2_0 and vs_2_0, the native compiler doesn't use it directly.
* For both ps_2_0 and vs_2_0 it uses this mul trick, because we only care if the absolute value is not 0. * I checked now and for ps_3_0 and vs_3_0 it adds the "_abs" modifier to the src registers of the comparison instruction.
So it may make sense to actually add the path to use ABS if the `profile->major_version` is larger than 2.0, similarly to what lower_ternary() does. I don't know if this is necessary, the only case I can think of where this differ is for very large values where the multiplication falls outside the representable range of as a float, but even in those cases (which I think map to "inf"?) the sign should also be correct.
I will check what happens on Windows on these cases.
vs_2_0 uses "slt dst, -mul, mul" instead of "slt dst, 0, mul". Does this ever make a difference? Should we do it anyway just to avoid an otherwise unnecessary constant?
Why does patch 5/5 not use hlsl_new_binary_expr() et al.?