-- v2: vkd3d-shader/hlsl: Parse step() intrinsic. tests: Test step() intrinsic. vkd3d-shader/hlsl: Parse sqrt() intrinsic. tests: Test sqrt() intrinsic. vkd3d-shader/hlsl: Support cos() intrinsic. vkd3d-shader/hlsl: Support sin() intrinsic. vkd3d-shader/hlsl: Add additional tests for sin() and cos().
From: Francisco Casas fcasas@codeweavers.com
--- tests/trigonometry.shader_test | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+)
diff --git a/tests/trigonometry.shader_test b/tests/trigonometry.shader_test index afc09221..fef85ca0 100644 --- a/tests/trigonometry.shader_test +++ b/tests/trigonometry.shader_test @@ -29,3 +29,31 @@ probe (12, 0) rgba (-0.53657292, 0.84385396, 0.0, 0.0) 1024 probe (13, 0) rgba ( 0.42016704, 0.90744678, 0.0, 0.0) 1024 probe (14, 0) rgba ( 0.99060736, 0.13673722, 0.0, 0.0) 1024 probe (15, 0) rgba ( 0.65028784, -0.75968791, 0.0, 0.0) 1024 + + +[pixel shader todo] +uniform float4 a; + +float4 main() : sv_target +{ + return round(1000 * sin(a)); +} + +[test] +uniform 0 float4 0.0 0.52359877 2.61799387 3.14159265 +todo draw quad +todo probe all rgba (0.0, 500.0, 500.0, 0.0) + + +[pixel shader todo] +uniform float4 a; + +float4 main() : sv_target +{ + return round(1000 * cos(a)); +} + +[test] +uniform 0 float4 0.0 0.78539816 1.57079632 2.35619449 +todo draw quad +todo probe all rgba (1000.0, 707.0, -0.0, -707.0)
From: Francisco Casas fcasas@codeweavers.com
--- libs/vkd3d-shader/hlsl.y | 12 ++++++++++++ libs/vkd3d-shader/hlsl_sm4.c | 28 ++++++++++++++++++++++++++++ tests/trigonometry.shader_test | 6 +++--- 3 files changed, 43 insertions(+), 3 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl.y b/libs/vkd3d-shader/hlsl.y index 019c875c..eea1d698 100644 --- a/libs/vkd3d-shader/hlsl.y +++ b/libs/vkd3d-shader/hlsl.y @@ -2702,6 +2702,17 @@ static bool intrinsic_saturate(struct hlsl_ctx *ctx, return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_SAT, arg, loc); }
+static bool intrinsic_sin(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *arg; + + if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) + return false; + + return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_SIN, arg, loc); +} + /* smoothstep(a, b, x) = p^2 (3 - 2p), where p = saturate((x - a)/(b - a)) */ static bool intrinsic_smoothstep(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) @@ -2846,6 +2857,7 @@ intrinsic_functions[] = {"pow", 2, true, intrinsic_pow}, {"round", 1, true, intrinsic_round}, {"saturate", 1, true, intrinsic_saturate}, + {"sin", 1, true, intrinsic_sin}, {"smoothstep", 3, true, intrinsic_smoothstep}, {"transpose", 1, true, intrinsic_transpose}, }; diff --git a/libs/vkd3d-shader/hlsl_sm4.c b/libs/vkd3d-shader/hlsl_sm4.c index d9d05e04..c7231826 100644 --- a/libs/vkd3d-shader/hlsl_sm4.c +++ b/libs/vkd3d-shader/hlsl_sm4.c @@ -1319,6 +1319,29 @@ static void write_sm4_unary_op(struct vkd3d_bytecode_buffer *buffer, enum vkd3d_ write_sm4_instruction(buffer, &instr); }
+static void write_sm4_unary_op_with_two_destinations(struct vkd3d_bytecode_buffer *buffer, + enum vkd3d_sm4_opcode opcode, const struct hlsl_ir_node *dst, unsigned dst_idx, + const struct hlsl_ir_node *src) +{ + struct sm4_instruction instr; + + memset(&instr, 0, sizeof(instr)); + instr.opcode = opcode; + + assert(dst_idx < ARRAY_SIZE(instr.dsts)); + sm4_dst_from_node(&instr.dsts[dst_idx], dst); + assert(1 - dst_idx >= 0); + instr.dsts[1 - dst_idx].reg.type = VKD3D_SM4_RT_NULL; + instr.dsts[1 - dst_idx].reg.dim = VKD3D_SM4_DIMENSION_NONE; + instr.dsts[1 - dst_idx].reg.idx_count = 0; + instr.dst_count = 2; + + sm4_src_from_node(&instr.srcs[0], src, instr.dsts[dst_idx].writemask); + instr.src_count = 1; + + write_sm4_instruction(buffer, &instr); +} + static void write_sm4_binary_op(struct vkd3d_bytecode_buffer *buffer, enum vkd3d_sm4_opcode opcode, const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2) { @@ -1738,6 +1761,11 @@ static void write_sm4_expr(struct hlsl_ctx *ctx, &expr->node, arg1, 0); break;
+ case HLSL_OP1_SIN: + assert(type_is_float(dst_type)); + write_sm4_unary_op_with_two_destinations(buffer, VKD3D_SM4_OP_SINCOS, &expr->node, 0, arg1); + break; + case HLSL_OP1_SQRT: assert(type_is_float(dst_type)); write_sm4_unary_op(buffer, VKD3D_SM4_OP_SQRT, &expr->node, arg1, 0); diff --git a/tests/trigonometry.shader_test b/tests/trigonometry.shader_test index fef85ca0..919eea20 100644 --- a/tests/trigonometry.shader_test +++ b/tests/trigonometry.shader_test @@ -31,7 +31,7 @@ probe (14, 0) rgba ( 0.99060736, 0.13673722, 0.0, 0.0) 1024 probe (15, 0) rgba ( 0.65028784, -0.75968791, 0.0, 0.0) 1024
-[pixel shader todo] +[pixel shader] uniform float4 a;
float4 main() : sv_target @@ -41,8 +41,8 @@ float4 main() : sv_target
[test] uniform 0 float4 0.0 0.52359877 2.61799387 3.14159265 -todo draw quad -todo probe all rgba (0.0, 500.0, 500.0, 0.0) +draw quad +probe all rgba (0.0, 500.0, 500.0, 0.0)
[pixel shader todo]
From: Francisco Casas fcasas@codeweavers.com
--- libs/vkd3d-shader/hlsl.y | 12 ++++++++++++ libs/vkd3d-shader/hlsl_sm4.c | 5 +++++ tests/trigonometry.shader_test | 10 +++++----- 3 files changed, 22 insertions(+), 5 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl.y b/libs/vkd3d-shader/hlsl.y index eea1d698..6beb80e4 100644 --- a/libs/vkd3d-shader/hlsl.y +++ b/libs/vkd3d-shader/hlsl.y @@ -2387,6 +2387,17 @@ static bool intrinsic_clamp(struct hlsl_ctx *ctx, return !!add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MIN, max, params->args[2], loc); }
+static bool intrinsic_cos(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *arg; + + if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) + return false; + + return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_COS, arg, loc); +} + static bool intrinsic_cross(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -2844,6 +2855,7 @@ intrinsic_functions[] = {"abs", 1, true, intrinsic_abs}, {"asuint", -1, true, intrinsic_asuint}, {"clamp", 3, true, intrinsic_clamp}, + {"cos", 1, true, intrinsic_cos}, {"cross", 2, true, intrinsic_cross}, {"dot", 2, true, intrinsic_dot}, {"floor", 1, true, intrinsic_floor}, diff --git a/libs/vkd3d-shader/hlsl_sm4.c b/libs/vkd3d-shader/hlsl_sm4.c index c7231826..e06f4b15 100644 --- a/libs/vkd3d-shader/hlsl_sm4.c +++ b/libs/vkd3d-shader/hlsl_sm4.c @@ -1698,6 +1698,11 @@ static void write_sm4_expr(struct hlsl_ctx *ctx, write_sm4_cast(ctx, buffer, expr); break;
+ case HLSL_OP1_COS: + assert(type_is_float(dst_type)); + write_sm4_unary_op_with_two_destinations(buffer, VKD3D_SM4_OP_SINCOS, &expr->node, 1, arg1); + break; + case HLSL_OP1_EXP2: assert(type_is_float(dst_type)); write_sm4_unary_op(buffer, VKD3D_SM4_OP_EXP, &expr->node, arg1, 0); diff --git a/tests/trigonometry.shader_test b/tests/trigonometry.shader_test index 919eea20..09933d3b 100644 --- a/tests/trigonometry.shader_test +++ b/tests/trigonometry.shader_test @@ -4,7 +4,7 @@ void main(out float tex : texcoord, inout float4 pos : sv_position) tex = (pos.x + 1) * 320; }
-[pixel shader todo] +[pixel shader] float4 main(float tex : texcoord) : sv_target { tex = floor(tex + 0.25); @@ -12,7 +12,7 @@ float4 main(float tex : texcoord) : sv_target }
[test] -todo draw quad +draw quad probe ( 0, 0) rgba ( 0.00000000, 1.00000000, 0.0, 0.0) probe ( 1, 0) rgba ( 0.84147098, 0.54030231, 0.0, 0.0) 1024 probe ( 2, 0) rgba ( 0.90929743, -0.41614684, 0.0, 0.0) 1024 @@ -45,7 +45,7 @@ draw quad probe all rgba (0.0, 500.0, 500.0, 0.0)
-[pixel shader todo] +[pixel shader] uniform float4 a;
float4 main() : sv_target @@ -55,5 +55,5 @@ float4 main() : sv_target
[test] uniform 0 float4 0.0 0.78539816 1.57079632 2.35619449 -todo draw quad -todo probe all rgba (1000.0, 707.0, -0.0, -707.0) +draw quad +probe all rgba (1000.0, 707.0, -0.0, -707.0)
From: Francisco Casas fcasas@codeweavers.com
--- Makefile.am | 1 + tests/sqrt.shader_test | 12 ++++++++++++ 2 files changed, 13 insertions(+) create mode 100644 tests/sqrt.shader_test
diff --git a/Makefile.am b/Makefile.am index 464b43ae..ed94f086 100644 --- a/Makefile.am +++ b/Makefile.am @@ -131,6 +131,7 @@ vkd3d_shader_tests = \ tests/sampler-offset.shader_test \ tests/saturate.shader_test \ tests/shader-interstage-interface.shader_test \ + tests/sqrt.shader_test \ tests/swizzle-0.shader_test \ tests/swizzle-1.shader_test \ tests/swizzle-2.shader_test \ diff --git a/tests/sqrt.shader_test b/tests/sqrt.shader_test new file mode 100644 index 00000000..81bf99a4 --- /dev/null +++ b/tests/sqrt.shader_test @@ -0,0 +1,12 @@ +[pixel shader todo] +uniform float4 f; + +float4 main() : sv_target +{ + return sqrt(f); +} + +[test] +uniform 0 float4 1.0 9.0 32.3 46.5 +todo draw quad +todo probe all rgba (1.0, 3.0, 5.683309, 6.819091) 1
From: Francisco Casas fcasas@codeweavers.com
--- libs/vkd3d-shader/hlsl.y | 12 ++++++++++++ tests/sqrt.shader_test | 6 +++--- 2 files changed, 15 insertions(+), 3 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl.y b/libs/vkd3d-shader/hlsl.y index 6beb80e4..f56df476 100644 --- a/libs/vkd3d-shader/hlsl.y +++ b/libs/vkd3d-shader/hlsl.y @@ -2783,6 +2783,17 @@ static bool intrinsic_smoothstep(struct hlsl_ctx *ctx, return true; }
+static bool intrinsic_sqrt(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *arg; + + if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) + return false; + + return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_SQRT, arg, loc); +} + static bool intrinsic_transpose(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -2871,6 +2882,7 @@ intrinsic_functions[] = {"saturate", 1, true, intrinsic_saturate}, {"sin", 1, true, intrinsic_sin}, {"smoothstep", 3, true, intrinsic_smoothstep}, + {"sqrt", 1, true, intrinsic_sqrt}, {"transpose", 1, true, intrinsic_transpose}, };
diff --git a/tests/sqrt.shader_test b/tests/sqrt.shader_test index 81bf99a4..5d048b4f 100644 --- a/tests/sqrt.shader_test +++ b/tests/sqrt.shader_test @@ -1,4 +1,4 @@ -[pixel shader todo] +[pixel shader] uniform float4 f;
float4 main() : sv_target @@ -8,5 +8,5 @@ float4 main() : sv_target
[test] uniform 0 float4 1.0 9.0 32.3 46.5 -todo draw quad -todo probe all rgba (1.0, 3.0, 5.683309, 6.819091) 1 +draw quad +probe all rgba (1.0, 3.0, 5.683309, 6.819091) 1
From: Francisco Casas fcasas@codeweavers.com
--- Makefile.am | 1 + tests/step.shader_test | 55 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+) create mode 100644 tests/step.shader_test
diff --git a/Makefile.am b/Makefile.am index ed94f086..051d34ba 100644 --- a/Makefile.am +++ b/Makefile.am @@ -132,6 +132,7 @@ vkd3d_shader_tests = \ tests/saturate.shader_test \ tests/shader-interstage-interface.shader_test \ tests/sqrt.shader_test \ + tests/step.shader_test \ tests/swizzle-0.shader_test \ tests/swizzle-1.shader_test \ tests/swizzle-2.shader_test \ diff --git a/tests/step.shader_test b/tests/step.shader_test new file mode 100644 index 00000000..8a8b1fd2 --- /dev/null +++ b/tests/step.shader_test @@ -0,0 +1,55 @@ +[pixel shader todo] +uniform float4 f, p; + +float4 main() : sv_target +{ + return step(f, p); +} + +[test] +uniform 0 float4 5.0 -2.6 3.0 2.0 +uniform 4 float4 1.0 -4.3 3.0 4.0 +todo draw quad +todo probe all rgba (0.0, 0.0, 1.0, 1.0) + + +[pixel shader todo] +float4 main() : sv_target +{ + float2x2 a = {1, 2, 3, 4}; + float b = {3}; + + return float4(step(a, b)); +} + +[test] +todo draw quad +todo probe all rgba (1.0, 1.0, 1.0, 0.0) + + +[pixel shader fail] +float4 main() : sv_target +{ + float2x2 a = {1, 2, 3, 4}; + float1 b = {3}; + + step(a, b); + return 0; +} + + +[pixel shader todo] +float4 main() : sv_target +{ + float3x2 a = {8, 0, + 0, 8, + 8, 8}; + float2x3 b = {0, 8, 0, + 8, 0, 0}; + + return float4(step(a, b)); +} + +[test] +todo draw quad +todo probe all rgba (0.0, 1.0, 1.0, 0.0)
From: Francisco Casas fcasas@codeweavers.com
--- libs/vkd3d-shader/hlsl.y | 23 +++++++++++++++++++++-- tests/step.shader_test | 18 +++++++++--------- 2 files changed, 30 insertions(+), 11 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl.y b/libs/vkd3d-shader/hlsl.y index f56df476..e38fe652 100644 --- a/libs/vkd3d-shader/hlsl.y +++ b/libs/vkd3d-shader/hlsl.y @@ -1486,7 +1486,7 @@ static struct list *add_binary_bitwise_expr_merge(struct hlsl_ctx *ctx, struct l
static struct hlsl_ir_node *add_binary_comparison_expr(struct hlsl_ctx *ctx, struct list *instrs, enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, - struct vkd3d_shader_location *loc) + const struct vkd3d_shader_location *loc) { struct hlsl_type *common_type, *return_type; enum hlsl_base_type base = expr_common_base_type(arg1->data_type->base_type, arg2->data_type->base_type); @@ -1510,7 +1510,7 @@ static struct hlsl_ir_node *add_binary_comparison_expr(struct hlsl_ctx *ctx, str }
static struct list *add_binary_comparison_expr_merge(struct hlsl_ctx *ctx, struct list *list1, struct list *list2, - enum hlsl_ir_expr_op op, struct vkd3d_shader_location loc) + enum hlsl_ir_expr_op op, const struct vkd3d_shader_location loc) { struct hlsl_ir_node *arg1 = node_from_list(list1), *arg2 = node_from_list(list2);
@@ -2794,6 +2794,24 @@ static bool intrinsic_sqrt(struct hlsl_ctx *ctx, return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_SQRT, arg, loc); }
+static bool intrinsic_step(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *ge; + struct hlsl_type *type; + + if (!elementwise_intrinsic_float_convert_args(ctx, params, loc)) + return false; + + if (!(ge = add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_GEQUAL, + params->args[1], params->args[0], loc))) + return false; + + type = ge->data_type; + type = hlsl_get_numeric_type(ctx, type->type, HLSL_TYPE_FLOAT, type->dimx, type->dimy); + return !!add_implicit_conversion(ctx, params->instrs, ge, type, loc); +} + static bool intrinsic_transpose(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -2883,6 +2901,7 @@ intrinsic_functions[] = {"sin", 1, true, intrinsic_sin}, {"smoothstep", 3, true, intrinsic_smoothstep}, {"sqrt", 1, true, intrinsic_sqrt}, + {"step", 2, true, intrinsic_step}, {"transpose", 1, true, intrinsic_transpose}, };
diff --git a/tests/step.shader_test b/tests/step.shader_test index 8a8b1fd2..e201e15f 100644 --- a/tests/step.shader_test +++ b/tests/step.shader_test @@ -1,4 +1,4 @@ -[pixel shader todo] +[pixel shader] uniform float4 f, p;
float4 main() : sv_target @@ -9,11 +9,11 @@ float4 main() : sv_target [test] uniform 0 float4 5.0 -2.6 3.0 2.0 uniform 4 float4 1.0 -4.3 3.0 4.0 -todo draw quad -todo probe all rgba (0.0, 0.0, 1.0, 1.0) +draw quad +probe all rgba (0.0, 0.0, 1.0, 1.0)
-[pixel shader todo] +[pixel shader] float4 main() : sv_target { float2x2 a = {1, 2, 3, 4}; @@ -23,8 +23,8 @@ float4 main() : sv_target }
[test] -todo draw quad -todo probe all rgba (1.0, 1.0, 1.0, 0.0) +draw quad +probe all rgba (1.0, 1.0, 1.0, 0.0)
[pixel shader fail] @@ -38,7 +38,7 @@ float4 main() : sv_target }
-[pixel shader todo] +[pixel shader] float4 main() : sv_target { float3x2 a = {8, 0, @@ -51,5 +51,5 @@ float4 main() : sv_target }
[test] -todo draw quad -todo probe all rgba (0.0, 1.0, 1.0, 0.0) +draw quad +probe all rgba (0.0, 1.0, 1.0, 0.0)
On Tue Jan 17 01:04:33 2023 +0000, Zebediah Figura wrote:
This fails when running under d3d9 under wine:
leslie@terabithia:~/git/vkd3d64$ make -j8 tests/shader_runner.cross64.exe && WINEDLLOVERRIDES=d3dcompiler_47=n VKD3D_TEST_PLATFORM=windows wine tests/shader_runner.cross64.exe ../vkd3d/tests/trigonometry.shader_test 0150:fixme:ntdll:NtQuerySystemInformation info_class SYSTEM_PERFORMANCE_INFORMATION shader_runner:98: Driver string: aticfx32.dll. shader_runner:99: Device: Radeon(TM) RX 460 Graphics, 1002:67ef. 0154:fixme:d3d:state_linepattern_w Setting line patterns is not supported in OpenGL core contexts. shader_runner:551:Section [test], line 42: Test failed: Got {6.55651093e-007, 4.99999940e-001, 5.00000417e-001, 8.94069672e-007}, expected {0.00000000e+000, 5.00000000e-001, 5.00000000e-001, -0.00000000e+000} at (0, 0). shader_runner:551:Section [test], line 56: Test failed: Got {1.00000000e+000, 7.07106709e-001, 8.94069672e-007, -7.07075000e-001}, expected {1.00000000e+000, 7.07107008e-001, -0.00000000e+000, -7.07107008e-001} at (0, 0). shader_runner:188: Adapter: Radeon(TM) RX 460 Graphics, 1002:67ef. shader_runner:342: Adapter: Radeon(TM) RX 460 Graphics, 1002:67ef. shader_runner: 492 tests executed (2 failures, 0 skipped, 0 todo, 0 bugs).
From testing, it looks like native will use a polynomial approximation in some cases (specifically: if more than one component is needed and the sincos() intrinsic isn't used).
I see, and the difference between `8.94069672e-007` and plain old `0.0` in terms of _ulps_ seems to be impractically high.
How about using the `round()` intrinsic, as in the update?
Worth noting: The [d3d11 functional specification](https://microsoft.github.io/DirectX-Specs/d3d/archive/D3D11_3_FunctionalSpec...) says:
`The maximum absolute error is 0.0008 in the interval from -100*Pi to +100*Pi.`
On Tue Jan 17 01:04:43 2023 +0000, Francisco Casas wrote:
I see, and the difference between `8.94069672e-007` and plain old `0.0` in terms of _ulps_ seems to be impractically high. How about using the `round()` intrinsic, as in the update? Worth noting: The [d3d11 functional specification](https://microsoft.github.io/DirectX-Specs/d3d/archive/D3D11_3_FunctionalSpec...) says:
`The maximum absolute error is 0.0008 in the interval from -100*Pi to +100*Pi.`
Seems reasonable; also reasonable would be to just pick some different constants (since it seems to be the ones close to zero which are broken).
This merge request was approved by Giovanni Mascellani.
This merge request was approved by Zebediah Figura.
This merge request was approved by Henri Verbeet.