Implements asin, acos, atan, and atan2.
Also includes some tests in a new test file.
One possible problem here is that I'm not sure how to test what Microsoft's atan and atan2 outputs are in boundary cases like atan2(1, 0). I've made the test suites adhere with the calculator program I've been using (Qalculate, which I assume is using libc's atan2).
Wine-Bug: https://bugs.winehq.org/show_bug.cgi?id=55154
-- v11: vkd3d-shader/hlsl: Implement atan and atan2. vkd3d-shader/hlsl: Implement acos and asin trig intrinsics.
From: Petrichor Park ppark@codeweavers.com
Tests have already been implemented in 92044d5e; this commit loosens some of the todos (because now they're implemented!).
Wine-Bug: https://bugs.winehq.org/show_bug.cgi?id=55154 --- libs/vkd3d-shader/hlsl.y | 59 +++++++++++++++++++++++++++++ tests/hlsl/inverse-trig.shader_test | 24 ++++++------ 2 files changed, 71 insertions(+), 12 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl.y b/libs/vkd3d-shader/hlsl.y index 5f6334a4d..3605f1d90 100644 --- a/libs/vkd3d-shader/hlsl.y +++ b/libs/vkd3d-shader/hlsl.y @@ -2660,6 +2660,57 @@ static bool intrinsic_abs(struct hlsl_ctx *ctx, return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_ABS, params->args[0], loc); }
+static bool write_acos_or_asin(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc, bool asin_mode) +{ + struct hlsl_ir_function_decl *func; + struct hlsl_type *type; + char *body; + + static const char template[] = + "%s %s(%s x)\n" + "{\n" + " %s abs_arg = abs(x);\n" + " %s poly_approx = (((-0.018729\n" + " * abs_arg + 0.074261)\n" + " * abs_arg - 0.212114)\n" + " * abs_arg + 1.570729);\n" + " %s correction = sqrt(1.0 - abs_arg);\n" + " %s zero_flip = (x < 0.0) * (-2.0 * correction * poly_approx + 3.141593);\n" + " %s result = poly_approx * correction + zero_flip;\n" + " return %s;\n" + "}"; + static const char fn_name_acos[] = "acos"; + static const char fn_name_asin[] = "asin"; + static const char return_stmt_acos[] = "result"; + static const char return_stmt_asin[] = "-result + 1.570796"; + + const char *fn_name = asin_mode + ? fn_name_asin + : fn_name_acos; + + type = params->args[0]->data_type; + type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy); + + if (!(body = hlsl_sprintf_alloc(ctx, template, + type->name, fn_name, type->name, + type->name, type->name, type->name, type->name, type->name, + (asin_mode ? return_stmt_asin : return_stmt_acos)))) + return false; + func = hlsl_compile_internal_function(ctx, fn_name, body); + vkd3d_free(body); + if (!func) + return false; + + return add_user_call(ctx, func, params, loc); +} + +static bool intrinsic_acos(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + return write_acos_or_asin(ctx, params, loc, false); +} + static bool intrinsic_all(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -2737,6 +2788,12 @@ static bool intrinsic_any(struct hlsl_ctx *ctx, return false; }
+static bool intrinsic_asin(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + return write_acos_or_asin(ctx, params, loc, true); +} + /* Find the type corresponding to the given source type, with the same * dimensions but a different base type. */ static struct hlsl_type *convert_numeric_type(const struct hlsl_ctx *ctx, @@ -3964,9 +4021,11 @@ intrinsic_functions[] = /* Note: these entries should be kept in alphabetical order. */ {"D3DCOLORtoUBYTE4", 1, true, intrinsic_d3dcolor_to_ubyte4}, {"abs", 1, true, intrinsic_abs}, + {"acos", 1, true, intrinsic_acos}, {"all", 1, true, intrinsic_all}, {"any", 1, true, intrinsic_any}, {"asfloat", 1, true, intrinsic_asfloat}, + {"asin", 1, true, intrinsic_asin}, {"asuint", -1, true, intrinsic_asuint}, {"ceil", 1, true, intrinsic_ceil}, {"clamp", 3, true, intrinsic_clamp}, diff --git a/tests/hlsl/inverse-trig.shader_test b/tests/hlsl/inverse-trig.shader_test index 0c1fdc01f..01344204b 100644 --- a/tests/hlsl/inverse-trig.shader_test +++ b/tests/hlsl/inverse-trig.shader_test @@ -3,7 +3,7 @@ % implementations. DXIL defines intrinsics for inverse trig, to be implemented % by the backend.
-[pixel shader todo] +[pixel shader todo(sm<4)] uniform float4 a;
float4 main() : sv_target @@ -13,26 +13,26 @@ float4 main() : sv_target
[test] uniform 0 float4 -1.0 0.0 0.0 0.0 -todo(sm<6) draw quad +todo(sm<4) draw quad probe all rgba (3.14159274, 0.0, 0.0, 0.0) 128
uniform 0 float4 -0.5 0.0 0.0 0.0 -todo(sm<6) draw quad +todo(sm<4) draw quad probe all rgba (2.094441441, 0.0, 0.0, 0.0) 256
uniform 0 float4 0.0 0.0 0.0 0.0 -todo(sm<6) draw quad +todo(sm<4) draw quad probe all rgba (1.57072878, 0.0, 0.0, 0.0) 1024
uniform 0 float4 0.5 0.0 0.0 0.0 -todo(sm<6) draw quad +todo(sm<4) draw quad probe all rgba (1.04715133, 0.0, 0.0, 0.0) 512
uniform 0 float4 1.0 0.0 0.0 0.0 -todo(sm<6) draw quad +todo(sm<4) draw quad probe all rgba (0.0, 0.0, 0.0, 0.0) 128
-[pixel shader todo] +[pixel shader todo(sm<4)] uniform float4 a;
float4 main() : sv_target @@ -44,7 +44,7 @@ float4 main() : sv_target
[test] uniform 0 float4 -1.0 0.0 0.0 0.0 -todo(sm<6) draw quad +todo(sm<4) draw quad probe all rgba (-31416.0, 0.0, 0.0, 0.0)
[require] @@ -52,15 +52,15 @@ shader model < 6.0
[test] uniform 0 float4 -0.5 0.0 0.0 0.0 -todo draw quad +todo(sm<4) draw quad probe all rgba (-10473.0, 0.0, 0.0, 0.0)
uniform 0 float4 0.0 0.0 0.0 0.0 -todo draw quad +todo(sm<4) draw quad probe all rgba (1.0, 0.0, 0.0, 0.0)
uniform 0 float4 0.5 0.0 0.0 0.0 -todo draw quad +todo(sm<4) draw quad probe all rgba (10473.0, 0.0, 0.0, 0.0)
[require] @@ -88,7 +88,7 @@ probe all rgba (10472.0, 0.0, 0.0, 0.0) 4096
[test] uniform 0 float4 1.0 0.0 0.0 0.0 -todo(sm<6) draw quad +todo(sm<4) draw quad probe all rgba (31416.0, 0.0, 0.0, 0.0)
From: Petrichor Park ppark@codeweavers.com
Also loosens some more todos on the tests. --- libs/vkd3d-shader/hlsl.y | 91 +++++++++++++++++++++++++++++ tests/hlsl/inverse-trig.shader_test | 44 +++++++------- 2 files changed, 113 insertions(+), 22 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl.y b/libs/vkd3d-shader/hlsl.y index 3605f1d90..7e478408d 100644 --- a/libs/vkd3d-shader/hlsl.y +++ b/libs/vkd3d-shader/hlsl.y @@ -2794,6 +2794,95 @@ static bool intrinsic_asin(struct hlsl_ctx *ctx, return write_acos_or_asin(ctx, params, loc, true); }
+static bool write_atan_or_atan2(struct hlsl_ctx *ctx, + const struct parse_initializer *params, + const struct vkd3d_shader_location *loc, bool atan2_mode) +{ + struct hlsl_ir_function_decl *func; + struct hlsl_type *type; + struct vkd3d_string_buffer *buf; + int ok; + + static const char* atan2_name = "atan2"; + static const char* atan_name = "atan"; + + static const char* atan2_header_template = + "%s atan2(%s y, %s x)\n" + "{\n" + " %s in_y, in_x;\n" + " in_y = y;\n" + " in_x = x;\n"; + static const char* atan_header_template = + "%s atan(%s y)\n" + "{\n" + " %s in_y, in_x;\n" + " in_y = y;\n" + " in_x = 1.0;\n"; + + static const char body_template[] = + " %s recip, input, x2, poly_approx, flipped;" + " recip = 1.0 / max(abs(in_y), abs(in_x));\n" + " input = recip * min(abs(in_y), abs(in_x));\n" + " x2 = input * input;\n" + " poly_approx = ((((0.020835\n" + " * x2 - 0.085133)\n" + " * x2 + 0.180141)\n" + " * x2 - 0.330299)\n" + " * x2 + 0.999866)\n" + " * input;\n" + " flipped = poly_approx * -2.0 + 1.570796;\n" + " poly_approx += abs(in_x) < abs(in_y) ? flipped : 0.0;\n" + " poly_approx += in_x < 0.0 ? -3.1415927 : 0.0;\n" + " return (min(in_x, in_y) < 0.0 && max(in_x, in_y) >= 0.0)\n" + " ? -poly_approx\n" + " : poly_approx;\n" + "}"; + + if (!(type = elementwise_intrinsic_get_common_type(ctx, params, loc))) + return false; + type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy); + + if (!(buf = hlsl_get_string_buffer(ctx))) { + return false; + } + + if (atan2_mode) { + ok = vkd3d_string_buffer_printf(buf, atan2_header_template, + type->name, type->name, type->name, type->name); + } else { + ok = vkd3d_string_buffer_printf(buf, atan_header_template, + type->name, type->name, type->name); + } + if (ok < 0) + return false; + + ok = vkd3d_string_buffer_printf(buf, body_template, type->name); + if (ok < 0) + return false; + + func = hlsl_compile_internal_function(ctx, + atan2_mode ? atan2_name : atan_name, buf->buffer); + vkd3d_string_buffer_cleanup(buf); + if (!func) + return false; + + return add_user_call(ctx, func, params, loc); +} + +static bool intrinsic_atan(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + return write_atan_or_atan2(ctx, params, loc, false); +} + + +static bool intrinsic_atan2(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + return write_atan_or_atan2(ctx, params, loc, true); +} + + /* Find the type corresponding to the given source type, with the same * dimensions but a different base type. */ static struct hlsl_type *convert_numeric_type(const struct hlsl_ctx *ctx, @@ -4027,6 +4116,8 @@ intrinsic_functions[] = {"asfloat", 1, true, intrinsic_asfloat}, {"asin", 1, true, intrinsic_asin}, {"asuint", -1, true, intrinsic_asuint}, + {"atan", 1, true, intrinsic_atan}, + {"atan2", 2, true, intrinsic_atan2}, {"ceil", 1, true, intrinsic_ceil}, {"clamp", 3, true, intrinsic_clamp}, {"clip", 1, true, intrinsic_clip}, diff --git a/tests/hlsl/inverse-trig.shader_test b/tests/hlsl/inverse-trig.shader_test index 01344204b..e428d3090 100644 --- a/tests/hlsl/inverse-trig.shader_test +++ b/tests/hlsl/inverse-trig.shader_test @@ -92,7 +92,7 @@ todo(sm<4) draw quad probe all rgba (31416.0, 0.0, 0.0, 0.0)
-[pixel shader todo] +[pixel shader todo(sm<4)] uniform float4 a;
float4 main() : sv_target @@ -102,26 +102,26 @@ float4 main() : sv_target
[test] uniform 0 float4 -1.0 0.0 0.0 0.0 -todo(sm<6) draw quad +todo(sm<4) draw quad probe all rgba (-0.785409629, 0.0, 0.0, 0.0) 512
uniform 0 float4 -0.5 0.0 0.0 0.0 -todo(sm<6) draw quad +todo(sm<4) draw quad probe all rgba (-0.4636476, 0.0, 0.0, 0.0) 256
uniform 0 float4 0.0 0.0 0.0 0.0 -todo(sm<6) draw quad +todo(sm<4) draw quad probe all rgba (0.0, 0.0, 0.0, 0.0) 256
uniform 0 float4 0.5 0.0 0.0 0.0 -todo(sm<6) draw quad +todo(sm<4) draw quad probe all rgba (0.4636476, 0.0, 0.0, 0.0) 256
uniform 0 float4 1.0 0.0 0.0 0.0 -todo(sm<6) draw quad +todo(sm<4) draw quad probe all rgba (0.785409629, 0.0, 0.0, 0.0) 512
-[pixel shader todo] +[pixel shader todo(sm<4)] uniform float4 a;
float4 main() : sv_target @@ -133,64 +133,64 @@ float4 main() : sv_target [test] % Non-degenerate cases uniform 0 float4 1.0 1.0 0.0 0.0 -todo(sm<6) draw quad +todo(sm<4) draw quad probe all rgba (0.785385, 0.0, 0.0, 0.0) 512
uniform 0 float4 5.0 -5.0 0.0 0.0 -todo(sm<6) draw quad +todo(sm<4) draw quad probe all rgba (2.356194, 0.0, 0.0, 0.0) 256
uniform 0 float4 -3.0 -3.0 0.0 0.0 -todo(sm<6) draw quad +todo(sm<4) draw quad probe all rgba (-2.356194, 0.0, 0.0, 0.0) 256
uniform 0 float4 1.0 0.0 0.0 0.0 -todo(sm<6) draw quad +todo(sm<4) draw quad probe all rgba (1.570796, 0.0, 0.0, 0.0) 256
uniform 0 float4 -1.0 0.0 0.0 0.0 -todo(sm<6) draw quad +todo(sm<4) draw quad probe all rgba (-1.570796, 0.0, 0.0, 0.0) 256
uniform 0 float4 0.0 1.0 0.0 0.0 -todo(sm<6) draw quad +todo(sm<4) draw quad probe all rgba (0.0, 0.0, 0.0, 0.0) 256
uniform 0 float4 0.0 -1.0 0.0 0.0 -todo(sm<6) draw quad +todo(sm<4) draw quad probe all rgba (3.1415927, 0.0, 0.0, 0.0) 256
% Degenerate cases uniform 0 float4 0.00001 0.00002 0.0 0.0 -todo(sm<6) draw quad +todo(sm<4) draw quad probe all rgba (0.463647, 0.0, 0.0, 0.0) 256
uniform 0 float4 0.00001 -0.00002 0.0 0.0 -todo(sm<6) draw quad +todo(sm<4) draw quad probe all rgba (2.677945, 0.0, 0.0, 0.0) 256
uniform 0 float4 -0.00001 100000.0 0.0 0.0 -todo(sm<6) draw quad +todo(sm<4) draw quad probe all rgba (-0.000000000099986595, 0.0, 0.0, 0.0) 2048
uniform 0 float4 10000000.0 0.00000001 0.0 0.0 -todo(sm<6) draw quad +todo(sm<4) draw quad probe all rgba (1.570796, 0.0, 0.0, 0.0) 256
% Negative zero behavior should be to treat it the % same as normal zero. uniform 0 float4 1000000000.0 0.0 0.0 0.0 -todo(sm<6) draw quad +todo(sm<4) draw quad probe all rgba (1.570796, 0.0, 0.0, 0.0) 256
uniform 0 float4 1000000000.0 -0.0 0.0 0.0 -todo(sm<6) draw quad +todo(sm<4) draw quad probe all rgba (1.570796, 0.0, 0.0, 0.0) 256
uniform 0 float4 0.0 -1.0 0.0 0.0 -todo(sm<6) draw quad +todo(sm<4) draw quad probe all rgba (3.1415927, 0.0, 0.0, 0.0) 256
uniform 0 float4 -0.0 -1.0 0.0 0.0 -todo(sm<6) draw quad +todo(sm<4) draw quad probe all rgba (3.1415927, 0.0, 0.0, 0.0) 256