[PATCH v7 1/2] vkd3d-shader/hlsl: Implement acos and asin trig intrinsics.

15 Jan 2024

From: Petrichor Park ppark@codeweavers.com
Also includes some tests in a new test file.
Wine-Bug: https://bugs.winehq.org/show_bug.cgi?id=55154
---
 Makefile.am                         |  1 +
 libs/vkd3d-shader/hlsl.y            | 59 ++++++++++++++++++++++++++
 tests/hlsl/inverse-trig.shader_test | 64 +++++++++++++++++++++++++++++
 3 files changed, 124 insertions(+)
 create mode 100644 tests/hlsl/inverse-trig.shader_test

diff --git a/Makefile.am b/Makefile.am
index bc648b631..f45d16196 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -117,6 +117,7 @@ vkd3d_shader_tests = \
    tests/hlsl/initializer-struct.shader_test \
    tests/hlsl/intrinsic-override.shader_test \
    tests/hlsl/invalid.shader_test \
+	tests/hlsl/inverse-trig.shader_test \
    tests/hlsl/is-front-face.shader_test \
    tests/hlsl/ldexp.shader_test \
    tests/hlsl/length.shader_test \
diff --git a/libs/vkd3d-shader/hlsl.y b/libs/vkd3d-shader/hlsl.y
index 7424e63a4..cce353946 100644
--- a/libs/vkd3d-shader/hlsl.y
+++ b/libs/vkd3d-shader/hlsl.y
@@ -2614,6 +2614,57 @@ static bool intrinsic_abs(struct hlsl_ctx *ctx,
     return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_ABS, params->args[0], loc);
 }
+static bool write_acos_or_asin(struct hlsl_ctx *ctx,
+        const struct parse_initializer *params, const struct vkd3d_shader_location *loc, bool asin_mode)
+{
+    struct hlsl_ir_function_decl *func;
+    struct hlsl_type *type;
+    char *body;
+
+    static const char template[] =
+            "%s %s(%s x)\n"
+            "{\n"
+            "    %s abs_arg = abs(x);\n"
+            "    %s poly_approx = (((-0.018729\n"
+            "        * abs_arg + 0.074261)\n"
+            "        * abs_arg - 0.212114)\n"
+            "        * abs_arg + 1.570729);\n"
+            "    %s correction = sqrt(1.0 - abs_arg);\n"
+            "    %s zero_flip = (x < 0.0) * (-2.0 * correction * poly_approx + 3.141593);\n"
+            "    %s result = poly_approx * correction + zero_flip;\n"
+            "    return %s;\n"
+            "}";
+    static const char fn_name_acos[] = "acos";
+    static const char fn_name_asin[] = "asin";
+    static const char return_stmt_acos[] = "result";
+    static const char return_stmt_asin[] = "-result + 1.570796";
+
+    const char *fn_name = asin_mode
+        ? fn_name_asin
+        : fn_name_acos;
+
+    type = params->args[0]->data_type;
+    type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy);
+
+    if (!(body = hlsl_sprintf_alloc(ctx, template,
+            type->name, fn_name, type->name,
+            type->name, type->name, type->name, type->name, type->name,
+            (asin_mode ? return_stmt_asin : return_stmt_acos))))
+        return false;
+    func = hlsl_compile_internal_function(ctx, fn_name, body);
+    vkd3d_free(body);
+    if (!func)
+        return false;
+
+    return add_user_call(ctx, func, params, loc);
+}
+
+static bool intrinsic_acos(struct hlsl_ctx *ctx,
+        const struct parse_initializer *params, const struct vkd3d_shader_location *loc)
+{
+    return write_acos_or_asin(ctx, params, loc, false);
+}
+
 static bool intrinsic_all(struct hlsl_ctx *ctx,
         const struct parse_initializer *params, const struct vkd3d_shader_location *loc)
 {
@@ -2691,6 +2742,12 @@ static bool intrinsic_any(struct hlsl_ctx *ctx,
     return false;
 }
+static bool intrinsic_asin(struct hlsl_ctx *ctx,
+        const struct parse_initializer *params, const struct vkd3d_shader_location *loc)
+{
+    return write_acos_or_asin(ctx, params, loc, true);
+}
+
 /* Find the type corresponding to the given source type, with the same
  * dimensions but a different base type. */
 static struct hlsl_type *convert_numeric_type(const struct hlsl_ctx *ctx,
@@ -3918,9 +3975,11 @@ intrinsic_functions[] =
     /* Note: these entries should be kept in alphabetical order. */
     {"D3DCOLORtoUBYTE4",                    1, true,  intrinsic_d3dcolor_to_ubyte4},
     {"abs",                                 1, true,  intrinsic_abs},
+    {"acos",                                1, true,  intrinsic_acos},
     {"all",                                 1, true,  intrinsic_all},
     {"any",                                 1, true,  intrinsic_any},
     {"asfloat",                             1, true,  intrinsic_asfloat},
+    {"asin",                                1, true,  intrinsic_asin},
     {"asuint",                             -1, true,  intrinsic_asuint},
     {"ceil",                                1, true,  intrinsic_ceil},
     {"clamp",                               3, true,  intrinsic_clamp},
diff --git a/tests/hlsl/inverse-trig.shader_test b/tests/hlsl/inverse-trig.shader_test
new file mode 100644
index 000000000..332db344f
--- /dev/null
+++ b/tests/hlsl/inverse-trig.shader_test
@@ -0,0 +1,64 @@
+% Microsoft natively outputs values that are slightly mathematically wrong.
+% VKD3D faithfully does the same.
+[pixel shader]
+uniform float4 a;
+
+float4 main() : sv_target
+{
+    return float4(acos(a.x), 0.0, 0.0, 0.0);
+}
+
+[test]
+uniform 0 float4 -1.0 0.0 0.0 0.0
+draw quad
+probe all rgba (3.14159274, 0.0, 0.0, 0.0) 128
+
+uniform 0 float4 -0.5 0.0 0.0 0.0
+draw quad
+probe all rgba (2.094441441, 0.0, 0.0, 0.0) 128
+
+uniform 0 float4 0.0 0.0 0.0 0.0
+draw quad
+probe all rgba (1.57072878, 0.0, 0.0, 0.0) 128
+
+uniform 0 float4 0.5 0.0 0.0 0.0
+draw quad
+probe all rgba (1.04715133, 0.0, 0.0, 0.0) 128
+
+uniform 0 float4 1.0 0.0 0.0 0.0
+draw quad
+probe all rgba (0.0, 0.0, 0.0, 0.0) 128
+
+[pixel shader]
+uniform float4 a;
+
+float4 main() : sv_target
+{
+    return float4(asin(a.x), 0.0, 0.0, 0.0);
+}
+
+[test]
+uniform 0 float4 -1.0 0.0 0.0 0.0
+draw quad
+probe all rgba (-1.57079637, 0.0, 0.0, 0.0) 128
+
+uniform 0 float4 -0.5 0.0 0.0 0.0
+draw quad
+probe all rgba (-0.523645043, 0.0, 0.0, 0.0) 128
+
+% Because sqrt isn't identical across platforms, there is some inaccuracy
+% here even with an identical algorithm, and because it's so near zero,
+% each ulp is really small. So, in order to pass there needs to be this
+% enormous margin.
+uniform 0 float4 0.0 0.0 0.0 0.0
+draw quad
+probe all rgba (0.0000675916672, 0.0, 0.0, 0.0) 131072
+
+uniform 0 float4 0.5 0.0 0.0 0.0
+draw quad
+probe all rgba (0.523645043, 0.0, 0.0, 0.0) 128
+
+uniform 0 float4 1.0 0.0 0.0 0.0
+draw quad
+probe all rgba (1.57079637, 0.0, 0.0, 0.0) 128
+
-- 
GitLab


https://gitlab.winehq.org/wine/vkd3d/-/merge_requests/364

    

2025

2024

2023

2022

[PATCH v7 1/2] vkd3d-shader/hlsl: Implement acos and asin trig intrinsics.