Implements asin, acos, atan, and atan2.
Also includes some tests in a new test file.
One possible problem here is that I'm not sure how to test what Microsoft's atan and atan2 outputs are in boundary cases like atan2(1, 0). I've made the test suites adhere with the calculator program I've been using (Qalculate, which I assume is using libc's atan2).
Wine-Bug: https://bugs.winehq.org/show_bug.cgi?id=55154
-- v7: vkd3d-shader/hlsl: Implement atan and atan2. vkd3d-shader/hlsl: Implement acos and asin trig intrinsics.
From: Petrichor Park ppark@codeweavers.com
Also includes some tests in a new test file.
Wine-Bug: https://bugs.winehq.org/show_bug.cgi?id=55154 --- Makefile.am | 1 + libs/vkd3d-shader/hlsl.y | 59 ++++++++++++++++++++++++++ tests/hlsl/inverse-trig.shader_test | 64 +++++++++++++++++++++++++++++ 3 files changed, 124 insertions(+) create mode 100644 tests/hlsl/inverse-trig.shader_test
diff --git a/Makefile.am b/Makefile.am index bc648b631..f45d16196 100644 --- a/Makefile.am +++ b/Makefile.am @@ -117,6 +117,7 @@ vkd3d_shader_tests = \ tests/hlsl/initializer-struct.shader_test \ tests/hlsl/intrinsic-override.shader_test \ tests/hlsl/invalid.shader_test \ + tests/hlsl/inverse-trig.shader_test \ tests/hlsl/is-front-face.shader_test \ tests/hlsl/ldexp.shader_test \ tests/hlsl/length.shader_test \ diff --git a/libs/vkd3d-shader/hlsl.y b/libs/vkd3d-shader/hlsl.y index 7424e63a4..cce353946 100644 --- a/libs/vkd3d-shader/hlsl.y +++ b/libs/vkd3d-shader/hlsl.y @@ -2614,6 +2614,57 @@ static bool intrinsic_abs(struct hlsl_ctx *ctx, return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_ABS, params->args[0], loc); }
+static bool write_acos_or_asin(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc, bool asin_mode) +{ + struct hlsl_ir_function_decl *func; + struct hlsl_type *type; + char *body; + + static const char template[] = + "%s %s(%s x)\n" + "{\n" + " %s abs_arg = abs(x);\n" + " %s poly_approx = (((-0.018729\n" + " * abs_arg + 0.074261)\n" + " * abs_arg - 0.212114)\n" + " * abs_arg + 1.570729);\n" + " %s correction = sqrt(1.0 - abs_arg);\n" + " %s zero_flip = (x < 0.0) * (-2.0 * correction * poly_approx + 3.141593);\n" + " %s result = poly_approx * correction + zero_flip;\n" + " return %s;\n" + "}"; + static const char fn_name_acos[] = "acos"; + static const char fn_name_asin[] = "asin"; + static const char return_stmt_acos[] = "result"; + static const char return_stmt_asin[] = "-result + 1.570796"; + + const char *fn_name = asin_mode + ? fn_name_asin + : fn_name_acos; + + type = params->args[0]->data_type; + type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy); + + if (!(body = hlsl_sprintf_alloc(ctx, template, + type->name, fn_name, type->name, + type->name, type->name, type->name, type->name, type->name, + (asin_mode ? return_stmt_asin : return_stmt_acos)))) + return false; + func = hlsl_compile_internal_function(ctx, fn_name, body); + vkd3d_free(body); + if (!func) + return false; + + return add_user_call(ctx, func, params, loc); +} + +static bool intrinsic_acos(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + return write_acos_or_asin(ctx, params, loc, false); +} + static bool intrinsic_all(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -2691,6 +2742,12 @@ static bool intrinsic_any(struct hlsl_ctx *ctx, return false; }
+static bool intrinsic_asin(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + return write_acos_or_asin(ctx, params, loc, true); +} + /* Find the type corresponding to the given source type, with the same * dimensions but a different base type. */ static struct hlsl_type *convert_numeric_type(const struct hlsl_ctx *ctx, @@ -3918,9 +3975,11 @@ intrinsic_functions[] = /* Note: these entries should be kept in alphabetical order. */ {"D3DCOLORtoUBYTE4", 1, true, intrinsic_d3dcolor_to_ubyte4}, {"abs", 1, true, intrinsic_abs}, + {"acos", 1, true, intrinsic_acos}, {"all", 1, true, intrinsic_all}, {"any", 1, true, intrinsic_any}, {"asfloat", 1, true, intrinsic_asfloat}, + {"asin", 1, true, intrinsic_asin}, {"asuint", -1, true, intrinsic_asuint}, {"ceil", 1, true, intrinsic_ceil}, {"clamp", 3, true, intrinsic_clamp}, diff --git a/tests/hlsl/inverse-trig.shader_test b/tests/hlsl/inverse-trig.shader_test new file mode 100644 index 000000000..332db344f --- /dev/null +++ b/tests/hlsl/inverse-trig.shader_test @@ -0,0 +1,64 @@ +% Microsoft natively outputs values that are slightly mathematically wrong. +% VKD3D faithfully does the same. +[pixel shader] +uniform float4 a; + +float4 main() : sv_target +{ + return float4(acos(a.x), 0.0, 0.0, 0.0); +} + +[test] +uniform 0 float4 -1.0 0.0 0.0 0.0 +draw quad +probe all rgba (3.14159274, 0.0, 0.0, 0.0) 128 + +uniform 0 float4 -0.5 0.0 0.0 0.0 +draw quad +probe all rgba (2.094441441, 0.0, 0.0, 0.0) 128 + +uniform 0 float4 0.0 0.0 0.0 0.0 +draw quad +probe all rgba (1.57072878, 0.0, 0.0, 0.0) 128 + +uniform 0 float4 0.5 0.0 0.0 0.0 +draw quad +probe all rgba (1.04715133, 0.0, 0.0, 0.0) 128 + +uniform 0 float4 1.0 0.0 0.0 0.0 +draw quad +probe all rgba (0.0, 0.0, 0.0, 0.0) 128 + +[pixel shader] +uniform float4 a; + +float4 main() : sv_target +{ + return float4(asin(a.x), 0.0, 0.0, 0.0); +} + +[test] +uniform 0 float4 -1.0 0.0 0.0 0.0 +draw quad +probe all rgba (-1.57079637, 0.0, 0.0, 0.0) 128 + +uniform 0 float4 -0.5 0.0 0.0 0.0 +draw quad +probe all rgba (-0.523645043, 0.0, 0.0, 0.0) 128 + +% Because sqrt isn't identical across platforms, there is some inaccuracy +% here even with an identical algorithm, and because it's so near zero, +% each ulp is really small. So, in order to pass there needs to be this +% enormous margin. +uniform 0 float4 0.0 0.0 0.0 0.0 +draw quad +probe all rgba (0.0000675916672, 0.0, 0.0, 0.0) 131072 + +uniform 0 float4 0.5 0.0 0.0 0.0 +draw quad +probe all rgba (0.523645043, 0.0, 0.0, 0.0) 128 + +uniform 0 float4 1.0 0.0 0.0 0.0 +draw quad +probe all rgba (1.57079637, 0.0, 0.0, 0.0) 128 +
From: Petrichor Park ppark@codeweavers.com
--- libs/vkd3d-shader/hlsl.y | 90 +++++++++++++++++++++++ tests/hlsl/inverse-trig.shader_test | 106 ++++++++++++++++++++++++++++ 2 files changed, 196 insertions(+)
diff --git a/libs/vkd3d-shader/hlsl.y b/libs/vkd3d-shader/hlsl.y index cce353946..957576d77 100644 --- a/libs/vkd3d-shader/hlsl.y +++ b/libs/vkd3d-shader/hlsl.y @@ -2748,6 +2748,94 @@ static bool intrinsic_asin(struct hlsl_ctx *ctx, return write_acos_or_asin(ctx, params, loc, true); }
+static bool write_atan_or_atan2(struct hlsl_ctx *ctx, + const struct parse_initializer *params, + const struct vkd3d_shader_location *loc, bool atan2_mode) +{ + struct hlsl_ir_function_decl *func; + struct hlsl_type *type; + char *header; + char *body; + char *whole_fn; + + static const char* atan2_name = "atan2"; + static const char* atan_name = "atan"; + + static const char* atan2_header_template = + "%s atan2(%s y, %s x)\n" + "{\n" + " %s in_y = y;\n" + " %s in_x = x;\n"; + static const char* atan_header_template = + "%s atan(%s y)\n" + "{\n" + " %s in_y = y;\n" + " %s in_x = 1.0;\n"; + + static const char body_template[] = + " %s recip = 1.0 / max(abs(in_y), abs(in_x));\n" + " %s input = recip * min(abs(in_y), abs(in_x));\n" + " %s x2 = input * input;\n" + " %s poly_approx = ((((0.020835\n" + " * x2 - 0.085133)\n" + " * x2 + 0.180141)\n" + " * x2 - 0.330299)\n" + " * x2 + 0.999866)\n" + " * input;\n" + " %s flipped = poly_approx * -2.0 + 1.570796;\n" + " poly_approx += abs(in_x) < abs(in_y) ? flipped : 0.0;\n" + " poly_approx += in_x < 0.0 ? -3.1415927 : 0.0;\n" + " return (min(in_x, in_y) < 0.0 && max(in_x, in_y) >= 0.0)\n" + " ? -poly_approx\n" + " : poly_approx;\n" + "}"; + + if (!(type = elementwise_intrinsic_get_common_type(ctx, params, loc))) + return false; + type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy); + + header = atan2_mode + ? hlsl_sprintf_alloc(ctx, atan2_header_template, + type->name, type->name, type->name, + type->name, type->name) + : hlsl_sprintf_alloc(ctx, atan_header_template, + type->name, type->name, + type->name, type->name); + if (!header) + return false; + + if (!(body = hlsl_sprintf_alloc(ctx, body_template, + type->name, type->name, type->name, type->name, type->name))) + return false; + + if (!(whole_fn = hlsl_sprintf_alloc(ctx, "%s%s", + header, body))) + return false; + vkd3d_free(header); + vkd3d_free(body); + + func = hlsl_compile_internal_function(ctx, atan2_mode ? atan2_name : atan_name, whole_fn); + vkd3d_free(whole_fn); + if (!func) + return false; + + return add_user_call(ctx, func, params, loc); +} + +static bool intrinsic_atan(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + return write_atan_or_atan2(ctx, params, loc, false); +} + + +static bool intrinsic_atan2(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + return write_atan_or_atan2(ctx, params, loc, true); +} + + /* Find the type corresponding to the given source type, with the same * dimensions but a different base type. */ static struct hlsl_type *convert_numeric_type(const struct hlsl_ctx *ctx, @@ -3981,6 +4069,8 @@ intrinsic_functions[] = {"asfloat", 1, true, intrinsic_asfloat}, {"asin", 1, true, intrinsic_asin}, {"asuint", -1, true, intrinsic_asuint}, + {"atan", 1, true, intrinsic_atan}, + {"atan2", 2, true, intrinsic_atan2}, {"ceil", 1, true, intrinsic_ceil}, {"clamp", 3, true, intrinsic_clamp}, {"clip", 1, true, intrinsic_clip}, diff --git a/tests/hlsl/inverse-trig.shader_test b/tests/hlsl/inverse-trig.shader_test index 332db344f..914e10d23 100644 --- a/tests/hlsl/inverse-trig.shader_test +++ b/tests/hlsl/inverse-trig.shader_test @@ -62,3 +62,109 @@ uniform 0 float4 1.0 0.0 0.0 0.0 draw quad probe all rgba (1.57079637, 0.0, 0.0, 0.0) 128
+ +[pixel shader] +uniform float4 a; + +float4 main() : sv_target +{ + return float4(atan(a.x), 0.0, 0.0, 0.0); +} + +[test] +uniform 0 float4 -1.0 0.0 0.0 0.0 +draw quad +probe all rgba (-0.785409629, 0.0, 0.0, 0.0) 512 + +uniform 0 float4 -0.5 0.0 0.0 0.0 +draw quad +probe all rgba (-0.4636476, 0.0, 0.0, 0.0) 256 + +uniform 0 float4 0.0 0.0 0.0 0.0 +draw quad +probe all rgba (0.0, 0.0, 0.0, 0.0) 256 + +uniform 0 float4 0.5 0.0 0.0 0.0 +draw quad +probe all rgba (0.4636476, 0.0, 0.0, 0.0) 256 + +uniform 0 float4 1.0 0.0 0.0 0.0 +draw quad +probe all rgba (0.785409629, 0.0, 0.0, 0.0) 512 + +[pixel shader] +uniform float4 a; + +float4 main() : sv_target +{ + // Because the argument order is (y,x) the numbers are + // passed in "backwards" here, so they're the right way in the + // test cases. + return float4(atan2(a.x, a.y), 0.0, 0.0, 0.0); +} + +[test] +% Non-degenerate cases +uniform 0 float4 1.0 1.0 0.0 0.0 +draw quad +probe all rgba (0.785385, 0.0, 0.0, 0.0) 512 + +uniform 0 float4 5.0 -5.0 0.0 0.0 +draw quad +probe all rgba (2.356194, 0.0, 0.0, 0.0) 256 + +uniform 0 float4 -3.0 -3.0 0.0 0.0 +draw quad +probe all rgba (-2.356194, 0.0, 0.0, 0.0) 256 + +uniform 0 float4 1.0 0.0 0.0 0.0 +draw quad +probe all rgba (1.570796, 0.0, 0.0, 0.0) 256 + +uniform 0 float4 -1.0 0.0 0.0 0.0 +draw quad +probe all rgba (-1.570796, 0.0, 0.0, 0.0) 256 + +uniform 0 float4 0.0 1.0 0.0 0.0 +draw quad +probe all rgba (0.0, 0.0, 0.0, 0.0) 256 + +uniform 0 float4 0.0 -1.0 0.0 0.0 +draw quad +probe all rgba (3.1415927, 0.0, 0.0, 0.0) 256 + +% Degenerate cases +uniform 0 float4 0.00001 0.00002 0.0 0.0 +draw quad +probe all rgba (0.463647, 0.0, 0.0, 0.0) 256 + +uniform 0 float4 0.00001 -0.00002 0.0 0.0 +draw quad +probe all rgba (2.677945, 0.0, 0.0, 0.0) 256 + +uniform 0 float4 -0.00001 100000.0 0.0 0.0 +draw quad +probe all rgba (-0.000000000099986595, 0.0, 0.0, 0.0) 1024 + +uniform 0 float4 10000000.0 0.00000001 0.0 0.0 +draw quad +probe all rgba (1.570796, 0.0, 0.0, 0.0) 256 + +% Negative zero behavior should be to treat it the +% same as normal zero. +uniform 0 float4 1000000000.0 0.0 0.0 0.0 +draw quad +probe all rgba (1.570796, 0.0, 0.0, 0.0) 256 + +uniform 0 float4 1000000000.0 -0.0 0.0 0.0 +draw quad +probe all rgba (1.570796, 0.0, 0.0, 0.0) 256 + +uniform 0 float4 0.0 -1.0 0.0 0.0 +draw quad +probe all rgba (3.1415927, 0.0, 0.0, 0.0) 256 + +uniform 0 float4 -0.0 -1.0 0.0 0.0 +draw quad +probe all rgba (3.1415927, 0.0, 0.0, 0.0) 256 +
Giovanni Mascellani (@giomasce) commented about libs/vkd3d-shader/hlsl.y:
const struct parse_initializer *params,
const struct vkd3d_shader_location *loc, bool atan2_mode)
+{
- struct hlsl_ir_function_decl *func;
- struct hlsl_type *type;
- char *header;
- char *body;
- char *whole_fn;
- static const char* atan2_name = "atan2";
- static const char* atan_name = "atan";
- static const char* atan2_header_template =
"%s atan2(%s y, %s x)\n"
"{\n"
" %s in_y = y;\n"
That's fine for me, but notice that if you want to save some typing `%s` and `type->name` you can move all the declarations at the beginning: ```hlsl %s in_y, in_x, recip, input, x2, poly_approx, flipped; ```
Giovanni Mascellani (@giomasce) commented about libs/vkd3d-shader/hlsl.y:
? hlsl_sprintf_alloc(ctx, atan2_header_template,
type->name, type->name, type->name,
type->name, type->name)
: hlsl_sprintf_alloc(ctx, atan_header_template,
type->name, type->name,
type->name, type->name);
- if (!header)
return false;
- if (!(body = hlsl_sprintf_alloc(ctx, body_template,
type->name, type->name, type->name, type->name, type->name)))
return false;
- if (!(whole_fn = hlsl_sprintf_alloc(ctx, "%s%s",
header, body)))
return false;
This is not wrong either, but we have a better tool for that, which is `vkd3d_string_buffer_printf()` (and companions).
Also, notice that the `if` continuation line should be indented 8 spaces, not 4.
Giovanni Mascellani (@giomasce) commented about libs/vkd3d-shader/hlsl.y:
" return (min(in_x, in_y) < 0.0 && max(in_x, in_y) >= 0.0)\n"
" ? -poly_approx\n"
" : poly_approx;\n"
"}";
- if (!(type = elementwise_intrinsic_get_common_type(ctx, params, loc)))
return false;
- type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy);
- header = atan2_mode
? hlsl_sprintf_alloc(ctx, atan2_header_template,
type->name, type->name, type->name,
type->name, type->name)
: hlsl_sprintf_alloc(ctx, atan_header_template,
type->name, type->name,
type->name, type->name);
I don't know if we have specific conventions for this construct, but in general continuation lines are expected to be indented 8 spaces. Though I would say that here it even makes sense to use a regular `if` construct, and my suggestion above might make everything shorter anyway.
The latest revision feels pretty good, I have a few more comments, but mostly nitpicks (and then Henri's suggestion: without trying it seems that you just need to stick a `todo(sm>=6)` in front of each `draw quad` line).
Sorry, I pushed this just as a rebase onto master, there's no changes. But thanks for the feedback!