I wanted to use ._xy element access to make it shorter, but that attempt was crushed by the reality - such indexing apparently compiles but does not produce correct element access loads. I'm going to update once this is fixed.
-- v5: vkd3d-shader/hlsl: Add determinant() function.
From: Nikolay Sivov nsivov@codeweavers.com
Signed-off-by: Nikolay Sivov nsivov@codeweavers.com --- Makefile.am | 1 + libs/vkd3d-shader/hlsl.y | 62 +++++++++++++++ tests/hlsl/determinant.shader_test | 116 +++++++++++++++++++++++++++++ 3 files changed, 179 insertions(+) create mode 100644 tests/hlsl/determinant.shader_test
diff --git a/Makefile.am b/Makefile.am index b36358b2..eebc359e 100644 --- a/Makefile.am +++ b/Makefile.am @@ -78,6 +78,7 @@ vkd3d_shader_tests = \ tests/hlsl/cross.shader_test \ tests/hlsl/d3dcolor-to-ubyte4.shader_test \ tests/hlsl/ddxddy.shader_test \ + tests/hlsl/determinant.shader_test \ tests/hlsl/discard.shader_test \ tests/hlsl/distance.shader_test \ tests/hlsl/dot.shader_test \ diff --git a/libs/vkd3d-shader/hlsl.y b/libs/vkd3d-shader/hlsl.y index 67fd9f6f..fd1d4630 100644 --- a/libs/vkd3d-shader/hlsl.y +++ b/libs/vkd3d-shader/hlsl.y @@ -2839,6 +2839,67 @@ static bool intrinsic_ddy_fine(struct hlsl_ctx *ctx, return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSY_FINE, arg, loc); }
+static bool intrinsic_determinant(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + static const char determinant2x2[] = + "float determinant(float2x2 m)\n" + "{\n" + " return m[0][0] * m[1][1] - m[0][1] * m[1][0];\n" + "}"; + static const char determinant3x3[] = + "float determinant(float3x3 m)\n" + "{\n" + " float2x2 m1 = { m[1][1], m[1][2], m[2][1], m[2][2] };\n" + " float2x2 m2 = { m[1][0], m[1][2], m[2][0], m[2][2] };\n" + " float2x2 m3 = { m[1][0], m[1][1], m[2][0], m[2][1] };\n" + " float3 v1 = { m[0][0], -m[0][1], m[0][2] };\n" + " float3 v2 = { determinant(m1), determinant(m2), determinant(m3) };\n" + " return dot(v1, v2);\n" + "}"; + static const char determinant4x4[] = + "float determinant(float4x4 m)\n" + "{\n" + " float3x3 m1 = { m[1][1], m[1][2], m[1][3], m[2][1], m[2][2], m[2][3], m[3][1], m[3][2], m[3][3] };\n" + " float3x3 m2 = { m[1][0], m[1][2], m[1][3], m[2][0], m[2][2], m[2][3], m[3][0], m[3][2], m[3][3] };\n" + " float3x3 m3 = { m[1][0], m[1][1], m[1][3], m[2][0], m[2][1], m[2][3], m[3][0], m[3][1], m[3][3] };\n" + " float3x3 m4 = { m[1][0], m[1][1], m[1][2], m[2][0], m[2][1], m[2][2], m[3][0], m[3][1], m[3][2] };\n" + " float4 v1 = { m[0][0], -m[0][1], m[0][2], -m[0][3] };\n" + " float4 v2 = { determinant(m1), determinant(m2), determinant(m3), determinant(m4) };\n" + " return dot(v1, v2);\n" + "}"; + static const char *bodies[] = + { + [2] = determinant2x2, + [3] = determinant3x3, + [4] = determinant4x4, + }; + + struct hlsl_ir_node *arg = params->args[0]; + const struct hlsl_type *type = arg->data_type; + struct hlsl_ir_function_decl *func; + unsigned int dim; + + if (type->class != HLSL_CLASS_SCALAR && type->class != HLSL_CLASS_MATRIX) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Invalid argument type."); + return false; + } + + dim = min(type->dimx, type->dimy); + if (dim == 1) + { + if (!(arg = intrinsic_float_convert_arg(ctx, params, arg, loc))) + return false; + return hlsl_add_load_component(ctx, params->instrs, arg, 0, loc); + } + + if (!(func = hlsl_compile_internal_function(ctx, "determinant", bodies[dim]))) + return false; + + return add_user_call(ctx, func, params, loc); +} + static bool intrinsic_distance(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -3634,6 +3695,7 @@ intrinsic_functions[] = {"ddy", 1, true, intrinsic_ddy}, {"ddy_coarse", 1, true, intrinsic_ddy_coarse}, {"ddy_fine", 1, true, intrinsic_ddy_fine}, + {"determinant", 1, true, intrinsic_determinant}, {"distance", 2, true, intrinsic_distance}, {"dot", 2, true, intrinsic_dot}, {"exp", 1, true, intrinsic_exp}, diff --git a/tests/hlsl/determinant.shader_test b/tests/hlsl/determinant.shader_test new file mode 100644 index 00000000..d7dcaa56 --- /dev/null +++ b/tests/hlsl/determinant.shader_test @@ -0,0 +1,116 @@ +[pixel shader] +float s; + +float4 main() : sv_target +{ + return determinant(s); +} + +[test] +uniform 0 float4 9.0 2.0 3.0 4.0 +draw quad +probe all rgba (9.0, 9.0, 9.0, 9.0) + +[pixel shader] +float1x1 m; + +float4 main() : sv_target +{ + return determinant(m); +} + +[test] +uniform 0 float4 1.0 2.0 3.0 4.0 +draw quad +probe all rgba (1.0, 1.0, 1.0, 1.0) + +[pixel shader] +float2x2 m; + +float4 main() : sv_target +{ + return determinant(m); +} + +[test] +uniform 0 float4 1.0 2.0 3.0 4.0 +uniform 4 float4 5.0 6.0 7.0 8.0 +draw quad +probe all rgba (-4.0, -4.0, -4.0, -4.0) + +[pixel shader] +float2x1 m; + +float4 main() : sv_target +{ + return determinant(m); +} + +[test] +uniform 0 float4 1.0 2.0 3.0 4.0 +uniform 4 float4 5.0 6.0 7.0 8.0 +draw quad +probe all rgba (1.0, 1.0, 1.0, 1.0) + +[pixel shader] +float3x3 m; + +float4 main() : sv_target +{ + return determinant(m); +} + +[test] +uniform 0 float4 1.0 2.0 3.0 4.0 +uniform 4 float4 5.0 -6.0 7.0 8.0 +uniform 8 float4 9.0 10.0 11.0 12.0 +draw quad +probe all rgba (192.0, 192.0, 192.0, 192.0) + +[pixel shader] +float4x4 m; + +float4 main() : sv_target +{ + return determinant(m); +} + +[test] +uniform 0 float4 1.0 -2.0 3.0 4.0 +uniform 4 float4 5.0 6.0 -7.0 8.0 +uniform 8 float4 9.0 10.0 11.0 12.0 +uniform 12 float4 13.0 14.0 15.0 16.0 +draw quad +probe all rgba (-672.0, -672.0, -672.0, -672.0) + +[pixel shader] +float3x4 m; + +float4 main() : sv_target +{ + return determinant(m); +} + +[test] +uniform 0 float4 1.0 2.0 3.0 0.0 +uniform 4 float4 5.0 -6.0 7.0 0.0 +uniform 8 float4 9.0 10.0 11.0 0.0 +uniform 12 float4 0.0 0.0 0.0 0.0 +draw quad +probe all rgba (192.0, 192.0, 192.0, 192.0) + +[pixel shader fail] +float1 v; + +float4 main() : sv_target +{ + return determinant(v); +} + +[pixel shader fail] +float2 v; + +float4 main() : sv_target +{ + return determinant(v); +}
On Sat Sep 9 13:41:02 2023 +0000, Giovanni Mascellani wrote:
Also, could you please add a test like this one:
[pixel shader] float3x4 m; float4 main() : sv_target { return determinant(m); } [test] uniform 0 float4 1.0 2.0 3.0 0.0 uniform 4 float4 5.0 -6.0 7.0 0.0 uniform 8 float4 9.0 10.0 11.0 0.0 uniform 12 float4 0.0 0.0 0.0 0.0 draw quad probe all rgba (192.0, 192.0, 192.0, 192.0)
I.e., I want to check what happens for a non-square matrix whose smallest dimension is not 1.
Pushed this one, thanks.
On Fri Sep 8 15:59:32 2023 +0000, Giovanni Mascellani wrote:
The trick I know takes advantage of function overloading:
float test(float x) { return 1.0; } float test(half x) { return 2.0; } float main() { return test(determinant(x)); }
Right, for half arguments it returns half result, judging by test() overload it picks. So that probably means I actually do need a template to produce different return type. It's also annoying if internal variables can't be kept as floats.
On Sat Sep 9 13:52:28 2023 +0000, Nikolay Sivov wrote:
Right, for half arguments it returns half result, judging by test() overload it picks. So that probably means I actually do need a template to produce different return type. It's also annoying if internal variables can't be kept as floats.
Yeah, it's quite unfortunate. Though notice that in https://gitlab.winehq.org/wine/vkd3d/-/merge_requests/310#note_43922 a kind of solution was suggested for avoiding writing the type too many times. We might even introduce some helper doing `sprintf()`, compilation and memory handling all at once.
On Mon Sep 11 10:48:39 2023 +0000, Giovanni Mascellani wrote:
Yeah, it's quite unfortunate. Though notice that in https://gitlab.winehq.org/wine/vkd3d/-/merge_requests/310#note_43922 a kind of solution was suggested for avoiding writing the type too many times. We might even introduce some helper doing `sprintf()`, compilation and memory handling all at once.
I'd rather we errored-out on half's for now.
On Mon Sep 11 10:54:13 2023 +0000, Nikolay Sivov wrote:
I'd rather we errored-out on half's for now.
Or maybe add a template for arguments/return type, and leave internal variables as floats at all times, if that works for casts.
On Mon Sep 11 10:55:28 2023 +0000, Nikolay Sivov wrote:
Or maybe add a template for arguments/return type, and leave internal variables as floats at all times, if that works for casts.
Not sure of what the others prefer, but I'd push for the proper solution instead. It's not that hard, and with a little tooling it should be simply reusable in other future cases.
On Mon Sep 11 10:56:57 2023 +0000, Giovanni Mascellani wrote:
Not sure of what the others prefer, but I'd push for the proper solution instead. It's not that hard, and with a little tooling it should be simply reusable in other future cases.
I don't think I care enough on this question. It seems easy, but halves are indeed rarely used. We'll need to fix lit() though; I'll send a patch for that one.
If we really care about not specifying the printf argument multiple times, we can use something like [1] and [2]. I do worry though, since this means we're not using ANSI stdio anymore, and maybe that'll break something subtle.
[1] https://gitlab.winehq.org/zfigura/vkd3d/-/commit/1277bbe82152cdffad393a905fd... [2] https://gitlab.winehq.org/zfigura/vkd3d/-/commit/93ca3d251b6bfa4beb59213a29a...
On Mon Sep 11 22:06:19 2023 +0000, Zebediah Figura wrote:
I don't think I care enough on this question. It seems easy, but halves are indeed rarely used. We'll need to fix lit() though; I'll send a patch for that one. If we really care about not specifying the printf argument multiple times, we can use something like [1] and [2]. I do worry though, since this means we're not using ANSI stdio anymore, and maybe that'll break something subtle. [1] https://gitlab.winehq.org/zfigura/vkd3d/-/commit/1277bbe82152cdffad393a905fd... [2] https://gitlab.winehq.org/zfigura/vkd3d/-/commit/93ca3d251b6bfa4beb59213a29a...
Making sure I get this right. The proper solution would be to generate float vs half functions? For positional arguments we'll have a single "%1$s" format with a single typename argument?
Making sure I get this right. The proper solution would be to generate float vs half functions? For positional arguments we'll have a single "%1$s" format with a single typename argument?
Yes, that's what I would try. Zeb has some concerns about breaking something with her patch that plugs in `_vsprintf_p()`, though. I'm willing to take the risk, though, but she definitely knows better about these kind of issues than I do. We're probably waiting for 1.9 to be released anyway.