SPIR-V already handled DSX/DSY, so only D3DBC/TPF needed new case blocks.
You'll notice that there's no test for this one - in addition to being a pretty straightforward translation for all possible formats, this feature uses the render target width/height and I wasn't sure if there was a good way to ensure that the test would always make sense.
Instead, I did the test manually, and it's what you'd expect (EDIT: Previously the test used a uniform which always optimized to 0, new test uses VPOS instead):
HLSL: ``` float4 main(float4 pos : sv_position) : sv_target { float4 x = ddx(pos.x); float4 y = ddy(pos.y); return x + y; } ```
D3DBC: ``` ps_3_0 dcl_position0 vPos mov r0.xyzw, vPos.xyzw mov r1.x, r0.x dsx r1.x, r1.x mov r0.x, r0.yxxx dsy r0.x, r0.x mov r1.xyzw, r1.x mov r0.xyzw, r0.x add r0.xyzw, r1.xyzw, r0.xyzw mov oC0.xyzw, r0.xyzw ```
DXBC-TPF: ``` ps_4_0 dcl_input_ps_siv linear v0.xyzw, position dcl_output o0.xyzw dcl_temps 2 mov r0.xyzw, v0.xyzw mov r1.x, r0.x dsx r1.x, r1.x mov r0.x, r0.yxxx dsy r0.x, r0.x mov r1.xyzw, r1.x mov r0.xyzw, r0.x add r0.xyzw, r1.xyzw, r0.xyzw mov o0.xyzw, r0.xyzw ret ```
Fixes https://bugs.winehq.org/show_bug.cgi?id=54827
-- v6: tests: Add tests for ddx(), ddy() intrinsics.
From: Ethan Lee flibitijibibo@gmail.com
SPIR-V already handled DSX/DSY, so only D3DBC/TPF needed new case blocks.
Signed-off-by: Ethan Lee flibitijibibo@gmail.com --- libs/vkd3d-shader/d3dbc.c | 8 ++++++++ libs/vkd3d-shader/hlsl.y | 24 ++++++++++++++++++++++++ libs/vkd3d-shader/tpf.c | 10 ++++++++++ 3 files changed, 42 insertions(+)
diff --git a/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d-shader/d3dbc.c index 14268440..aa45dc2d 100644 --- a/libs/vkd3d-shader/d3dbc.c +++ b/libs/vkd3d-shader/d3dbc.c @@ -1696,6 +1696,14 @@ static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b write_sm1_unary_op(ctx, buffer, D3DSIO_ABS, &instr->reg, &arg1->reg, 0, 0); break;
+ case HLSL_OP1_DSX: + write_sm1_unary_op(ctx, buffer, D3DSIO_DSX, &instr->reg, &arg1->reg, 0, 0); + break; + + case HLSL_OP1_DSY: + write_sm1_unary_op(ctx, buffer, D3DSIO_DSY, &instr->reg, &arg1->reg, 0, 0); + break; + case HLSL_OP1_EXP2: write_sm1_per_component_unary_op(ctx, buffer, instr, D3DSIO_EXP); break; diff --git a/libs/vkd3d-shader/hlsl.y b/libs/vkd3d-shader/hlsl.y index 10deda75..6c3bca34 100644 --- a/libs/vkd3d-shader/hlsl.y +++ b/libs/vkd3d-shader/hlsl.y @@ -2648,6 +2648,28 @@ static bool intrinsic_cross(struct hlsl_ctx *ctx, return !!add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_ADD, mul2, mul1_neg, loc); }
+static bool intrinsic_ddx(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *arg; + + if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) + return false; + + return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSX, arg, loc); +} + +static bool intrinsic_ddy(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *arg; + + if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) + return false; + + return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSY, arg, loc); +} + static bool intrinsic_distance(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -3380,6 +3402,8 @@ intrinsic_functions[] = {"clamp", 3, true, intrinsic_clamp}, {"cos", 1, true, intrinsic_cos}, {"cross", 2, true, intrinsic_cross}, + {"ddx", 1, true, intrinsic_ddx}, + {"ddy", 1, true, intrinsic_ddy}, {"distance", 2, true, intrinsic_distance}, {"dot", 2, true, intrinsic_dot}, {"exp", 1, true, intrinsic_exp}, diff --git a/libs/vkd3d-shader/tpf.c b/libs/vkd3d-shader/tpf.c index d104c078..0908fba3 100644 --- a/libs/vkd3d-shader/tpf.c +++ b/libs/vkd3d-shader/tpf.c @@ -3948,6 +3948,16 @@ static void write_sm4_expr(struct hlsl_ctx *ctx, write_sm4_unary_op_with_two_destinations(buffer, VKD3D_SM4_OP_SINCOS, &expr->node, 1, arg1); break;
+ case HLSL_OP1_DSX: + assert(type_is_float(dst_type)); + write_sm4_unary_op(buffer, VKD3D_SM4_OP_DERIV_RTX, &expr->node, arg1, 0); + break; + + case HLSL_OP1_DSY: + assert(type_is_float(dst_type)); + write_sm4_unary_op(buffer, VKD3D_SM4_OP_DERIV_RTY, &expr->node, arg1, 0); + break; + case HLSL_OP1_EXP2: assert(type_is_float(dst_type)); write_sm4_unary_op(buffer, VKD3D_SM4_OP_EXP, &expr->node, arg1, 0);
From: Ethan Lee flibitijibibo@gmail.com
Thanks to Giovanni for the second set of tests! Note that the tolerance for the final pixel was set much higher than the others; this test seems to be an issue for some devices (in my case, a 7900 XTX running RADV).
Co-authored-by: Giovanni Mascellani gmascellani@codeweavers.com Signed-off-by: Ethan Lee flibitijibibo@gmail.com --- Makefile.am | 1 + tests/ddxddy.shader_test | 26 ++++++++++++++++++++++++++ 2 files changed, 27 insertions(+) create mode 100644 tests/ddxddy.shader_test
diff --git a/Makefile.am b/Makefile.am index 3f1ec171..8e94658d 100644 --- a/Makefile.am +++ b/Makefile.am @@ -63,6 +63,7 @@ vkd3d_shader_tests = \ tests/cbuffer.shader_test \ tests/compute.shader_test \ tests/conditional.shader_test \ + tests/ddxddy.shader_test \ tests/distance.shader_test \ tests/entry-point-semantics.shader_test \ tests/exp.shader_test \ diff --git a/tests/ddxddy.shader_test b/tests/ddxddy.shader_test new file mode 100644 index 00000000..6efb5ab6 --- /dev/null +++ b/tests/ddxddy.shader_test @@ -0,0 +1,26 @@ +[pixel shader] +float4 main(float4 pos : sv_position) : sv_target +{ + return float4(ddx(pos.x), ddy(pos.y), 0, 0); +} + +[test] +draw quad +probe all rgba (1.0, 1.0, 0.0, 0.0) + +[pixel shader] +float4 main(float4 pos : sv_position) : sv_target +{ + pos /= 10.0; + float nonlinear = pos.x * pos.y - pos.x * (pos.x + 0.5); + return float4(nonlinear, ddx(nonlinear), ddy(nonlinear), 0.0); +} + +[test] +draw quad +probe (10, 10) rgba (-0.524999976, -0.164999843, 0.104999900, 0.0) 8 +probe (11, 10) rgba (-0.689999819, -0.164999843, 0.104999900, 0.0) 8 +probe (10, 11) rgba (-0.420000076, -0.164999843, 0.104999900, 0.0) 8 +probe (11, 11) rgba (-0.574999928, -0.164999843, 0.104999900, 0.0) 8 +probe (12, 10) rgba (-0.874999881, -0.205000162, 0.124999881, 0.0) 8 +probe (150, 150) rgba (-7.52500916, -1.56500244, 1.50500488, 0.0) 40
On Fri Apr 28 14:17:12 2023 +0000, Giovanni Mascellani wrote:
Strange, not sure why it goes that astray, but I wouldn't sweat too much over that. Pushing 40 is fine.
Understood, test has been pushed!