On my Nvidia GeForce GTX 1050 Ti `ddxddy.shader_test` doesn't pass because of considerably different numeric results.
As Giovanni pointed out, this is because my GPU uses the fine derivate and not the coarse derivate to implement ddx() and ddy().
For this reason, the result for ddx|ddy() is quantized so that the test passes if the GPU uses either coarse or fine derivates.
Additionally, tests for both ddx_coarse|ddy_coarse() and ddx_fine|ddy_fine() are added, that expect a more precise result.
-- v3: vkd3d-shader/hlsl: Support fine derivates. vkd3d-shader/hlsl: Support coarse derivates. tests: Quantize regular and coarse derivate test results. tests: Make ddx() and ddy() test behave correctly for shader models < 4. tests: Test coarse and fine derivates.
From: Francisco Casas fcasas@codeweavers.com
Co-authored-by: Giovanni Mascellani gmascellani@codeweavers.com --- tests/ddxddy.shader_test | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+)
diff --git a/tests/ddxddy.shader_test b/tests/ddxddy.shader_test index 6efb5ab6f..8becf524f 100644 --- a/tests/ddxddy.shader_test +++ b/tests/ddxddy.shader_test @@ -24,3 +24,43 @@ probe (10, 11) rgba (-0.420000076, -0.164999843, 0.104999900, 0.0) 8 probe (11, 11) rgba (-0.574999928, -0.164999843, 0.104999900, 0.0) 8 probe (12, 10) rgba (-0.874999881, -0.205000162, 0.124999881, 0.0) 8 probe (150, 150) rgba (-7.52500916, -1.56500244, 1.50500488, 0.0) 40 + + +[require] +shader model >= 5.0 + + +[pixel shader todo] +float4 main(float4 pos : sv_position) : sv_target +{ + pos /= 10.0; + float nonlinear = pos.x * pos.y - pos.x * (pos.x + 0.5); + return float4(nonlinear, ddx_coarse(nonlinear), ddy_coarse(nonlinear), 0.0); +} + +[test] +todo draw quad +probe (10, 10) rgba (-0.524999976, -0.164999843, 0.104999900, 0.0) 16 +probe (11, 10) rgba (-0.689999819, -0.164999843, 0.104999900, 0.0) 16 +probe (10, 11) rgba (-0.420000076, -0.164999843, 0.104999900, 0.0) 16 +probe (11, 11) rgba (-0.574999928, -0.164999843, 0.104999900, 0.0) 16 +probe (12, 10) rgba (-0.874999881, -0.205000162, 0.124999881, 0.0) 24 +probe (150, 150) rgba (-7.52500916, -1.56500244, 1.50500488, 0.0) 40 + + +[pixel shader todo] +float4 main(float4 pos : sv_position) : sv_target +{ + pos /= 10.0; + float nonlinear = pos.x * pos.y - pos.x * (pos.x + 0.5); + return float4(nonlinear, ddx_fine(nonlinear), ddy_fine(nonlinear), 0.0); +} + +[test] +todo draw quad +probe (10, 10) rgba (-0.524999976, -0.164999843, 0.104999900, 0.0) 16 +probe (11, 10) rgba (-0.689999819, -0.164999843, 0.114999890, 0.0) 32 +probe (10, 11) rgba (-0.420000076, -0.154999852, 0.104999900, 0.0) 32 +probe (11, 11) rgba (-0.574999928, -0.154999852, 0.114999890, 0.0) 32 +probe (12, 10) rgba (-0.874999881, -0.205000162, 0.124999881, 0.0) 24 +probe (150, 150) rgba (-7.52500916, -1.56500244, 1.50500488, 0.0) 128
From: Francisco Casas fcasas@codeweavers.com
--- tests/ddxddy.shader_test | 10 ++++++++++ 1 file changed, 10 insertions(+)
diff --git a/tests/ddxddy.shader_test b/tests/ddxddy.shader_test index 8becf524f..0dfdadbd0 100644 --- a/tests/ddxddy.shader_test +++ b/tests/ddxddy.shader_test @@ -1,16 +1,26 @@ +[require] +shader model >= 3.0 + [pixel shader] float4 main(float4 pos : sv_position) : sv_target { return float4(ddx(pos.x), ddy(pos.y), 0, 0); }
+ [test] draw quad probe all rgba (1.0, 1.0, 0.0, 0.0)
+ [pixel shader] float4 main(float4 pos : sv_position) : sv_target { + // Shader models < 4 don't add 0.5 to sv_position, so this adjustment is required to get the + // same outputs. + pos.x = floor(pos.x) + 0.5; + pos.y = floor(pos.y) + 0.5; + pos /= 10.0; float nonlinear = pos.x * pos.y - pos.x * (pos.x + 0.5); return float4(nonlinear, ddx(nonlinear), ddy(nonlinear), 0.0);
From: Francisco Casas fcasas@codeweavers.com
Co-authored-by: Giovanni Mascellani gmascellani@codeweavers.com --- tests/ddxddy.shader_test | 36 ++++++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 14 deletions(-)
diff --git a/tests/ddxddy.shader_test b/tests/ddxddy.shader_test index 0dfdadbd0..ac8e4f3a4 100644 --- a/tests/ddxddy.shader_test +++ b/tests/ddxddy.shader_test @@ -23,17 +23,21 @@ float4 main(float4 pos : sv_position) : sv_target
pos /= 10.0; float nonlinear = pos.x * pos.y - pos.x * (pos.x + 0.5); - return float4(nonlinear, ddx(nonlinear), ddy(nonlinear), 0.0); + float4 res = float4(nonlinear, ddx(nonlinear), ddy(nonlinear), 0.0); + + // Each device may use either the coarse or the fine derivate, so use quantization. + return round(30 * res); }
[test] draw quad -probe (10, 10) rgba (-0.524999976, -0.164999843, 0.104999900, 0.0) 8 -probe (11, 10) rgba (-0.689999819, -0.164999843, 0.104999900, 0.0) 8 -probe (10, 11) rgba (-0.420000076, -0.164999843, 0.104999900, 0.0) 8 -probe (11, 11) rgba (-0.574999928, -0.164999843, 0.104999900, 0.0) 8 -probe (12, 10) rgba (-0.874999881, -0.205000162, 0.124999881, 0.0) 8 -probe (150, 150) rgba (-7.52500916, -1.56500244, 1.50500488, 0.0) 40 +probe (10, 10) rgba (-16.0, -5.0, 3.0, 0.0) +probe (11, 10) rgba (-21.0, -5.0, 3.0, 0.0) +probe (10, 11) rgba (-13.0, -5.0, 3.0, 0.0) +probe (11, 11) rgba (-17.0, -5.0, 3.0, 0.0) +probe (12, 10) rgba (-26.0, -6.0, 4.0, 0.0) +probe (16, 16) rgba (-25.0, -7.0, 5.0, 0.0) +probe (150, 150) rgba (-226.0, -47.0, 45.0, 0.0)
[require] @@ -45,17 +49,21 @@ float4 main(float4 pos : sv_position) : sv_target { pos /= 10.0; float nonlinear = pos.x * pos.y - pos.x * (pos.x + 0.5); - return float4(nonlinear, ddx_coarse(nonlinear), ddy_coarse(nonlinear), 0.0); + float4 res = float4(nonlinear, ddx_coarse(nonlinear), ddy_coarse(nonlinear), 0.0); + + // Each device may use either the coarse or the fine derivate, so use quantization. + return round(30 * res); }
[test] todo draw quad -probe (10, 10) rgba (-0.524999976, -0.164999843, 0.104999900, 0.0) 16 -probe (11, 10) rgba (-0.689999819, -0.164999843, 0.104999900, 0.0) 16 -probe (10, 11) rgba (-0.420000076, -0.164999843, 0.104999900, 0.0) 16 -probe (11, 11) rgba (-0.574999928, -0.164999843, 0.104999900, 0.0) 16 -probe (12, 10) rgba (-0.874999881, -0.205000162, 0.124999881, 0.0) 24 -probe (150, 150) rgba (-7.52500916, -1.56500244, 1.50500488, 0.0) 40 +probe (10, 10) rgba (-16.0, -5.0, 3.0, 0.0) +probe (11, 10) rgba (-21.0, -5.0, 3.0, 0.0) +probe (10, 11) rgba (-13.0, -5.0, 3.0, 0.0) +probe (11, 11) rgba (-17.0, -5.0, 3.0, 0.0) +probe (12, 10) rgba (-26.0, -6.0, 4.0, 0.0) +probe (16, 16) rgba (-25.0, -7.0, 5.0, 0.0) +probe (150, 150) rgba (-226.0, -47.0, 45.0, 0.0)
[pixel shader todo]
From: Francisco Casas fcasas@codeweavers.com
--- libs/vkd3d-shader/hlsl.c | 2 ++ libs/vkd3d-shader/hlsl.h | 2 ++ libs/vkd3d-shader/hlsl.y | 24 ++++++++++++++++++++++++ libs/vkd3d-shader/tpf.c | 10 ++++++++++ tests/ddxddy.shader_test | 4 ++-- 5 files changed, 40 insertions(+), 2 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl.c b/libs/vkd3d-shader/hlsl.c index 152ec6275..8b9b41125 100644 --- a/libs/vkd3d-shader/hlsl.c +++ b/libs/vkd3d-shader/hlsl.c @@ -2340,7 +2340,9 @@ const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op) [HLSL_OP1_COS] = "cos", [HLSL_OP1_COS_REDUCED] = "cos_reduced", [HLSL_OP1_DSX] = "dsx", + [HLSL_OP1_DSX_COARSE] = "dsx_coarse", [HLSL_OP1_DSY] = "dsy", + [HLSL_OP1_DSY_COARSE] = "dsy_coarse", [HLSL_OP1_EXP2] = "exp2", [HLSL_OP1_FRACT] = "fract", [HLSL_OP1_LOG2] = "log2", diff --git a/libs/vkd3d-shader/hlsl.h b/libs/vkd3d-shader/hlsl.h index 7d02448e0..fe002dbc5 100644 --- a/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d-shader/hlsl.h @@ -502,7 +502,9 @@ enum hlsl_ir_expr_op HLSL_OP1_COS, HLSL_OP1_COS_REDUCED, /* Reduced range [-pi, pi] */ HLSL_OP1_DSX, + HLSL_OP1_DSX_COARSE, HLSL_OP1_DSY, + HLSL_OP1_DSY_COARSE, HLSL_OP1_EXP2, HLSL_OP1_FLOOR, HLSL_OP1_FRACT, diff --git a/libs/vkd3d-shader/hlsl.y b/libs/vkd3d-shader/hlsl.y index cf483d82c..74f63e4e2 100644 --- a/libs/vkd3d-shader/hlsl.y +++ b/libs/vkd3d-shader/hlsl.y @@ -2612,6 +2612,17 @@ static bool intrinsic_ddx(struct hlsl_ctx *ctx, return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSX, arg, loc); }
+static bool intrinsic_ddx_coarse(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *arg; + + if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) + return false; + + return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSX_COARSE, arg, loc); +} + static bool intrinsic_ddy(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -2623,6 +2634,17 @@ static bool intrinsic_ddy(struct hlsl_ctx *ctx, return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSY, arg, loc); }
+static bool intrinsic_ddy_coarse(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *arg; + + if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) + return false; + + return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSY_COARSE, arg, loc); +} + static bool intrinsic_distance(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -3485,7 +3507,9 @@ intrinsic_functions[] = {"cos", 1, true, intrinsic_cos}, {"cross", 2, true, intrinsic_cross}, {"ddx", 1, true, intrinsic_ddx}, + {"ddx_coarse", 1, true, intrinsic_ddx_coarse}, {"ddy", 1, true, intrinsic_ddy}, + {"ddy_coarse", 1, true, intrinsic_ddy_coarse}, {"distance", 2, true, intrinsic_distance}, {"dot", 2, true, intrinsic_dot}, {"exp", 1, true, intrinsic_exp}, diff --git a/libs/vkd3d-shader/tpf.c b/libs/vkd3d-shader/tpf.c index 60948d649..7ffae550b 100644 --- a/libs/vkd3d-shader/tpf.c +++ b/libs/vkd3d-shader/tpf.c @@ -4364,11 +4364,21 @@ static void write_sm4_expr(struct hlsl_ctx *ctx, write_sm4_unary_op(buffer, VKD3D_SM4_OP_DERIV_RTX, &expr->node, arg1, 0); break;
+ case HLSL_OP1_DSX_COARSE: + assert(type_is_float(dst_type)); + write_sm4_unary_op(buffer, VKD3D_SM5_OP_DERIV_RTX_COARSE, &expr->node, arg1, 0); + break; + case HLSL_OP1_DSY: assert(type_is_float(dst_type)); write_sm4_unary_op(buffer, VKD3D_SM4_OP_DERIV_RTY, &expr->node, arg1, 0); break;
+ case HLSL_OP1_DSY_COARSE: + assert(type_is_float(dst_type)); + write_sm4_unary_op(buffer, VKD3D_SM5_OP_DERIV_RTY_COARSE, &expr->node, arg1, 0); + break; + case HLSL_OP1_EXP2: assert(type_is_float(dst_type)); write_sm4_unary_op(buffer, VKD3D_SM4_OP_EXP, &expr->node, arg1, 0); diff --git a/tests/ddxddy.shader_test b/tests/ddxddy.shader_test index ac8e4f3a4..2526ed156 100644 --- a/tests/ddxddy.shader_test +++ b/tests/ddxddy.shader_test @@ -44,7 +44,7 @@ probe (150, 150) rgba (-226.0, -47.0, 45.0, 0.0) shader model >= 5.0
-[pixel shader todo] +[pixel shader] float4 main(float4 pos : sv_position) : sv_target { pos /= 10.0; @@ -56,7 +56,7 @@ float4 main(float4 pos : sv_position) : sv_target }
[test] -todo draw quad +draw quad probe (10, 10) rgba (-16.0, -5.0, 3.0, 0.0) probe (11, 10) rgba (-21.0, -5.0, 3.0, 0.0) probe (10, 11) rgba (-13.0, -5.0, 3.0, 0.0)
From: Francisco Casas fcasas@codeweavers.com
--- libs/vkd3d-shader/hlsl.c | 2 ++ libs/vkd3d-shader/hlsl.h | 2 ++ libs/vkd3d-shader/hlsl.y | 24 ++++++++++++++++++++++++ libs/vkd3d-shader/tpf.c | 10 ++++++++++ tests/ddxddy.shader_test | 4 ++-- 5 files changed, 40 insertions(+), 2 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl.c b/libs/vkd3d-shader/hlsl.c index 8b9b41125..da38435f7 100644 --- a/libs/vkd3d-shader/hlsl.c +++ b/libs/vkd3d-shader/hlsl.c @@ -2341,8 +2341,10 @@ const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op) [HLSL_OP1_COS_REDUCED] = "cos_reduced", [HLSL_OP1_DSX] = "dsx", [HLSL_OP1_DSX_COARSE] = "dsx_coarse", + [HLSL_OP1_DSX_FINE] = "dsx_fine", [HLSL_OP1_DSY] = "dsy", [HLSL_OP1_DSY_COARSE] = "dsy_coarse", + [HLSL_OP1_DSY_FINE] = "dsy_fine", [HLSL_OP1_EXP2] = "exp2", [HLSL_OP1_FRACT] = "fract", [HLSL_OP1_LOG2] = "log2", diff --git a/libs/vkd3d-shader/hlsl.h b/libs/vkd3d-shader/hlsl.h index fe002dbc5..f7f764128 100644 --- a/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d-shader/hlsl.h @@ -503,8 +503,10 @@ enum hlsl_ir_expr_op HLSL_OP1_COS_REDUCED, /* Reduced range [-pi, pi] */ HLSL_OP1_DSX, HLSL_OP1_DSX_COARSE, + HLSL_OP1_DSX_FINE, HLSL_OP1_DSY, HLSL_OP1_DSY_COARSE, + HLSL_OP1_DSY_FINE, HLSL_OP1_EXP2, HLSL_OP1_FLOOR, HLSL_OP1_FRACT, diff --git a/libs/vkd3d-shader/hlsl.y b/libs/vkd3d-shader/hlsl.y index 74f63e4e2..6d1aa8a1d 100644 --- a/libs/vkd3d-shader/hlsl.y +++ b/libs/vkd3d-shader/hlsl.y @@ -2623,6 +2623,17 @@ static bool intrinsic_ddx_coarse(struct hlsl_ctx *ctx, return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSX_COARSE, arg, loc); }
+static bool intrinsic_ddx_fine(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *arg; + + if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) + return false; + + return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSX_FINE, arg, loc); +} + static bool intrinsic_ddy(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -2645,6 +2656,17 @@ static bool intrinsic_ddy_coarse(struct hlsl_ctx *ctx, return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSY_COARSE, arg, loc); }
+static bool intrinsic_ddy_fine(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *arg; + + if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) + return false; + + return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSY_FINE, arg, loc); +} + static bool intrinsic_distance(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -3508,8 +3530,10 @@ intrinsic_functions[] = {"cross", 2, true, intrinsic_cross}, {"ddx", 1, true, intrinsic_ddx}, {"ddx_coarse", 1, true, intrinsic_ddx_coarse}, + {"ddx_fine", 1, true, intrinsic_ddx_fine}, {"ddy", 1, true, intrinsic_ddy}, {"ddy_coarse", 1, true, intrinsic_ddy_coarse}, + {"ddy_fine", 1, true, intrinsic_ddy_fine}, {"distance", 2, true, intrinsic_distance}, {"dot", 2, true, intrinsic_dot}, {"exp", 1, true, intrinsic_exp}, diff --git a/libs/vkd3d-shader/tpf.c b/libs/vkd3d-shader/tpf.c index 7ffae550b..11edaf860 100644 --- a/libs/vkd3d-shader/tpf.c +++ b/libs/vkd3d-shader/tpf.c @@ -4369,6 +4369,11 @@ static void write_sm4_expr(struct hlsl_ctx *ctx, write_sm4_unary_op(buffer, VKD3D_SM5_OP_DERIV_RTX_COARSE, &expr->node, arg1, 0); break;
+ case HLSL_OP1_DSX_FINE: + assert(type_is_float(dst_type)); + write_sm4_unary_op(buffer, VKD3D_SM5_OP_DERIV_RTX_FINE, &expr->node, arg1, 0); + break; + case HLSL_OP1_DSY: assert(type_is_float(dst_type)); write_sm4_unary_op(buffer, VKD3D_SM4_OP_DERIV_RTY, &expr->node, arg1, 0); @@ -4379,6 +4384,11 @@ static void write_sm4_expr(struct hlsl_ctx *ctx, write_sm4_unary_op(buffer, VKD3D_SM5_OP_DERIV_RTY_COARSE, &expr->node, arg1, 0); break;
+ case HLSL_OP1_DSY_FINE: + assert(type_is_float(dst_type)); + write_sm4_unary_op(buffer, VKD3D_SM5_OP_DERIV_RTY_FINE, &expr->node, arg1, 0); + break; + case HLSL_OP1_EXP2: assert(type_is_float(dst_type)); write_sm4_unary_op(buffer, VKD3D_SM4_OP_EXP, &expr->node, arg1, 0); diff --git a/tests/ddxddy.shader_test b/tests/ddxddy.shader_test index 2526ed156..586447995 100644 --- a/tests/ddxddy.shader_test +++ b/tests/ddxddy.shader_test @@ -66,7 +66,7 @@ probe (16, 16) rgba (-25.0, -7.0, 5.0, 0.0) probe (150, 150) rgba (-226.0, -47.0, 45.0, 0.0)
-[pixel shader todo] +[pixel shader] float4 main(float4 pos : sv_position) : sv_target { pos /= 10.0; @@ -75,7 +75,7 @@ float4 main(float4 pos : sv_position) : sv_target }
[test] -todo draw quad +draw quad probe (10, 10) rgba (-0.524999976, -0.164999843, 0.104999900, 0.0) 16 probe (11, 10) rgba (-0.689999819, -0.164999843, 0.114999890, 0.0) 32 probe (10, 11) rgba (-0.420000076, -0.154999852, 0.104999900, 0.0) 32
On Tue Jun 20 20:06:01 2023 +0000, Giovanni Mascellani wrote:
Great! Only one last favor: I'd like llvmpipe to pass tests too. This needs two simple changes:
- Despite its name, the coarse derivative can be fine, and in the case
of llvmpipe it seems to be. So please quantize it like you do for the unqualified derivative.
- llvmpipe is apparently even less precise than NVIDIA. So please raise
the ULP limits like in https://gitlab.winehq.org/giomasce/vkd3d/-/commit/e78c53aab6be5cd647de25f47a.... That commit passes tests in all the implementations I have access to (radv, NVIDIA and Intel). If it is compiled in your mesa binaries, you can test on llvmpipe using `VKD3D_VULKAN_DEVICE=1` or something. Also, we normally use the imperative form in commit subjects: "Support fine derivatives" rather than "Fine derivatives support". Thanks!
Thanks, I applied these changes.
This merge request was approved by Giovanni Mascellani.