On my Nvidia GeForce GTX 1050 Ti `ddxddy.shader_test` doesn't pass because of considerably different numeric results.
As Giovanni pointed out, this is because my GPU uses the fine derivate and not the coarse derivate to implement ddx() and ddy().
For this reason, the result for ddx|ddy() is quantized so that the test passes if the GPU uses either coarse or fine derivates.
Additionally, tests for both ddx_coarse|ddy_coarse() and ddx_fine|ddy_fine() are added, that expect a more precise result.
-- v4: vkd3d-shader/hlsl: Support fine derivates. vkd3d-shader/hlsl: Support coarse derivates. tests: Quantize regular and coarse derivate test results. tests: Make ddx() and ddy() test behave correctly for shader models < 4. tests: Test coarse and fine derivates.
From: Francisco Casas fcasas@codeweavers.com
Co-authored-by: Giovanni Mascellani gmascellani@codeweavers.com --- tests/hlsl/ddxddy.shader_test | 40 +++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+)
diff --git a/tests/hlsl/ddxddy.shader_test b/tests/hlsl/ddxddy.shader_test index 6efb5ab6f..8becf524f 100644 --- a/tests/hlsl/ddxddy.shader_test +++ b/tests/hlsl/ddxddy.shader_test @@ -24,3 +24,43 @@ probe (10, 11) rgba (-0.420000076, -0.164999843, 0.104999900, 0.0) 8 probe (11, 11) rgba (-0.574999928, -0.164999843, 0.104999900, 0.0) 8 probe (12, 10) rgba (-0.874999881, -0.205000162, 0.124999881, 0.0) 8 probe (150, 150) rgba (-7.52500916, -1.56500244, 1.50500488, 0.0) 40 + + +[require] +shader model >= 5.0 + + +[pixel shader todo] +float4 main(float4 pos : sv_position) : sv_target +{ + pos /= 10.0; + float nonlinear = pos.x * pos.y - pos.x * (pos.x + 0.5); + return float4(nonlinear, ddx_coarse(nonlinear), ddy_coarse(nonlinear), 0.0); +} + +[test] +todo draw quad +probe (10, 10) rgba (-0.524999976, -0.164999843, 0.104999900, 0.0) 16 +probe (11, 10) rgba (-0.689999819, -0.164999843, 0.104999900, 0.0) 16 +probe (10, 11) rgba (-0.420000076, -0.164999843, 0.104999900, 0.0) 16 +probe (11, 11) rgba (-0.574999928, -0.164999843, 0.104999900, 0.0) 16 +probe (12, 10) rgba (-0.874999881, -0.205000162, 0.124999881, 0.0) 24 +probe (150, 150) rgba (-7.52500916, -1.56500244, 1.50500488, 0.0) 40 + + +[pixel shader todo] +float4 main(float4 pos : sv_position) : sv_target +{ + pos /= 10.0; + float nonlinear = pos.x * pos.y - pos.x * (pos.x + 0.5); + return float4(nonlinear, ddx_fine(nonlinear), ddy_fine(nonlinear), 0.0); +} + +[test] +todo draw quad +probe (10, 10) rgba (-0.524999976, -0.164999843, 0.104999900, 0.0) 16 +probe (11, 10) rgba (-0.689999819, -0.164999843, 0.114999890, 0.0) 32 +probe (10, 11) rgba (-0.420000076, -0.154999852, 0.104999900, 0.0) 32 +probe (11, 11) rgba (-0.574999928, -0.154999852, 0.114999890, 0.0) 32 +probe (12, 10) rgba (-0.874999881, -0.205000162, 0.124999881, 0.0) 24 +probe (150, 150) rgba (-7.52500916, -1.56500244, 1.50500488, 0.0) 128
From: Francisco Casas fcasas@codeweavers.com
--- tests/hlsl/ddxddy.shader_test | 9 +++++++++ 1 file changed, 9 insertions(+)
diff --git a/tests/hlsl/ddxddy.shader_test b/tests/hlsl/ddxddy.shader_test index 8becf524f..da1414b19 100644 --- a/tests/hlsl/ddxddy.shader_test +++ b/tests/hlsl/ddxddy.shader_test @@ -1,3 +1,6 @@ +[require] +shader model >= 3.0 + [pixel shader] float4 main(float4 pos : sv_position) : sv_target { @@ -8,9 +11,15 @@ float4 main(float4 pos : sv_position) : sv_target draw quad probe all rgba (1.0, 1.0, 0.0, 0.0)
+ [pixel shader] float4 main(float4 pos : sv_position) : sv_target { + // Shader models < 4 don't add 0.5 to sv_position, so this adjustment is required to get the + // same outputs. + pos.x = floor(pos.x) + 0.5; + pos.y = floor(pos.y) + 0.5; + pos /= 10.0; float nonlinear = pos.x * pos.y - pos.x * (pos.x + 0.5); return float4(nonlinear, ddx(nonlinear), ddy(nonlinear), 0.0);
From: Francisco Casas fcasas@codeweavers.com
Co-authored-by: Giovanni Mascellani gmascellani@codeweavers.com --- tests/hlsl/ddxddy.shader_test | 36 +++++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 14 deletions(-)
diff --git a/tests/hlsl/ddxddy.shader_test b/tests/hlsl/ddxddy.shader_test index da1414b19..674e8d06b 100644 --- a/tests/hlsl/ddxddy.shader_test +++ b/tests/hlsl/ddxddy.shader_test @@ -22,17 +22,21 @@ float4 main(float4 pos : sv_position) : sv_target
pos /= 10.0; float nonlinear = pos.x * pos.y - pos.x * (pos.x + 0.5); - return float4(nonlinear, ddx(nonlinear), ddy(nonlinear), 0.0); + float4 res = float4(nonlinear, ddx(nonlinear), ddy(nonlinear), 0.0); + + // Each device may use either the coarse or the fine derivate, so use quantization. + return round(30 * res); }
[test] draw quad -probe (10, 10) rgba (-0.524999976, -0.164999843, 0.104999900, 0.0) 8 -probe (11, 10) rgba (-0.689999819, -0.164999843, 0.104999900, 0.0) 8 -probe (10, 11) rgba (-0.420000076, -0.164999843, 0.104999900, 0.0) 8 -probe (11, 11) rgba (-0.574999928, -0.164999843, 0.104999900, 0.0) 8 -probe (12, 10) rgba (-0.874999881, -0.205000162, 0.124999881, 0.0) 8 -probe (150, 150) rgba (-7.52500916, -1.56500244, 1.50500488, 0.0) 40 +probe (10, 10) rgba (-16.0, -5.0, 3.0, 0.0) +probe (11, 10) rgba (-21.0, -5.0, 3.0, 0.0) +probe (10, 11) rgba (-13.0, -5.0, 3.0, 0.0) +probe (11, 11) rgba (-17.0, -5.0, 3.0, 0.0) +probe (12, 10) rgba (-26.0, -6.0, 4.0, 0.0) +probe (16, 16) rgba (-25.0, -7.0, 5.0, 0.0) +probe (150, 150) rgba (-226.0, -47.0, 45.0, 0.0)
[require] @@ -44,17 +48,21 @@ float4 main(float4 pos : sv_position) : sv_target { pos /= 10.0; float nonlinear = pos.x * pos.y - pos.x * (pos.x + 0.5); - return float4(nonlinear, ddx_coarse(nonlinear), ddy_coarse(nonlinear), 0.0); + float4 res = float4(nonlinear, ddx_coarse(nonlinear), ddy_coarse(nonlinear), 0.0); + + // Each device may use either the coarse or the fine derivate, so use quantization. + return round(30 * res); }
[test] todo draw quad -probe (10, 10) rgba (-0.524999976, -0.164999843, 0.104999900, 0.0) 16 -probe (11, 10) rgba (-0.689999819, -0.164999843, 0.104999900, 0.0) 16 -probe (10, 11) rgba (-0.420000076, -0.164999843, 0.104999900, 0.0) 16 -probe (11, 11) rgba (-0.574999928, -0.164999843, 0.104999900, 0.0) 16 -probe (12, 10) rgba (-0.874999881, -0.205000162, 0.124999881, 0.0) 24 -probe (150, 150) rgba (-7.52500916, -1.56500244, 1.50500488, 0.0) 40 +probe (10, 10) rgba (-16.0, -5.0, 3.0, 0.0) +probe (11, 10) rgba (-21.0, -5.0, 3.0, 0.0) +probe (10, 11) rgba (-13.0, -5.0, 3.0, 0.0) +probe (11, 11) rgba (-17.0, -5.0, 3.0, 0.0) +probe (12, 10) rgba (-26.0, -6.0, 4.0, 0.0) +probe (16, 16) rgba (-25.0, -7.0, 5.0, 0.0) +probe (150, 150) rgba (-226.0, -47.0, 45.0, 0.0)
[pixel shader todo]
From: Francisco Casas fcasas@codeweavers.com
--- libs/vkd3d-shader/hlsl.c | 2 ++ libs/vkd3d-shader/hlsl.h | 2 ++ libs/vkd3d-shader/hlsl.y | 24 ++++++++++++++++++++++++ libs/vkd3d-shader/tpf.c | 10 ++++++++++ tests/hlsl/ddxddy.shader_test | 4 ++-- 5 files changed, 40 insertions(+), 2 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl.c b/libs/vkd3d-shader/hlsl.c index 3a1e17797..e959435ac 100644 --- a/libs/vkd3d-shader/hlsl.c +++ b/libs/vkd3d-shader/hlsl.c @@ -2339,7 +2339,9 @@ const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op) [HLSL_OP1_COS] = "cos", [HLSL_OP1_COS_REDUCED] = "cos_reduced", [HLSL_OP1_DSX] = "dsx", + [HLSL_OP1_DSX_COARSE] = "dsx_coarse", [HLSL_OP1_DSY] = "dsy", + [HLSL_OP1_DSY_COARSE] = "dsy_coarse", [HLSL_OP1_EXP2] = "exp2", [HLSL_OP1_FRACT] = "fract", [HLSL_OP1_LOG2] = "log2", diff --git a/libs/vkd3d-shader/hlsl.h b/libs/vkd3d-shader/hlsl.h index 28c7c6914..3caf15abc 100644 --- a/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d-shader/hlsl.h @@ -502,7 +502,9 @@ enum hlsl_ir_expr_op HLSL_OP1_COS, HLSL_OP1_COS_REDUCED, /* Reduced range [-pi, pi] */ HLSL_OP1_DSX, + HLSL_OP1_DSX_COARSE, HLSL_OP1_DSY, + HLSL_OP1_DSY_COARSE, HLSL_OP1_EXP2, HLSL_OP1_FLOOR, HLSL_OP1_FRACT, diff --git a/libs/vkd3d-shader/hlsl.y b/libs/vkd3d-shader/hlsl.y index 60d6514c9..bf0522cc1 100644 --- a/libs/vkd3d-shader/hlsl.y +++ b/libs/vkd3d-shader/hlsl.y @@ -2670,6 +2670,17 @@ static bool intrinsic_ddx(struct hlsl_ctx *ctx, return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSX, arg, loc); }
+static bool intrinsic_ddx_coarse(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *arg; + + if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) + return false; + + return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSX_COARSE, arg, loc); +} + static bool intrinsic_ddy(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -2681,6 +2692,17 @@ static bool intrinsic_ddy(struct hlsl_ctx *ctx, return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSY, arg, loc); }
+static bool intrinsic_ddy_coarse(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *arg; + + if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) + return false; + + return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSY_COARSE, arg, loc); +} + static bool intrinsic_distance(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -3541,7 +3563,9 @@ intrinsic_functions[] = {"cos", 1, true, intrinsic_cos}, {"cross", 2, true, intrinsic_cross}, {"ddx", 1, true, intrinsic_ddx}, + {"ddx_coarse", 1, true, intrinsic_ddx_coarse}, {"ddy", 1, true, intrinsic_ddy}, + {"ddy_coarse", 1, true, intrinsic_ddy_coarse}, {"distance", 2, true, intrinsic_distance}, {"dot", 2, true, intrinsic_dot}, {"exp", 1, true, intrinsic_exp}, diff --git a/libs/vkd3d-shader/tpf.c b/libs/vkd3d-shader/tpf.c index 677243e15..665fd1aa5 100644 --- a/libs/vkd3d-shader/tpf.c +++ b/libs/vkd3d-shader/tpf.c @@ -4370,11 +4370,21 @@ static void write_sm4_expr(struct hlsl_ctx *ctx, write_sm4_unary_op(buffer, VKD3D_SM4_OP_DERIV_RTX, &expr->node, arg1, 0); break;
+ case HLSL_OP1_DSX_COARSE: + assert(type_is_float(dst_type)); + write_sm4_unary_op(buffer, VKD3D_SM5_OP_DERIV_RTX_COARSE, &expr->node, arg1, 0); + break; + case HLSL_OP1_DSY: assert(type_is_float(dst_type)); write_sm4_unary_op(buffer, VKD3D_SM4_OP_DERIV_RTY, &expr->node, arg1, 0); break;
+ case HLSL_OP1_DSY_COARSE: + assert(type_is_float(dst_type)); + write_sm4_unary_op(buffer, VKD3D_SM5_OP_DERIV_RTY_COARSE, &expr->node, arg1, 0); + break; + case HLSL_OP1_EXP2: assert(type_is_float(dst_type)); write_sm4_unary_op(buffer, VKD3D_SM4_OP_EXP, &expr->node, arg1, 0); diff --git a/tests/hlsl/ddxddy.shader_test b/tests/hlsl/ddxddy.shader_test index 674e8d06b..53a4f6b7c 100644 --- a/tests/hlsl/ddxddy.shader_test +++ b/tests/hlsl/ddxddy.shader_test @@ -43,7 +43,7 @@ probe (150, 150) rgba (-226.0, -47.0, 45.0, 0.0) shader model >= 5.0
-[pixel shader todo] +[pixel shader] float4 main(float4 pos : sv_position) : sv_target { pos /= 10.0; @@ -55,7 +55,7 @@ float4 main(float4 pos : sv_position) : sv_target }
[test] -todo draw quad +draw quad probe (10, 10) rgba (-16.0, -5.0, 3.0, 0.0) probe (11, 10) rgba (-21.0, -5.0, 3.0, 0.0) probe (10, 11) rgba (-13.0, -5.0, 3.0, 0.0)
From: Francisco Casas fcasas@codeweavers.com
--- libs/vkd3d-shader/hlsl.c | 2 ++ libs/vkd3d-shader/hlsl.h | 2 ++ libs/vkd3d-shader/hlsl.y | 24 ++++++++++++++++++++++++ libs/vkd3d-shader/tpf.c | 10 ++++++++++ tests/hlsl/ddxddy.shader_test | 4 ++-- 5 files changed, 40 insertions(+), 2 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl.c b/libs/vkd3d-shader/hlsl.c index e959435ac..4e9af15c1 100644 --- a/libs/vkd3d-shader/hlsl.c +++ b/libs/vkd3d-shader/hlsl.c @@ -2340,8 +2340,10 @@ const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op) [HLSL_OP1_COS_REDUCED] = "cos_reduced", [HLSL_OP1_DSX] = "dsx", [HLSL_OP1_DSX_COARSE] = "dsx_coarse", + [HLSL_OP1_DSX_FINE] = "dsx_fine", [HLSL_OP1_DSY] = "dsy", [HLSL_OP1_DSY_COARSE] = "dsy_coarse", + [HLSL_OP1_DSY_FINE] = "dsy_fine", [HLSL_OP1_EXP2] = "exp2", [HLSL_OP1_FRACT] = "fract", [HLSL_OP1_LOG2] = "log2", diff --git a/libs/vkd3d-shader/hlsl.h b/libs/vkd3d-shader/hlsl.h index 3caf15abc..f102657d9 100644 --- a/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d-shader/hlsl.h @@ -503,8 +503,10 @@ enum hlsl_ir_expr_op HLSL_OP1_COS_REDUCED, /* Reduced range [-pi, pi] */ HLSL_OP1_DSX, HLSL_OP1_DSX_COARSE, + HLSL_OP1_DSX_FINE, HLSL_OP1_DSY, HLSL_OP1_DSY_COARSE, + HLSL_OP1_DSY_FINE, HLSL_OP1_EXP2, HLSL_OP1_FLOOR, HLSL_OP1_FRACT, diff --git a/libs/vkd3d-shader/hlsl.y b/libs/vkd3d-shader/hlsl.y index bf0522cc1..a8157d3ae 100644 --- a/libs/vkd3d-shader/hlsl.y +++ b/libs/vkd3d-shader/hlsl.y @@ -2681,6 +2681,17 @@ static bool intrinsic_ddx_coarse(struct hlsl_ctx *ctx, return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSX_COARSE, arg, loc); }
+static bool intrinsic_ddx_fine(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *arg; + + if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) + return false; + + return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSX_FINE, arg, loc); +} + static bool intrinsic_ddy(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -2703,6 +2714,17 @@ static bool intrinsic_ddy_coarse(struct hlsl_ctx *ctx, return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSY_COARSE, arg, loc); }
+static bool intrinsic_ddy_fine(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *arg; + + if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) + return false; + + return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSY_FINE, arg, loc); +} + static bool intrinsic_distance(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -3564,8 +3586,10 @@ intrinsic_functions[] = {"cross", 2, true, intrinsic_cross}, {"ddx", 1, true, intrinsic_ddx}, {"ddx_coarse", 1, true, intrinsic_ddx_coarse}, + {"ddx_fine", 1, true, intrinsic_ddx_fine}, {"ddy", 1, true, intrinsic_ddy}, {"ddy_coarse", 1, true, intrinsic_ddy_coarse}, + {"ddy_fine", 1, true, intrinsic_ddy_fine}, {"distance", 2, true, intrinsic_distance}, {"dot", 2, true, intrinsic_dot}, {"exp", 1, true, intrinsic_exp}, diff --git a/libs/vkd3d-shader/tpf.c b/libs/vkd3d-shader/tpf.c index 665fd1aa5..a465e0b70 100644 --- a/libs/vkd3d-shader/tpf.c +++ b/libs/vkd3d-shader/tpf.c @@ -4375,6 +4375,11 @@ static void write_sm4_expr(struct hlsl_ctx *ctx, write_sm4_unary_op(buffer, VKD3D_SM5_OP_DERIV_RTX_COARSE, &expr->node, arg1, 0); break;
+ case HLSL_OP1_DSX_FINE: + assert(type_is_float(dst_type)); + write_sm4_unary_op(buffer, VKD3D_SM5_OP_DERIV_RTX_FINE, &expr->node, arg1, 0); + break; + case HLSL_OP1_DSY: assert(type_is_float(dst_type)); write_sm4_unary_op(buffer, VKD3D_SM4_OP_DERIV_RTY, &expr->node, arg1, 0); @@ -4385,6 +4390,11 @@ static void write_sm4_expr(struct hlsl_ctx *ctx, write_sm4_unary_op(buffer, VKD3D_SM5_OP_DERIV_RTY_COARSE, &expr->node, arg1, 0); break;
+ case HLSL_OP1_DSY_FINE: + assert(type_is_float(dst_type)); + write_sm4_unary_op(buffer, VKD3D_SM5_OP_DERIV_RTY_FINE, &expr->node, arg1, 0); + break; + case HLSL_OP1_EXP2: assert(type_is_float(dst_type)); write_sm4_unary_op(buffer, VKD3D_SM4_OP_EXP, &expr->node, arg1, 0); diff --git a/tests/hlsl/ddxddy.shader_test b/tests/hlsl/ddxddy.shader_test index 53a4f6b7c..4986c233f 100644 --- a/tests/hlsl/ddxddy.shader_test +++ b/tests/hlsl/ddxddy.shader_test @@ -65,7 +65,7 @@ probe (16, 16) rgba (-25.0, -7.0, 5.0, 0.0) probe (150, 150) rgba (-226.0, -47.0, 45.0, 0.0)
-[pixel shader todo] +[pixel shader] float4 main(float4 pos : sv_position) : sv_target { pos /= 10.0; @@ -74,7 +74,7 @@ float4 main(float4 pos : sv_position) : sv_target }
[test] -todo draw quad +draw quad probe (10, 10) rgba (-0.524999976, -0.164999843, 0.104999900, 0.0) 16 probe (11, 10) rgba (-0.689999819, -0.164999843, 0.114999890, 0.0) 32 probe (10, 11) rgba (-0.420000076, -0.154999852, 0.104999900, 0.0) 32
:arrow_up: Rebased.
This merge request was approved by Zebediah Figura.
This merge request was approved by Henri Verbeet.