SPIR-V already handled DSX/DSY, so only D3DBC/TPF needed new case blocks.
You'll notice that there's no test for this one - in addition to being a pretty straightforward translation for all possible formats, this feature uses the render target width/height and I wasn't sure if there was a good way to ensure that the test would always make sense.
Instead, I did the test manually, and it's what you'd expect:
HLSL: ``` uniform float f;
float4 main() : sv_target { float4 x = ddx(f); float4 y = ddy(f); return x + y; } ```
D3DBC: ``` ps_3_0 mov r0.x, c0.x dsx r1.x, r0.x dsy r0.x, r0.x mov r1.xyzw, r1.x mov r0.xyzw, r0.x add r0.xyzw, r1.xyzw, r0.xyzw mov oC0.xyzw, r0.xyzw ```
DXBC-TPF: ``` ps_4_0 dcl_constantBuffer cb0[1], immediateIndexed dcl_output o0.xyzw dcl_temps 2 mov r0.x, cb0[0].x dsx r0.y, r0.x dsy r0.x, r0.x mov r1.xyzw, r0.y mov r0.xyzw, r0.x add r0.xyzw, r1.xyzw, r0.xyzw mov o0.xyzw, r0.xyzw ret ```
From: Ethan Lee flibitijibibo@gmail.com
SPIR-V already handled DSX/DSY, so only D3DBC/TPF needed new case blocks.
Signed-off-by: Ethan Lee flibitijibibo@gmail.com --- libs/vkd3d-shader/d3dbc.c | 8 ++++++++ libs/vkd3d-shader/hlsl.y | 24 ++++++++++++++++++++++++ libs/vkd3d-shader/tpf.c | 10 ++++++++++ 3 files changed, 42 insertions(+)
diff --git a/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d-shader/d3dbc.c index 14268440..aa45dc2d 100644 --- a/libs/vkd3d-shader/d3dbc.c +++ b/libs/vkd3d-shader/d3dbc.c @@ -1696,6 +1696,14 @@ static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b write_sm1_unary_op(ctx, buffer, D3DSIO_ABS, &instr->reg, &arg1->reg, 0, 0); break;
+ case HLSL_OP1_DSX: + write_sm1_unary_op(ctx, buffer, D3DSIO_DSX, &instr->reg, &arg1->reg, 0, 0); + break; + + case HLSL_OP1_DSY: + write_sm1_unary_op(ctx, buffer, D3DSIO_DSY, &instr->reg, &arg1->reg, 0, 0); + break; + case HLSL_OP1_EXP2: write_sm1_per_component_unary_op(ctx, buffer, instr, D3DSIO_EXP); break; diff --git a/libs/vkd3d-shader/hlsl.y b/libs/vkd3d-shader/hlsl.y index 194d21f4..219c0a94 100644 --- a/libs/vkd3d-shader/hlsl.y +++ b/libs/vkd3d-shader/hlsl.y @@ -2648,6 +2648,28 @@ static bool intrinsic_cross(struct hlsl_ctx *ctx, return !!add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_ADD, mul2, mul1_neg, loc); }
+static bool intrinsic_ddx(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *arg; + + if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) + return false; + + return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSX, arg, loc); +} + +static bool intrinsic_ddy(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *arg; + + if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) + return false; + + return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSY, arg, loc); +} + static bool intrinsic_distance(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -3380,6 +3402,8 @@ intrinsic_functions[] = {"clamp", 3, true, intrinsic_clamp}, {"cos", 1, true, intrinsic_cos}, {"cross", 2, true, intrinsic_cross}, + {"ddx", 1, true, intrinsic_ddx}, + {"ddy", 1, true, intrinsic_ddy}, {"distance", 2, true, intrinsic_distance}, {"dot", 2, true, intrinsic_dot}, {"exp", 1, true, intrinsic_exp}, diff --git a/libs/vkd3d-shader/tpf.c b/libs/vkd3d-shader/tpf.c index e76cf8c9..d84216bf 100644 --- a/libs/vkd3d-shader/tpf.c +++ b/libs/vkd3d-shader/tpf.c @@ -3926,6 +3926,16 @@ static void write_sm4_expr(struct hlsl_ctx *ctx, write_sm4_unary_op_with_two_destinations(buffer, VKD3D_SM4_OP_SINCOS, &expr->node, 1, arg1); break;
+ case HLSL_OP1_DSX: + assert(type_is_float(dst_type)); + write_sm4_unary_op(buffer, VKD3D_SM4_OP_DERIV_RTX, &expr->node, arg1, 0); + break; + + case HLSL_OP1_DSY: + assert(type_is_float(dst_type)); + write_sm4_unary_op(buffer, VKD3D_SM4_OP_DERIV_RTY, &expr->node, arg1, 0); + break; + case HLSL_OP1_EXP2: assert(type_is_float(dst_type)); write_sm4_unary_op(buffer, VKD3D_SM4_OP_EXP, &expr->node, arg1, 0);
I don't know if that's an optimization effect, but code in your example produces zero output unconditionally:
``` ps_4_0 dcl_output o0.xyzw mov o0.xyzw, l(0,0,0,0) ret ```
I don't know how this works exactly, but maybe constant arguments produce 0 derivatives, and using expression that depends on certain semantics generates actual instructions.
On Sat Apr 22 15:46:22 2023 +0000, Nikolay Sivov wrote:
I don't know if that's an optimization effect, but code in your example produces zero output unconditionally:
ps_4_0 dcl_output o0.xyzw mov o0.xyzw, l(0,0,0,0) ret
I don't know how this works exactly, but maybe constant arguments produce 0 derivatives, and using expression that depends on certain semantics generates actual instructions.
I think you're right; I changed the test to use VPOS and got a better result:
``` uniform float f;
float4 main(float4 pos : SV_POSITION) : sv_target { float x = ddx(pos.x); float y = ddy(pos.y); return float4(x, y, 1, 1); } ```
``` ps_4_0 dcl_input_ps_siv linear noperspective v0.xy, position dcl_output o0.xyzw deriv_rtx o0.x, v0.x deriv_rty o0.y, v0.y mov o0.zw, l(0,0,1.000000,1.000000) ret ```
This merge request was approved by Zebediah Figura.