Module: vkd3d Branch: master Commit: 902ddee557f3b2c0e5bb668c7330b79b7954ebce URL: https://gitlab.winehq.org/wine/vkd3d/-/commit/902ddee557f3b2c0e5bb668c7330b7...
Author: Francisco Casas fcasas@codeweavers.com Date: Fri Feb 3 12:17:26 2023 -0300
vkd3d-shader/hlsl: Fix SM1 dp2add swizzles.
SM1 dp2add doesn't map src swizzles to the dst writemask, also it expects the last argument to have a replicate swizzle.
Before this patch we were writing the operation as: ``` dp2add r0.x, r1.x, r0.x, r2.x ```
and now it is: ``` dp2add r0.x, r1.xyxx, r0.xyxx, r2.x ```
dp2add now has its own function, write_sm1_dp2add(), since it seems to be the only instruction with this structure.
Ideally we would be using the default swizzles for the first two src arguments: ``` dp2add r0.x, r1, r0, r2.x ``` since, according to native's documentation, these are supported for all sm < 4.
But this change -- along with following the convention of repeating the last component of the swizzle when fewer than 4 components are to be specified -- would require more global changes, probably in hlsl_swizzle_from_writemask() and hlsl_map_swizzle().
---
libs/vkd3d-shader/hlsl_sm1.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl_sm1.c b/libs/vkd3d-shader/hlsl_sm1.c index ebeaadb1..34bead54 100644 --- a/libs/vkd3d-shader/hlsl_sm1.c +++ b/libs/vkd3d-shader/hlsl_sm1.c @@ -462,13 +462,13 @@ static void sm1_map_src_swizzle(struct sm1_src_register *src, unsigned int map_w src->swizzle = hlsl_map_swizzle(src->swizzle, map_writemask); }
-static void write_sm1_ternary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, const struct hlsl_reg *dst, - const struct hlsl_reg *src1, const struct hlsl_reg *src2, const struct hlsl_reg *src3) +static void write_sm1_dp2add(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + const struct hlsl_reg *dst, const struct hlsl_reg *src1, const struct hlsl_reg *src2, + const struct hlsl_reg *src3) { struct sm1_instruction instr = { - .opcode = opcode, + .opcode = D3DSIO_DP2ADD,
.dst.type = D3DSPR_TEMP, .dst.writemask = dst->writemask, @@ -487,9 +487,6 @@ static void write_sm1_ternary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buf .src_count = 3, };
- sm1_map_src_swizzle(&instr.srcs[0], instr.dst.writemask); - sm1_map_src_swizzle(&instr.srcs[1], instr.dst.writemask); - sm1_map_src_swizzle(&instr.srcs[2], instr.dst.writemask); write_sm1_instruction(ctx, buffer, &instr); }
@@ -742,7 +739,7 @@ static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b break;
case HLSL_OP3_DP2ADD: - write_sm1_ternary_op(ctx, buffer, D3DSIO_DP2ADD, &instr->reg, &arg1->reg, &arg2->reg, &arg3->reg); + write_sm1_dp2add(ctx, buffer, &instr->reg, &arg1->reg, &arg2->reg, &arg3->reg); break;
default: