-- v4: vkd3d-shader/hlsl: Support dot() for SM1.
From: Nikolay Sivov nsivov@codeweavers.com
--- libs/vkd3d-shader/hlsl.c | 1 + libs/vkd3d-shader/hlsl.h | 1 + libs/vkd3d-shader/hlsl_codegen.c | 57 +++++++++++++++++++++++++ libs/vkd3d-shader/hlsl_sm1.c | 73 +++++++++++++++++++++++++++++++- 4 files changed, 131 insertions(+), 1 deletion(-)
diff --git a/libs/vkd3d-shader/hlsl.c b/libs/vkd3d-shader/hlsl.c index 256e466a..6b8e1b10 100644 --- a/libs/vkd3d-shader/hlsl.c +++ b/libs/vkd3d-shader/hlsl.c @@ -1768,6 +1768,7 @@ const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op) [HLSL_OP2_NEQUAL] = "!=", [HLSL_OP2_RSHIFT] = ">>",
+ [HLSL_OP3_DP2ADD] = "dp2add", [HLSL_OP3_LERP] = "lerp", };
diff --git a/libs/vkd3d-shader/hlsl.h b/libs/vkd3d-shader/hlsl.h index bb63f827..2d0beb6c 100644 --- a/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d-shader/hlsl.h @@ -497,6 +497,7 @@ enum hlsl_ir_expr_op HLSL_OP2_NEQUAL, HLSL_OP2_RSHIFT,
+ HLSL_OP3_DP2ADD, HLSL_OP3_LERP, };
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 4fa860a6..aa950e35 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -1535,6 +1535,62 @@ static bool lower_sqrt(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *c return true; }
+/* Lower DP2 to MUL + ADD */ +static bool lower_dot(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +{ + struct hlsl_ir_node *arg1, *arg2, *mul, *replacement; + struct hlsl_ir_swizzle *add_x, *add_y; + struct hlsl_ir_constant *zero; + struct hlsl_ir_expr *expr; + + if (instr->type != HLSL_IR_EXPR) + return false; + expr = hlsl_ir_expr(instr); + arg1 = expr->operands[0].node; + arg2 = expr->operands[1].node; + if (expr->op != HLSL_OP2_DOT) + return false; + if (arg1->data_type->dimx != 2) + return false; + + if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) + { + struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = { 0 }; + + if (!(zero = hlsl_new_float_constant(ctx, 0.0f, &expr->node.loc))) + return false; + list_add_before(&instr->entry, &zero->node.entry); + + operands[0] = arg1; + operands[1] = arg2; + operands[2] = &zero->node; + + if (!(replacement = hlsl_new_expr(ctx, HLSL_OP3_DP2ADD, operands, instr->data_type, &expr->node.loc))) + return false; + } + else + { + if (!(mul = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, expr->operands[0].node, expr->operands[1].node))) + return false; + list_add_before(&instr->entry, &mul->entry); + + if (!(add_x = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, X, X, X), instr->data_type->dimx, mul, &expr->node.loc))) + return false; + list_add_before(&instr->entry, &add_x->node.entry); + + if (!(add_y = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Y, Y, Y, Y), instr->data_type->dimx, mul, &expr->node.loc))) + return false; + list_add_before(&instr->entry, &add_y->node.entry); + + if (!(replacement = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, &add_x->node, &add_y->node))) + return false; + } + list_add_before(&instr->entry, &replacement->entry); + + hlsl_replace_node(instr, replacement); + return true; +} + static bool lower_casts_to_bool(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) { struct hlsl_type *type = instr->data_type, *arg_type; @@ -2949,6 +3005,7 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry { transform_ir(ctx, lower_division, body, NULL); transform_ir(ctx, lower_sqrt, body, NULL); + transform_ir(ctx, lower_dot, body, NULL); }
transform_ir(ctx, validate_static_object_references, body, NULL); diff --git a/libs/vkd3d-shader/hlsl_sm1.c b/libs/vkd3d-shader/hlsl_sm1.c index facf81b8..57c9021a 100644 --- a/libs/vkd3d-shader/hlsl_sm1.c +++ b/libs/vkd3d-shader/hlsl_sm1.c @@ -422,7 +422,7 @@ struct sm1_instruction D3DSHADER_PARAM_SRCMOD_TYPE mod; unsigned int swizzle; uint32_t reg; - } srcs[2]; + } srcs[HLSL_MAX_OPERANDS]; unsigned int src_count;
unsigned int has_dst; @@ -459,6 +459,33 @@ static void write_sm1_instruction(struct hlsl_ctx *ctx, struct vkd3d_bytecode_bu write_sm1_src_register(buffer, &instr->srcs[i], instr->dst.writemask); };
+static void write_sm1_ternary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, const struct hlsl_reg *dst, + const struct hlsl_reg *src1, const struct hlsl_reg *src2, const struct hlsl_reg *src3) +{ + const struct sm1_instruction instr = + { + .opcode = opcode, + + .dst.type = D3DSPR_TEMP, + .dst.writemask = dst->writemask, + .dst.reg = dst->id, + .has_dst = 1, + + .srcs[0].type = D3DSPR_TEMP, + .srcs[0].swizzle = hlsl_swizzle_from_writemask(src1->writemask), + .srcs[0].reg = src1->id, + .srcs[1].type = D3DSPR_TEMP, + .srcs[1].swizzle = hlsl_swizzle_from_writemask(src2->writemask), + .srcs[1].reg = src2->id, + .srcs[2].type = D3DSPR_TEMP, + .srcs[2].swizzle = hlsl_swizzle_from_writemask(src3->writemask), + .srcs[2].reg = src3->id, + .src_count = 3, + }; + write_sm1_instruction(ctx, buffer, &instr); +} + static void write_sm1_binary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, const struct hlsl_reg *dst, const struct hlsl_reg *src1, const struct hlsl_reg *src2) @@ -631,6 +658,9 @@ static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b struct hlsl_ir_expr *expr = hlsl_ir_expr(instr); struct hlsl_ir_node *arg1 = expr->operands[0].node; struct hlsl_ir_node *arg2 = expr->operands[1].node; + struct hlsl_ir_node *arg3 = expr->operands[2].node; + const struct hlsl_type *dst_type = expr->node.data_type; + struct vkd3d_string_buffer *dst_type_string;
assert(instr->reg.allocated);
@@ -641,6 +671,9 @@ static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b return; }
+ if (!(dst_type_string = hlsl_type_to_string(ctx, dst_type))) + return; + switch (expr->op) { case HLSL_OP1_EXP2: @@ -679,10 +712,48 @@ static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b write_sm1_unary_op(ctx, buffer, D3DSIO_FRC, &instr->reg, &arg1->reg, D3DSPSM_NONE); break;
+ case HLSL_OP2_DOT: + switch (dst_type->base_type) + { + case HLSL_TYPE_FLOAT: + switch (arg1->data_type->dimx) + { + case 4: + write_sm1_binary_op(ctx, buffer, D3DSIO_DP4, &instr->reg, &arg1->reg, &arg2->reg); + break; + + case 3: + write_sm1_binary_op(ctx, buffer, D3DSIO_DP3, &instr->reg, &arg1->reg, &arg2->reg); + break; + + default: + vkd3d_unreachable(); + } + break; + + default: + hlsl_fixme(ctx, &expr->node.loc, "SM1 %s dot expression.", dst_type_string->buffer); + } + break; + + case HLSL_OP3_DP2ADD: + switch (dst_type->base_type) + { + case HLSL_TYPE_FLOAT: + write_sm1_ternary_op(ctx, buffer, D3DSIO_DP2ADD, &instr->reg, &arg1->reg, &arg2->reg, &arg3->reg); + break; + + default: + hlsl_fixme(ctx, &expr->node.loc, "SM1 %s dp2add expression.", dst_type_string->buffer); + } + break; + default: hlsl_fixme(ctx, &instr->loc, "SM1 "%s" expression.", debug_hlsl_expr_op(expr->op)); break; } + + hlsl_release_string_buffer(ctx, dst_type_string); }
static void write_sm1_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr)
On Wed Feb 1 15:46:51 2023 +0000, Nikolay Sivov wrote:
changed this line in [version 4 of the diff](/wine/vkd3d/-/merge_requests/74/diffs?diff_id=30372&start_sha=45e322c581c5284eb3cd39e077b09bd4a2c52148#e23171cc4cf1a77702423c2fd4a3eed6e32c7443_667_667)
Thanks, pushed that one.