Signed-off-by: Nikolay Sivov nsivov@codeweavers.com
-- v2: vkd3d-shader: Add constant folding for 'dp2add' operation. vkd3d-shader: Add constant folding for the 'dot' operation.
From: Nikolay Sivov nsivov@codeweavers.com
Signed-off-by: Nikolay Sivov nsivov@codeweavers.com --- libs/vkd3d-shader/hlsl_constant_ops.c | 24 ++++++++++++++++++++++++ tests/hlsl-dot.shader_test | 13 +++++++++++++ 2 files changed, 37 insertions(+)
diff --git a/libs/vkd3d-shader/hlsl_constant_ops.c b/libs/vkd3d-shader/hlsl_constant_ops.c index 301113c84..5aa2431dd 100644 --- a/libs/vkd3d-shader/hlsl_constant_ops.c +++ b/libs/vkd3d-shader/hlsl_constant_ops.c @@ -294,6 +294,26 @@ static bool fold_nequal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, c return true; }
+static bool fold_dot(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) +{ + enum hlsl_base_type type = dst_type->base_type; + unsigned int k; + + assert(type == src1->node.data_type->base_type); + assert(type == src2->node.data_type->base_type); + assert(src1->node.data_type->dimx == src2->node.data_type->dimx); + assert(type == HLSL_TYPE_FLOAT || type == HLSL_TYPE_HALF); + + dst->u[0].f = 0.0f; + for (k = 0; k < src1->node.data_type->dimx; ++k) + { + dst->u[0].f += src1->value.u[k].f * src2->value.u[k].f; + } + + return true; +} + static bool fold_div(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2, const struct vkd3d_shader_location *loc) @@ -599,6 +619,10 @@ bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, success = fold_nequal(ctx, &res, instr->data_type, arg1, arg2); break;
+ case HLSL_OP2_DOT: + success = fold_dot(ctx, &res, instr->data_type, arg1, arg2); + break; + case HLSL_OP2_DIV: success = fold_div(ctx, &res, instr->data_type, arg1, arg2, &instr->loc); break; diff --git a/tests/hlsl-dot.shader_test b/tests/hlsl-dot.shader_test index e51d7cb8c..15f120f70 100644 --- a/tests/hlsl-dot.shader_test +++ b/tests/hlsl-dot.shader_test @@ -74,6 +74,19 @@ uniform 4 float4 3.0 0.0 0.0 0.0 draw quad probe all rgba (6.0, 6.0, 6.0, 6.0)
+[pixel shader] +static const float4 x = float4(2.0, 3.0, 4.0, 5.0); +static const float4 y = float4(6.0, 7.0, 8.0, 9.0); + +float4 main() : sv_target +{ + return dot(x, y); +} + +[test] +draw quad +probe all rgba (110.0, 110.0, 110.0, 110.0) + [pixel shader fail] uniform float1x1 x; uniform float4 y;
From: Nikolay Sivov nsivov@codeweavers.com
Signed-off-by: Nikolay Sivov nsivov@codeweavers.com --- libs/vkd3d-shader/hlsl_constant_ops.c | 29 ++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-)
diff --git a/libs/vkd3d-shader/hlsl_constant_ops.c b/libs/vkd3d-shader/hlsl_constant_ops.c index 5aa2431dd..360202fd5 100644 --- a/libs/vkd3d-shader/hlsl_constant_ops.c +++ b/libs/vkd3d-shader/hlsl_constant_ops.c @@ -314,6 +314,27 @@ static bool fold_dot(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons return true; }
+static bool fold_dp2add(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2, const struct hlsl_ir_constant *src3) +{ + enum hlsl_base_type type = dst_type->base_type; + unsigned int k; + + assert(type == src1->node.data_type->base_type); + assert(type == src2->node.data_type->base_type); + assert(type == src3->node.data_type->base_type); + assert(src1->node.data_type->dimx == src2->node.data_type->dimx); + assert(type == HLSL_TYPE_FLOAT || type == HLSL_TYPE_HALF); + + dst->u[0].f = src3->value.u[0].f; + for (k = 0; k < src1->node.data_type->dimx; ++k) + { + dst->u[0].f += src1->value.u[k].f * src2->value.u[k].f; + } + + return true; +} + static bool fold_div(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2, const struct vkd3d_shader_location *loc) @@ -564,7 +585,7 @@ static bool fold_bit_or(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, c
bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) { - struct hlsl_ir_constant *arg1, *arg2 = NULL; + struct hlsl_ir_constant *arg1, *arg2 = NULL, *arg3 = NULL; struct hlsl_constant_value res = {0}; struct hlsl_ir_node *res_node; struct hlsl_ir_expr *expr; @@ -592,6 +613,8 @@ bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, arg1 = hlsl_ir_constant(expr->operands[0].node); if (expr->operands[1].node) arg2 = hlsl_ir_constant(expr->operands[1].node); + if (expr->operands[2].node) + arg3 = hlsl_ir_constant(expr->operands[2].node);
switch (expr->op) { @@ -651,6 +674,10 @@ bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, success = fold_bit_or(ctx, &res, instr->data_type, arg1, arg2); break;
+ case HLSL_OP3_DP2ADD: + success = fold_dp2add(ctx, &res, instr->data_type, arg1, arg2, arg3); + break; + default: FIXME("Fold "%s" expression.\n", debug_hlsl_expr_op(expr->op)); success = false;
Francisco Casas (@fcasas) commented about libs/vkd3d-shader/hlsl_constant_ops.c:
return true;
}
+static bool fold_dp2add(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type,
const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2, const struct hlsl_ir_constant *src3)
+{
- enum hlsl_base_type type = dst_type->base_type;
- unsigned int k;
- assert(type == src1->node.data_type->base_type);
- assert(type == src2->node.data_type->base_type);
- assert(type == src3->node.data_type->base_type);
- assert(src1->node.data_type->dimx == src2->node.data_type->dimx);
- assert(type == HLSL_TYPE_FLOAT || type == HLSL_TYPE_HALF);
perhaps we should also assert that `src3->node.data_type->dimx == 1`.
This merge request was approved by Francisco Casas.
Giovanni Mascellani (@giomasce) commented about libs/vkd3d-shader/hlsl_constant_ops.c:
return true;
}
+static bool fold_dot(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type,
const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2)
+{
- enum hlsl_base_type type = dst_type->base_type;
- unsigned int k;
- assert(type == src1->node.data_type->base_type);
- assert(type == src2->node.data_type->base_type);
- assert(src1->node.data_type->dimx == src2->node.data_type->dimx);
- assert(type == HLSL_TYPE_FLOAT || type == HLSL_TYPE_HALF);
This is not wrong, given that at this point all dot operations on other types should already have been lowered, but it is inconsistent with the other operations, for which unknown types are just logged with `FIXME()` and ignored. I think it should be better to do the same here, so in case this pass is used in other places than `hlsl_emit_bytecode()` we don't `assert()` for no real reason.