Signed-off-by: Nikolay Sivov nsivov@codeweavers.com
-- v4: vkd3d-shader: Add constant folding for 'dp2add' operation. vkd3d-shader: Add constant folding for the 'dot' operation.
From: Nikolay Sivov nsivov@codeweavers.com
Signed-off-by: Nikolay Sivov nsivov@codeweavers.com --- libs/vkd3d-shader/hlsl_constant_ops.c | 32 +++++++++++++++++++++++++++ tests/hlsl/dot.shader_test | 13 +++++++++++ 2 files changed, 45 insertions(+)
diff --git a/libs/vkd3d-shader/hlsl_constant_ops.c b/libs/vkd3d-shader/hlsl_constant_ops.c index 01c438ae2..5501b6e9c 100644 --- a/libs/vkd3d-shader/hlsl_constant_ops.c +++ b/libs/vkd3d-shader/hlsl_constant_ops.c @@ -348,6 +348,34 @@ static bool fold_bit_xor(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, return true; }
+static bool fold_dot(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) +{ + enum hlsl_base_type type = dst_type->base_type; + unsigned int k; + + assert(type == src1->node.data_type->base_type); + assert(type == src2->node.data_type->base_type); + assert(src1->node.data_type->dimx == src2->node.data_type->dimx); + + dst->u[0].f = 0.0f; + for (k = 0; k < src1->node.data_type->dimx; ++k) + { + switch (type) + { + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: + dst->u[0].f += src1->value.u[k].f * src2->value.u[k].f; + break; + default: + FIXME("Fold 'dot' for type %s.\n", debug_hlsl_type(ctx, dst_type)); + return false; + } + } + + return true; +} + static bool fold_div(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2, const struct vkd3d_shader_location *loc) @@ -788,6 +816,10 @@ bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, success = fold_bit_xor(ctx, &res, instr->data_type, arg1, arg2); break;
+ case HLSL_OP2_DOT: + success = fold_dot(ctx, &res, instr->data_type, arg1, arg2); + break; + case HLSL_OP2_DIV: success = fold_div(ctx, &res, instr->data_type, arg1, arg2, &instr->loc); break; diff --git a/tests/hlsl/dot.shader_test b/tests/hlsl/dot.shader_test index e51d7cb8c..15f120f70 100644 --- a/tests/hlsl/dot.shader_test +++ b/tests/hlsl/dot.shader_test @@ -74,6 +74,19 @@ uniform 4 float4 3.0 0.0 0.0 0.0 draw quad probe all rgba (6.0, 6.0, 6.0, 6.0)
+[pixel shader] +static const float4 x = float4(2.0, 3.0, 4.0, 5.0); +static const float4 y = float4(6.0, 7.0, 8.0, 9.0); + +float4 main() : sv_target +{ + return dot(x, y); +} + +[test] +draw quad +probe all rgba (110.0, 110.0, 110.0, 110.0) + [pixel shader fail] uniform float1x1 x; uniform float4 y;
From: Nikolay Sivov nsivov@codeweavers.com
Signed-off-by: Nikolay Sivov nsivov@codeweavers.com --- libs/vkd3d-shader/hlsl_constant_ops.c | 38 ++++++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-)
diff --git a/libs/vkd3d-shader/hlsl_constant_ops.c b/libs/vkd3d-shader/hlsl_constant_ops.c index 5501b6e9c..8b191de98 100644 --- a/libs/vkd3d-shader/hlsl_constant_ops.c +++ b/libs/vkd3d-shader/hlsl_constant_ops.c @@ -376,6 +376,36 @@ static bool fold_dot(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons return true; }
+static bool fold_dp2add(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2, const struct hlsl_ir_constant *src3) +{ + enum hlsl_base_type type = dst_type->base_type; + unsigned int k; + + assert(type == src1->node.data_type->base_type); + assert(type == src2->node.data_type->base_type); + assert(type == src3->node.data_type->base_type); + assert(src1->node.data_type->dimx == src2->node.data_type->dimx); + assert(src3->node.data_type->dimx == 1); + + dst->u[0].f = src3->value.u[0].f; + for (k = 0; k < src1->node.data_type->dimx; ++k) + { + switch (type) + { + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: + dst->u[0].f += src1->value.u[k].f * src2->value.u[k].f; + break; + default: + FIXME("Fold 'dp2add' for type %s.\n", debug_hlsl_type(ctx, dst_type)); + return false; + } + } + + return true; +} + static bool fold_div(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2, const struct vkd3d_shader_location *loc) @@ -751,7 +781,7 @@ static bool fold_nequal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, c
bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) { - struct hlsl_ir_constant *arg1, *arg2 = NULL; + struct hlsl_ir_constant *arg1, *arg2 = NULL, *arg3 = NULL; struct hlsl_constant_value res = {0}; struct hlsl_ir_node *res_node; struct hlsl_ir_expr *expr; @@ -779,6 +809,8 @@ bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, arg1 = hlsl_ir_constant(expr->operands[0].node); if (expr->operands[1].node) arg2 = hlsl_ir_constant(expr->operands[1].node); + if (expr->operands[2].node) + arg3 = hlsl_ir_constant(expr->operands[2].node);
switch (expr->op) { @@ -856,6 +888,10 @@ bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, success = fold_nequal(ctx, &res, instr->data_type, arg1, arg2); break;
+ case HLSL_OP3_DP2ADD: + success = fold_dp2add(ctx, &res, instr->data_type, arg1, arg2, arg3); + break; + default: FIXME("Fold "%s" expression.\n", debug_hlsl_expr_op(expr->op)); success = false;