Signed-off-by: Nikolay Sivov nsivov@codeweavers.com
-- v3: vkd3d-shader: Add constant folding for 'dp2add' operation. vkd3d-shader: Add constant folding for the 'dot' operation.
From: Nikolay Sivov nsivov@codeweavers.com
Signed-off-by: Nikolay Sivov nsivov@codeweavers.com --- libs/vkd3d-shader/hlsl_constant_ops.c | 32 +++++++++++++++++++++++++++ tests/hlsl-dot.shader_test | 13 +++++++++++ 2 files changed, 45 insertions(+)
diff --git a/libs/vkd3d-shader/hlsl_constant_ops.c b/libs/vkd3d-shader/hlsl_constant_ops.c index 301113c84..fc8bf0370 100644 --- a/libs/vkd3d-shader/hlsl_constant_ops.c +++ b/libs/vkd3d-shader/hlsl_constant_ops.c @@ -294,6 +294,34 @@ static bool fold_nequal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, c return true; }
+static bool fold_dot(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) +{ + enum hlsl_base_type type = dst_type->base_type; + unsigned int k; + + assert(type == src1->node.data_type->base_type); + assert(type == src2->node.data_type->base_type); + assert(src1->node.data_type->dimx == src2->node.data_type->dimx); + + dst->u[0].f = 0.0f; + for (k = 0; k < src1->node.data_type->dimx; ++k) + { + switch (type) + { + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: + dst->u[0].f += src1->value.u[k].f * src2->value.u[k].f; + break; + default: + FIXME("Fold 'dot' for type %s.\n", debug_hlsl_type(ctx, dst_type)); + return false; + } + } + + return true; +} + static bool fold_div(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2, const struct vkd3d_shader_location *loc) @@ -599,6 +627,10 @@ bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, success = fold_nequal(ctx, &res, instr->data_type, arg1, arg2); break;
+ case HLSL_OP2_DOT: + success = fold_dot(ctx, &res, instr->data_type, arg1, arg2); + break; + case HLSL_OP2_DIV: success = fold_div(ctx, &res, instr->data_type, arg1, arg2, &instr->loc); break; diff --git a/tests/hlsl-dot.shader_test b/tests/hlsl-dot.shader_test index e51d7cb8c..15f120f70 100644 --- a/tests/hlsl-dot.shader_test +++ b/tests/hlsl-dot.shader_test @@ -74,6 +74,19 @@ uniform 4 float4 3.0 0.0 0.0 0.0 draw quad probe all rgba (6.0, 6.0, 6.0, 6.0)
+[pixel shader] +static const float4 x = float4(2.0, 3.0, 4.0, 5.0); +static const float4 y = float4(6.0, 7.0, 8.0, 9.0); + +float4 main() : sv_target +{ + return dot(x, y); +} + +[test] +draw quad +probe all rgba (110.0, 110.0, 110.0, 110.0) + [pixel shader fail] uniform float1x1 x; uniform float4 y;
From: Nikolay Sivov nsivov@codeweavers.com
Signed-off-by: Nikolay Sivov nsivov@codeweavers.com --- libs/vkd3d-shader/hlsl_constant_ops.c | 38 ++++++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-)
diff --git a/libs/vkd3d-shader/hlsl_constant_ops.c b/libs/vkd3d-shader/hlsl_constant_ops.c index fc8bf0370..65ee15c36 100644 --- a/libs/vkd3d-shader/hlsl_constant_ops.c +++ b/libs/vkd3d-shader/hlsl_constant_ops.c @@ -322,6 +322,36 @@ static bool fold_dot(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons return true; }
+static bool fold_dp2add(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2, const struct hlsl_ir_constant *src3) +{ + enum hlsl_base_type type = dst_type->base_type; + unsigned int k; + + assert(type == src1->node.data_type->base_type); + assert(type == src2->node.data_type->base_type); + assert(type == src3->node.data_type->base_type); + assert(src1->node.data_type->dimx == src2->node.data_type->dimx); + assert(src3->node.data_type->dimx == 1); + + dst->u[0].f = src3->value.u[0].f; + for (k = 0; k < src1->node.data_type->dimx; ++k) + { + switch (type) + { + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: + dst->u[0].f += src1->value.u[k].f * src2->value.u[k].f; + break; + default: + FIXME("Fold 'dp2add' for type %s.\n", debug_hlsl_type(ctx, dst_type)); + return false; + } + } + + return true; +} + static bool fold_div(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2, const struct vkd3d_shader_location *loc) @@ -572,7 +602,7 @@ static bool fold_bit_or(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, c
bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) { - struct hlsl_ir_constant *arg1, *arg2 = NULL; + struct hlsl_ir_constant *arg1, *arg2 = NULL, *arg3 = NULL; struct hlsl_constant_value res = {0}; struct hlsl_ir_node *res_node; struct hlsl_ir_expr *expr; @@ -600,6 +630,8 @@ bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, arg1 = hlsl_ir_constant(expr->operands[0].node); if (expr->operands[1].node) arg2 = hlsl_ir_constant(expr->operands[1].node); + if (expr->operands[2].node) + arg3 = hlsl_ir_constant(expr->operands[2].node);
switch (expr->op) { @@ -659,6 +691,10 @@ bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, success = fold_bit_or(ctx, &res, instr->data_type, arg1, arg2); break;
+ case HLSL_OP3_DP2ADD: + success = fold_dp2add(ctx, &res, instr->data_type, arg1, arg2, arg3); + break; + default: FIXME("Fold "%s" expression.\n", debug_hlsl_expr_op(expr->op)); success = false;
On Wed Jun 21 10:31:14 2023 +0000, Nikolay Sivov wrote:
changed this line in [version 3 of the diff](/wine/vkd3d/-/merge_requests/240/diffs?diff_id=53100&start_sha=7c67620b3183374086eada41f132152e90edca07#42d7bc823e29607ffee11286a3147a03c3ae6ce6_306_306)
Ok, pushed that.
On Wed Jun 21 10:31:14 2023 +0000, Nikolay Sivov wrote:
changed this line in [version 3 of the diff](/wine/vkd3d/-/merge_requests/240/diffs?diff_id=53100&start_sha=7c67620b3183374086eada41f132152e90edca07#42d7bc823e29607ffee11286a3147a03c3ae6ce6_327_335)
Done.
Thanks, that's ok for me. Notice that we're not dealing with a lot of complications that come with floating point numbers. For example, SM1-3 doesn't allow infinities or NaNs (AFAIU), so I'm not sure of what should happen if the dot folding results in one of those. However, most of the constant folding code doesn't care, so I'm ignoring the issue for the time being.
Also, we usually keep `switch` cases separated with newlines.
This merge request was approved by Giovanni Mascellani.
On Mon Jun 26 09:13:02 2023 +0000, Giovanni Mascellani wrote:
Thanks, that's ok for me. Notice that we're not dealing with a lot of complications that come with floating point numbers. For example, SM1-3 doesn't allow infinities or NaNs (AFAIU), so I'm not sure of what should happen if the dot folding results in one of those. However, most of the constant folding code doesn't care, so I'm ignoring the issue for the time being. Also, we usually keep `switch` cases separated with newlines.
Do we know how consistent handling of special float values is? Maybe we should have some helpers for multiplication/addition that will handle this depending on profile.
On Mon Jun 26 09:13:02 2023 +0000, Nikolay Sivov wrote:
Do we know how consistent handling of special float values is? Maybe we should have some helpers for multiplication/addition that will handle this depending on profile.
Personally I don't know.