Signed-off-by: Nikolay Sivov nsivov@codeweavers.com
-- v4: vkd3d-shader/hlsl: Add constant folding for the ternary operator. vkd3d-shader/hlsl: Use conditional moves for arithmetic operators instead of branching.
From: Nikolay Sivov nsivov@codeweavers.com
Signed-off-by: Nikolay Sivov nsivov@codeweavers.com --- libs/vkd3d-shader/hlsl_codegen.c | 35 +++++-------------- .../hlsl/arithmetic-float-uniform.shader_test | 14 ++++++++ tests/hlsl/arithmetic-float.shader_test | 13 +++++++ tests/hlsl/arithmetic-int-uniform.shader_test | 19 ++++++++++ tests/hlsl/arithmetic-int.shader_test | 14 ++++++++ 5 files changed, 69 insertions(+), 26 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 132f44a2a..69fa930b5 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -2474,36 +2474,19 @@ static bool lower_casts_to_bool(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr struct hlsl_ir_node *hlsl_add_conditional(struct hlsl_ctx *ctx, struct hlsl_block *instrs, struct hlsl_ir_node *condition, struct hlsl_ir_node *if_true, struct hlsl_ir_node *if_false) { - struct hlsl_block then_block, else_block; - struct hlsl_ir_node *iff, *store; - struct hlsl_ir_load *load; - struct hlsl_ir_var *var; + struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS]; + struct hlsl_ir_node *cond;
assert(hlsl_types_are_equal(if_true->data_type, if_false->data_type));
- if (!(var = hlsl_new_synthetic_var(ctx, "conditional", if_true->data_type, &condition->loc))) - return NULL; - - hlsl_block_init(&then_block); - hlsl_block_init(&else_block); - - if (!(store = hlsl_new_simple_store(ctx, var, if_true))) - return NULL; - hlsl_block_add_instr(&then_block, store); - - if (!(store = hlsl_new_simple_store(ctx, var, if_false))) - return NULL; - hlsl_block_add_instr(&else_block, store); - - if (!(iff = hlsl_new_if(ctx, condition, &then_block, &else_block, &condition->loc))) - return NULL; - hlsl_block_add_instr(instrs, iff); - - if (!(load = hlsl_new_var_load(ctx, var, &condition->loc))) - return NULL; - hlsl_block_add_instr(instrs, &load->node); + operands[0] = condition; + operands[1] = if_true; + operands[2] = if_false; + if (!(cond = hlsl_new_expr(ctx, HLSL_OP3_TERNARY, operands, if_true->data_type, &condition->loc))) + return false; + hlsl_block_add_instr(instrs, cond);
- return &load->node; + return cond; }
static bool lower_int_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) diff --git a/tests/hlsl/arithmetic-float-uniform.shader_test b/tests/hlsl/arithmetic-float-uniform.shader_test index 708dc2704..f022ac79f 100644 --- a/tests/hlsl/arithmetic-float-uniform.shader_test +++ b/tests/hlsl/arithmetic-float-uniform.shader_test @@ -58,6 +58,20 @@ uniform 0 float4 45.0 5.0 0.0 0.0 draw quad probe all rgba (0.0, 0.0, 0.0, 0.0)
+[pixel shader] +float4 x, y; + +float4 main() : sv_target +{ + return x % y; +} + +[test] +uniform 0 float4 5.0 -42.1 4.0 45.0 +uniform 4 float4 15.0 -5.0 4.1 5.0 +draw quad +probe all rgba (5.0, -2.1, 4.0, 0.0) 4 + [require] % Infinities are not allowed in SM1 shader model >= 4.0 diff --git a/tests/hlsl/arithmetic-float.shader_test b/tests/hlsl/arithmetic-float.shader_test index 558d5d100..73bd627c0 100644 --- a/tests/hlsl/arithmetic-float.shader_test +++ b/tests/hlsl/arithmetic-float.shader_test @@ -50,6 +50,19 @@ float4 main() : SV_TARGET draw quad probe all rgba (0.0, 0.0, 0.0, 0.0)
+[pixel shader] +float4 main() : sv_target +{ + float4 x = {5.0, -42.1, 4.0, 45.0}; + float4 y = {15.0, -5.0, 4.1, 5.0}; + + return x % y; +} + +[test] +draw quad +probe all rgba (5.0, -2.1, 4.0, 0.0) 4 + [require] % Infinities are not allowed in SM1 shader model >= 4.0 diff --git a/tests/hlsl/arithmetic-int-uniform.shader_test b/tests/hlsl/arithmetic-int-uniform.shader_test index bd35f5660..b2bf720fc 100644 --- a/tests/hlsl/arithmetic-int-uniform.shader_test +++ b/tests/hlsl/arithmetic-int-uniform.shader_test @@ -100,3 +100,22 @@ float4 main() : SV_TARGET uniform 0 float4 5.0 -7.0 0.0 -10.0 draw quad probe all rgba (5.0, 7.0, 0.0, 10.0) + +[pixel shader] +uniform float4 a; +uniform float4 b; + +float4 main() : sv_target +{ + int2 x = a.xz; + int2 y = a.yw; + int2 z = b.xy; + int2 w = b.zw; + return float4(x / y, z % w); +} + +[test] +uniform 0 float4 45.0 5.0 50.0 10.0 +uniform 4 float4 3.0 8.0 2.0 5.0 +draw quad +probe all rgba (9.0, 5.0, 1.0, 3.0) diff --git a/tests/hlsl/arithmetic-int.shader_test b/tests/hlsl/arithmetic-int.shader_test index f4c989041..c97022099 100644 --- a/tests/hlsl/arithmetic-int.shader_test +++ b/tests/hlsl/arithmetic-int.shader_test @@ -109,3 +109,17 @@ float4 main() : SV_TARGET [test] draw quad probe all rgba (-2147483648.0, -2147483648.0, -2147483648.0, -2147483648.0) + +[pixel shader] +float4 main() : sv_target +{ + int2 x = {5, 15}; + int2 y = {2, 5}; + int2 z = {3, 8}; + + return float4(x / y, z % y); +} + +[test] +draw quad +probe all rgba (2.0, 3.0, 1.0, 3.0)
From: Nikolay Sivov nsivov@codeweavers.com
Signed-off-by: Nikolay Sivov nsivov@codeweavers.com --- libs/vkd3d-shader/hlsl_constant_ops.c | 35 +++++++++++++++++++++++++++ 1 file changed, 35 insertions(+)
diff --git a/libs/vkd3d-shader/hlsl_constant_ops.c b/libs/vkd3d-shader/hlsl_constant_ops.c index 41a72ab6c..5d519ddbe 100644 --- a/libs/vkd3d-shader/hlsl_constant_ops.c +++ b/libs/vkd3d-shader/hlsl_constant_ops.c @@ -869,6 +869,37 @@ static bool fold_nequal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, c return true; }
+static bool fold_ternary(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2, const struct hlsl_ir_constant *src3) +{ + unsigned int k; + + for (k = 0; k < dst_type->dimx; ++k) + { + switch (src1->node.data_type->base_type) + { + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: + dst->u[k].u = src1->value.u[k].u ? src2->value.u[k].f : src3->value.u[k].f; + break; + + case HLSL_TYPE_DOUBLE: + dst->u[k].u = src1->value.u[k].u ? src2->value.u[k].d : src3->value.u[k].d; + break; + + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: + case HLSL_TYPE_BOOL: + dst->u[k].u = src1->value.u[k].u ? src2->value.u[k].u : src3->value.u[k].u; + break; + + default: + vkd3d_unreachable(); + } + } + return true; +} + bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) { struct hlsl_ir_constant *arg1, *arg2 = NULL, *arg3 = NULL; @@ -990,6 +1021,10 @@ bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, success = fold_dp2add(ctx, &res, instr->data_type, arg1, arg2, arg3); break;
+ case HLSL_OP3_TERNARY: + success = fold_ternary(ctx, &res, instr->data_type, arg1, arg2, arg3); + break; + default: FIXME("Fold "%s" expression.\n", debug_hlsl_expr_op(expr->op)); success = false;
Switched all of the operators to use conditional move, updated the tests too.
The constant folding is right for MOVC but wrong for TERNARY, since TERNARY does arithmetic comparison.
Giovanni Mascellani (@giomasce) commented about libs/vkd3d-shader/hlsl_constant_ops.c:
return true;
}
+static bool fold_ternary(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type,
const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2, const struct hlsl_ir_constant *src3)
+{
- unsigned int k;
- for (k = 0; k < dst_type->dimx; ++k)
- {
switch (src1->node.data_type->base_type)
{
case HLSL_TYPE_FLOAT:
case HLSL_TYPE_HALF:
dst->u[k].u = src1->value.u[k].u ? src2->value.u[k].f : src3->value.u[k].f;
I guess this should be something like `dst->u[k].u = (src1->value.u[k].f != 0.0f) ? src2->value.u[k].u : src3->value.u[k].u`.
In particular, the first operand should take the `float` value and compare it to `0.0f`, so that `-0.0f` is also considered equal to zero. The other two operands should take the unsigned part, so that there is no implicit cast when assigning to the destination's unsigned part.
Same for the double below.
On Wed Sep 13 09:01:07 2023 +0000, Giovanni Mascellani wrote:
I guess this should be something like `dst->u[k].u = (src1->value.u[k].f != 0.0f) ? src2->value.u[k].u : src3->value.u[k].u`. In particular, the first operand should take the `float` value and compare it to `0.0f`, so that `-0.0f` is also considered equal to zero. The other two operands should take the unsigned part, so that there is no implicit cast when assigning to the destination's unsigned part. Same for the double below.
I stand corrected! The correct version is probably rather: `dst->u[k] = (src1->value.u[k].f != 0.0f) ? src2->value.u[k] : src3->value.u[k]`. IOW, we have to copy the `union`, not the unsigned field, because the data type of the second and third operand might be larger than an unsigned (i.e., it might be a double).
Also, similarly to other `fold_*()` helpers, you might assert that the type of the second and third operand coincides with the type of the result.
On Wed Sep 13 09:08:35 2023 +0000, Giovanni Mascellani wrote:
I stand corrected! The correct version is probably rather: `dst->u[k] = (src1->value.u[k].f != 0.0f) ? src2->value.u[k] : src3->value.u[k]`. IOW, we have to copy the `union`, not the unsigned field, because the data type of the second and third operand might be larger than an unsigned (i.e., it might be a double). Also, similarly to other `fold_*()` helpers, you might assert that the type of the second and third operand coincides with the type of the result.
Right, source/destination type is not affected by selector type. Thanks.