-- v7: tests: Add a test for fmod() with vector arguments. vkd3d-shader: Use ternary operator in fmod() implementation. vkd3d-shader/tpf: Use 'movc' to implement ternary operator.
From: Nikolay Sivov nsivov@codeweavers.com
Signed-off-by: Nikolay Sivov nsivov@codeweavers.com --- libs/vkd3d-shader/hlsl.c | 2 ++ libs/vkd3d-shader/hlsl.h | 3 ++ libs/vkd3d-shader/hlsl.y | 6 +++- libs/vkd3d-shader/hlsl_codegen.c | 50 ++++++++++++++++++++++++++++++++ libs/vkd3d-shader/tpf.c | 25 ++++++++++++++++ tests/hlsl/ternary.shader_test | 15 ++++++++++ 6 files changed, 100 insertions(+), 1 deletion(-)
diff --git a/libs/vkd3d-shader/hlsl.c b/libs/vkd3d-shader/hlsl.c index 8b706e1e..14944a2c 100644 --- a/libs/vkd3d-shader/hlsl.c +++ b/libs/vkd3d-shader/hlsl.c @@ -2467,6 +2467,8 @@ const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op)
[HLSL_OP3_DP2ADD] = "dp2add", [HLSL_OP3_LERP] = "lerp", + [HLSL_OP3_MOVC] = "movc", + [HLSL_OP3_TERNARY] = "ternary", };
return op_names[op]; diff --git a/libs/vkd3d-shader/hlsl.h b/libs/vkd3d-shader/hlsl.h index 8bc72a8a..ad8add2f 100644 --- a/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d-shader/hlsl.h @@ -552,6 +552,9 @@ enum hlsl_ir_expr_op
HLSL_OP3_DP2ADD, HLSL_OP3_LERP, + /* TERNARY is used specifically for ternary operator, and later lowered according to profile to e.g. MOVC. */ + HLSL_OP3_MOVC, + HLSL_OP3_TERNARY, };
#define HLSL_MAX_OPERANDS 3 diff --git a/libs/vkd3d-shader/hlsl.y b/libs/vkd3d-shader/hlsl.y index 73257b8e..79f5ee38 100644 --- a/libs/vkd3d-shader/hlsl.y +++ b/libs/vkd3d-shader/hlsl.y @@ -6632,6 +6632,7 @@ conditional_expr: struct hlsl_ir_node *cond = node_from_block($1); struct hlsl_ir_node *first = node_from_block($3); struct hlsl_ir_node *second = node_from_block($5); + struct hlsl_ir_node *args[HLSL_MAX_OPERANDS] = { 0 }; struct hlsl_type *common_type;
hlsl_block_add_block($1, $3); @@ -6648,7 +6649,10 @@ conditional_expr: if (!(second = add_implicit_conversion(ctx, $1, second, common_type, &@5))) YYABORT;
- if (!hlsl_add_conditional(ctx, $1, cond, first, second)) + args[0] = cond; + args[1] = first; + args[2] = second; + if (!add_expr(ctx, $1, HLSL_OP3_TERNARY, args, common_type, &@1)) YYABORT; $$ = $1; } diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index bae8e5f9..e572a4b4 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -2396,6 +2396,54 @@ static bool lower_round(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void * return true; }
+/* Use 'movc' for the ternary operator. */ +static bool lower_ternary(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +{ + struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS], *replacement; + struct hlsl_ir_node *zero, *cond, *first, *second; + struct hlsl_constant_value zero_value = { 0 }; + struct hlsl_ir_expr *expr; + struct hlsl_type *type; + + if (instr->type != HLSL_IR_EXPR) + return false; + + expr = hlsl_ir_expr(instr); + if (expr->op != HLSL_OP3_TERNARY) + return false; + + cond = expr->operands[0].node; + first = expr->operands[1].node; + second = expr->operands[2].node; + + if (cond->data_type->base_type == HLSL_TYPE_FLOAT) + { + if (!(zero = hlsl_new_constant(ctx, cond->data_type, &zero_value, &instr->loc))) + return false; + list_add_tail(&instr->entry, &zero->entry); + + memset(operands, 0, sizeof(operands)); + operands[0] = zero; + operands[1] = cond; + type = cond->data_type; + type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_BOOL, type->dimx, type->dimy); + if (!(cond = hlsl_new_expr(ctx, HLSL_OP2_NEQUAL, operands, type, &instr->loc))) + return false; + list_add_before(&instr->entry, &cond->entry); + } + + memset(operands, 0, sizeof(operands)); + operands[0] = cond; + operands[1] = first; + operands[2] = second; + if (!(replacement = hlsl_new_expr(ctx, HLSL_OP3_MOVC, operands, first->data_type, &instr->loc))) + return false; + list_add_before(&instr->entry, &replacement->entry); + + hlsl_replace_node(instr, replacement); + return true; +} + static bool lower_casts_to_bool(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) { struct hlsl_type *type = instr->data_type, *arg_type; @@ -4354,6 +4402,8 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry hlsl_transform_ir(ctx, track_object_components_usage, body, NULL); sort_synthetic_separated_samplers_first(ctx);
+ if (profile->major_version >= 4) + hlsl_transform_ir(ctx, lower_ternary, body, NULL); if (profile->major_version < 4) { hlsl_transform_ir(ctx, lower_division, body, NULL); diff --git a/libs/vkd3d-shader/tpf.c b/libs/vkd3d-shader/tpf.c index 7949be15..045fb6c5 100644 --- a/libs/vkd3d-shader/tpf.c +++ b/libs/vkd3d-shader/tpf.c @@ -4341,6 +4341,26 @@ static void write_sm4_binary_op_with_two_destinations(const struct tpf_writer *t write_sm4_instruction(tpf, &instr); }
+static void write_sm4_ternary_op(const struct tpf_writer *tpf, enum vkd3d_sm4_opcode opcode, + const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2, + const struct hlsl_ir_node *src3) +{ + struct sm4_instruction instr; + + memset(&instr, 0, sizeof(instr)); + instr.opcode = opcode; + + sm4_dst_from_node(&instr.dsts[0], dst); + instr.dst_count = 1; + + sm4_src_from_node(&instr.srcs[0], src1, instr.dsts[0].writemask); + sm4_src_from_node(&instr.srcs[1], src2, instr.dsts[0].writemask); + sm4_src_from_node(&instr.srcs[2], src3, instr.dsts[0].writemask); + instr.src_count = 3; + + write_sm4_instruction(tpf, &instr); +} + static void write_sm4_ld(const struct tpf_writer *tpf, const struct hlsl_ir_node *dst, const struct hlsl_deref *resource, const struct hlsl_ir_node *coords, const struct hlsl_ir_node *sample_index, const struct hlsl_ir_node *texel_offset, @@ -4702,6 +4722,7 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex { const struct hlsl_ir_node *arg1 = expr->operands[0].node; const struct hlsl_ir_node *arg2 = expr->operands[1].node; + const struct hlsl_ir_node *arg3 = expr->operands[2].node; const struct hlsl_type *dst_type = expr->node.data_type; struct vkd3d_string_buffer *dst_type_string;
@@ -5127,6 +5148,10 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex &expr->node, arg1, arg2); break;
+ case HLSL_OP3_MOVC: + write_sm4_ternary_op(tpf, VKD3D_SM4_OP_MOVC, &expr->node, arg1, arg2, arg3); + break; + default: hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s expression.", debug_hlsl_expr_op(expr->op)); } diff --git a/tests/hlsl/ternary.shader_test b/tests/hlsl/ternary.shader_test index 99fee241..91f49e0e 100644 --- a/tests/hlsl/ternary.shader_test +++ b/tests/hlsl/ternary.shader_test @@ -46,3 +46,18 @@ float4 main() : sv_target uniform 0 float4 1.0 0.0 0.0 0.0 draw quad probe all rgba (0.5, 0.6, 0.7, 0.0) + +[pixel shader] +float4 x, y, z; + +float4 main() : sv_target +{ + return x ? y : z; +} + +[test] +uniform 0 float4 0.0 1.0 0.0 -3.0 +uniform 4 float4 1.0 2.0 3.0 4.0 +uniform 8 float4 5.0 6.0 7.0 8.0 +draw quad +probe all rgba (5.0, 2.0, 7.0, 4.0)
From: Nikolay Sivov nsivov@codeweavers.com
Signed-off-by: Nikolay Sivov nsivov@codeweavers.com --- libs/vkd3d-shader/hlsl.y | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/libs/vkd3d-shader/hlsl.y b/libs/vkd3d-shader/hlsl.y index 79f5ee38..3ff54493 100644 --- a/libs/vkd3d-shader/hlsl.y +++ b/libs/vkd3d-shader/hlsl.y @@ -2827,6 +2827,7 @@ static bool intrinsic_fmod(struct hlsl_ctx *ctx, const struct parse_initializer const struct vkd3d_shader_location *loc) { struct hlsl_ir_node *x, *y, *div, *abs, *frac, *neg_frac, *ge, *select, *zero; + struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = { 0 }; static const struct hlsl_constant_value zero_value;
if (!(x = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) @@ -2854,7 +2855,10 @@ static bool intrinsic_fmod(struct hlsl_ctx *ctx, const struct parse_initializer if (!(ge = add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_GEQUAL, div, zero, loc))) return false;
- if (!(select = hlsl_add_conditional(ctx, params->instrs, ge, frac, neg_frac))) + operands[0] = ge; + operands[1] = frac; + operands[2] = neg_frac; + if (!(select = add_expr(ctx, params->instrs, HLSL_OP3_TERNARY, operands, x->data_type, loc))) return false;
return !!add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, select, y, loc);
From: Nikolay Sivov nsivov@codeweavers.com
Signed-off-by: Nikolay Sivov nsivov@codeweavers.com --- tests/hlsl/fmod.shader_test | 14 ++++++++++++++ 1 file changed, 14 insertions(+)
diff --git a/tests/hlsl/fmod.shader_test b/tests/hlsl/fmod.shader_test index 4c9ff988..9eb69c3b 100644 --- a/tests/hlsl/fmod.shader_test +++ b/tests/hlsl/fmod.shader_test @@ -11,3 +11,17 @@ probe all rgba (-0.5, 0.0, 0.0, 0.0) 4 uniform 0 float4 1.1 0.3 0.0 0.0 draw quad probe all rgba (0.2, 0.0, 0.0, 0.0) 4 + +[pixel shader] +float4 main(uniform float4 u) : sv_target +{ + return float4(fmod(u.xy, u.z), 0, 0); +} + +[test] +uniform 0 float4 -0.5 6.5 2.0 0.0 +draw quad +probe all rgba (-0.5, 0.5, 0.0, 0.0) 4 +uniform 0 float4 1.1 0.3 3.0 0.0 +draw quad +probe all rgba (1.1, 0.3, 0.0, 0.0) 4
On Wed Aug 30 13:09:10 2023 +0000, Giovanni Mascellani wrote:
It's useful to have a header which a new contributor can understand even without having to read the fine implementation details. Adding a short documentation paragraph doesn't sound hard and it provides significant added value.
I pushed something, please see if that's what you had in mind.
On Wed Aug 30 20:43:02 2023 +0000, Nikolay Sivov wrote:
I pushed something, please see if that's what you had in mind.
Mmh, that still doesn't explain what each of them does. I was rather thinking at something like: "`MOVC` selects its third operand if the first one is bitwise zero, the second operand otherwise; `TERNARY` selects its third operand if the first one compares equal to zero (even if its not bitwise zero, like a floating point negative zero), the second operand otherwise".
On Fri Sep 1 14:30:31 2023 +0000, Giovanni Mascellani wrote:
Mmh, that still doesn't explain what each of them does. I was rather thinking at something like: "`MOVC` selects its third operand if the first one is bitwise zero, the second operand otherwise; `TERNARY` selects its third operand if the first one compares equal to zero (even if its not bitwise zero, like a floating point negative zero), the second operand otherwise".
Other operations are not really explained either, and this is no more complicated than looking up what DP2ADD does or why it exists.
My question is, if there is a consensus, @hverbeet, @zfigura, @fcasas, and last but not least @Mystral, I can surely adjust or expand on the comments. But is that really blocking?
On Fri Sep 1 14:40:50 2023 +0000, Nikolay Sivov wrote:
Other operations are not really explained either, and this is no more complicated than looking up what DP2ADD does or why it exists. My question is, if there is a consensus, @hverbeet, @zfigura, @fcasas, and last but not least @Mystral, I can surely adjust or expand on the comments. But is that really blocking?
For sure I won't be the one fighting against more documentation, but to me all the other opcodes are rather self-explaining. `MOVC` and `TERNARY` are pretty self-explaining too, individually, but I find it less obvious to have two things which are so similar to each other. That's the bit that IMHO deserves some explanation.
That said, if the consensus is just an explanation is excessive I won't hold a grudge against anybody. I'm just saying my opinion. Also, it doesn't look like a hard request.
On Fri Sep 1 15:03:52 2023 +0000, Giovanni Mascellani wrote:
For sure I won't be the one fighting against more documentation, but to me all the other opcodes are rather self-explaining. `MOVC` and `TERNARY` are pretty self-explaining too, individually, but I find it less obvious to have two things which are so similar to each other. That's the bit that IMHO deserves some explanation. That said, if the consensus is just an explanation is excessive I won't hold a grudge against anybody. I'm just saying my opinion. Also, it doesn't look like a hard request.
I don't really know. I'm accepting the patch, but I won't complain if someone improves the documentation later.
This merge request was approved by Zebediah Figura.
Other operations are not really explained either, and this is no more complicated than looking up what DP2ADD does or why it exists.
My question is, if there is a consensus, @hverbeet, @zfigura, @fcasas, and last but not least @Mystral, I can surely adjust or expand on the comments. But is that really blocking?
In general I think we could do with a bit more documentation in that area, yes. For example, it's fairly obvious that HLSL_OP3_LERP takes 3 arguments, and it seems safe enough to assume it does a linear interpolation between two of them, but I don't think it's necessarily immediately obvious which argument is the start, end, or interpolation factor.
On Tue Sep 5 03:20:25 2023 +0000, Zebediah Figura wrote:
I don't really know. I'm accepting the patch, but I won't complain if someone improves the documentation later.
If that can help unblocking the situation, I can accept the patch too and later add the documentation myself.
This merge request was approved by Giovanni Mascellani.
This merge request was approved by Henri Verbeet.