This is correct for pixel shaders, vertex shaders do not have 'cmp'.
From: Nikolay Sivov nsivov@codeweavers.com
This is correct for pixel shaders, vertex shaders do not have 'cmp'. --- libs/vkd3d-shader/d3dbc.c | 32 ++++++++++++++++++ libs/vkd3d-shader/hlsl.c | 1 + libs/vkd3d-shader/hlsl.h | 5 ++- libs/vkd3d-shader/hlsl_codegen.c | 56 ++++++++++++++++++++++---------- 4 files changed, 75 insertions(+), 19 deletions(-)
diff --git a/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d-shader/d3dbc.c index 598b75183..7ac3ffd08 100644 --- a/libs/vkd3d-shader/d3dbc.c +++ b/libs/vkd3d-shader/d3dbc.c @@ -1805,6 +1805,34 @@ static void sm1_map_src_swizzle(struct sm1_src_register *src, unsigned int map_w src->swizzle = hlsl_map_swizzle(src->swizzle, map_writemask); }
+static void write_sm1_cmp(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + const struct hlsl_reg *dst, const struct hlsl_reg *src1, const struct hlsl_reg *src2, + const struct hlsl_reg *src3) +{ + struct sm1_instruction instr = + { + .opcode = D3DSIO_CMP, + + .dst.type = D3DSPR_TEMP, + .dst.writemask = dst->writemask, + .dst.reg = dst->id, + .has_dst = 1, + + .srcs[0].type = D3DSPR_TEMP, + .srcs[0].swizzle = hlsl_swizzle_from_writemask(src1->writemask), + .srcs[0].reg = src1->id, + .srcs[1].type = D3DSPR_TEMP, + .srcs[1].swizzle = hlsl_swizzle_from_writemask(src2->writemask), + .srcs[1].reg = src2->id, + .srcs[2].type = D3DSPR_TEMP, + .srcs[2].swizzle = hlsl_swizzle_from_writemask(src3->writemask), + .srcs[2].reg = src3->id, + .src_count = 3, + }; + + write_sm1_instruction(ctx, buffer, &instr); +} + static void write_sm1_dp2add(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_reg *dst, const struct hlsl_reg *src1, const struct hlsl_reg *src2, const struct hlsl_reg *src3) @@ -2199,6 +2227,10 @@ static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b } break;
+ case HLSL_OP3_CMP: + write_sm1_cmp(ctx, buffer, &instr->reg, &arg1->reg, &arg2->reg, &arg3->reg); + break; + case HLSL_OP3_DP2ADD: write_sm1_dp2add(ctx, buffer, &instr->reg, &arg1->reg, &arg2->reg, &arg3->reg); break; diff --git a/libs/vkd3d-shader/hlsl.c b/libs/vkd3d-shader/hlsl.c index 5fe9047bf..33ece0493 100644 --- a/libs/vkd3d-shader/hlsl.c +++ b/libs/vkd3d-shader/hlsl.c @@ -2485,6 +2485,7 @@ const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op) [HLSL_OP2_NEQUAL] = "!=", [HLSL_OP2_RSHIFT] = ">>",
+ [HLSL_OP3_CMP] = "cmp", [HLSL_OP3_DP2ADD] = "dp2add", [HLSL_OP3_MOVC] = "movc", [HLSL_OP3_TERNARY] = "ternary", diff --git a/libs/vkd3d-shader/hlsl.h b/libs/vkd3d-shader/hlsl.h index 44cebaaf6..f4120f8a3 100644 --- a/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d-shader/hlsl.h @@ -556,7 +556,10 @@ enum hlsl_ir_expr_op /* MOVC(a, b, c) returns c if a is bitwise zero and b otherwise. * TERNARY(a, b, c) returns c if a == 0 and b otherwise. * They differ for floating point numbers, because - * -0.0 == 0.0, but it is not bitwise zero. */ + * -0.0 == 0.0, but it is not bitwise zero. CMP(a, b, c) returns b + if a >= 0, and c otherwise. It's used only for SM1-SM3 targets, while + SM4+ is using MOVC in such cases. */ + HLSL_OP3_CMP, HLSL_OP3_MOVC, HLSL_OP3_TERNARY, }; diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 76572cf93..a1b80effd 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -2435,7 +2435,7 @@ static bool lower_round(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct /* Use 'movc' for the ternary operator. */ static bool lower_ternary(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) { - struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS], *replacement; + struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = { 0 }, *replacement; struct hlsl_ir_node *zero, *cond, *first, *second; struct hlsl_constant_value zero_value = { 0 }; struct hlsl_ir_expr *expr; @@ -2452,28 +2452,49 @@ static bool lower_ternary(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, stru first = expr->operands[1].node; second = expr->operands[2].node;
- if (cond->data_type->base_type == HLSL_TYPE_FLOAT) + if (ctx->profile->major_version < 4) { - if (!(zero = hlsl_new_constant(ctx, cond->data_type, &zero_value, &instr->loc))) + struct hlsl_ir_node *abs, *neg; + + if (!(abs = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, cond, &instr->loc))) return false; - hlsl_block_add_instr(block, zero); + hlsl_block_add_instr(block, abs); + + if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, abs, &instr->loc))) + return false; + hlsl_block_add_instr(block, neg); + + operands[0] = neg; + operands[1] = second; + operands[2] = first; + if (!(replacement = hlsl_new_expr(ctx, HLSL_OP3_CMP, operands, first->data_type, &instr->loc))) + return false; + } + else + { + if (cond->data_type->base_type == HLSL_TYPE_FLOAT) + { + if (!(zero = hlsl_new_constant(ctx, cond->data_type, &zero_value, &instr->loc))) + return false; + hlsl_block_add_instr(block, zero); + + operands[0] = zero; + operands[1] = cond; + type = cond->data_type; + type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_BOOL, type->dimx, type->dimy); + if (!(cond = hlsl_new_expr(ctx, HLSL_OP2_NEQUAL, operands, type, &instr->loc))) + return false; + hlsl_block_add_instr(block, cond); + }
memset(operands, 0, sizeof(operands)); - operands[0] = zero; - operands[1] = cond; - type = cond->data_type; - type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_BOOL, type->dimx, type->dimy); - if (!(cond = hlsl_new_expr(ctx, HLSL_OP2_NEQUAL, operands, type, &instr->loc))) + operands[0] = cond; + operands[1] = first; + operands[2] = second; + if (!(replacement = hlsl_new_expr(ctx, HLSL_OP3_MOVC, operands, first->data_type, &instr->loc))) return false; - hlsl_block_add_instr(block, cond); }
- memset(operands, 0, sizeof(operands)); - operands[0] = cond; - operands[1] = first; - operands[2] = second; - if (!(replacement = hlsl_new_expr(ctx, HLSL_OP3_MOVC, operands, first->data_type, &instr->loc))) - return false; hlsl_block_add_instr(block, replacement); return true; } @@ -4422,8 +4443,7 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry hlsl_transform_ir(ctx, track_object_components_usage, body, NULL); sort_synthetic_separated_samplers_first(ctx);
- if (profile->major_version >= 4) - lower_ir(ctx, lower_ternary, body); + lower_ir(ctx, lower_ternary, body); if (profile->major_version < 4) { lower_ir(ctx, lower_division, body);
Giovanni Mascellani (@giomasce) commented about libs/vkd3d-shader/d3dbc.c:
.has_dst = 1,
.srcs[0].type = D3DSPR_TEMP,
.srcs[0].swizzle = hlsl_swizzle_from_writemask(src1->writemask),
.srcs[0].reg = src1->id,
.srcs[1].type = D3DSPR_TEMP,
.srcs[1].swizzle = hlsl_swizzle_from_writemask(src2->writemask),
.srcs[1].reg = src2->id,
.srcs[2].type = D3DSPR_TEMP,
.srcs[2].swizzle = hlsl_swizzle_from_writemask(src3->writemask),
.srcs[2].reg = src3->id,
.src_count = 3,
- };
- write_sm1_instruction(ctx, buffer, &instr);
+}
I would expect some `sm1_map_src_swizzle()` call at the end, like `write_sm1_unary_op()` and `write_sm1_binary_op()`. Or is `CMP` special for some reason? If not, I would actually rather create the `write_sm1_ternary_op()` helper, similar to the first two, and use that.
On Wed Sep 27 11:50:55 2023 +0000, Giovanni Mascellani wrote:
I would expect some `sm1_map_src_swizzle()` call at the end, like `write_sm1_unary_op()` and `write_sm1_binary_op()`. Or is `CMP` special for some reason? If not, I would actually rather create the `write_sm1_ternary_op()` helper, similar to the first two, and use that.
How to tell if mapping is necessary?
On Wed Sep 27 17:19:41 2023 +0000, Nikolay Sivov wrote:
How to tell if mapping is necessary?
It is necessary for all componentwise operations, so that the source registers swizzles are remapped on the components actually selected by the destination register writemask, instead of default to the first `dimx` components. That includes `CMP` and `MOVC`. Dot products are instead an exception because they are not componentwise operations: all the source components concur to compute the single scalar destination.