[PATCH v4 0/1] MR211: vkd3d-shader/hlsl: Support clip() intrinsic.

List overview All Threads

newer

older

[PATCH v2 0/1] MR2941: riched20:...

Re: [PATCH v2 0/1] MR2951:...

Nikolay Sivov (＠nsivov)

28 May 2023 28 May '23

8:38 a.m.

-- v4: vkd3d-shader/hlsl: Support clip() intrinsic.

https://gitlab.winehq.org/wine/vkd3d/-/merge_requests/211

Show replies by date

Nikolay Sivov

28 May 28 May

8:38 a.m.

New subject: [PATCH v4 1/1] vkd3d-shader/hlsl: Support clip() intrinsic.

From: Nikolay Sivov nsivov@codeweavers.com

Signed-off-by: Nikolay Sivov nsivov@codeweavers.com --- Makefile.am | 1 + libs/vkd3d-shader/d3dbc.c | 33 ++++++++++++++++ libs/vkd3d-shader/hlsl.c | 17 +++++++-- libs/vkd3d-shader/hlsl.h | 3 +- libs/vkd3d-shader/hlsl.y | 64 ++++++++++++++++++++++++++++++-- libs/vkd3d-shader/hlsl_codegen.c | 11 +++++- libs/vkd3d-shader/tpf.c | 20 +++++++--- tests/hlsl-clip.shader_test | 22 +++++++++++ 8 files changed, 155 insertions(+), 16 deletions(-) create mode 100644 tests/hlsl-clip.shader_test

diff --git a/Makefile.am b/Makefile.am index 954b0ffd..1220fc09 100644 --- a/Makefile.am +++ b/Makefile.am @@ -79,6 +79,7 @@ vkd3d_shader_tests = \ tests/hlsl-attributes.shader_test \ tests/hlsl-bool-cast.shader_test \ tests/hlsl-clamp.shader_test \ + tests/hlsl-clip.shader_test \ tests/hlsl-comma.shader_test \ tests/hlsl-cross.shader_test \ tests/hlsl-d3dcolor-to-ubyte4.shader_test \ diff --git a/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d-shader/d3dbc.c index c35f8ca0..7a044c4d 100644 --- a/libs/vkd3d-shader/d3dbc.c +++ b/libs/vkd3d-shader/d3dbc.c @@ -1834,6 +1834,35 @@ static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b } }

+static void write_sm1_jump(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) +{ + const struct hlsl_ir_jump *jump = hlsl_ir_jump(instr); + + switch (jump->type) + { + case HLSL_IR_JUMP_DISCARD: + { + struct hlsl_reg *reg = &jump->arg.node->reg; + + struct sm1_instruction instr = + { + .opcode = VKD3D_SM1_OP_TEXKILL, + + .dst.type = D3DSPR_TEMP, + .dst.reg = reg->id, + .dst.writemask = reg->writemask, + .has_dst = 1, + }; + + write_sm1_instruction(ctx, buffer, &instr); + break; + } + + default: + hlsl_fixme(ctx, &jump->node.loc, "Jump type %s.\n", hlsl_jump_type_to_string(jump->type)); + } +} + static void write_sm1_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) { const struct hlsl_ir_load *load = hlsl_ir_load(instr); @@ -2028,6 +2057,10 @@ static void write_sm1_instructions(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b write_sm1_expr(ctx, buffer, instr); break;

+ case HLSL_IR_JUMP: + write_sm1_jump(ctx, buffer, instr); + break; + case HLSL_IR_LOAD: write_sm1_load(ctx, buffer, instr); break; diff --git a/libs/vkd3d-shader/hlsl.c b/libs/vkd3d-shader/hlsl.c index 5bca84ba..7ad0b922 100644 --- a/libs/vkd3d-shader/hlsl.c +++ b/libs/vkd3d-shader/hlsl.c @@ -1429,7 +1429,7 @@ struct hlsl_ir_node *hlsl_new_index(struct hlsl_ctx *ctx, struct hlsl_ir_node *v }

struct hlsl_ir_node *hlsl_new_jump(struct hlsl_ctx *ctx, enum hlsl_ir_jump_type type, - const struct vkd3d_shader_location *loc) + struct hlsl_ir_node *arg, const struct vkd3d_shader_location *loc) { struct hlsl_ir_jump *jump;

@@ -1437,6 +1437,7 @@ struct hlsl_ir_node *hlsl_new_jump(struct hlsl_ctx *ctx, enum hlsl_ir_jump_type return NULL; init_node(&jump->node, HLSL_IR_JUMP, NULL, loc); jump->type = type; + hlsl_src_from_node(&jump->arg, arg); return &jump->node; }

@@ -1587,9 +1588,16 @@ static struct hlsl_ir_node *clone_if(struct hlsl_ctx *ctx, struct clone_instr_ma return dst; }

-static struct hlsl_ir_node *clone_jump(struct hlsl_ctx *ctx, struct hlsl_ir_jump *src) +static struct hlsl_ir_node *clone_jump(struct hlsl_ctx *ctx, struct clone_instr_map *map, struct hlsl_ir_jump *src) { - return hlsl_new_jump(ctx, src->type, &src->node.loc); + struct hlsl_ir_node *node; + struct hlsl_ir_jump *jump; + + if (!(node = hlsl_new_jump(ctx, src->type, NULL, &src->node.loc))) + return NULL; + jump = hlsl_ir_jump(node); + clone_src(map, &jump->arg, &src->arg); + return &jump->node; }

static struct hlsl_ir_node *clone_load(struct hlsl_ctx *ctx, struct clone_instr_map *map, struct hlsl_ir_load *src) @@ -1729,7 +1737,7 @@ static struct hlsl_ir_node *clone_instr(struct hlsl_ctx *ctx, return clone_index(ctx, map, hlsl_ir_index(instr));

case HLSL_IR_JUMP: - return clone_jump(ctx, hlsl_ir_jump(instr)); + return clone_jump(ctx, map, hlsl_ir_jump(instr));

case HLSL_IR_LOAD: return clone_load(ctx, map, hlsl_ir_load(instr)); @@ -2697,6 +2705,7 @@ static void free_ir_if(struct hlsl_ir_if *if_node)

static void free_ir_jump(struct hlsl_ir_jump *jump) { + hlsl_src_remove(&jump->arg); vkd3d_free(jump); }

diff --git a/libs/vkd3d-shader/hlsl.h b/libs/vkd3d-shader/hlsl.h index 24d431e4..ea88fb16 100644 --- a/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d-shader/hlsl.h @@ -565,6 +565,7 @@ struct hlsl_ir_jump { struct hlsl_ir_node node; enum hlsl_ir_jump_type type; + struct hlsl_src arg; };

struct hlsl_ir_swizzle @@ -1118,7 +1119,7 @@ struct hlsl_ir_node *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *cond struct hlsl_block *then_block, struct hlsl_block *else_block, const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_new_int_constant(struct hlsl_ctx *ctx, int32_t n, const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_new_jump(struct hlsl_ctx *ctx, - enum hlsl_ir_jump_type type, const struct vkd3d_shader_location *loc); + enum hlsl_ir_jump_type type, struct hlsl_ir_node *arg, const struct vkd3d_shader_location *loc);

void hlsl_init_simple_deref_from_var(struct hlsl_deref *deref, struct hlsl_ir_var *var);

diff --git a/libs/vkd3d-shader/hlsl.y b/libs/vkd3d-shader/hlsl.y index dae1851c..0b2db5f6 100644 --- a/libs/vkd3d-shader/hlsl.y +++ b/libs/vkd3d-shader/hlsl.y @@ -421,7 +421,7 @@ static bool append_conditional_break(struct hlsl_ctx *ctx, struct list *cond_lis

hlsl_block_init(&then_block);

- if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_BREAK, &condition->loc))) + if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_BREAK, NULL, &condition->loc))) return false; hlsl_block_add_instr(&then_block, jump);

@@ -656,7 +656,7 @@ static bool add_return(struct hlsl_ctx *ctx, struct list *instrs, hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RETURN, "Void functions cannot return a value."); }

- if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_RETURN, loc))) + if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_RETURN, NULL, loc))) return false; list_add_tail(instrs, &jump->entry);

@@ -2539,6 +2539,63 @@ static bool intrinsic_clamp(struct hlsl_ctx *ctx, return !!add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MIN, max, params->args[2], loc); }

+static bool intrinsic_clip(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *arg, *zero, *bool_false, *or, *cmp, *load, *discard; + unsigned int i, count; + + if (!elementwise_intrinsic_float_convert_args(ctx, params, loc)) + return false; + + arg = params->args[0]; + + if (ctx->profile->major_version < 4 && hlsl_type_component_count(arg->data_type) > 4) + { + struct vkd3d_string_buffer *string; + + if ((string = hlsl_type_to_string(ctx, arg->data_type))) + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Argument type cannot exceed 4 components, got type "%s".", string->buffer); + hlsl_release_string_buffer(ctx, string); + return false; + } + + if (ctx->profile->major_version >= 4) + { + if (!(zero = hlsl_new_float_constant(ctx, 0.0f, loc))) + return false; + list_add_tail(params->instrs, &zero->entry); + + if (!(cmp = add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_LESS, arg, zero, loc))) + return false; + + if (!(bool_false = hlsl_new_bool_constant(ctx, false, loc))) + return false; + list_add_tail(params->instrs, &bool_false->entry); + + or = bool_false; + + count = hlsl_type_component_count(cmp->data_type); + for (i = 0; i < count; ++i) + { + if (!(load = add_load_component(ctx, params->instrs, cmp, i, loc))) + return false; + + if (!(or = add_binary_logical_expr(ctx, params->instrs, HLSL_OP2_LOGIC_OR, or, load, loc))) + return false; + } + + arg = or; + } + + if (!(discard = hlsl_new_jump(ctx, HLSL_IR_JUMP_DISCARD, arg, loc))) + return false; + list_add_tail(params->instrs, &discard->entry); + + return true; +} + static bool intrinsic_cos(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -3482,6 +3539,7 @@ intrinsic_functions[] = {"asfloat", 1, true, intrinsic_asfloat}, {"asuint", -1, true, intrinsic_asuint}, {"clamp", 3, true, intrinsic_clamp}, + {"clip", 1, true, intrinsic_clip}, {"cos", 1, true, intrinsic_cos}, {"cross", 2, true, intrinsic_cross}, {"ddx", 1, true, intrinsic_ddx}, @@ -5626,7 +5684,7 @@ discard_statement:

if (!($$ = make_empty_list(ctx))) YYABORT; - if (!(discard = hlsl_new_jump(ctx, HLSL_IR_JUMP_DISCARD, &@1))) + if (!(discard = hlsl_new_jump(ctx, HLSL_IR_JUMP_DISCARD, NULL, &@1))) return false; list_add_tail($$, &discard->entry); } diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index bbb5223b..093073b2 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -655,7 +655,7 @@ static void insert_early_return_break(struct hlsl_ctx *ctx, return; list_add_after(&cf_instr->entry, &load->node.entry);

- if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_BREAK, &cf_instr->loc))) + if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_BREAK, NULL, &cf_instr->loc))) return; hlsl_block_add_instr(&then_block, jump);

@@ -2709,8 +2709,15 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop index->idx.node->last_read = last_read; break; } - case HLSL_IR_CONSTANT: case HLSL_IR_JUMP: + { + struct hlsl_ir_jump *jump = hlsl_ir_jump(instr); + + if (jump->arg.node) + jump->arg.node->last_read = last_read; + break; + } + case HLSL_IR_CONSTANT: break; } } diff --git a/libs/vkd3d-shader/tpf.c b/libs/vkd3d-shader/tpf.c index 2166eb41..8462bea3 100644 --- a/libs/vkd3d-shader/tpf.c +++ b/libs/vkd3d-shader/tpf.c @@ -4772,16 +4772,24 @@ static void write_sm4_jump(struct hlsl_ctx *ctx,

case HLSL_IR_JUMP_DISCARD: { - struct sm4_register *reg = &instr.srcs[0].reg; - instr.opcode = VKD3D_SM4_OP_DISCARD | VKD3D_SM4_CONDITIONAL_NZ;

memset(&instr.srcs[0], 0, sizeof(*instr.srcs)); - instr.srcs[0].swizzle_type = VKD3D_SM4_SWIZZLE_NONE; instr.src_count = 1; - reg->type = VKD3D_SM4_RT_IMMCONST; - reg->dim = VKD3D_SM4_DIMENSION_SCALAR; - reg->immconst_uint[0] = ~0u; + + if (jump->arg.node) + { + sm4_src_from_node(&instr.srcs[0], jump->arg.node, VKD3DSP_WRITEMASK_ALL); + } + else + { + struct sm4_register *reg = &instr.srcs[0].reg; + + instr.srcs[0].swizzle_type = VKD3D_SM4_SWIZZLE_NONE; + reg->type = VKD3D_SM4_RT_IMMCONST; + reg->dim = VKD3D_SM4_DIMENSION_SCALAR; + reg->immconst_uint[0] = ~0u; + }

break; } diff --git a/tests/hlsl-clip.shader_test b/tests/hlsl-clip.shader_test new file mode 100644 index 00000000..f9859e1b --- /dev/null +++ b/tests/hlsl-clip.shader_test @@ -0,0 +1,22 @@ +[pixel shader] +uniform float4 x; + +float4 main() : sv_target +{ + clip(x); + return x; +} + +[test] +uniform 0 float4 1 2 3 4 +draw quad +probe all rgba (1, 2, 3, 4) +uniform 0 float4 9 8 7 6 +draw quad +probe all rgba (9, 8, 7, 6) +uniform 0 float4 -1 8 7 6 +draw quad +probe all rgba (9, 8, 7, 6) +uniform 0 float4 9 0 7 6 +draw quad +probe all rgba (9, 0, 7, 6)

-- GitLab https://gitlab.winehq.org/wine/vkd3d/-/merge_requests/211

Nikolay Sivov (＠nsivov)

8:56 a.m.

On Sun May 28 08:38:09 2023 +0000, Nikolay Sivov wrote:

...

changed this line in [version 4 of the diff](/wine/vkd3d/-/merge_requests/211/diffs?diff_id=49214&start_sha=a9fc203963a81bc95ef9c1ae9db54ac24a998ce3#65224f45149d44504eb7d8073b46ace7b7a7a4ce_352_352)

Yes, that was confusing.

-- https://gitlab.winehq.org/wine/vkd3d/-/merge_requests/211#note_34053

Nikolay Sivov (＠nsivov)

2:20 p.m.

Alright, I pushed something that implements conditional discard_nz. I'm sure it has to be doable to move everything sm4 specific from clip() to some later pass, but that involves sharing more expression helpers, that are not otherwise used outside of hlsl.y.

-- https://gitlab.winehq.org/wine/vkd3d/-/merge_requests/211#note_34054

Giovanni Mascellani (＠giomasce)

29 May 29 May

10:08 a.m.

On Sat May 27 09:36:58 2023 +0000, Nikolay Sivov wrote:

...

Why is it a problem for IR to differ? There is already a precedent in D3DCOLORtoUBYTE4(). For SM4 input has to be computed to a bool, for older versions we pass floats in. Do you mean that we could reference the argument of JUMP_DISCARD as is, and run a pass for SM4+ to produce this float comparison condition?

The thing that bothers me is not really that the generated IR is different, but that the IR semantics themselves are different. Specifically, it seems that in SM4 an instruction of type `struct hlsl_ir_jump` means "jump if `arg`, which is supposed to be a scalar, is non-zero", while in SM1 it is "jump if `arg` has any negative component". This must be taken in consideration by all the downstream code, and it is not a good idea in my opinion. Incidentally, `arg` might probably use a better name (e.g., `condition`?). Also, I would add a comment in `struct hlsl_ir_jump` specifying that `arg` is only considered for jumps of type `HLSL_IR_JUMP_DISCARD`, which is otherwise non obvious.

On top of that, there is probably a decision that should at least be more explicit here: when I proposed to add an IR operator corresponding to the C ternary operator this was rejected because, if I understood well, we don't want to make the IR too rich, so that it's easier to write the optimizer (because there are fewer possible ways to express the same constructs). So, it is intended that HLSL `a ? b : c` must be translated to IR `if (a) { store(x, b); } else { store(x, c); } load(x)` rather than using an IR-level ternary operator. Here we've got essentially the same dilemma, so according to the same principle we should avoid adding `arg`, but rather use an `if` construct; and the task of lowering that to a better TPF program should be delegated to some other optimizer that might in principle appear at some point in the future.

Now, I was against this decision at the time, so I should be in favor to what Nikolay is doing here. And in a sense I am, except that I feel we should a bit more consistent about these design decisions. Is this case different from the ternary operator? Or did we decide that since the lower-level optimizer doesn't exist (or, more precisely, while it doesn't exist yet) we agree that it makes sense for the current IR to be a little more flexible, so that some optimizations can still be done on it? That wouldn't be similar in spirit to !215.

-- https://gitlab.winehq.org/wine/vkd3d/-/merge_requests/211#note_34060

Nikolay Sivov (＠nsivov)

10:51 a.m.

On Mon May 29 10:08:54 2023 +0000, Giovanni Mascellani wrote:

...

The thing that bothers me is not really that the generated IR is different, but that the IR semantics themselves are different. Specifically, it seems that in SM4 an instruction of type `struct hlsl_ir_jump` means "jump if `arg`, which is supposed to be a scalar, is non-zero", while in SM1 it is "jump if `arg` has any negative component". This must be taken in consideration by all the downstream code, and it is not a good idea in my opinion. Incidentally, `arg` might probably use a better name (e.g., `condition`?). Also, I would add a comment in `struct hlsl_ir_jump` specifying that `arg` is only considered for jumps of type `HLSL_IR_JUMP_DISCARD`, which is otherwise non obvious. On top of that, there is probably a decision that should at least be more explicit here: when I proposed to add an IR operator corresponding to the C ternary operator this was rejected because, if I understood well, we don't want to make the IR too rich, so that it's easier to write the optimizer (because there are fewer possible ways to express the same constructs). So, it is intended that HLSL `a ? b : c` must be translated to IR `if (a) { store(x, b); } else { store(x, c); } load(x)` rather than using an IR-level ternary operator. Here we've got essentially the same dilemma, so according to the same principle we should avoid adding `arg`, but rather use an `if` construct; and the task of lowering that to a better TPF program should be delegated to some other optimizer that might in principle appear at some point in the future. Now, I was against this decision at the time, so I should be in favor to what Nikolay is doing here. And in a sense I am, except that I feel we should a bit more consistent about these design decisions. Is this case different from the ternary operator? Or did we decide that since the lower-level optimizer doesn't exist (or, more precisely, while it doesn't exist yet) we agree that it makes sense for the current IR to be a little more flexible, so that some optimizations can still be done on it? That wouldn't be similar in spirit to !215.

Why downstream code needs to be aware of this? If SM4+ condition is explicitly written out here, or later, it will only matter if we wanted to remove dead discard calls, by evaluating arguments differently. But even then we might scan for resulting "texkill/discard_nz" later and remove them, and not at IR level.

Calling this a design decision is a bit too much, it's too minor for that. Having separate texkill and discard jump types wouldn't bother me and won't add a new type of IR node, but will also need separate paths depending on profile.

-- https://gitlab.winehq.org/wine/vkd3d/-/merge_requests/211#note_34063

Giovanni Mascellani (＠giomasce)

11:39 a.m.

...

Why downstream code needs to be aware of this? If SM4+ condition is explicitly written out here, or later, it will only matter if we wanted to remove dead discard calls, by evaluating arguments differently. But even then we might scan for resulting "texkill/discard_nz" later and remove them, and not at IR level.

Downstream code needs to be aware because, as your proposal is currently written, `struct hlsl_ir_jump` has a different meaning depending on whether it's SM1 or SM4. So each compiler pass touching a `struct hlsl_ir_jump` will have to check whether that's a SM1 or a SM4 jump and possible change its behavior accordingly.

-- https://gitlab.winehq.org/wine/vkd3d/-/merge_requests/211#note_34069

Nikolay Sivov (＠nsivov)

4:18 p.m.

On Mon May 29 11:39:54 2023 +0000, Giovanni Mascellani wrote:

...

...
Why downstream code needs to be aware of this? If SM4+ condition is

explicitly written out here, or later, it will only matter if we wanted to remove dead discard calls, by evaluating arguments differently. But even then we might scan for resulting "texkill/discard_nz" later and remove them, and not at IR level. Downstream code needs to be aware because, as your proposal is currently written, `struct hlsl_ir_jump` has a different meaning depending on whether it's SM1 or SM4. So each compiler pass touching a `struct hlsl_ir_jump` will have to check whether that's a SM1 or a SM4 jump and possible change its behavior accordingly.

Depends on what that pass is going to do. The obvious way I can thing of is to have separate jump types, if e.g. we'll want to check for dead texkill/discards later, and for that you'll have to know the meaning of its argument.

-- https://gitlab.winehq.org/wine/vkd3d/-/merge_requests/211#note_34084

Giovanni Mascellani (＠giomasce)

1 Jun 1 Jun

1:36 p.m.

On Mon May 29 16:18:57 2023 +0000, Nikolay Sivov wrote:

...

Depends on what that pass is going to do. The obvious way I can thing of is to have separate jump types, if e.g. we'll want to check for dead texkill/discards later, and for that you'll have to know the meaning of its argument.

Yes, but the mere fact that it *can* happen is a problem for me, because it requires that every developer keeps in mind that `struct hlsl_ir_jump` changes meaning depending on the SM, which I think it's a bad idea. Especially, I don't want this to become a precedent for other IR instructions to be assigned different meanings depending on the SM. I don't know whether this is already happening for floating point rules, but if it is it's already bad enough, and if it isn't then let's not start it now.

-- https://gitlab.winehq.org/wine/vkd3d/-/merge_requests/211#note_34351

Zebediah Figura (＠zfigura)

8:51 p.m.

On Thu Jun 1 13:36:54 2023 +0000, Giovanni Mascellani wrote:

...

Yes, but the mere fact that it *can* happen is a problem for me, because it requires that every developer keeps in mind that `struct hlsl_ir_jump` changes meaning depending on the SM, which I think it's a bad idea. Especially, I don't want this to become a precedent for other IR instructions to be assigned different meanings depending on the SM. I don't know whether this is already happening for floating point rules, but if it is it's already bad enough, and if it isn't then let's not start it now.

At the time I wrote that comment (v3), the patch was emitting conditional HLSL_IR_JUMP_DISCARD for sm1, but still unconditional HLSL_IR_JUMP_DISCARD (guarded out with HLSL_IR_IF) for sm4. There are basically two issues here:

* That the IR should have identical semantics regardless of backend—i.e. what Giovanni is trying to argue. I think this is universally something we should stick to. I want to be able to read code that generates HLSL, or dumped HLSL IR, and know exactly what it's doing, without having to rely on context.

* That we should (as much as possible) generate IR that doesn't rely on the profile in the frontend, and use explicit lowering passes to change it if necessary. This is... something I've been trying to argue for on the grounds of modularity, and what I was trying to say in my original comment in this thread, but I can also see the counterargument that I am cargo-culting modularity, especially when we can't get away from having profile checks elsewhere in the HLSL code.

For the sake of explanation, D3DCOLORtoUBYTE4() is a bit of a different case than this: in that case the *semantics* of the function actually differ between sm1 and sm4. In the case of clip() the semantics are the same, but require different bytecode to express them. That doesn't mean I'm 100% convinced we want to use a lowering pass, along the lines of what I was saying above.

In terms of analogizing to other code, this is a more similar case to (float) division, sqrt(), dot(), or round(), which all have dedicated instructions for sm4, and need lowering for sm1. Currently we use dedicated passes to perform that lowering. Is that the best solution? Maybe. Probably?

...

On top of that, there is probably a decision that should at least be more explicit here: when I proposed to add an IR operator corresponding to the C ternary operator this was rejected because, if I understood well, we don't want to make the IR too rich, so that it's easier to write the optimizer (because there are fewer possible ways to express the same constructs). So, it is intended that HLSL `a ? b : c` must be translated to IR `if (a) { store(x, b); } else { store(x, c); } load(x)` rather than using an IR-level ternary operator. Here we've got essentially the same dilemma, so according to the same principle we should avoid adding `arg`, but rather use an `if` construct; and the task of lowering that to a better TPF program should be delegated to some other optimizer that might in principle appear at some point in the future.

Now, I was against this decision at the time, so I should be in favor to what Nikolay is doing here. And in a sense I am, except that I feel we should a bit more consistent about these design decisions. Is this case different from the ternary operator? Or did we decide that since the lower-level optimizer doesn't exist (or, more precisely, while it doesn't exist yet) we agree that it makes sense for the current IR to be a little more flexible, so that some optimizations can still be done on it? That wouldn't be similar in spirit to !215.

Honestly looking at that discussion I'm not really convinced of my arguments there either, or at least I don't think that they're very strong. I think keeping the IR as conceptually simple as possible is a good idea, but I'm not sure that adding a MOVC analogue really adds much in the way of conceptual complexity. It's not like conditional discard adds that much either. On the other hand, these things do add up...

One thing that we probably do want to avoid (which I think I alluded to an earlier comment) is the possibility of having to write both a lowering pass and a raising pass for the same transformation, i.e. doing the same transformation in both directions. E.g. we don't want to have to emit MOVC for ternaries, have to lower it for SM1, but then also have to raise "if (cond) a = b; else a = c;" to a movc. Similarly, it's a waste of time to emit a clip-discard statement, have to lower it for sm4, but then also have to raise an explicitly spelled clip() for sm1. On the other hand, native doesn't seem to raise *either* of these cases...

-- https://gitlab.winehq.org/wine/vkd3d/-/merge_requests/211#note_34386

778

Age (days ago)

782

Last active (days ago)

wine-gitlab@winehq.org

9 comments

4 participants

tags (0)

participants (4)

Giovanni Mascellani (＠giomasce)
Nikolay Sivov
Nikolay Sivov (＠nsivov)
Zebediah Figura (＠zfigura)