Signed-off-by: Giovanni Mascellani gmascellani@codeweavers.com --- libs/vkd3d-shader/hlsl_sm4.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl_sm4.c b/libs/vkd3d-shader/hlsl_sm4.c index e597425a..c56a74d4 100644 --- a/libs/vkd3d-shader/hlsl_sm4.c +++ b/libs/vkd3d-shader/hlsl_sm4.c @@ -1446,11 +1446,14 @@ static void write_sm4_if(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buf
write_sm4_block(ctx, buffer, &iff->then_instrs);
- instr.opcode = VKD3D_SM4_OP_ELSE; - instr.src_count = 0; - write_sm4_instruction(buffer, &instr); + if (!list_empty(&iff->else_instrs.instrs)) + { + instr.opcode = VKD3D_SM4_OP_ELSE; + instr.src_count = 0; + write_sm4_instruction(buffer, &instr);
- write_sm4_block(ctx, buffer, &iff->else_instrs); + write_sm4_block(ctx, buffer, &iff->else_instrs); + }
instr.opcode = VKD3D_SM4_OP_ENDIF; write_sm4_instruction(buffer, &instr);
Signed-off-by: Giovanni Mascellani gmascellani@codeweavers.com --- libs/vkd3d-shader/hlsl_sm4.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl_sm4.c b/libs/vkd3d-shader/hlsl_sm4.c index c56a74d4..18c80774 100644 --- a/libs/vkd3d-shader/hlsl_sm4.c +++ b/libs/vkd3d-shader/hlsl_sm4.c @@ -1166,7 +1166,7 @@ static void write_sm4_constant(struct hlsl_ctx *ctx, { const unsigned int dimx = constant->node.data_type->dimx; struct sm4_instruction instr; - unsigned int i; + unsigned int i, j = 0;
memset(&instr, 0, sizeof(instr)); instr.opcode = VKD3D_SM4_OP_MOV; @@ -1177,7 +1177,11 @@ static void write_sm4_constant(struct hlsl_ctx *ctx, instr.srcs[0].reg.dim = (dimx > 1) ? VKD3D_SM4_DIMENSION_VEC4 : VKD3D_SM4_DIMENSION_SCALAR; instr.srcs[0].reg.type = VKD3D_SM4_RT_IMMCONST; for (i = 0; i < dimx; ++i) - instr.srcs[0].reg.immconst_uint[i] = constant->value[i].u; + { + while (dimx != 1 && !(instr.dsts[0].writemask & (1u << j))) + j++; + instr.srcs[0].reg.immconst_uint[j++] = constant->value[i].u; + } instr.src_count = 1,
write_sm4_instruction(buffer, &instr);
On 11/12/21 09:36, Giovanni Mascellani wrote:
Signed-off-by: Giovanni Mascellani gmascellani@codeweavers.com
libs/vkd3d-shader/hlsl_sm4.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl_sm4.c b/libs/vkd3d-shader/hlsl_sm4.c index c56a74d4..18c80774 100644 --- a/libs/vkd3d-shader/hlsl_sm4.c +++ b/libs/vkd3d-shader/hlsl_sm4.c @@ -1166,7 +1166,7 @@ static void write_sm4_constant(struct hlsl_ctx *ctx, { const unsigned int dimx = constant->node.data_type->dimx; struct sm4_instruction instr;
- unsigned int i;
unsigned int i, j = 0;
memset(&instr, 0, sizeof(instr)); instr.opcode = VKD3D_SM4_OP_MOV;
@@ -1177,7 +1177,11 @@ static void write_sm4_constant(struct hlsl_ctx *ctx, instr.srcs[0].reg.dim = (dimx > 1) ? VKD3D_SM4_DIMENSION_VEC4 : VKD3D_SM4_DIMENSION_SCALAR; instr.srcs[0].reg.type = VKD3D_SM4_RT_IMMCONST; for (i = 0; i < dimx; ++i)
instr.srcs[0].reg.immconst_uint[i] = constant->value[i].u;
{
while (dimx != 1 && !(instr.dsts[0].writemask & (1u << j)))
j++;
instr.srcs[0].reg.immconst_uint[j++] = constant->value[i].u;
} instr.src_count = 1,
write_sm4_instruction(buffer, &instr);
This is probably quibbling, but how about this?
j = 0; for (i = 0; i < 4; ++i) { if (instr.dsts[0].writemask & (1u << i)) instr.srcs[0].reg.immconst_uint[i] = constant->value[j++].u; }
Feels a bit simpler to me.
Hi,
On 15/11/21 18:48, Zebediah Figura (she/her) wrote:
This is probably quibbling, but how about this?
j = 0; for (i = 0; i < 4; ++i) { if (instr.dsts[0].writemask & (1u << i)) instr.srcs[0].reg.immconst_uint[i] = constant->value[j++].u; }
Feels a bit simpler to me.
I agree it's simpler, but as written it's also wrong. :-)
The problem is that when dimx == 1, we set the register to have dimension VKD3D_SM4_DIMENSION_SCALAR, which means that the constant must be written in the first register, no matter the writemask.
I think that trying to accommodate for that thing lead me to uselessly convoluted code, because your proposal can be fixed to this:
j = 0; for (i = 0; i < 4; ++i) { if (dimx == 1 || instr.dsts[0].writemask & (1u << i)) instr.srcs[0].reg.immconst_uint[i] = constant->value[j++].u; }
This produces three useless writes for scalar registers, but hopefully that's not a big problem.
Thanks for the review, Giovanni.
On 11/16/21 3:22 AM, Giovanni Mascellani wrote:
Hi,
On 15/11/21 18:48, Zebediah Figura (she/her) wrote:
This is probably quibbling, but how about this?
j = 0; for (i = 0; i < 4; ++i) { if (instr.dsts[0].writemask & (1u << i)) instr.srcs[0].reg.immconst_uint[i] = constant->value[j++].u; }
Feels a bit simpler to me.
I agree it's simpler, but as written it's also wrong. :-)
The problem is that when dimx == 1, we set the register to have dimension VKD3D_SM4_DIMENSION_SCALAR, which means that the constant must be written in the first register, no matter the writemask.
I think that trying to accommodate for that thing lead me to uselessly convoluted code, because your proposal can be fixed to this:
j = 0; for (i = 0; i < 4; ++i) { if (dimx == 1 || instr.dsts[0].writemask & (1u << i)) instr.srcs[0].reg.immconst_uint[i] = constant->value[j++].u; }
This produces three useless writes for scalar registers, but hopefully that's not a big problem.
Thanks for the review, Giovanni.
Personally I'd just do
struct sm4_register *reg = &instr.srcs[0].reg;
if (dimx == 1) { reg->dim = VKD3D_SM4_DIMENSION_SCALAR; reg->immconst_uint[0] = constant->value[0].u; } else { unsigned int i, j = 0;
reg->dim = VKD3D_SM4_DIMENSION_VEC4; for (i = 0; i < 4; ++i) { if (instr.dsts[0].writemask & (1u << i)) reg->immconst_uint[i] = constant->value[j++].u; } }
It's more LOC, yeah, but I think it's clearer.
On Tue, Nov 16, 2021 at 10:23 AM Giovanni Mascellani gmascellani@codeweavers.com wrote:
Hi,
On 15/11/21 18:48, Zebediah Figura (she/her) wrote:
This is probably quibbling, but how about this?
j = 0; for (i = 0; i < 4; ++i) { if (instr.dsts[0].writemask & (1u << i)) instr.srcs[0].reg.immconst_uint[i] = constant->value[j++].u; }
Feels a bit simpler to me.
I agree it's simpler, but as written it's also wrong. :-)
The problem is that when dimx == 1, we set the register to have dimension VKD3D_SM4_DIMENSION_SCALAR, which means that the constant must be written in the first register, no matter the writemask.
That's probably right but it raises all kinds of red flags for me. Why is dimx == 1 such a special case (aside from the SCALAR / VEC4 dimension)? We're basically saying that we ignore the writemask for scalar constants. Why?
The code being convoluted, or mostly duplicated, is a consequence of the above. It feels like we should fix that instead.
Hi,
On 17/11/21 10:52, Matteo Bruni wrote:
That's probably right but it raises all kinds of red flags for me. Why is dimx == 1 such a special case (aside from the SCALAR / VEC4 dimension)? We're basically saying that we ignore the writemask for scalar constants. Why?
The code being convoluted, or mostly duplicated, is a consequence of the above. It feels like we should fix that instead.
My understanding is that the special case here is not made by us, but by Microsoft: when an immediate is SCALAR instead of VEC4, then it is implicitly swizzled to all the components, but that means that it has to be always be stored in component 0 (the only one that exists), not in the component selected by the writemask.
Note that we're not ignoring the writemask at writing time; here we're just formatting the immediate in the right way, and when there is only one component, of course, there is no useful information in the writemask. You cannot choose between different components if there is only one. When the write will be executed, it will still be with the right writemask.
Giovanni.
On Wed, Nov 17, 2021 at 11:04 AM Giovanni Mascellani gmascellani@codeweavers.com wrote:
Hi,
On 17/11/21 10:52, Matteo Bruni wrote:
That's probably right but it raises all kinds of red flags for me. Why is dimx == 1 such a special case (aside from the SCALAR / VEC4 dimension)? We're basically saying that we ignore the writemask for scalar constants. Why?
The code being convoluted, or mostly duplicated, is a consequence of the above. It feels like we should fix that instead.
My understanding is that the special case here is not made by us, but by Microsoft: when an immediate is SCALAR instead of VEC4, then it is implicitly swizzled to all the components, but that means that it has to be always be stored in component 0 (the only one that exists), not in the component selected by the writemask.
An alternative way of seeing it is asking whether instr.dsts[0].writemask is supposed to contain the writemask exactly as it should be output by the compiler or if it should be "our" writemask (which for scalar constants it could usefully have value ".x"). It's okay to pick the former option and accept the complication here but at least we should be aware of the choice we're making.
On Wed, Nov 17, 2021 at 11:17 AM Matteo Bruni matteo.mystral@gmail.com wrote:
On Wed, Nov 17, 2021 at 11:04 AM Giovanni Mascellani gmascellani@codeweavers.com wrote:
Hi,
On 17/11/21 10:52, Matteo Bruni wrote:
That's probably right but it raises all kinds of red flags for me. Why is dimx == 1 such a special case (aside from the SCALAR / VEC4 dimension)? We're basically saying that we ignore the writemask for scalar constants. Why?
The code being convoluted, or mostly duplicated, is a consequence of the above. It feels like we should fix that instead.
My understanding is that the special case here is not made by us, but by Microsoft: when an immediate is SCALAR instead of VEC4, then it is implicitly swizzled to all the components, but that means that it has to be always be stored in component 0 (the only one that exists), not in the component selected by the writemask.
An alternative way of seeing it is asking whether instr.dsts[0].writemask is supposed to contain the writemask exactly as it should be output by the compiler or if it should be "our" writemask (which for scalar constants it could usefully have value ".x"). It's okay to pick the former option and accept the complication here but at least we should be aware of the choice we're making.
In the latter case we need to fix up the writemask when generating the bytecode, probably in write_sm4_instruction(). As it turns out we're already doing that, since we only use it for VKD3D_SM4_DIMENSION_VEC4 destinations.
Hi,
On 17/11/21 11:22, Matteo Bruni wrote:
An alternative way of seeing it is asking whether instr.dsts[0].writemask is supposed to contain the writemask exactly as it should be output by the compiler or if it should be "our" writemask (which for scalar constants it could usefully have value ".x"). It's okay to pick the former option and accept the complication here but at least we should be aware of the choice we're making.
In the latter case we need to fix up the writemask when generating the bytecode, probably in write_sm4_instruction(). As it turns out we're already doing that, since we only use it for VKD3D_SM4_DIMENSION_VEC4 destinations.
Yeah, I guess we could see it in a different way, but it seems to me that currently the bulk of hlsl_sm4.c is organized around considering write_sm4_instruction essentially a serialization helper (all the callers of write_sm4_instruction "convert" their writemasks before calling it). We can discuss whether we want it this way or another one, but currently it is like that, and the target of this patch was not to change it. It was just to fix a bug.
Giovanni.
On 11/17/21 03:52, Matteo Bruni wrote:
On Tue, Nov 16, 2021 at 10:23 AM Giovanni Mascellani gmascellani@codeweavers.com wrote:
Hi,
On 15/11/21 18:48, Zebediah Figura (she/her) wrote:
This is probably quibbling, but how about this?
j = 0; for (i = 0; i < 4; ++i) { if (instr.dsts[0].writemask & (1u << i)) instr.srcs[0].reg.immconst_uint[i] = constant->value[j++].u; }
Feels a bit simpler to me.
I agree it's simpler, but as written it's also wrong. :-)
The problem is that when dimx == 1, we set the register to have dimension VKD3D_SM4_DIMENSION_SCALAR, which means that the constant must be written in the first register, no matter the writemask.
That's probably right but it raises all kinds of red flags for me. Why is dimx == 1 such a special case (aside from the SCALAR / VEC4 dimension)? We're basically saying that we ignore the writemask for scalar constants. Why?
The code being convoluted, or mostly duplicated, is a consequence of the above. It feels like we should fix that instead.
dimx == 1 is a special case because it's a special case in DXBC. We're not ignoring the writemask; it's just that for scalars the writemask is only reflected in the destination register, whereas for vectors it's reflected in the source registers as well.
As Giovanni puts it, write_sm4_instruction() is mostly just a serialization helper. hlsl_ir_node structures categorically contain "unmapped" data—so the "writemask" field of hlsl_reg is a misnomer; on the other hand, not all source registers are "mapped" (e.g. ld/sample coords) so we can't categorically do mapping in write_sm4_instruction(). Hence we do that translation in the individual callers, like this one.
We should be doing a similar thing for normal arithmetic register swizzles, but we currently aren't.
On Thu, Nov 18, 2021 at 3:44 AM Zebediah Figura (she/her) zfigura@codeweavers.com wrote:
On 11/17/21 03:52, Matteo Bruni wrote:
On Tue, Nov 16, 2021 at 10:23 AM Giovanni Mascellani gmascellani@codeweavers.com wrote:
Hi,
On 15/11/21 18:48, Zebediah Figura (she/her) wrote:
This is probably quibbling, but how about this?
j = 0; for (i = 0; i < 4; ++i) { if (instr.dsts[0].writemask & (1u << i)) instr.srcs[0].reg.immconst_uint[i] = constant->value[j++].u; }
Feels a bit simpler to me.
I agree it's simpler, but as written it's also wrong. :-)
The problem is that when dimx == 1, we set the register to have dimension VKD3D_SM4_DIMENSION_SCALAR, which means that the constant must be written in the first register, no matter the writemask.
That's probably right but it raises all kinds of red flags for me. Why is dimx == 1 such a special case (aside from the SCALAR / VEC4 dimension)? We're basically saying that we ignore the writemask for scalar constants. Why?
The code being convoluted, or mostly duplicated, is a consequence of the above. It feels like we should fix that instead.
dimx == 1 is a special case because it's a special case in DXBC. We're not ignoring the writemask; it's just that for scalars the writemask is only reflected in the destination register, whereas for vectors it's reflected in the source registers as well.
As Giovanni puts it, write_sm4_instruction() is mostly just a serialization helper. hlsl_ir_node structures categorically contain "unmapped" data—so the "writemask" field of hlsl_reg is a misnomer; on the other hand, not all source registers are "mapped" (e.g. ld/sample coords) so we can't categorically do mapping in write_sm4_instruction(). Hence we do that translation in the individual callers, like this one.
We should be doing a similar thing for normal arithmetic register swizzles, but we currently aren't.
What I'm getting at is this kind of half one thing, half the opposite is bad. I think we should move towards eventually making struct sm4_instruction a proper IR and every time there is a chance of making progress in that direction we should take it.
On 11/18/21 11:14, Matteo Bruni wrote:
On Thu, Nov 18, 2021 at 3:44 AM Zebediah Figura (she/her) zfigura@codeweavers.com wrote:
On 11/17/21 03:52, Matteo Bruni wrote:
On Tue, Nov 16, 2021 at 10:23 AM Giovanni Mascellani gmascellani@codeweavers.com wrote:
Hi,
On 15/11/21 18:48, Zebediah Figura (she/her) wrote:
This is probably quibbling, but how about this?
j = 0; for (i = 0; i < 4; ++i) { if (instr.dsts[0].writemask & (1u << i)) instr.srcs[0].reg.immconst_uint[i] = constant->value[j++].u; }
Feels a bit simpler to me.
I agree it's simpler, but as written it's also wrong. :-)
The problem is that when dimx == 1, we set the register to have dimension VKD3D_SM4_DIMENSION_SCALAR, which means that the constant must be written in the first register, no matter the writemask.
That's probably right but it raises all kinds of red flags for me. Why is dimx == 1 such a special case (aside from the SCALAR / VEC4 dimension)? We're basically saying that we ignore the writemask for scalar constants. Why?
The code being convoluted, or mostly duplicated, is a consequence of the above. It feels like we should fix that instead.
dimx == 1 is a special case because it's a special case in DXBC. We're not ignoring the writemask; it's just that for scalars the writemask is only reflected in the destination register, whereas for vectors it's reflected in the source registers as well.
As Giovanni puts it, write_sm4_instruction() is mostly just a serialization helper. hlsl_ir_node structures categorically contain "unmapped" data—so the "writemask" field of hlsl_reg is a misnomer; on the other hand, not all source registers are "mapped" (e.g. ld/sample coords) so we can't categorically do mapping in write_sm4_instruction(). Hence we do that translation in the individual callers, like this one.
We should be doing a similar thing for normal arithmetic register swizzles, but we currently aren't.
What I'm getting at is this kind of half one thing, half the opposite is bad. I think we should move towards eventually making struct sm4_instruction a proper IR and every time there is a chance of making progress in that direction we should take it.
I don't think I see why this is half of anything. We have to translate between the hlsl_ir representation of swizzles and the DXBC representation of swizzles somewhere, why not here?
I'm not sure if we'd want the SM4 IR to use mapped swizzles or not, but either way, I don't see how this commit goes in the wrong direction.
Hi,
the patch has already been accepted, but since this discussion also concerns more long term work...
On 18/11/21 18:19, Zebediah Figura (she/her) wrote:
What I'm getting at is this kind of half one thing, half the opposite is bad. I think we should move towards eventually making struct sm4_instruction a proper IR and every time there is a chance of making progress in that direction we should take it.
I don't think I see why this is half of anything. We have to translate between the hlsl_ir representation of swizzles and the DXBC representation of swizzles somewhere, why not here?
Agreed, neither I can see what would be the two half things. As I see it, write_sm4_instruction is basically a serialization function, which is useful as it is and shouldn't do anything else than serialization, and struct sm4_instruction is just a way to provide structured input to write_sm4_instruction. I find it logical that all processing is done before data is fed to write_sm4_instruction, so processing and serialization are well separated.
I think it might make sense to have another optimization framework which is lower level than IR, but this should happen before write_sm4_instruction and be essentially independent of it. It might reuse struct sm4_instruction, possibly with some changes to make it easier to manipulate (like, for example, store it in a linked list).
If this were to happen, my guess is that it would make sense to have this pass at an actual lower level than our current IR, therefore drop out knowledge of the IR variables and temporaries, implying that we store the "actual" writemasks and swizzles, not those that make sense given the IR variables and temporaries. But this can only be evaluated once some actual optimization pass is there.
Just my two cents!
Giovanni.
On Mon, Nov 22, 2021 at 11:41 AM Giovanni Mascellani gmascellani@codeweavers.com wrote:
Hi,
the patch has already been accepted, but since this discussion also concerns more long term work...
On 18/11/21 18:19, Zebediah Figura (she/her) wrote:
What I'm getting at is this kind of half one thing, half the opposite is bad. I think we should move towards eventually making struct sm4_instruction a proper IR and every time there is a chance of making progress in that direction we should take it.
I don't think I see why this is half of anything. We have to translate between the hlsl_ir representation of swizzles and the DXBC representation of swizzles somewhere, why not here?
Agreed, neither I can see what would be the two half things. As I see it, write_sm4_instruction is basically a serialization function, which is useful as it is and shouldn't do anything else than serialization, and struct sm4_instruction is just a way to provide structured input to write_sm4_instruction. I find it logical that all processing is done before data is fed to write_sm4_instruction, so processing and serialization are well separated.
I think it might make sense to have another optimization framework which is lower level than IR, but this should happen before write_sm4_instruction and be essentially independent of it. It might reuse struct sm4_instruction, possibly with some changes to make it easier to manipulate (like, for example, store it in a linked list).
If this were to happen, my guess is that it would make sense to have this pass at an actual lower level than our current IR, therefore drop out knowledge of the IR variables and temporaries, implying that we store the "actual" writemasks and swizzles, not those that make sense given the IR variables and temporaries. But this can only be evaluated once some actual optimization pass is there.
Just my two cents!
Giovanni.
Yeah, I still disagree at a fundamental level. I don't think further arguing is useful; I'll try to write some patches at some point.
Signed-off-by: Giovanni Mascellani gmascellani@codeweavers.com --- libs/vkd3d-shader/hlsl.y | 8 ++++++++ 1 file changed, 8 insertions(+)
diff --git a/libs/vkd3d-shader/hlsl.y b/libs/vkd3d-shader/hlsl.y index 2120b26f..b7e0409d 100644 --- a/libs/vkd3d-shader/hlsl.y +++ b/libs/vkd3d-shader/hlsl.y @@ -2529,6 +2529,10 @@ type:
$$ = hlsl_get_vector_type(ctx, $3->base_type, $5); } + | KW_VECTOR + { + $$ = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, 4); + } | KW_MATRIX '<' type ',' C_INTEGER ',' C_INTEGER '>' { if ($3->type != HLSL_CLASS_SCALAR) @@ -2557,6 +2561,10 @@ type:
$$ = hlsl_get_matrix_type(ctx, $3->base_type, $7, $5); } + | KW_MATRIX + { + $$ = hlsl_get_matrix_type(ctx, HLSL_TYPE_FLOAT, 4, 4); + } | KW_VOID { $$ = ctx->builtin_types.Void;
Signed-off-by: Zebediah Figura zfigura@codeweavers.com
---
Adding a quick test for this would be nice, though.