 
            From: Conor McCarthy cmccarthy@codeweavers.com
--- libs/vkd3d-shader/spirv.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/libs/vkd3d-shader/spirv.c b/libs/vkd3d-shader/spirv.c index 8a4704e56..a5f29ed48 100644 --- a/libs/vkd3d-shader/spirv.c +++ b/libs/vkd3d-shader/spirv.c @@ -6907,8 +6907,8 @@ static void spirv_compiler_emit_ext_glsl_instruction(struct spirv_compiler *comp const struct vkd3d_shader_src_param *src = instruction->src; uint32_t src_id[SPIRV_MAX_SRC_COUNT]; uint32_t instr_set_id, type_id, val_id; + unsigned int i, component_count; enum GLSLstd450 glsl_inst; - unsigned int i;
glsl_inst = spirv_compiler_map_ext_glsl_instruction(instruction); if (glsl_inst == GLSLstd450Bad) @@ -6934,8 +6934,9 @@ static void spirv_compiler_emit_ext_glsl_instruction(struct spirv_compiler *comp || instruction->handler_idx == VKD3DSIH_FIRSTBIT_SHI) { /* In D3D bits are numbered from the most significant bit. */ + component_count = vsir_write_mask_component_count(dst->write_mask); val_id = vkd3d_spirv_build_op_isub(builder, type_id, - spirv_compiler_get_constant_uint(compiler, 31), val_id); + spirv_compiler_get_constant_uint_vector(compiler, 31, component_count), val_id); }
spirv_compiler_emit_store_dst(compiler, dst, val_id);
 
            From: Conor McCarthy cmccarthy@codeweavers.com
--- tests/d3d12.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+)
diff --git a/tests/d3d12.c b/tests/d3d12.c index 427e59c22..0edad708e 100644 --- a/tests/d3d12.c +++ b/tests/d3d12.c @@ -9157,6 +9157,28 @@ static void test_shader_instructions(void) 0x00000000, 0x0020800a, 0x00000000, 0x00000000, 0x0100003e, }; static struct named_shader ps_bits = {"bits", ps_bits_code, sizeof(ps_bits_code)}; + static const DWORD ps_bits_vector_code[] = + { +#if 0 + uint2 u; + + uint4 main() : SV_Target + { + return uint4(countbits(u), firstbithigh(u)); + } +#endif + 0x43425844, 0x799764f6, 0x7e5d98cf, 0x4dd694ef, 0xa30ee79a, 0x00000001, 0x0000013c, 0x00000003, + 0x0000002c, 0x0000003c, 0x00000070, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f, + 0x0000002c, 0x00000001, 0x00000008, 0x00000020, 0x00000000, 0x00000000, 0x00000001, 0x00000000, + 0x0000000f, 0x545f5653, 0x65677261, 0xabab0074, 0x58454853, 0x000000c4, 0x00000050, 0x00000031, + 0x0100086a, 0x04000059, 0x00208e46, 0x00000000, 0x00000001, 0x03000065, 0x001020f2, 0x00000000, + 0x02000068, 0x00000001, 0x06000087, 0x00100032, 0x00000000, 0x00208046, 0x00000000, 0x00000000, + 0x0b00001e, 0x00100032, 0x00000000, 0x80100046, 0x00000041, 0x00000000, 0x00004002, 0x0000001f, + 0x0000001f, 0x00000000, 0x00000000, 0x0d000037, 0x001020c2, 0x00000000, 0x00208406, 0x00000000, + 0x00000000, 0x00100406, 0x00000000, 0x00004002, 0x00000000, 0x00000000, 0xffffffff, 0xffffffff, + 0x06000086, 0x00102032, 0x00000000, 0x00208046, 0x00000000, 0x00000000, 0x0100003e, + }; + static struct named_shader ps_bits_vector = {"bits_vector", ps_bits_vector_code, sizeof(ps_bits_vector_code)}; static const DWORD ps_ishr_code[] = { #if 0 @@ -10529,6 +10551,8 @@ static void test_shader_instructions(void) {&ps_bits, {{{0x8000000f, 0x8000000f}}}, {{ 5, 0, 31, 30}}}, {&ps_bits, {{{0x00080000, 0x00080000}}}, {{ 1, 19, 19, 19}}},
+ {&ps_bits_vector, {{{0x11111111, 0x00080000}}}, {{8, 1, 28, 19}}}, + {&ps_ishr, {{{0x00000000, 0x00000000, 0x00000000, 0x00000000}, {~0x1fu, 0, 32, 64}}}, {{0x00000000, 0x00000000, 0x00000000, 0x00000000}}}, {&ps_ishr, {{{0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff}, {~0x1fu, 0, 32, 64}}},
 
            Giovanni Mascellani (@giomasce) commented about libs/vkd3d-shader/spirv.c:
|| instruction->handler_idx == VKD3DSIH_FIRSTBIT_SHI) { /* In D3D bits are numbered from the most significant bit. */
component_count = vsir_write_mask_component_count(dst->write_mask); val_id = vkd3d_spirv_build_op_isub(builder, type_id,
spirv_compiler_get_constant_uint(compiler, 31), val_id);
spirv_compiler_get_constant_uint_vector(compiler, 31, component_count), val_id);
I guess this is wrong when `~0u` is returned (because the input is 0 or, for the `shi` variant, -1).
It seems that the tests don't trip on this because the HLSL compiler already covers for this. The emitted code is something like: ``` vkd3d:3227409:trace:vkd3d_shader_trace firstbit_shi r0.x v4:uint, cb0[0].y v4:int vkd3d:3227409:trace:vkd3d_shader_trace ieq r0.y v4:uint, r0.x v4:int, l(-1) <s:int> vkd3d:3227409:trace:vkd3d_shader_trace iadd r0.x v4:int, l(31) <s:int>, -r0.x v4:int vkd3d:3227409:trace:vkd3d_shader_trace movc o0.w v4:float, r0.y v4:uint, l(-nan) <s:float>, r0.x v4:float ```
So even if we emit `31 - ~0u` the code immediately makes up for that and converts that back to `~0u`. However this is still wrong, because we're supposed to translate the TPF code correctly even if the HLSL compiler isn't covering up for us.
I realize that's orthogonal to what this MR is doing, but if you could fix that while you're at it it would be great. I guess the hardest part is getting a TPF shader without the fixup instructions. Maybe we'll have to edit the bytecode by hand for that. It would be nice to have a D3DBC/TPF assembler for instruction tests.
 
            On Wed Jan 10 12:24:56 2024 +0000, Giovanni Mascellani wrote:
I guess this is wrong when `~0u` is returned (because the input is 0 or, for the `shi` variant, -1). It seems that the tests don't trip on this because the HLSL compiler already covers for this. The emitted code is something like:
vkd3d:3227409:trace:vkd3d_shader_trace firstbit_shi r0.x <v4:uint>, cb0[0].y <v4:int> vkd3d:3227409:trace:vkd3d_shader_trace ieq r0.y <v4:uint>, r0.x <v4:int>, l(-1) <s:int> vkd3d:3227409:trace:vkd3d_shader_trace iadd r0.x <v4:int>, l(31) <s:int>, -r0.x <v4:int> vkd3d:3227409:trace:vkd3d_shader_trace movc o0.w <v4:float>, r0.y <v4:uint>, l(-nan) <s:float>, r0.x <v4:float>So even if we emit `31 - ~0u` the code immediately makes up for that and converts that back to `~0u`. However this is still wrong, because we're supposed to translate the TPF code correctly even if the HLSL compiler isn't covering up for us. I realize that's orthogonal to what this MR is doing, but if you could fix that while you're at it it would be great. I guess the hardest part is getting a TPF shader without the fixup instructions. Maybe we'll have to edit the bytecode by hand for that. It would be nice to have a D3DBC/TPF assembler for instruction tests.
For testing I can NOP the fixup instructions away pretty easily in a hex editor. I'm not sure we should be making this change though, unless it fixes something. While it's not great to depend on the presence of fixup instructions (which DXIL also contains btw), if the backend can assume they are always present, then the code is not broken. @hverbeet?
 
            On Mon Jan 15 01:04:49 2024 +0000, Conor McCarthy wrote:
For testing I can NOP the fixup instructions away pretty easily in a hex editor. I'm not sure we should be making this change though, unless it fixes something. While it's not great to depend on the presence of fixup instructions (which DXIL also contains btw), if the backend can assume they are always present, then the code is not broken. @hverbeet?
We don't know whether the fixup code is always present. In principle games are not required to use FXC or DXC to generate binary shaders, so I think our tests should be based on the TPF or DXIL semantics rather than on what compilers output. Even FXC and DXC might skip the fixup in some cases ([though so far I could make that happen](https://shader-playground.timjones.io/79a97c90156287c02164fe34a070a6b5), FXC doesn't seem to be able to propagate the knowledge about `u` being non-zero).
At any rate, as I said, that's a preexisting problem, so this MR can be accepted anyway.
 
            This merge request was approved by Giovanni Mascellani.
 
            For testing I can NOP the fixup instructions away pretty easily in a hex editor. I'm not sure we should be making this change though, unless it fixes something. While it's not great to depend on the presence of fixup instructions (which DXIL also contains btw), if the backend can assume they are always present, then the code is not broken. @hverbeet?
No, as Giovanni said, we can't assume our input is produced by a particular compiler.
 
            This merge request was approved by Henri Verbeet.



