-- v8: vkd3d-shader/dxil: Support 16-bit types. vkd3d-shader/spirv: Introduce HALF and UINT16 types for minimum precision.
From: Conor McCarthy cmccarthy@codeweavers.com
Values in DXIL have no signedness, so it is ambiguous whether 16-bit constants should or should not be sign-extended when 16-bit execution is not supported. --- tests/hlsl/minimum-precision.shader_test | 41 ++++++++++++++++++++++++ 1 file changed, 41 insertions(+)
diff --git a/tests/hlsl/minimum-precision.shader_test b/tests/hlsl/minimum-precision.shader_test index e5053e3d1..a93d235ca 100644 --- a/tests/hlsl/minimum-precision.shader_test +++ b/tests/hlsl/minimum-precision.shader_test @@ -19,3 +19,44 @@ float4 main() : sv_target [test] draw quad probe all rgba (197.0, 218.0, 238.0, 257.0) + + +[pixel shader] +uniform min16uint2 u; + +float4 main() : sv_target +{ + min16uint i = 0x7fff, j = 0xffff; + return float4(u.x + i, u.y + j, 0, 0); +} + +[test] +uniform 0 uint4 0 0 0 0 +draw quad +probe all rgba (32767.0, 65535.0, 0.0, 0.0) + + +% The code d3dcompiler_47 produces for this appears correct, but the result +% is still zero in Windows. +[require] +shader model >= 6.0 + +[pixel shader] +uniform min16uint4 u; +uniform uint i; + +float4 main() : sv_target +{ + min16uint arr[4] = {1, 2, 0x7fff, 0xffff}; + return float4(u.x + arr[i], u.y + arr[i + 1], 0, 0); +} + +[test] +uniform 0 uint4 0 0 0 0 +uniform 4 uint 2 +draw quad +probe all rgba (32767.0, 65535.0, 0.0, 0.0) +uniform 0 uint4 0 0 0 0 +uniform 4 uint 0 +draw quad +probe all rgba (1.0, 2.0, 0.0, 0.0)
From: Conor McCarthy cmccarthy@codeweavers.com
--- libs/vkd3d-shader/spirv.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-)
diff --git a/libs/vkd3d-shader/spirv.c b/libs/vkd3d-shader/spirv.c index 45f46a91c..614f4e8e1 100644 --- a/libs/vkd3d-shader/spirv.c +++ b/libs/vkd3d-shader/spirv.c @@ -223,6 +223,11 @@ enum vkd3d_shader_input_sysval_semantic vkd3d_siv_from_sysval_indexed(enum vkd3d } }
+static bool data_type_is_floating_point(enum vkd3d_data_type data_type) +{ + return data_type == VKD3D_DATA_FLOAT || data_type == VKD3D_DATA_DOUBLE; +} + static inline bool register_is_undef(const struct vkd3d_shader_register *reg) { return reg->type == VKD3DSPR_UNDEF; @@ -3984,7 +3989,7 @@ static uint32_t spirv_compiler_emit_abs(struct spirv_compiler *compiler, uint32_t type_id;
type_id = spirv_compiler_get_type_id_for_reg(compiler, reg, write_mask); - if (reg->data_type == VKD3D_DATA_FLOAT || reg->data_type == VKD3D_DATA_DOUBLE) + if (data_type_is_floating_point(reg->data_type)) return vkd3d_spirv_build_op_glsl_std450_fabs(builder, type_id, val_id);
FIXME("Unhandled data type %#x.\n", reg->data_type); @@ -3998,7 +4003,7 @@ static uint32_t spirv_compiler_emit_neg(struct spirv_compiler *compiler, uint32_t type_id;
type_id = spirv_compiler_get_type_id_for_reg(compiler, reg, write_mask); - if (reg->data_type == VKD3D_DATA_FLOAT || reg->data_type == VKD3D_DATA_DOUBLE) + if (data_type_is_floating_point(reg->data_type)) return vkd3d_spirv_build_op_fnegate(builder, type_id, val_id); else if (reg->data_type == VKD3D_DATA_INT || reg->data_type == VKD3D_DATA_UINT) return vkd3d_spirv_build_op_snegate(builder, type_id, val_id); @@ -4179,7 +4184,7 @@ static uint32_t spirv_compiler_emit_sat(struct spirv_compiler *compiler, }
type_id = spirv_compiler_get_type_id_for_reg(compiler, reg, write_mask); - if (reg->data_type == VKD3D_DATA_FLOAT || reg->data_type == VKD3D_DATA_DOUBLE) + if (data_type_is_floating_point(reg->data_type)) return vkd3d_spirv_build_op_glsl_std450_nclamp(builder, type_id, val_id, zero_id, one_id);
FIXME("Unhandled data type %#x.\n", reg->data_type);
From: Conor McCarthy cmccarthy@codeweavers.com
Minimum precision types must always be implemented as 32-bit to match how reduced precision works in SPIR-V. --- libs/vkd3d-shader/d3d_asm.c | 2 + libs/vkd3d-shader/spirv.c | 90 ++++++++++++++++++++++-- libs/vkd3d-shader/vkd3d_shader_private.h | 8 ++- 3 files changed, 94 insertions(+), 6 deletions(-)
diff --git a/libs/vkd3d-shader/d3d_asm.c b/libs/vkd3d-shader/d3d_asm.c index ac1c41f96..b603732b0 100644 --- a/libs/vkd3d-shader/d3d_asm.c +++ b/libs/vkd3d-shader/d3d_asm.c @@ -627,6 +627,8 @@ static void shader_dump_data_type(struct vkd3d_d3d_asm_compiler *compiler, enum [VKD3D_DATA_UINT8 ] = "uint8", [VKD3D_DATA_UINT64 ] = "uint64", [VKD3D_DATA_BOOL ] = "bool", + [VKD3D_DATA_UINT16 ] = "uint16", + [VKD3D_DATA_HALF ] = "half", };
const char *name; diff --git a/libs/vkd3d-shader/spirv.c b/libs/vkd3d-shader/spirv.c index 614f4e8e1..8fd72c2c4 100644 --- a/libs/vkd3d-shader/spirv.c +++ b/libs/vkd3d-shader/spirv.c @@ -225,7 +225,7 @@ enum vkd3d_shader_input_sysval_semantic vkd3d_siv_from_sysval_indexed(enum vkd3d
static bool data_type_is_floating_point(enum vkd3d_data_type data_type) { - return data_type == VKD3D_DATA_FLOAT || data_type == VKD3D_DATA_DOUBLE; + return data_type == VKD3D_DATA_HALF || data_type == VKD3D_DATA_FLOAT || data_type == VKD3D_DATA_DOUBLE; }
static inline bool register_is_undef(const struct vkd3d_shader_register *reg) @@ -1824,6 +1824,7 @@ static uint32_t vkd3d_spirv_get_type_id_for_data_type(struct vkd3d_spirv_builder { switch (data_type) { + case VKD3D_DATA_HALF: /* Minimum precision. TODO: native 16-bit */ case VKD3D_DATA_FLOAT: case VKD3D_DATA_SNORM: case VKD3D_DATA_UNORM: @@ -1831,6 +1832,7 @@ static uint32_t vkd3d_spirv_get_type_id_for_data_type(struct vkd3d_spirv_builder break; case VKD3D_DATA_INT: case VKD3D_DATA_UINT: + case VKD3D_DATA_UINT16: /* Minimum precision. TODO: native 16-bit */ return vkd3d_spirv_get_op_type_int(builder, 32, data_type == VKD3D_DATA_INT); break; case VKD3D_DATA_DOUBLE: @@ -3679,6 +3681,78 @@ static uint32_t spirv_compiler_emit_vector_shuffle(struct spirv_compiler *compil type_id, vector1_id, vector2_id, components, component_count); }
+/* Based on the implementation in the OpenGL Mathematics library. */ +static uint32_t half_to_float(uint16_t value) +{ + uint32_t s = (value & 0x8000) << 16; + uint32_t e = (value >> 10) & 0x1fu; + uint32_t m = value & 0x3ffu; + + if (!e) + { + if (!m) + { + /* Plus or minus zero */ + return s; + } + else + { + /* Denormalized number -- renormalize it */ + + while (!(m & 0x400u)) + { + m <<= 1; + --e; + } + + ++e; + m &= ~0x400u; + } + } + else if (e == 31u) + { + if (!m) + { + /* Positive or negative infinity */ + return s | 0x7f800000u; + } + else + { + /* Nan -- preserve sign and significand bits */ + return s | 0x7f800000u | (m << 13); + } + } + + /* Normalized number */ + e += 127u - 15u; + m <<= 13; + + /* Assemble s, e and m. */ + return s | (e << 23) | m; +} + +static uint32_t convert_raw_constant32(struct spirv_compiler *compiler, + enum vkd3d_data_type data_type, unsigned int uint_value) +{ + int16_t i; + + /* TODO: native 16-bit support. */ + if (data_type != VKD3D_DATA_UINT16 && data_type != VKD3D_DATA_HALF) + return uint_value; + + if (data_type == VKD3D_DATA_HALF) + return half_to_float(uint_value); + + /* Values in DXIL have no signedness, so it is ambiguous whether 16-bit constants should or + * should not be sign-extended when 16-bit execution is not supported. The AMD RX 580 Windows + * driver has no 16-bit support, and sign-extends all 16-bit constant ints to 32 bits. These + * results differ from SM 5. The RX 6750 XT supports 16-bit execution, so constants are not + * extended, and results match SM 5. It seems best to replicate the sign-extension, and if + * execution is 16-bit, the values will be truncated. */ + i = uint_value; + return (int32_t)i; +} + static uint32_t spirv_compiler_emit_load_constant(struct spirv_compiler *compiler, const struct vkd3d_shader_register *reg, uint32_t swizzle, uint32_t write_mask) { @@ -3691,14 +3765,15 @@ static uint32_t spirv_compiler_emit_load_constant(struct spirv_compiler *compile if (reg->dimension == VSIR_DIMENSION_SCALAR) { for (i = 0; i < component_count; ++i) - values[i] = *reg->u.immconst_uint; + values[i] = convert_raw_constant32(compiler, reg->data_type, reg->u.immconst_uint[0]); } else { for (i = 0, j = 0; i < VKD3D_VEC4_SIZE; ++i) { if (write_mask & (VKD3DSP_WRITEMASK_0 << i)) - values[j++] = reg->u.immconst_uint[vsir_swizzle_get_component(swizzle, i)]; + values[j++] = convert_raw_constant32(compiler, reg->data_type, + reg->u.immconst_uint[vsir_swizzle_get_component(swizzle, i)]); } }
@@ -3829,6 +3904,13 @@ static uint32_t spirv_compiler_emit_constant_array(struct spirv_compiler *compil
switch (icb->data_type) { + case VKD3D_DATA_HALF: + case VKD3D_DATA_UINT16: + /* Scalar only. */ + for (i = 0; i < element_count; ++i) + elements[i] = vkd3d_spirv_get_op_constant(builder, elem_type_id, + convert_raw_constant32(compiler, icb->data_type, icb->data[i])); + break; case VKD3D_DATA_FLOAT: case VKD3D_DATA_INT: case VKD3D_DATA_UINT: @@ -6719,7 +6801,7 @@ static void spirv_compiler_emit_bool_cast(struct spirv_compiler *compiler, assert(src->reg.data_type == VKD3D_DATA_BOOL && dst->reg.data_type != VKD3D_DATA_BOOL);
val_id = spirv_compiler_emit_load_src(compiler, src, dst->write_mask); - if (dst->reg.data_type == VKD3D_DATA_FLOAT) + if (dst->reg.data_type == VKD3D_DATA_HALF || dst->reg.data_type == VKD3D_DATA_FLOAT) { val_id = spirv_compiler_emit_bool_to_float(compiler, 1, val_id, instruction->handler_idx == VKD3DSIH_ITOF); } diff --git a/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d-shader/vkd3d_shader_private.h index f1cc9ad7b..b1a93cd6d 100644 --- a/libs/vkd3d-shader/vkd3d_shader_private.h +++ b/libs/vkd3d-shader/vkd3d_shader_private.h @@ -605,14 +605,16 @@ enum vkd3d_data_type VKD3D_DATA_UINT8, VKD3D_DATA_UINT64, VKD3D_DATA_BOOL, + VKD3D_DATA_UINT16, + VKD3D_DATA_HALF,
VKD3D_DATA_COUNT, };
static inline bool data_type_is_integer(enum vkd3d_data_type data_type) { - return data_type == VKD3D_DATA_INT || data_type == VKD3D_DATA_UINT8 || data_type == VKD3D_DATA_UINT - || data_type == VKD3D_DATA_UINT64; + return data_type == VKD3D_DATA_INT || data_type == VKD3D_DATA_UINT8 || data_type == VKD3D_DATA_UINT16 + || data_type == VKD3D_DATA_UINT || data_type == VKD3D_DATA_UINT64; }
static inline bool data_type_is_bool(enum vkd3d_data_type data_type) @@ -1445,10 +1447,12 @@ static inline enum vkd3d_shader_component_type vkd3d_component_type_from_data_ty { switch (data_type) { + case VKD3D_DATA_HALF: /* Minimum precision. TODO: native 16-bit */ case VKD3D_DATA_FLOAT: case VKD3D_DATA_UNORM: case VKD3D_DATA_SNORM: return VKD3D_SHADER_COMPONENT_FLOAT; + case VKD3D_DATA_UINT16: /* Minimum precision. TODO: native 16-bit */ case VKD3D_DATA_UINT: return VKD3D_SHADER_COMPONENT_UINT; case VKD3D_DATA_INT:
From: Conor McCarthy cmccarthy@codeweavers.com
--- libs/vkd3d-shader/dxil.c | 11 +++++++++-- tests/hlsl/minimum-precision.shader_test | 16 +++++++++++++--- 2 files changed, 22 insertions(+), 5 deletions(-)
diff --git a/libs/vkd3d-shader/dxil.c b/libs/vkd3d-shader/dxil.c index 602056e25..7d13fe0c4 100644 --- a/libs/vkd3d-shader/dxil.c +++ b/libs/vkd3d-shader/dxil.c @@ -2008,6 +2008,8 @@ static enum vkd3d_data_type vkd3d_data_type_from_sm6_type(const struct sm6_type return VKD3D_DATA_BOOL; case 8: return VKD3D_DATA_UINT8; + case 16: + return VKD3D_DATA_UINT16; case 32: return VKD3D_DATA_UINT; case 64: @@ -2021,6 +2023,8 @@ static enum vkd3d_data_type vkd3d_data_type_from_sm6_type(const struct sm6_type { switch (type->u.width) { + case 16: + return VKD3D_DATA_HALF; case 32: return VKD3D_DATA_FLOAT; case 64: @@ -2594,7 +2598,7 @@ static enum vkd3d_result sm6_parser_constants_init(struct sm6_parser *sm6, const }
if (type->u.width == 16) - FIXME("Half float type is not supported yet.\n"); + dst->u.reg.u.immconst_uint[0] = record->operands[0]; else if (type->u.width == 32) dst->u.reg.u.immconst_float[0] = bitcast_uint64_to_float(record->operands[0]); else if (type->u.width == 64) @@ -3826,7 +3830,10 @@ static enum vkd3d_shader_opcode sm6_map_cast_op(uint64_t code, const struct sm6_ break; case CAST_ZEXT: case CAST_SEXT: - /* nop or min precision. TODO: native 16-bit */ + /* nop or min precision. TODO: native 16-bit. + * Extension instructions could be emitted for min precision, but in Windows + * the AMD RX 580 simply drops such instructions, which makes sense as no + * assumptions should be made about any behaviour which depends on bit width. */ if (to->u.width == from->u.width || (to->u.width == 32 && from->u.width == 16)) { op = VKD3DSIH_NOP; diff --git a/tests/hlsl/minimum-precision.shader_test b/tests/hlsl/minimum-precision.shader_test index a93d235ca..bc56720b2 100644 --- a/tests/hlsl/minimum-precision.shader_test +++ b/tests/hlsl/minimum-precision.shader_test @@ -30,16 +30,26 @@ float4 main() : sv_target return float4(u.x + i, u.y + j, 0, 0); }
+[require] +shader model >= 4.0 +shader model < 6.0 + [test] uniform 0 uint4 0 0 0 0 draw quad probe all rgba (32767.0, 65535.0, 0.0, 0.0)
+[require] +shader model >= 6.0 + +[test] +uniform 0 uint4 0 0 0 0 +draw quad +probe all rgba (32767.0, 4.29496730e09, 0.0, 0.0) 1 +
% The code d3dcompiler_47 produces for this appears correct, but the result % is still zero in Windows. -[require] -shader model >= 6.0
[pixel shader] uniform min16uint4 u; @@ -55,7 +65,7 @@ float4 main() : sv_target uniform 0 uint4 0 0 0 0 uniform 4 uint 2 draw quad -probe all rgba (32767.0, 65535.0, 0.0, 0.0) +probe all rgba (32767.0, 4.29496730e09, 0.0, 0.0) 1 uniform 0 uint4 0 0 0 0 uniform 4 uint 0 draw quad
I made this change so runner crosstests can be run in a Visual Studio command prompt, where dxcompiler.dll is in the path. `SONAME_LIBDXCOMPILER` is already defined even for crosstests, and there's no way to tell at compile time what its value should actually be for crosstest builds.
Right, I overlooked that the issue here is that we may want to use e.g. "libdxcompiler.so" for the regular build and "dxcompiler.dll" for the cross build. It turns out that in principle e.g. the Vulkan runner would have that issue as well, but conveniently the Vulkan runner can't work for cross builds.
In any case, something along the lines of Giovanni's proposal seems reasonable to me.