-- v7: vkd3d-shader/dxil: Support 16-bit types. vkd3d-shader/spirv: Introduce HALF and UINT16 types for minimum precision. vkd3d-shader/spirv: Introduce a data_type_is_floating_point() helper function. tests/shader-runner: Add tests for minimum-precision constants.
From: Conor McCarthy cmccarthy@codeweavers.com
Values in DXIL have no signedness, so it is ambiguous whether 16-bit constants should or should not be sign-extended when 16-bit execution is not supported. --- tests/hlsl/minimum-precision.shader_test | 41 ++++++++++++++++++++++++ 1 file changed, 41 insertions(+)
diff --git a/tests/hlsl/minimum-precision.shader_test b/tests/hlsl/minimum-precision.shader_test index e5053e3d1..a93d235ca 100644 --- a/tests/hlsl/minimum-precision.shader_test +++ b/tests/hlsl/minimum-precision.shader_test @@ -19,3 +19,44 @@ float4 main() : sv_target [test] draw quad probe all rgba (197.0, 218.0, 238.0, 257.0) + + +[pixel shader] +uniform min16uint2 u; + +float4 main() : sv_target +{ + min16uint i = 0x7fff, j = 0xffff; + return float4(u.x + i, u.y + j, 0, 0); +} + +[test] +uniform 0 uint4 0 0 0 0 +draw quad +probe all rgba (32767.0, 65535.0, 0.0, 0.0) + + +% The code d3dcompiler_47 produces for this appears correct, but the result +% is still zero in Windows. +[require] +shader model >= 6.0 + +[pixel shader] +uniform min16uint4 u; +uniform uint i; + +float4 main() : sv_target +{ + min16uint arr[4] = {1, 2, 0x7fff, 0xffff}; + return float4(u.x + arr[i], u.y + arr[i + 1], 0, 0); +} + +[test] +uniform 0 uint4 0 0 0 0 +uniform 4 uint 2 +draw quad +probe all rgba (32767.0, 65535.0, 0.0, 0.0) +uniform 0 uint4 0 0 0 0 +uniform 4 uint 0 +draw quad +probe all rgba (1.0, 2.0, 0.0, 0.0)
From: Conor McCarthy cmccarthy@codeweavers.com
--- libs/vkd3d-shader/spirv.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-)
diff --git a/libs/vkd3d-shader/spirv.c b/libs/vkd3d-shader/spirv.c index 45f46a91c..614f4e8e1 100644 --- a/libs/vkd3d-shader/spirv.c +++ b/libs/vkd3d-shader/spirv.c @@ -223,6 +223,11 @@ enum vkd3d_shader_input_sysval_semantic vkd3d_siv_from_sysval_indexed(enum vkd3d } }
+static bool data_type_is_floating_point(enum vkd3d_data_type data_type) +{ + return data_type == VKD3D_DATA_FLOAT || data_type == VKD3D_DATA_DOUBLE; +} + static inline bool register_is_undef(const struct vkd3d_shader_register *reg) { return reg->type == VKD3DSPR_UNDEF; @@ -3984,7 +3989,7 @@ static uint32_t spirv_compiler_emit_abs(struct spirv_compiler *compiler, uint32_t type_id;
type_id = spirv_compiler_get_type_id_for_reg(compiler, reg, write_mask); - if (reg->data_type == VKD3D_DATA_FLOAT || reg->data_type == VKD3D_DATA_DOUBLE) + if (data_type_is_floating_point(reg->data_type)) return vkd3d_spirv_build_op_glsl_std450_fabs(builder, type_id, val_id);
FIXME("Unhandled data type %#x.\n", reg->data_type); @@ -3998,7 +4003,7 @@ static uint32_t spirv_compiler_emit_neg(struct spirv_compiler *compiler, uint32_t type_id;
type_id = spirv_compiler_get_type_id_for_reg(compiler, reg, write_mask); - if (reg->data_type == VKD3D_DATA_FLOAT || reg->data_type == VKD3D_DATA_DOUBLE) + if (data_type_is_floating_point(reg->data_type)) return vkd3d_spirv_build_op_fnegate(builder, type_id, val_id); else if (reg->data_type == VKD3D_DATA_INT || reg->data_type == VKD3D_DATA_UINT) return vkd3d_spirv_build_op_snegate(builder, type_id, val_id); @@ -4179,7 +4184,7 @@ static uint32_t spirv_compiler_emit_sat(struct spirv_compiler *compiler, }
type_id = spirv_compiler_get_type_id_for_reg(compiler, reg, write_mask); - if (reg->data_type == VKD3D_DATA_FLOAT || reg->data_type == VKD3D_DATA_DOUBLE) + if (data_type_is_floating_point(reg->data_type)) return vkd3d_spirv_build_op_glsl_std450_nclamp(builder, type_id, val_id, zero_id, one_id);
FIXME("Unhandled data type %#x.\n", reg->data_type);
From: Conor McCarthy cmccarthy@codeweavers.com
Minimum precision types must always be implemented as 32-bit to match how reduced precision works in SPIR-V. --- libs/vkd3d-shader/spirv.c | 90 ++++++++++++++++++++++-- libs/vkd3d-shader/vkd3d_shader_private.h | 8 ++- 2 files changed, 92 insertions(+), 6 deletions(-)
diff --git a/libs/vkd3d-shader/spirv.c b/libs/vkd3d-shader/spirv.c index 614f4e8e1..8fd72c2c4 100644 --- a/libs/vkd3d-shader/spirv.c +++ b/libs/vkd3d-shader/spirv.c @@ -225,7 +225,7 @@ enum vkd3d_shader_input_sysval_semantic vkd3d_siv_from_sysval_indexed(enum vkd3d
static bool data_type_is_floating_point(enum vkd3d_data_type data_type) { - return data_type == VKD3D_DATA_FLOAT || data_type == VKD3D_DATA_DOUBLE; + return data_type == VKD3D_DATA_HALF || data_type == VKD3D_DATA_FLOAT || data_type == VKD3D_DATA_DOUBLE; }
static inline bool register_is_undef(const struct vkd3d_shader_register *reg) @@ -1824,6 +1824,7 @@ static uint32_t vkd3d_spirv_get_type_id_for_data_type(struct vkd3d_spirv_builder { switch (data_type) { + case VKD3D_DATA_HALF: /* Minimum precision. TODO: native 16-bit */ case VKD3D_DATA_FLOAT: case VKD3D_DATA_SNORM: case VKD3D_DATA_UNORM: @@ -1831,6 +1832,7 @@ static uint32_t vkd3d_spirv_get_type_id_for_data_type(struct vkd3d_spirv_builder break; case VKD3D_DATA_INT: case VKD3D_DATA_UINT: + case VKD3D_DATA_UINT16: /* Minimum precision. TODO: native 16-bit */ return vkd3d_spirv_get_op_type_int(builder, 32, data_type == VKD3D_DATA_INT); break; case VKD3D_DATA_DOUBLE: @@ -3679,6 +3681,78 @@ static uint32_t spirv_compiler_emit_vector_shuffle(struct spirv_compiler *compil type_id, vector1_id, vector2_id, components, component_count); }
+/* Based on the implementation in the OpenGL Mathematics library. */ +static uint32_t half_to_float(uint16_t value) +{ + uint32_t s = (value & 0x8000) << 16; + uint32_t e = (value >> 10) & 0x1fu; + uint32_t m = value & 0x3ffu; + + if (!e) + { + if (!m) + { + /* Plus or minus zero */ + return s; + } + else + { + /* Denormalized number -- renormalize it */ + + while (!(m & 0x400u)) + { + m <<= 1; + --e; + } + + ++e; + m &= ~0x400u; + } + } + else if (e == 31u) + { + if (!m) + { + /* Positive or negative infinity */ + return s | 0x7f800000u; + } + else + { + /* Nan -- preserve sign and significand bits */ + return s | 0x7f800000u | (m << 13); + } + } + + /* Normalized number */ + e += 127u - 15u; + m <<= 13; + + /* Assemble s, e and m. */ + return s | (e << 23) | m; +} + +static uint32_t convert_raw_constant32(struct spirv_compiler *compiler, + enum vkd3d_data_type data_type, unsigned int uint_value) +{ + int16_t i; + + /* TODO: native 16-bit support. */ + if (data_type != VKD3D_DATA_UINT16 && data_type != VKD3D_DATA_HALF) + return uint_value; + + if (data_type == VKD3D_DATA_HALF) + return half_to_float(uint_value); + + /* Values in DXIL have no signedness, so it is ambiguous whether 16-bit constants should or + * should not be sign-extended when 16-bit execution is not supported. The AMD RX 580 Windows + * driver has no 16-bit support, and sign-extends all 16-bit constant ints to 32 bits. These + * results differ from SM 5. The RX 6750 XT supports 16-bit execution, so constants are not + * extended, and results match SM 5. It seems best to replicate the sign-extension, and if + * execution is 16-bit, the values will be truncated. */ + i = uint_value; + return (int32_t)i; +} + static uint32_t spirv_compiler_emit_load_constant(struct spirv_compiler *compiler, const struct vkd3d_shader_register *reg, uint32_t swizzle, uint32_t write_mask) { @@ -3691,14 +3765,15 @@ static uint32_t spirv_compiler_emit_load_constant(struct spirv_compiler *compile if (reg->dimension == VSIR_DIMENSION_SCALAR) { for (i = 0; i < component_count; ++i) - values[i] = *reg->u.immconst_uint; + values[i] = convert_raw_constant32(compiler, reg->data_type, reg->u.immconst_uint[0]); } else { for (i = 0, j = 0; i < VKD3D_VEC4_SIZE; ++i) { if (write_mask & (VKD3DSP_WRITEMASK_0 << i)) - values[j++] = reg->u.immconst_uint[vsir_swizzle_get_component(swizzle, i)]; + values[j++] = convert_raw_constant32(compiler, reg->data_type, + reg->u.immconst_uint[vsir_swizzle_get_component(swizzle, i)]); } }
@@ -3829,6 +3904,13 @@ static uint32_t spirv_compiler_emit_constant_array(struct spirv_compiler *compil
switch (icb->data_type) { + case VKD3D_DATA_HALF: + case VKD3D_DATA_UINT16: + /* Scalar only. */ + for (i = 0; i < element_count; ++i) + elements[i] = vkd3d_spirv_get_op_constant(builder, elem_type_id, + convert_raw_constant32(compiler, icb->data_type, icb->data[i])); + break; case VKD3D_DATA_FLOAT: case VKD3D_DATA_INT: case VKD3D_DATA_UINT: @@ -6719,7 +6801,7 @@ static void spirv_compiler_emit_bool_cast(struct spirv_compiler *compiler, assert(src->reg.data_type == VKD3D_DATA_BOOL && dst->reg.data_type != VKD3D_DATA_BOOL);
val_id = spirv_compiler_emit_load_src(compiler, src, dst->write_mask); - if (dst->reg.data_type == VKD3D_DATA_FLOAT) + if (dst->reg.data_type == VKD3D_DATA_HALF || dst->reg.data_type == VKD3D_DATA_FLOAT) { val_id = spirv_compiler_emit_bool_to_float(compiler, 1, val_id, instruction->handler_idx == VKD3DSIH_ITOF); } diff --git a/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d-shader/vkd3d_shader_private.h index f1cc9ad7b..161354e3d 100644 --- a/libs/vkd3d-shader/vkd3d_shader_private.h +++ b/libs/vkd3d-shader/vkd3d_shader_private.h @@ -604,6 +604,8 @@ enum vkd3d_data_type VKD3D_DATA_UNUSED, VKD3D_DATA_UINT8, VKD3D_DATA_UINT64, + VKD3D_DATA_UINT16, + VKD3D_DATA_HALF, VKD3D_DATA_BOOL,
VKD3D_DATA_COUNT, @@ -611,8 +613,8 @@ enum vkd3d_data_type
static inline bool data_type_is_integer(enum vkd3d_data_type data_type) { - return data_type == VKD3D_DATA_INT || data_type == VKD3D_DATA_UINT8 || data_type == VKD3D_DATA_UINT - || data_type == VKD3D_DATA_UINT64; + return data_type == VKD3D_DATA_INT || data_type == VKD3D_DATA_UINT8 || data_type == VKD3D_DATA_UINT16 + || data_type == VKD3D_DATA_UINT || data_type == VKD3D_DATA_UINT64; }
static inline bool data_type_is_bool(enum vkd3d_data_type data_type) @@ -1445,10 +1447,12 @@ static inline enum vkd3d_shader_component_type vkd3d_component_type_from_data_ty { switch (data_type) { + case VKD3D_DATA_HALF: /* Minimum precision. TODO: native 16-bit */ case VKD3D_DATA_FLOAT: case VKD3D_DATA_UNORM: case VKD3D_DATA_SNORM: return VKD3D_SHADER_COMPONENT_FLOAT; + case VKD3D_DATA_UINT16: /* Minimum precision. TODO: native 16-bit */ case VKD3D_DATA_UINT: return VKD3D_SHADER_COMPONENT_UINT; case VKD3D_DATA_INT:
From: Conor McCarthy cmccarthy@codeweavers.com
--- libs/vkd3d-shader/dxil.c | 11 +++++++++-- tests/hlsl/minimum-precision.shader_test | 16 +++++++++++++--- 2 files changed, 22 insertions(+), 5 deletions(-)
diff --git a/libs/vkd3d-shader/dxil.c b/libs/vkd3d-shader/dxil.c index 602056e25..7d13fe0c4 100644 --- a/libs/vkd3d-shader/dxil.c +++ b/libs/vkd3d-shader/dxil.c @@ -2008,6 +2008,8 @@ static enum vkd3d_data_type vkd3d_data_type_from_sm6_type(const struct sm6_type return VKD3D_DATA_BOOL; case 8: return VKD3D_DATA_UINT8; + case 16: + return VKD3D_DATA_UINT16; case 32: return VKD3D_DATA_UINT; case 64: @@ -2021,6 +2023,8 @@ static enum vkd3d_data_type vkd3d_data_type_from_sm6_type(const struct sm6_type { switch (type->u.width) { + case 16: + return VKD3D_DATA_HALF; case 32: return VKD3D_DATA_FLOAT; case 64: @@ -2594,7 +2598,7 @@ static enum vkd3d_result sm6_parser_constants_init(struct sm6_parser *sm6, const }
if (type->u.width == 16) - FIXME("Half float type is not supported yet.\n"); + dst->u.reg.u.immconst_uint[0] = record->operands[0]; else if (type->u.width == 32) dst->u.reg.u.immconst_float[0] = bitcast_uint64_to_float(record->operands[0]); else if (type->u.width == 64) @@ -3826,7 +3830,10 @@ static enum vkd3d_shader_opcode sm6_map_cast_op(uint64_t code, const struct sm6_ break; case CAST_ZEXT: case CAST_SEXT: - /* nop or min precision. TODO: native 16-bit */ + /* nop or min precision. TODO: native 16-bit. + * Extension instructions could be emitted for min precision, but in Windows + * the AMD RX 580 simply drops such instructions, which makes sense as no + * assumptions should be made about any behaviour which depends on bit width. */ if (to->u.width == from->u.width || (to->u.width == 32 && from->u.width == 16)) { op = VKD3DSIH_NOP; diff --git a/tests/hlsl/minimum-precision.shader_test b/tests/hlsl/minimum-precision.shader_test index a93d235ca..bc56720b2 100644 --- a/tests/hlsl/minimum-precision.shader_test +++ b/tests/hlsl/minimum-precision.shader_test @@ -30,16 +30,26 @@ float4 main() : sv_target return float4(u.x + i, u.y + j, 0, 0); }
+[require] +shader model >= 4.0 +shader model < 6.0 + [test] uniform 0 uint4 0 0 0 0 draw quad probe all rgba (32767.0, 65535.0, 0.0, 0.0)
+[require] +shader model >= 6.0 + +[test] +uniform 0 uint4 0 0 0 0 +draw quad +probe all rgba (32767.0, 4.29496730e09, 0.0, 0.0) 1 +
% The code d3dcompiler_47 produces for this appears correct, but the result % is still zero in Windows. -[require] -shader model >= 6.0
[pixel shader] uniform min16uint4 u; @@ -55,7 +65,7 @@ float4 main() : sv_target uniform 0 uint4 0 0 0 0 uniform 4 uint 2 draw quad -probe all rgba (32767.0, 65535.0, 0.0, 0.0) +probe all rgba (32767.0, 4.29496730e09, 0.0, 0.0) 1 uniform 0 uint4 0 0 0 0 uniform 4 uint 0 draw quad