-- v2: vkd3d-shader/dxil: Support 16-bit types. vkd3d-shader/spirv: Introduce HALF and UINT16 types for minimum precision. vkd3d-shader/spirv: Introduce a data_type_is_floating_point() helper function. tests/shader-runner: Add tests for minimum-precision constants. tests/shader-runner: Run Shader Model 6 tests in the crossbuild.
From: Conor McCarthy cmccarthy@codeweavers.com
--- tests/shader_runner.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-)
diff --git a/tests/shader_runner.c b/tests/shader_runner.c index 4847ec75b..76a616a5e 100644 --- a/tests/shader_runner.c +++ b/tests/shader_runner.c @@ -1567,7 +1567,7 @@ out: } #endif
-#if defined(SONAME_LIBDXCOMPILER) && !defined(VKD3D_CROSSTEST) +#if defined(SONAME_LIBDXCOMPILER) || defined(VKD3D_CROSSTEST) static IDxcCompiler3 *dxcompiler_create(void) { DxcCreateInstanceProc create_instance; @@ -1575,8 +1575,14 @@ static IDxcCompiler3 *dxcompiler_create(void) HRESULT hr; void *dll;
+# ifdef VKD3D_CROSSTEST + dll = vkd3d_dlopen("dxcompiler.dll"); + if (!dll) + trace("Failed to load dxcompiler.dll, %s.\n", vkd3d_dlerror()); +# else dll = vkd3d_dlopen(SONAME_LIBDXCOMPILER); ok(dll, "Failed to load dxcompiler library, %s.\n", vkd3d_dlerror()); +# endif if (!dll) return NULL;
@@ -1592,7 +1598,7 @@ static IDxcCompiler3 *dxcompiler_create(void)
return compiler; } -#elif !defined(VKD3D_CROSSTEST) +#else static IDxcCompiler3 *dxcompiler_create(void) { return NULL; @@ -1601,9 +1607,7 @@ static IDxcCompiler3 *dxcompiler_create(void)
START_TEST(shader_runner) { -#ifndef VKD3D_CROSSTEST IDxcCompiler3 *dxc_compiler; -#endif
parse_args(argc, argv);
@@ -1619,6 +1623,14 @@ START_TEST(shader_runner) trace("Compiling shaders with d3dcompiler_47.dll and executing with d3d12.dll\n"); run_shader_tests_d3d12(NULL, SHADER_MODEL_4_0, SHADER_MODEL_5_1);
+ if ((dxc_compiler = dxcompiler_create())) + { + trace("Compiling shaders with dxcompiler.dll and executing with d3d12.dll\n"); + run_shader_tests_d3d12(dxc_compiler, SHADER_MODEL_6_0, SHADER_MODEL_6_0); + IDxcCompiler3_Release(dxc_compiler); + print_dll_version("dxcompiler.dll"); + } + print_dll_version("d3dcompiler_47.dll"); print_dll_version("dxgi.dll"); print_dll_version("d3d9.dll");
From: Conor McCarthy cmccarthy@codeweavers.com
Values in DXIL have no signedness, so it is ambiguous whether 16-bit constants should or should not be sign-extended when 16-bit execution is not supported. --- tests/hlsl/minimum-precision.shader_test | 36 ++++++++++++++++++++++++ 1 file changed, 36 insertions(+)
diff --git a/tests/hlsl/minimum-precision.shader_test b/tests/hlsl/minimum-precision.shader_test index e5053e3d1..f7375bd2c 100644 --- a/tests/hlsl/minimum-precision.shader_test +++ b/tests/hlsl/minimum-precision.shader_test @@ -19,3 +19,39 @@ float4 main() : sv_target [test] draw quad probe all rgba (197.0, 218.0, 238.0, 257.0) + + +[pixel shader] +uniform min16uint2 u; + +float4 main() : sv_target +{ + min16uint i = 0x7fff, j = 0xffff; + return float4(u.x + i, u.y + j, 0, 0); +} + +[test] +uniform 0 uint4 0 0 0 0 +draw quad +probe all rgba (32767.0, 65535.0, 0.0, 0.0) + + +[pixel shader] +uniform min16uint4 u; +uniform uint i; + +float4 main() : sv_target +{ + min16uint arr[4] = {1, 2, 0x7fff, 0xffff}; + return float4(u.x + arr[i], u.y + arr[i + 1], 0, 0); +} + +[test] +uniform 0 uint4 0 0 0 0 +uniform 4 uint 2 +draw quad +probe all rgba (32767.0, 65535.0, 0.0, 0.0) +uniform 0 uint4 0 0 0 0 +uniform 4 uint 0 +draw quad +probe all rgba (1.0, 2.0, 0.0, 0.0)
From: Conor McCarthy cmccarthy@codeweavers.com
--- libs/vkd3d-shader/spirv.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-)
diff --git a/libs/vkd3d-shader/spirv.c b/libs/vkd3d-shader/spirv.c index 62ee1bef3..29a5fc275 100644 --- a/libs/vkd3d-shader/spirv.c +++ b/libs/vkd3d-shader/spirv.c @@ -219,6 +219,11 @@ enum vkd3d_shader_input_sysval_semantic vkd3d_siv_from_sysval_indexed(enum vkd3d } }
+static bool data_type_is_floating_point(enum vkd3d_data_type data_type) +{ + return data_type == VKD3D_DATA_HALF || data_type == VKD3D_DATA_FLOAT || data_type == VKD3D_DATA_DOUBLE; +} + static inline bool register_is_undef(const struct vkd3d_shader_register *reg) { return reg->type == VKD3DSPR_UNDEF; @@ -3976,7 +3981,7 @@ static uint32_t spirv_compiler_emit_abs(struct spirv_compiler *compiler, uint32_t type_id;
type_id = spirv_compiler_get_type_id_for_reg(compiler, reg, write_mask); - if (reg->data_type == VKD3D_DATA_FLOAT || reg->data_type == VKD3D_DATA_DOUBLE) + if (data_type_is_floating_point(reg->data_type)) return vkd3d_spirv_build_op_glsl_std450_fabs(builder, type_id, val_id);
FIXME("Unhandled data type %#x.\n", reg->data_type); @@ -3990,7 +3995,7 @@ static uint32_t spirv_compiler_emit_neg(struct spirv_compiler *compiler, uint32_t type_id;
type_id = spirv_compiler_get_type_id_for_reg(compiler, reg, write_mask); - if (reg->data_type == VKD3D_DATA_FLOAT || reg->data_type == VKD3D_DATA_DOUBLE) + if (data_type_is_floating_point(reg->data_type)) return vkd3d_spirv_build_op_fnegate(builder, type_id, val_id); else if (reg->data_type == VKD3D_DATA_INT || reg->data_type == VKD3D_DATA_UINT) return vkd3d_spirv_build_op_snegate(builder, type_id, val_id); @@ -4171,7 +4176,7 @@ static uint32_t spirv_compiler_emit_sat(struct spirv_compiler *compiler, }
type_id = spirv_compiler_get_type_id_for_reg(compiler, reg, write_mask); - if (reg->data_type == VKD3D_DATA_FLOAT || reg->data_type == VKD3D_DATA_DOUBLE) + if (data_type_is_floating_point(reg->data_type)) return vkd3d_spirv_build_op_glsl_std450_nclamp(builder, type_id, val_id, zero_id, one_id);
FIXME("Unhandled data type %#x.\n", reg->data_type);
From: Conor McCarthy cmccarthy@codeweavers.com
Minimum precision types must always be implemented as 32-bit to match how reduced precision works in SPIR-V. --- libs/vkd3d-shader/spirv.c | 88 +++++++++++++++++++++++- libs/vkd3d-shader/vkd3d_shader_private.h | 8 ++- 2 files changed, 91 insertions(+), 5 deletions(-)
diff --git a/libs/vkd3d-shader/spirv.c b/libs/vkd3d-shader/spirv.c index 29a5fc275..c1783ab5a 100644 --- a/libs/vkd3d-shader/spirv.c +++ b/libs/vkd3d-shader/spirv.c @@ -1820,6 +1820,7 @@ static uint32_t vkd3d_spirv_get_type_id_for_data_type(struct vkd3d_spirv_builder { switch (data_type) { + case VKD3D_DATA_HALF: /* Minimum precision. TODO: native 16-bit */ case VKD3D_DATA_FLOAT: case VKD3D_DATA_SNORM: case VKD3D_DATA_UNORM: @@ -1827,6 +1828,7 @@ static uint32_t vkd3d_spirv_get_type_id_for_data_type(struct vkd3d_spirv_builder break; case VKD3D_DATA_INT: case VKD3D_DATA_UINT: + case VKD3D_DATA_UINT16: /* Minimum precision. TODO: native 16-bit */ return vkd3d_spirv_get_op_type_int(builder, 32, data_type == VKD3D_DATA_INT); break; case VKD3D_DATA_DOUBLE: @@ -3671,6 +3673,78 @@ static uint32_t spirv_compiler_emit_vector_shuffle(struct spirv_compiler *compil type_id, vector1_id, vector2_id, components, component_count); }
+/* Based on the implementation in the OpenGL Mathematics library. */ +static uint32_t half_to_float(uint16_t value) +{ + uint32_t s = (value & 0x8000) << 16; + uint32_t e = (value >> 10) & 0x1fu; + uint32_t m = value & 0x3ffu; + + if (!e) + { + if (!m) + { + /* Plus or minus zero */ + return s; + } + else + { + /* Denormalized number -- renormalize it */ + + while (!(m & 0x400u)) + { + m <<= 1; + --e; + } + + ++e; + m &= ~0x400u; + } + } + else if (e == 31u) + { + if (!m) + { + /* Positive or negative infinity */ + return s | 0x7f800000u; + } + else + { + /* Nan -- preserve sign and significand bits */ + return s | 0x7f800000u | (m << 13); + } + } + + /* Normalized number */ + e += 127u - 15u; + m <<= 13; + + /* Assemble s, e and m. */ + return s | (e << 23) | m; +} + +static uint32_t convert_raw_constant32(struct spirv_compiler *compiler, + enum vkd3d_data_type data_type, unsigned int uint_value) +{ + int16_t i; + + /* TODO: native 16-bit support. */ + if (data_type != VKD3D_DATA_UINT16 && data_type != VKD3D_DATA_HALF) + return uint_value; + + if (data_type == VKD3D_DATA_HALF) + return half_to_float(uint_value); + + /* Values in DXIL have no signedness, so it is ambiguous whether 16-bit constants should or + * should not be sign-extended when 16-bit execution is not supported. The AMD RX 580 Windows + * driver has no 16-bit support, and sign-extends all 16-bit constant ints to 32 bits. These + * results differ from SM 5. The RX 6750 XT supports 16-bit execution, so constants are not + * extended, and results match SM 5. It seems best to replicate the sign-extension, and if + * execution is 16-bit, the values will be truncated. */ + i = uint_value; + return (int32_t)i; +} + static uint32_t spirv_compiler_emit_load_constant(struct spirv_compiler *compiler, const struct vkd3d_shader_register *reg, DWORD swizzle, DWORD write_mask) { @@ -3683,14 +3757,15 @@ static uint32_t spirv_compiler_emit_load_constant(struct spirv_compiler *compile if (reg->dimension == VSIR_DIMENSION_SCALAR) { for (i = 0; i < component_count; ++i) - values[i] = *reg->u.immconst_uint; + values[i] = convert_raw_constant32(compiler, reg->data_type, reg->u.immconst_uint[0]); } else { for (i = 0, j = 0; i < VKD3D_VEC4_SIZE; ++i) { if (write_mask & (VKD3DSP_WRITEMASK_0 << i)) - values[j++] = reg->u.immconst_uint[vkd3d_swizzle_get_component(swizzle, i)]; + values[j++] = convert_raw_constant32(compiler, reg->data_type, + reg->u.immconst_uint[vkd3d_swizzle_get_component(swizzle, i)]); } }
@@ -3821,6 +3896,13 @@ static uint32_t spirv_compiler_emit_constant_array(struct spirv_compiler *compil
switch (icb->data_type) { + case VKD3D_DATA_HALF: + case VKD3D_DATA_UINT16: + /* Scalar only. */ + for (i = 0; i < element_count; ++i) + elements[i] = vkd3d_spirv_get_op_constant(builder, elem_type_id, + convert_raw_constant32(compiler, icb->data_type, icb->data[i])); + break; case VKD3D_DATA_FLOAT: case VKD3D_DATA_INT: case VKD3D_DATA_UINT: @@ -6711,7 +6793,7 @@ static void spirv_compiler_emit_bool_cast(struct spirv_compiler *compiler, assert(src->reg.data_type == VKD3D_DATA_BOOL && dst->reg.data_type != VKD3D_DATA_BOOL);
val_id = spirv_compiler_emit_load_src(compiler, src, dst->write_mask); - if (dst->reg.data_type == VKD3D_DATA_FLOAT) + if (dst->reg.data_type == VKD3D_DATA_HALF || dst->reg.data_type == VKD3D_DATA_FLOAT) { val_id = spirv_compiler_emit_bool_to_float(compiler, 1, val_id, instruction->handler_idx == VKD3DSIH_ITOF); } diff --git a/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d-shader/vkd3d_shader_private.h index 74f3e9905..eb5b2b9b7 100644 --- a/libs/vkd3d-shader/vkd3d_shader_private.h +++ b/libs/vkd3d-shader/vkd3d_shader_private.h @@ -603,6 +603,8 @@ enum vkd3d_data_type VKD3D_DATA_UNUSED, VKD3D_DATA_UINT8, VKD3D_DATA_UINT64, + VKD3D_DATA_UINT16, + VKD3D_DATA_HALF, VKD3D_DATA_BOOL,
VKD3D_DATA_COUNT, @@ -610,8 +612,8 @@ enum vkd3d_data_type
static inline bool data_type_is_integer(enum vkd3d_data_type data_type) { - return data_type == VKD3D_DATA_INT || data_type == VKD3D_DATA_UINT8 || data_type == VKD3D_DATA_UINT - || data_type == VKD3D_DATA_UINT64; + return data_type == VKD3D_DATA_INT || data_type == VKD3D_DATA_UINT8 || data_type == VKD3D_DATA_UINT16 + || data_type == VKD3D_DATA_UINT || data_type == VKD3D_DATA_UINT64; }
static inline bool data_type_is_bool(enum vkd3d_data_type data_type) @@ -1444,10 +1446,12 @@ static inline enum vkd3d_shader_component_type vkd3d_component_type_from_data_ty { switch (data_type) { + case VKD3D_DATA_HALF: /* Minimum precision. TODO: native 16-bit */ case VKD3D_DATA_FLOAT: case VKD3D_DATA_UNORM: case VKD3D_DATA_SNORM: return VKD3D_SHADER_COMPONENT_FLOAT; + case VKD3D_DATA_UINT16: /* Minimum precision. TODO: native 16-bit */ case VKD3D_DATA_UINT: return VKD3D_SHADER_COMPONENT_UINT; case VKD3D_DATA_INT:
From: Conor McCarthy cmccarthy@codeweavers.com
--- libs/vkd3d-shader/dxil.c | 11 +++++++-- tests/hlsl/minimum-precision.shader_test | 29 ++++++++++++++++++++++++ 2 files changed, 38 insertions(+), 2 deletions(-)
diff --git a/libs/vkd3d-shader/dxil.c b/libs/vkd3d-shader/dxil.c index 602056e25..7d13fe0c4 100644 --- a/libs/vkd3d-shader/dxil.c +++ b/libs/vkd3d-shader/dxil.c @@ -2008,6 +2008,8 @@ static enum vkd3d_data_type vkd3d_data_type_from_sm6_type(const struct sm6_type return VKD3D_DATA_BOOL; case 8: return VKD3D_DATA_UINT8; + case 16: + return VKD3D_DATA_UINT16; case 32: return VKD3D_DATA_UINT; case 64: @@ -2021,6 +2023,8 @@ static enum vkd3d_data_type vkd3d_data_type_from_sm6_type(const struct sm6_type { switch (type->u.width) { + case 16: + return VKD3D_DATA_HALF; case 32: return VKD3D_DATA_FLOAT; case 64: @@ -2594,7 +2598,7 @@ static enum vkd3d_result sm6_parser_constants_init(struct sm6_parser *sm6, const }
if (type->u.width == 16) - FIXME("Half float type is not supported yet.\n"); + dst->u.reg.u.immconst_uint[0] = record->operands[0]; else if (type->u.width == 32) dst->u.reg.u.immconst_float[0] = bitcast_uint64_to_float(record->operands[0]); else if (type->u.width == 64) @@ -3826,7 +3830,10 @@ static enum vkd3d_shader_opcode sm6_map_cast_op(uint64_t code, const struct sm6_ break; case CAST_ZEXT: case CAST_SEXT: - /* nop or min precision. TODO: native 16-bit */ + /* nop or min precision. TODO: native 16-bit. + * Extension instructions could be emitted for min precision, but in Windows + * the AMD RX 580 simply drops such instructions, which makes sense as no + * assumptions should be made about any behaviour which depends on bit width. */ if (to->u.width == from->u.width || (to->u.width == 32 && from->u.width == 16)) { op = VKD3DSIH_NOP; diff --git a/tests/hlsl/minimum-precision.shader_test b/tests/hlsl/minimum-precision.shader_test index f7375bd2c..d999787a5 100644 --- a/tests/hlsl/minimum-precision.shader_test +++ b/tests/hlsl/minimum-precision.shader_test @@ -30,11 +30,23 @@ float4 main() : sv_target return float4(u.x + i, u.y + j, 0, 0); }
+[require] +shader model >= 4.0 +shader model < 6.0 + [test] uniform 0 uint4 0 0 0 0 draw quad probe all rgba (32767.0, 65535.0, 0.0, 0.0)
+[require] +shader model >= 6.0 + +[test] +uniform 0 uint4 0 0 0 0 +draw quad +probe all rgba (32767.0, 4.29496730e09, 0.0, 0.0) 1 +
[pixel shader] uniform min16uint4 u; @@ -46,6 +58,10 @@ float4 main() : sv_target return float4(u.x + arr[i], u.y + arr[i + 1], 0, 0); }
+[require] +shader model >= 4.0 +shader model < 6.0 + [test] uniform 0 uint4 0 0 0 0 uniform 4 uint 2 @@ -55,3 +71,16 @@ uniform 0 uint4 0 0 0 0 uniform 4 uint 0 draw quad probe all rgba (1.0, 2.0, 0.0, 0.0) + +[require] +shader model >= 6.0 + +[test] +uniform 0 int4 0 0 0 0 +uniform 4 uint 2 +draw quad +probe all rgba (32767.0, 4.29496730e09, 0.0, 0.0) 1 +uniform 0 int4 0 0 0 0 +uniform 4 uint 0 +draw quad +probe all rgba (1.0, 2.0, 0.0, 0.0)