Goes atop !681. The last 8 commits belong here.
-- v8: vkd3d-shader/spirv: Emit a warning if atomic RMW flags are unhandled. vkd3d-shader/dxil: Implement the DXIL ATOMICRMW instruction. vkd3d-shader/dxil: Implement DX instructions ThreadId, GroupId, ThreadIdInGroup and FlattenedThreadIdInGroup. tests/shader-runner: Add TGSM tests. vkd3d-shader/dxil: Emit an error if a constant code is unhandled. vkd3d-shader/spirv: Support 64-bit register info component type in spirv_compiler_emit_load_reg(). vkd3d-shader/spirv: Do not assert if a TGSM store data register is not UINT. vkd3d-shader/spirv: Do not assert if a TGSM load dst register is not UINT.
From: Conor McCarthy cmccarthy@codeweavers.com
--- libs/vkd3d-shader/spirv.c | 1 - 1 file changed, 1 deletion(-)
diff --git a/libs/vkd3d-shader/spirv.c b/libs/vkd3d-shader/spirv.c index d1073884a..b64fe93c1 100644 --- a/libs/vkd3d-shader/spirv.c +++ b/libs/vkd3d-shader/spirv.c @@ -8785,7 +8785,6 @@ static void spirv_compiler_emit_ld_tgsm(struct spirv_compiler *compiler, ptr_id = vkd3d_spirv_build_op_access_chain1(builder, ptr_type_id, reg_info.id, coordinate_id); constituents[j++] = vkd3d_spirv_build_op_load(builder, type_id, ptr_id, SpvMemoryAccessMaskNone); } - assert(dst->reg.data_type == VKD3D_DATA_UINT); spirv_compiler_emit_store_dst_components(compiler, dst, VKD3D_SHADER_COMPONENT_UINT, constituents); }
From: Conor McCarthy cmccarthy@codeweavers.com
--- libs/vkd3d-shader/spirv.c | 1 - 1 file changed, 1 deletion(-)
diff --git a/libs/vkd3d-shader/spirv.c b/libs/vkd3d-shader/spirv.c index b64fe93c1..f1a436723 100644 --- a/libs/vkd3d-shader/spirv.c +++ b/libs/vkd3d-shader/spirv.c @@ -8906,7 +8906,6 @@ static void spirv_compiler_emit_store_tgsm(struct spirv_compiler *compiler, type_id, reg_info.structure_stride, &src[0], VKD3DSP_WRITEMASK_0, &src[1], VKD3DSP_WRITEMASK_0);
data = &src[instruction->src_count - 1]; - assert(data->reg.data_type == VKD3D_DATA_UINT); val_id = spirv_compiler_emit_load_src(compiler, data, dst->write_mask);
component_count = vsir_write_mask_component_count(dst->write_mask);
From: Conor McCarthy cmccarthy@codeweavers.com
For 64-bit indexable temps (and any other 64-bit declarations) the write mask must not be converted. --- libs/vkd3d-shader/spirv.c | 10 +++++----- libs/vkd3d-shader/vkd3d_shader_private.h | 5 +++++ 2 files changed, 10 insertions(+), 5 deletions(-)
diff --git a/libs/vkd3d-shader/spirv.c b/libs/vkd3d-shader/spirv.c index f1a436723..f2c40e921 100644 --- a/libs/vkd3d-shader/spirv.c +++ b/libs/vkd3d-shader/spirv.c @@ -4094,7 +4094,7 @@ static uint32_t spirv_compiler_emit_load_reg(struct spirv_compiler *compiler, struct vkd3d_shader_register_info reg_info; unsigned int component_count; uint32_t type_id, val_id; - uint32_t write_mask32; + uint32_t val_write_mask;
if (reg->type == VKD3DSPR_IMMCONST) return spirv_compiler_emit_load_constant(compiler, reg, swizzle, write_mask); @@ -4114,17 +4114,17 @@ static uint32_t spirv_compiler_emit_load_reg(struct spirv_compiler *compiler, type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count); return vkd3d_spirv_get_op_undef(builder, type_id); } - assert(reg_info.component_type != VKD3D_SHADER_COMPONENT_DOUBLE); spirv_compiler_emit_dereference_register(compiler, reg, ®_info);
- write_mask32 = data_type_is_64_bit(reg->data_type) ? vsir_write_mask_32_from_64(write_mask) : write_mask; + val_write_mask = (data_type_is_64_bit(reg->data_type) && !component_type_is_64_bit(reg_info.component_type)) + ? vsir_write_mask_32_from_64(write_mask) : write_mask;
/* Intermediate value (no storage class). */ if (reg_info.storage_class == SpvStorageClassMax) { val_id = reg_info.id; } - else if (vsir_write_mask_component_count(write_mask32) == 1) + else if (vsir_write_mask_component_count(val_write_mask) == 1) { return spirv_compiler_emit_load_scalar(compiler, reg, swizzle, write_mask, ®_info); } @@ -4137,7 +4137,7 @@ static uint32_t spirv_compiler_emit_load_reg(struct spirv_compiler *compiler,
swizzle = data_type_is_64_bit(reg->data_type) ? vsir_swizzle_32_from_64(swizzle) : swizzle; val_id = spirv_compiler_emit_swizzle(compiler, - val_id, reg_info.write_mask, reg_info.component_type, swizzle, write_mask32); + val_id, reg_info.write_mask, reg_info.component_type, swizzle, val_write_mask);
if (component_type != reg_info.component_type) { diff --git a/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d-shader/vkd3d_shader_private.h index 57065bac4..f72c867b6 100644 --- a/libs/vkd3d-shader/vkd3d_shader_private.h +++ b/libs/vkd3d-shader/vkd3d_shader_private.h @@ -1609,6 +1609,11 @@ static inline enum vkd3d_shader_component_type vkd3d_component_type_from_resourc } }
+static inline bool component_type_is_64_bit(enum vkd3d_shader_component_type component_type) +{ + return component_type == VKD3D_SHADER_COMPONENT_DOUBLE || component_type == VKD3D_SHADER_COMPONENT_UINT64; +} + enum vkd3d_shader_input_sysval_semantic vkd3d_siv_from_sysval_indexed(enum vkd3d_shader_sysval_semantic sysval, unsigned int index);
From: Conor McCarthy cmccarthy@codeweavers.com
--- libs/vkd3d-shader/dxil.c | 2 ++ 1 file changed, 2 insertions(+)
diff --git a/libs/vkd3d-shader/dxil.c b/libs/vkd3d-shader/dxil.c index 7f9a74fa7..b3e42c158 100644 --- a/libs/vkd3d-shader/dxil.c +++ b/libs/vkd3d-shader/dxil.c @@ -2960,6 +2960,8 @@ static enum vkd3d_result sm6_parser_constants_init(struct sm6_parser *sm6, const
default: FIXME("Unhandled constant code %u.\n", record->code); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + "Constant code %u is unhandled.", record->code); dst->u.reg.type = VKD3DSPR_UNDEF; break; }
From: Conor McCarthy cmccarthy@codeweavers.com
--- Makefile.am | 1 + tests/hlsl/tgsm.shader_test | 135 ++++++++++++++++++++++++++++++++++++ 2 files changed, 136 insertions(+) create mode 100644 tests/hlsl/tgsm.shader_test
diff --git a/Makefile.am b/Makefile.am index 8abe08638..9795bdf1a 100644 --- a/Makefile.am +++ b/Makefile.am @@ -200,6 +200,7 @@ vkd3d_shader_tests = \ tests/hlsl/texture-load-typed.shader_test \ tests/hlsl/texture-load.shader_test \ tests/hlsl/texture-ordering.shader_test \ + tests/hlsl/tgsm.shader_test \ tests/hlsl/transpose.shader_test \ tests/hlsl/trigonometry.shader_test \ tests/hlsl/trunc.shader_test \ diff --git a/tests/hlsl/tgsm.shader_test b/tests/hlsl/tgsm.shader_test new file mode 100644 index 000000000..a1b7259e9 --- /dev/null +++ b/tests/hlsl/tgsm.shader_test @@ -0,0 +1,135 @@ +[require] +shader model >= 5.0 + +[uav 1] +format r32 uint +size (buffer, 4) + +1 0 0 0 + +[compute shader todo] +RWByteAddressBuffer u : register(u1); +groupshared uint m; + + [numthreads(32, 1, 1)] +void main(uint local_idx : SV_GroupIndex, uint group_id : SV_GroupID) +{ + if (!local_idx) + m = group_id.x; + GroupMemoryBarrierWithGroupSync(); + InterlockedAdd(m, group_id.x); + GroupMemoryBarrierWithGroupSync(); + if (!local_idx) + u.Store(4 * group_id.x, m); +} + +[test] +todo dispatch 4 1 1 +probe uav 1 (0) rui (0) +probe uav 1 (1) rui (33) +probe uav 1 (2) rui (66) +probe uav 1 (3) rui (99) + + +[uav 1] +format r32 sint +size (buffer, 4) + +1 0 0 0 + +[compute shader todo] +RWByteAddressBuffer u : register(u1); +groupshared int m; + + [numthreads(32, 1, 1)] +void main(uint local_idx : SV_GroupIndex, uint group_id : SV_GroupID) +{ + if (!local_idx) + m = group_id.x; + GroupMemoryBarrierWithGroupSync(); + InterlockedAdd(m, -group_id.x); + GroupMemoryBarrierWithGroupSync(); + if (!local_idx) + u.Store(4 * group_id.x, m); +} + +[test] +todo dispatch 4 1 1 +probe uav 1 (0) ri (0) +probe uav 1 (1) ri (-31) +probe uav 1 (2) ri (-62) +probe uav 1 (3) ri (-93) + + +[uav 1] +format r32 float +size (buffer, 8) + +1 1 1 1 0 0 0 0 + +[uav 2] +format r32 sint +size (buffer, 8) + +1 1 1 1 0 0 0 0 + +[compute shader todo] +uniform uint idx; + +#define GROUP_SIZE 4 + +struct data +{ + float f; + uint u; +}; + +RWBuffer<float> u : register(u1); +RWBuffer<uint> u2 : register(u2); +groupshared data m[GROUP_SIZE]; + + [numthreads(GROUP_SIZE, 1, 1)] +void main(uint local_idx : SV_GroupIndex, uint group_id : SV_GroupID, + uint thread_id : SV_DispatchThreadID) +{ + uint i; + if (!local_idx) + { + for (i = 0; i < GROUP_SIZE; ++i) + { + m[i].f = group_id.x; + m[i].u = group_id.x; + } + } + GroupMemoryBarrierWithGroupSync(); + InterlockedAdd(m[0].u, 2); + InterlockedAdd(m[idx].u, 1); + GroupMemoryBarrierWithGroupSync(); + for (i = 0; i < local_idx; ++i) + { + m[local_idx].f += group_id.x; + m[local_idx].u += group_id.x; + } + u[thread_id.x] = m[local_idx].f; + u2[thread_id.x] = m[local_idx].u; +} + +[test] +uniform 0 uint 1 +todo dispatch 2 1 1 +probe uav 1 (0) r (0.0) +probe uav 1 (1) r (0.0) +probe uav 1 (2) r (0.0) +probe uav 1 (3) r (0.0) +probe uav 1 (4) r (1.0) +probe uav 1 (5) r (2.0) +probe uav 1 (6) r (3.0) +probe uav 1 (7) r (4.0) +probe uav 2 (0) ri (8) +probe uav 2 (1) ri (4) +probe uav 2 (2) ri (0) +probe uav 2 (3) ri (0) +probe uav 2 (4) ri (9) +probe uav 2 (5) ri (6) +probe uav 2 (6) ri (3) +probe uav 2 (7) ri (4)
From: Conor McCarthy cmccarthy@codeweavers.com
--- libs/vkd3d-shader/dxil.c | 69 +++++++++++++++++++++++++++++++ tests/hlsl/numthreads.shader_test | 10 +++-- 2 files changed, 75 insertions(+), 4 deletions(-)
diff --git a/libs/vkd3d-shader/dxil.c b/libs/vkd3d-shader/dxil.c index b3e42c158..62cb82616 100644 --- a/libs/vkd3d-shader/dxil.c +++ b/libs/vkd3d-shader/dxil.c @@ -405,6 +405,10 @@ enum dx_intrinsic_opcode DX_DERIV_COARSEY = 84, DX_DERIV_FINEX = 85, DX_DERIV_FINEY = 86, + DX_THREAD_ID = 93, + DX_GROUP_ID = 94, + DX_THREAD_ID_IN_GROUP = 95, + DX_FLATTENED_THREAD_ID_IN_GROUP = 96, DX_SPLIT_DOUBLE = 102, DX_LEGACY_F32TOF16 = 130, DX_LEGACY_F16TOF32 = 131, @@ -758,6 +762,7 @@ struct sm6_parser
struct vkd3d_shader_dst_param *output_params; struct vkd3d_shader_dst_param *input_params; + uint64_t input_reg_declarations;
struct sm6_function *functions; size_t function_count; @@ -4275,6 +4280,24 @@ static void sm6_parser_emit_dx_create_handle(struct sm6_parser *sm6, enum dx_int ins->handler_idx = VKD3DSIH_NOP; }
+static void sm6_parser_dcl_register_builtin(struct sm6_parser *sm6, + enum vkd3d_shader_register_type reg_type, enum vkd3d_data_type data_type, unsigned int component_count) +{ + struct vkd3d_shader_dst_param *dst_param; + struct vkd3d_shader_instruction *ins; + uint64_t reg_flag; + + reg_flag = 1ull << reg_type; + if (!(sm6->input_reg_declarations & reg_flag)) + { + sm6->input_reg_declarations |= reg_flag; + ins = sm6_parser_add_instruction(sm6, VKD3DSIH_DCL_INPUT); + dst_param = &ins->declaration.dst; + vsir_register_init(&dst_param->reg, reg_type, data_type, 0); + dst_param_init_vector(dst_param, component_count); + } +} + static void sm6_parser_emit_dx_fabs(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, const struct sm6_value **operands, struct function_emission_state *state) { @@ -4290,6 +4313,48 @@ static void sm6_parser_emit_dx_fabs(struct sm6_parser *sm6, enum dx_intrinsic_op instruction_dst_param_init_ssa_scalar(ins, sm6); }
+static void sm6_parser_emit_dx_compute_builtin(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, + const struct sm6_value **operands, struct function_emission_state *state) +{ + unsigned int component_count = 3, component_idx = 0; + struct vkd3d_shader_instruction *ins = state->ins; + struct vkd3d_shader_src_param *src_param; + enum vkd3d_shader_register_type reg_type; + + switch (op) + { + case DX_THREAD_ID: + reg_type = VKD3DSPR_THREADID; + break; + case DX_GROUP_ID: + reg_type = VKD3DSPR_THREADGROUPID; + break; + case DX_THREAD_ID_IN_GROUP: + reg_type = VKD3DSPR_LOCALTHREADID; + break; + case DX_FLATTENED_THREAD_ID_IN_GROUP: + reg_type = VKD3DSPR_LOCALTHREADINDEX; + component_count = 1; + break; + default: + vkd3d_unreachable(); + } + + sm6_parser_dcl_register_builtin(sm6, reg_type, VKD3D_DATA_UINT, component_count); + vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_MOV); + if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) + return; + vsir_register_init(&src_param->reg, reg_type, VKD3D_DATA_UINT, 0); + if (component_count > 1) + { + src_param->reg.dimension = VSIR_DIMENSION_VEC4; + component_idx = sm6_value_get_constant_uint(operands[0]); + } + src_param_init_scalar(src_param, component_idx); + + instruction_dst_param_init_ssa_scalar(ins, sm6); +} + static enum vkd3d_shader_opcode sm6_dx_map_ma_op(enum dx_intrinsic_opcode op, const struct sm6_type *type) { switch (op) @@ -5093,12 +5158,14 @@ static const struct sm6_dx_opcode_info sm6_dx_op_table[] = [DX_FIRST_BIT_HI ] = {"i", "m", sm6_parser_emit_dx_unary}, [DX_FIRST_BIT_LO ] = {"i", "m", sm6_parser_emit_dx_unary}, [DX_FIRST_BIT_SHI ] = {"i", "m", sm6_parser_emit_dx_unary}, + [DX_FLATTENED_THREAD_ID_IN_GROUP ] = {"i", "", sm6_parser_emit_dx_compute_builtin}, [DX_FMA ] = {"g", "RRR", sm6_parser_emit_dx_ma}, [DX_FMAD ] = {"g", "RRR", sm6_parser_emit_dx_ma}, [DX_FMAX ] = {"g", "RR", sm6_parser_emit_dx_binary}, [DX_FMIN ] = {"g", "RR", sm6_parser_emit_dx_binary}, [DX_FRC ] = {"g", "R", sm6_parser_emit_dx_unary}, [DX_GET_DIMENSIONS ] = {"D", "Hi", sm6_parser_emit_dx_get_dimensions}, + [DX_GROUP_ID ] = {"i", "c", sm6_parser_emit_dx_compute_builtin}, [DX_IBFE ] = {"m", "iiR", sm6_parser_emit_dx_tertiary}, [DX_HCOS ] = {"g", "R", sm6_parser_emit_dx_unary}, [DX_HSIN ] = {"g", "R", sm6_parser_emit_dx_unary}, @@ -5136,6 +5203,8 @@ static const struct sm6_dx_opcode_info sm6_dx_op_table[] = [DX_TEXTURE_GATHER_CMP ] = {"o", "HHffffiicf", sm6_parser_emit_dx_texture_gather}, [DX_TEXTURE_LOAD ] = {"o", "HiiiiCCC", sm6_parser_emit_dx_texture_load}, [DX_TEXTURE_STORE ] = {"v", "Hiiiooooc", sm6_parser_emit_dx_texture_store}, + [DX_THREAD_ID ] = {"i", "c", sm6_parser_emit_dx_compute_builtin}, + [DX_THREAD_ID_IN_GROUP ] = {"i", "c", sm6_parser_emit_dx_compute_builtin}, [DX_UBFE ] = {"m", "iiR", sm6_parser_emit_dx_tertiary}, [DX_UMAD ] = {"m", "RRR", sm6_parser_emit_dx_ma}, [DX_UMAX ] = {"m", "RR", sm6_parser_emit_dx_binary}, diff --git a/tests/hlsl/numthreads.shader_test b/tests/hlsl/numthreads.shader_test index e8f7900bf..ceeb40e3e 100644 --- a/tests/hlsl/numthreads.shader_test +++ b/tests/hlsl/numthreads.shader_test @@ -172,7 +172,7 @@ size (2d, 2, 2) 1.0 1.0
[compute shader] -/* Attributes are taken from the first function, and dropped from the second. */ +/* In SM < 6.0, attributes are taken from the first function, and dropped from the second. */ RWTexture2D<float> u;
[numthreads(2, 1, 1)] @@ -185,8 +185,10 @@ void main(uint2 id : sv_dispatchthreadid) }
[test] -todo(sm>=6) dispatch 1 1 1 +dispatch 1 1 1 probe uav 0 (0, 0) r (2.0) -probe uav 0 (0, 1) r (1.0) -probe uav 0 (1, 0) r (2.0) +if(sm<6) probe uav 0 (0, 1) r (1.0) +if(sm<6) probe uav 0 (1, 0) r (2.0) probe uav 0 (1, 1) r (1.0) +if(sm>=6) probe uav 0 (0, 1) r (2.0) +if(sm>=6) probe uav 0 (1, 0) r (1.0)
From: Conor McCarthy cmccarthy@codeweavers.com
--- libs/vkd3d-shader/d3d_asm.c | 30 ++++ libs/vkd3d-shader/dxil.c | 184 +++++++++++++++++++++++ libs/vkd3d-shader/vkd3d_shader_private.h | 6 + tests/hlsl/tgsm.shader_test | 4 +- 4 files changed, 222 insertions(+), 2 deletions(-)
diff --git a/libs/vkd3d-shader/d3d_asm.c b/libs/vkd3d-shader/d3d_asm.c index 0623a129e..f7a64f7e8 100644 --- a/libs/vkd3d-shader/d3d_asm.c +++ b/libs/vkd3d-shader/d3d_asm.c @@ -448,6 +448,23 @@ static void shader_dump_global_flags(struct vkd3d_d3d_asm_compiler *compiler, vkd3d_string_buffer_printf(&compiler->buffer, "unknown_flags(%#"PRIx64")", (uint64_t)global_flags); }
+static void shader_dump_atomic_op_flags(struct vkd3d_d3d_asm_compiler *compiler, uint32_t atomic_flags) +{ + if (atomic_flags & VKD3DARF_SEQ_CST) + { + vkd3d_string_buffer_printf(&compiler->buffer, "_seq_cst"); + atomic_flags &= ~VKD3DARF_SEQ_CST; + } + if (atomic_flags & VKD3DARF_VOLATILE) + { + vkd3d_string_buffer_printf(&compiler->buffer, "_volatile"); + atomic_flags &= ~VKD3DARF_VOLATILE; + } + + if (atomic_flags) + vkd3d_string_buffer_printf(&compiler->buffer, "_unknown_flags(%#x)", atomic_flags); +} + static void shader_dump_sync_flags(struct vkd3d_d3d_asm_compiler *compiler, uint32_t sync_flags) { if (sync_flags & VKD3DSSF_GLOBAL_UAV) @@ -1734,6 +1751,19 @@ static void shader_dump_instruction_flags(struct vkd3d_d3d_asm_compiler *compile } break;
+ case VKD3DSIH_IMM_ATOMIC_CMP_EXCH: + case VKD3DSIH_IMM_ATOMIC_IADD: + case VKD3DSIH_IMM_ATOMIC_AND: + case VKD3DSIH_IMM_ATOMIC_IMAX: + case VKD3DSIH_IMM_ATOMIC_IMIN: + case VKD3DSIH_IMM_ATOMIC_OR: + case VKD3DSIH_IMM_ATOMIC_UMAX: + case VKD3DSIH_IMM_ATOMIC_UMIN: + case VKD3DSIH_IMM_ATOMIC_EXCH: + case VKD3DSIH_IMM_ATOMIC_XOR: + shader_dump_atomic_op_flags(compiler, ins->flags); + break; + case VKD3DSIH_SYNC: shader_dump_sync_flags(compiler, ins->flags); break; diff --git a/libs/vkd3d-shader/dxil.c b/libs/vkd3d-shader/dxil.c index 62cb82616..03c30c3dc 100644 --- a/libs/vkd3d-shader/dxil.c +++ b/libs/vkd3d-shader/dxil.c @@ -463,6 +463,32 @@ enum dxil_predicate ICMP_SLE = 41, };
+enum dxil_rmw_code +{ + RMW_XCHG = 0, + RMW_ADD = 1, + RMW_SUB = 2, + RMW_AND = 3, + RMW_NAND = 4, + RMW_OR = 5, + RMW_XOR = 6, + RMW_MAX = 7, + RMW_MIN = 8, + RMW_UMAX = 9, + RMW_UMIN = 10, +}; + +enum dxil_atomic_ordering +{ + ORDERING_NOTATOMIC = 0, + ORDERING_UNORDERED = 1, + ORDERING_MONOTONIC = 2, + ORDERING_ACQUIRE = 3, + ORDERING_RELEASE = 4, + ORDERING_ACQREL = 5, + ORDERING_SEQCST = 6, +}; + enum dxil_atomic_binop_code { ATOMIC_BINOP_ADD, @@ -2593,6 +2619,30 @@ static bool sm6_value_validate_is_bool(const struct sm6_value *value, struct sm6 return true; }
+static bool sm6_value_validate_is_pointer_to_i32(const struct sm6_value *value, struct sm6_parser *sm6) +{ + if (!sm6_type_is_pointer(value->type) || !sm6_type_is_i32(value->type->u.pointer.type)) + { + WARN("Operand result type %u is not a pointer to i32.\n", value->type->class); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + "An int32 pointer operand passed to a DXIL instruction is not an int32 pointer."); + return false; + } + return true; +} + +static bool sm6_value_validate_is_i32(const struct sm6_value *value, struct sm6_parser *sm6) +{ + if (!sm6_type_is_i32(value->type)) + { + WARN("Operand result type %u is not i32.\n", value->type->class); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + "An int32 operand passed to a DXIL instruction is not an int32."); + return false; + } + return true; +} + static const struct sm6_value *sm6_parser_get_value_safe(struct sm6_parser *sm6, unsigned int idx) { if (idx < sm6->value_count) @@ -3507,6 +3557,9 @@ struct function_emission_state unsigned int temp_idx; };
+static bool sm6_parser_emit_reg_composite_construct(struct sm6_parser *sm6, const struct vkd3d_shader_register **operand_regs, + unsigned int component_count, struct function_emission_state *state, struct vkd3d_shader_register *reg); + static void sm6_parser_emit_alloca(struct sm6_parser *sm6, const struct dxil_record *record, struct vkd3d_shader_instruction *ins, struct sm6_value *dst) { @@ -3582,6 +3635,130 @@ static void sm6_parser_emit_alloca(struct sm6_parser *sm6, const struct dxil_rec sm6_parser_declare_indexable_temp(sm6, elem_type, type[0]->u.array.count, alignment, true, 0, ins, dst); }
+static enum vkd3d_shader_opcode map_dx_atomicrmw_op(uint64_t code) +{ + switch (code) + { + case RMW_ADD: + return VKD3DSIH_IMM_ATOMIC_IADD; + case RMW_AND: + return VKD3DSIH_IMM_ATOMIC_AND; + case RMW_MAX: + return VKD3DSIH_IMM_ATOMIC_IMAX; + case RMW_MIN: + return VKD3DSIH_IMM_ATOMIC_IMIN; + case RMW_OR: + return VKD3DSIH_IMM_ATOMIC_OR; + case RMW_UMAX: + return VKD3DSIH_IMM_ATOMIC_UMAX; + case RMW_UMIN: + return VKD3DSIH_IMM_ATOMIC_UMIN; + case RMW_XCHG: + return VKD3DSIH_IMM_ATOMIC_EXCH; + case RMW_XOR: + return VKD3DSIH_IMM_ATOMIC_XOR; + default: + /* DXIL currently doesn't use SUB and NAND. */ + return VKD3DSIH_INVALID; + } +} + +static void sm6_parser_emit_atomicrmw(struct sm6_parser *sm6, const struct dxil_record *record, + struct function_emission_state *state, struct sm6_value *dst) +{ + struct vkd3d_shader_register coord, const_offset, const_zero; + const struct vkd3d_shader_register *regs[2]; + struct vkd3d_shader_dst_param *dst_params; + struct vkd3d_shader_src_param *src_params; + struct vkd3d_shader_instruction *ins; + const struct sm6_value *ptr, *src; + enum vkd3d_shader_opcode op; + unsigned int i = 0; + bool is_volatile; + uint64_t code; + + if (!(ptr = sm6_parser_get_value_by_ref(sm6, record, NULL, &i)) + || !sm6_value_validate_is_pointer_to_i32(ptr, sm6)) + return; + + if (ptr->u.reg.type != VKD3DSPR_GROUPSHAREDMEM) + { + WARN("Register is not groupshared.\n"); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + "The destination register for an atomicrmw instruction is not groupshared memory."); + return; + } + + dst->type = ptr->type->u.pointer.type; + + if (!(src = sm6_parser_get_value_by_ref(sm6, record, dst->type, &i)) + || !sm6_value_validate_is_i32(src, sm6)) + return; + + if (!dxil_record_validate_operand_count(record, i + 4, i + 4, sm6)) + return; + + if ((op = map_dx_atomicrmw_op(code = record->operands[i++])) == VKD3DSIH_INVALID) + { + FIXME("Unhandled atomicrmw op %"PRIu64".\n", code); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + "Operation %"PRIu64" for an atomicrmw instruction is unhandled.", code); + return; + } + + is_volatile = record->operands[i++]; + + /* It's currently not possible to specify an atomic ordering in HLSL, and it defaults to seq_cst. */ + if ((code = record->operands[i++]) != ORDERING_SEQCST) + FIXME("Unhandled atomic ordering %"PRIu64".\n", code); + + if ((code = record->operands[i]) != 1) + WARN("Ignoring synchronisation scope %"PRIu64".\n", code); + + if (ptr->structure_stride) + { + if (ptr->u.reg.idx[1].rel_addr) + { + regs[0] = &ptr->u.reg.idx[1].rel_addr->reg; + } + else + { + register_make_constant_uint(&const_offset, ptr->u.reg.idx[1].offset); + regs[0] = &const_offset; + } + register_make_constant_uint(&const_zero, 0); + regs[1] = &const_zero; + if (!sm6_parser_emit_reg_composite_construct(sm6, regs, 2, state, &coord)) + return; + } + + ins = state->ins; + vsir_instruction_init(ins, &sm6->p.location, op); + ins->flags = is_volatile ? VKD3DARF_SEQ_CST | VKD3DARF_VOLATILE : VKD3DARF_SEQ_CST; + + if (!(src_params = instruction_src_params_alloc(ins, 2, sm6))) + return; + if (ptr->structure_stride) + src_param_init_vector_from_reg(&src_params[0], &coord); + else + src_param_make_constant_uint(&src_params[0], 0); + src_param_init_from_value(&src_params[1], src); + + dst_params = instruction_dst_params_alloc(ins, 2, sm6); + register_init_ssa_scalar(&dst_params[0].reg, dst->type, dst, sm6); + dst_param_init(&dst_params[0]); + + dst_params[1].reg = ptr->u.reg; + /* The groupshared register has data type UAV when accessed. */ + dst_params[1].reg.data_type = VKD3D_DATA_UAV; + dst_params[1].reg.idx[1].rel_addr = NULL; + dst_params[1].reg.idx[1].offset = ~0u; + dst_params[1].reg.idx_count = 1; + dst_param_init(&dst_params[1]); + + dst->u.reg = dst_params[0].reg; +} + static enum vkd3d_shader_opcode map_binary_op(uint64_t code, const struct sm6_type *type_a, const struct sm6_type *type_b, struct sm6_parser *sm6) { @@ -6678,6 +6855,13 @@ static enum vkd3d_result sm6_parser_function_init(struct sm6_parser *sm6, const case FUNC_CODE_INST_ALLOCA: sm6_parser_emit_alloca(sm6, record, ins, dst); break; + case FUNC_CODE_INST_ATOMICRMW: + { + struct function_emission_state state = {code_block, ins}; + sm6_parser_emit_atomicrmw(sm6, record, &state, dst); + sm6->p.program.temp_count = max(sm6->p.program.temp_count, state.temp_idx); + break; + } case FUNC_CODE_INST_BINOP: sm6_parser_emit_binop(sm6, record, ins, dst); break; diff --git a/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d-shader/vkd3d_shader_private.h index f72c867b6..84979f0cc 100644 --- a/libs/vkd3d-shader/vkd3d_shader_private.h +++ b/libs/vkd3d-shader/vkd3d_shader_private.h @@ -755,6 +755,12 @@ enum vkd3d_shader_uav_flags VKD3DSUF_ORDER_PRESERVING_COUNTER = 0x100, };
+enum vkd3d_shader_atomic_rmw_flags +{ + VKD3DARF_SEQ_CST = 0x1, + VKD3DARF_VOLATILE = 0x2, +}; + enum vkd3d_tessellator_domain { VKD3D_TESSELLATOR_DOMAIN_LINE = 1, diff --git a/tests/hlsl/tgsm.shader_test b/tests/hlsl/tgsm.shader_test index a1b7259e9..e6b1a6858 100644 --- a/tests/hlsl/tgsm.shader_test +++ b/tests/hlsl/tgsm.shader_test @@ -24,7 +24,7 @@ void main(uint local_idx : SV_GroupIndex, uint group_id : SV_GroupID) }
[test] -todo dispatch 4 1 1 +todo(sm<6) dispatch 4 1 1 probe uav 1 (0) rui (0) probe uav 1 (1) rui (33) probe uav 1 (2) rui (66) @@ -54,7 +54,7 @@ void main(uint local_idx : SV_GroupIndex, uint group_id : SV_GroupID) }
[test] -todo dispatch 4 1 1 +todo(sm<6) dispatch 4 1 1 probe uav 1 (0) ri (0) probe uav 1 (1) ri (-31) probe uav 1 (2) ri (-62)
From: Conor McCarthy cmccarthy@codeweavers.com
--- libs/vkd3d-shader/spirv.c | 5 +++++ 1 file changed, 5 insertions(+)
diff --git a/libs/vkd3d-shader/spirv.c b/libs/vkd3d-shader/spirv.c index f2c40e921..d3eeb3eb9 100644 --- a/libs/vkd3d-shader/spirv.c +++ b/libs/vkd3d-shader/spirv.c @@ -9248,6 +9248,11 @@ static void spirv_compiler_emit_atomic_instruction(struct spirv_compiler *compil
val_id = spirv_compiler_emit_load_src_with_type(compiler, &src[1], VKD3DSP_WRITEMASK_0, component_type);
+ if (instruction->flags & VKD3DARF_SEQ_CST) + WARN("Ignoring sequentially consistent ordering.\n"); + if (instruction->flags & VKD3DARF_VOLATILE) + WARN("Ignoring 'volatile' attribute.\n"); + operands[i++] = pointer_id; operands[i++] = spirv_compiler_get_constant_uint(compiler, scope); operands[i++] = spirv_compiler_get_constant_uint(compiler, SpvMemorySemanticsMaskNone);
Giovanni Mascellani (@giomasce) commented about libs/vkd3d-shader/d3d_asm.c:
vkd3d_string_buffer_printf(&compiler->buffer, "unknown_flags(%#"PRIx64")", (uint64_t)global_flags);
}
+static void shader_dump_atomic_op_flags(struct vkd3d_d3d_asm_compiler *compiler, uint32_t atomic_flags) +{
- if (atomic_flags & VKD3DARF_SEQ_CST)
- {
vkd3d_string_buffer_printf(&compiler->buffer, "_seq_cst");
If we are inventing this, can we use `_seqCst` to make the language a tad more regular?
Giovanni Mascellani (@giomasce) commented about libs/vkd3d-shader/dxil.c:
return true;
}
+static bool sm6_value_validate_is_pointer_to_i32(const struct sm6_value *value, struct sm6_parser *sm6) +{
- if (!sm6_type_is_pointer(value->type) || !sm6_type_is_i32(value->type->u.pointer.type))
- {
WARN("Operand result type %u is not a pointer to i32.\n", value->type->class);
vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND,
"An int32 pointer operand passed to a DXIL instruction is not an int32 pointer.");
I see there are a few already, but I find this type of error message a bit confusing. What does it mean that an int32 pointer operand is not an int32 pointer? Maybe it should be something like "Expected an int32 pointer, got something else"?
Giovanni Mascellani (@giomasce) commented about libs/vkd3d-shader/dxil.c:
ins->handler_idx = VKD3DSIH_NOP;
}
+static void sm6_parser_dcl_register_builtin(struct sm6_parser *sm6,
enum vkd3d_shader_register_type reg_type, enum vkd3d_data_type data_type, unsigned int component_count)
+{
- struct vkd3d_shader_dst_param *dst_param;
- struct vkd3d_shader_instruction *ins;
- uint64_t reg_flag;
- reg_flag = 1ull << reg_type;
So far it's fine, but we're not far away from having 64 register types. Could we statically assert that `VKD3DSPR_COUNT < sizeof(unsigned long long) * CHAR_BIT`?
Giovanni Mascellani (@giomasce) commented about libs/vkd3d-shader/dxil.c:
- if (!(ptr = sm6_parser_get_value_by_ref(sm6, record, NULL, &i))
|| !sm6_value_validate_is_pointer_to_i32(ptr, sm6))
return;
- if (ptr->u.reg.type != VKD3DSPR_GROUPSHAREDMEM)
- {
WARN("Register is not groupshared.\n");
vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND,
"The destination register for an atomicrmw instruction is not groupshared memory.");
return;
- }
- dst->type = ptr->type->u.pointer.type;
- if (!(src = sm6_parser_get_value_by_ref(sm6, record, dst->type, &i))
|| !sm6_value_validate_is_i32(src, sm6))
I wonder whether this is useful, given that we already checked the pointer type and in `sm6_parser_get_value_idx_by_ref()` we check that the value has the expected type.
Giovanni Mascellani (@giomasce) commented about libs/vkd3d-shader/spirv.c:
val_id = spirv_compiler_emit_load_src_with_type(compiler, &src[1], VKD3DSP_WRITEMASK_0, component_type);
- if (instruction->flags & VKD3DARF_SEQ_CST)
WARN("Ignoring sequentially consistent ordering.\n");
- if (instruction->flags & VKD3DARF_VOLATILE)
WARN("Ignoring 'volatile' attribute.\n");
Do you plan to add them shortly? Sequential consistency should be pretty easy, you just have to add the appropriate flag.
Volatile seems a bit more complicated: it requires the Vulkan memory model, which in turn is incompatible with sequential consistency. So I don't know what's the solution for this. I wonder whether volatile accesses are used often in practice.
This merge request was approved by Giovanni Mascellani.