-- v9: vkd3d-shader/spirv: Handle the sequentially consistent ordering flag for atomic instructions. vkd3d-shader/spirv: Emit a warning if the atomic instruction volatile flag is unhandled. vkd3d-shader/dxil: Implement the DXIL ATOMICRMW instruction. vkd3d-shader/dxil: Implement DX instructions ThreadId, GroupId, ThreadIdInGroup and FlattenedThreadIdInGroup.
From: Conor McCarthy cmccarthy@codeweavers.com
--- libs/vkd3d-shader/spirv.c | 1 - 1 file changed, 1 deletion(-)
diff --git a/libs/vkd3d-shader/spirv.c b/libs/vkd3d-shader/spirv.c index d1073884a..b64fe93c1 100644 --- a/libs/vkd3d-shader/spirv.c +++ b/libs/vkd3d-shader/spirv.c @@ -8785,7 +8785,6 @@ static void spirv_compiler_emit_ld_tgsm(struct spirv_compiler *compiler, ptr_id = vkd3d_spirv_build_op_access_chain1(builder, ptr_type_id, reg_info.id, coordinate_id); constituents[j++] = vkd3d_spirv_build_op_load(builder, type_id, ptr_id, SpvMemoryAccessMaskNone); } - assert(dst->reg.data_type == VKD3D_DATA_UINT); spirv_compiler_emit_store_dst_components(compiler, dst, VKD3D_SHADER_COMPONENT_UINT, constituents); }
From: Conor McCarthy cmccarthy@codeweavers.com
--- libs/vkd3d-shader/spirv.c | 1 - 1 file changed, 1 deletion(-)
diff --git a/libs/vkd3d-shader/spirv.c b/libs/vkd3d-shader/spirv.c index b64fe93c1..f1a436723 100644 --- a/libs/vkd3d-shader/spirv.c +++ b/libs/vkd3d-shader/spirv.c @@ -8906,7 +8906,6 @@ static void spirv_compiler_emit_store_tgsm(struct spirv_compiler *compiler, type_id, reg_info.structure_stride, &src[0], VKD3DSP_WRITEMASK_0, &src[1], VKD3DSP_WRITEMASK_0);
data = &src[instruction->src_count - 1]; - assert(data->reg.data_type == VKD3D_DATA_UINT); val_id = spirv_compiler_emit_load_src(compiler, data, dst->write_mask);
component_count = vsir_write_mask_component_count(dst->write_mask);
From: Conor McCarthy cmccarthy@codeweavers.com
For 64-bit indexable temps (and any other 64-bit declarations) the write mask must not be converted. --- libs/vkd3d-shader/spirv.c | 10 +++++----- libs/vkd3d-shader/vkd3d_shader_private.h | 5 +++++ 2 files changed, 10 insertions(+), 5 deletions(-)
diff --git a/libs/vkd3d-shader/spirv.c b/libs/vkd3d-shader/spirv.c index f1a436723..f2c40e921 100644 --- a/libs/vkd3d-shader/spirv.c +++ b/libs/vkd3d-shader/spirv.c @@ -4094,7 +4094,7 @@ static uint32_t spirv_compiler_emit_load_reg(struct spirv_compiler *compiler, struct vkd3d_shader_register_info reg_info; unsigned int component_count; uint32_t type_id, val_id; - uint32_t write_mask32; + uint32_t val_write_mask;
if (reg->type == VKD3DSPR_IMMCONST) return spirv_compiler_emit_load_constant(compiler, reg, swizzle, write_mask); @@ -4114,17 +4114,17 @@ static uint32_t spirv_compiler_emit_load_reg(struct spirv_compiler *compiler, type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count); return vkd3d_spirv_get_op_undef(builder, type_id); } - assert(reg_info.component_type != VKD3D_SHADER_COMPONENT_DOUBLE); spirv_compiler_emit_dereference_register(compiler, reg, ®_info);
- write_mask32 = data_type_is_64_bit(reg->data_type) ? vsir_write_mask_32_from_64(write_mask) : write_mask; + val_write_mask = (data_type_is_64_bit(reg->data_type) && !component_type_is_64_bit(reg_info.component_type)) + ? vsir_write_mask_32_from_64(write_mask) : write_mask;
/* Intermediate value (no storage class). */ if (reg_info.storage_class == SpvStorageClassMax) { val_id = reg_info.id; } - else if (vsir_write_mask_component_count(write_mask32) == 1) + else if (vsir_write_mask_component_count(val_write_mask) == 1) { return spirv_compiler_emit_load_scalar(compiler, reg, swizzle, write_mask, ®_info); } @@ -4137,7 +4137,7 @@ static uint32_t spirv_compiler_emit_load_reg(struct spirv_compiler *compiler,
swizzle = data_type_is_64_bit(reg->data_type) ? vsir_swizzle_32_from_64(swizzle) : swizzle; val_id = spirv_compiler_emit_swizzle(compiler, - val_id, reg_info.write_mask, reg_info.component_type, swizzle, write_mask32); + val_id, reg_info.write_mask, reg_info.component_type, swizzle, val_write_mask);
if (component_type != reg_info.component_type) { diff --git a/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d-shader/vkd3d_shader_private.h index 57065bac4..f72c867b6 100644 --- a/libs/vkd3d-shader/vkd3d_shader_private.h +++ b/libs/vkd3d-shader/vkd3d_shader_private.h @@ -1609,6 +1609,11 @@ static inline enum vkd3d_shader_component_type vkd3d_component_type_from_resourc } }
+static inline bool component_type_is_64_bit(enum vkd3d_shader_component_type component_type) +{ + return component_type == VKD3D_SHADER_COMPONENT_DOUBLE || component_type == VKD3D_SHADER_COMPONENT_UINT64; +} + enum vkd3d_shader_input_sysval_semantic vkd3d_siv_from_sysval_indexed(enum vkd3d_shader_sysval_semantic sysval, unsigned int index);
From: Conor McCarthy cmccarthy@codeweavers.com
--- libs/vkd3d-shader/dxil.c | 2 ++ 1 file changed, 2 insertions(+)
diff --git a/libs/vkd3d-shader/dxil.c b/libs/vkd3d-shader/dxil.c index 7f9a74fa7..b3e42c158 100644 --- a/libs/vkd3d-shader/dxil.c +++ b/libs/vkd3d-shader/dxil.c @@ -2960,6 +2960,8 @@ static enum vkd3d_result sm6_parser_constants_init(struct sm6_parser *sm6, const
default: FIXME("Unhandled constant code %u.\n", record->code); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + "Constant code %u is unhandled.", record->code); dst->u.reg.type = VKD3DSPR_UNDEF; break; }
From: Conor McCarthy cmccarthy@codeweavers.com
--- Makefile.am | 1 + tests/hlsl/tgsm.shader_test | 135 ++++++++++++++++++++++++++++++++++++ 2 files changed, 136 insertions(+) create mode 100644 tests/hlsl/tgsm.shader_test
diff --git a/Makefile.am b/Makefile.am index 8abe08638..9795bdf1a 100644 --- a/Makefile.am +++ b/Makefile.am @@ -200,6 +200,7 @@ vkd3d_shader_tests = \ tests/hlsl/texture-load-typed.shader_test \ tests/hlsl/texture-load.shader_test \ tests/hlsl/texture-ordering.shader_test \ + tests/hlsl/tgsm.shader_test \ tests/hlsl/transpose.shader_test \ tests/hlsl/trigonometry.shader_test \ tests/hlsl/trunc.shader_test \ diff --git a/tests/hlsl/tgsm.shader_test b/tests/hlsl/tgsm.shader_test new file mode 100644 index 000000000..a1b7259e9 --- /dev/null +++ b/tests/hlsl/tgsm.shader_test @@ -0,0 +1,135 @@ +[require] +shader model >= 5.0 + +[uav 1] +format r32 uint +size (buffer, 4) + +1 0 0 0 + +[compute shader todo] +RWByteAddressBuffer u : register(u1); +groupshared uint m; + + [numthreads(32, 1, 1)] +void main(uint local_idx : SV_GroupIndex, uint group_id : SV_GroupID) +{ + if (!local_idx) + m = group_id.x; + GroupMemoryBarrierWithGroupSync(); + InterlockedAdd(m, group_id.x); + GroupMemoryBarrierWithGroupSync(); + if (!local_idx) + u.Store(4 * group_id.x, m); +} + +[test] +todo dispatch 4 1 1 +probe uav 1 (0) rui (0) +probe uav 1 (1) rui (33) +probe uav 1 (2) rui (66) +probe uav 1 (3) rui (99) + + +[uav 1] +format r32 sint +size (buffer, 4) + +1 0 0 0 + +[compute shader todo] +RWByteAddressBuffer u : register(u1); +groupshared int m; + + [numthreads(32, 1, 1)] +void main(uint local_idx : SV_GroupIndex, uint group_id : SV_GroupID) +{ + if (!local_idx) + m = group_id.x; + GroupMemoryBarrierWithGroupSync(); + InterlockedAdd(m, -group_id.x); + GroupMemoryBarrierWithGroupSync(); + if (!local_idx) + u.Store(4 * group_id.x, m); +} + +[test] +todo dispatch 4 1 1 +probe uav 1 (0) ri (0) +probe uav 1 (1) ri (-31) +probe uav 1 (2) ri (-62) +probe uav 1 (3) ri (-93) + + +[uav 1] +format r32 float +size (buffer, 8) + +1 1 1 1 0 0 0 0 + +[uav 2] +format r32 sint +size (buffer, 8) + +1 1 1 1 0 0 0 0 + +[compute shader todo] +uniform uint idx; + +#define GROUP_SIZE 4 + +struct data +{ + float f; + uint u; +}; + +RWBuffer<float> u : register(u1); +RWBuffer<uint> u2 : register(u2); +groupshared data m[GROUP_SIZE]; + + [numthreads(GROUP_SIZE, 1, 1)] +void main(uint local_idx : SV_GroupIndex, uint group_id : SV_GroupID, + uint thread_id : SV_DispatchThreadID) +{ + uint i; + if (!local_idx) + { + for (i = 0; i < GROUP_SIZE; ++i) + { + m[i].f = group_id.x; + m[i].u = group_id.x; + } + } + GroupMemoryBarrierWithGroupSync(); + InterlockedAdd(m[0].u, 2); + InterlockedAdd(m[idx].u, 1); + GroupMemoryBarrierWithGroupSync(); + for (i = 0; i < local_idx; ++i) + { + m[local_idx].f += group_id.x; + m[local_idx].u += group_id.x; + } + u[thread_id.x] = m[local_idx].f; + u2[thread_id.x] = m[local_idx].u; +} + +[test] +uniform 0 uint 1 +todo dispatch 2 1 1 +probe uav 1 (0) r (0.0) +probe uav 1 (1) r (0.0) +probe uav 1 (2) r (0.0) +probe uav 1 (3) r (0.0) +probe uav 1 (4) r (1.0) +probe uav 1 (5) r (2.0) +probe uav 1 (6) r (3.0) +probe uav 1 (7) r (4.0) +probe uav 2 (0) ri (8) +probe uav 2 (1) ri (4) +probe uav 2 (2) ri (0) +probe uav 2 (3) ri (0) +probe uav 2 (4) ri (9) +probe uav 2 (5) ri (6) +probe uav 2 (6) ri (3) +probe uav 2 (7) ri (4)
From: Conor McCarthy cmccarthy@codeweavers.com
--- libs/vkd3d-shader/dxil.c | 67 +++++++++++++++++++++++++++++++ tests/hlsl/numthreads.shader_test | 10 +++-- 2 files changed, 73 insertions(+), 4 deletions(-)
diff --git a/libs/vkd3d-shader/dxil.c b/libs/vkd3d-shader/dxil.c index b3e42c158..183a2ca17 100644 --- a/libs/vkd3d-shader/dxil.c +++ b/libs/vkd3d-shader/dxil.c @@ -405,6 +405,10 @@ enum dx_intrinsic_opcode DX_DERIV_COARSEY = 84, DX_DERIV_FINEX = 85, DX_DERIV_FINEY = 86, + DX_THREAD_ID = 93, + DX_GROUP_ID = 94, + DX_THREAD_ID_IN_GROUP = 95, + DX_FLATTENED_THREAD_ID_IN_GROUP = 96, DX_SPLIT_DOUBLE = 102, DX_LEGACY_F32TOF16 = 130, DX_LEGACY_F16TOF32 = 131, @@ -758,6 +762,7 @@ struct sm6_parser
struct vkd3d_shader_dst_param *output_params; struct vkd3d_shader_dst_param *input_params; + uint32_t input_regs_declared[(VKD3DSPR_COUNT + 0x1f) / 0x20];
struct sm6_function *functions; size_t function_count; @@ -4275,6 +4280,22 @@ static void sm6_parser_emit_dx_create_handle(struct sm6_parser *sm6, enum dx_int ins->handler_idx = VKD3DSIH_NOP; }
+static void sm6_parser_dcl_register_builtin(struct sm6_parser *sm6, + enum vkd3d_shader_register_type reg_type, enum vkd3d_data_type data_type, unsigned int component_count) +{ + struct vkd3d_shader_dst_param *dst_param; + struct vkd3d_shader_instruction *ins; + + if (!bitmap_is_set(sm6->input_regs_declared, reg_type)) + { + bitmap_set(sm6->input_regs_declared, reg_type); + ins = sm6_parser_add_instruction(sm6, VKD3DSIH_DCL_INPUT); + dst_param = &ins->declaration.dst; + vsir_register_init(&dst_param->reg, reg_type, data_type, 0); + dst_param_init_vector(dst_param, component_count); + } +} + static void sm6_parser_emit_dx_fabs(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, const struct sm6_value **operands, struct function_emission_state *state) { @@ -4290,6 +4311,48 @@ static void sm6_parser_emit_dx_fabs(struct sm6_parser *sm6, enum dx_intrinsic_op instruction_dst_param_init_ssa_scalar(ins, sm6); }
+static void sm6_parser_emit_dx_compute_builtin(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, + const struct sm6_value **operands, struct function_emission_state *state) +{ + unsigned int component_count = 3, component_idx = 0; + struct vkd3d_shader_instruction *ins = state->ins; + struct vkd3d_shader_src_param *src_param; + enum vkd3d_shader_register_type reg_type; + + switch (op) + { + case DX_THREAD_ID: + reg_type = VKD3DSPR_THREADID; + break; + case DX_GROUP_ID: + reg_type = VKD3DSPR_THREADGROUPID; + break; + case DX_THREAD_ID_IN_GROUP: + reg_type = VKD3DSPR_LOCALTHREADID; + break; + case DX_FLATTENED_THREAD_ID_IN_GROUP: + reg_type = VKD3DSPR_LOCALTHREADINDEX; + component_count = 1; + break; + default: + vkd3d_unreachable(); + } + + sm6_parser_dcl_register_builtin(sm6, reg_type, VKD3D_DATA_UINT, component_count); + vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_MOV); + if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) + return; + vsir_register_init(&src_param->reg, reg_type, VKD3D_DATA_UINT, 0); + if (component_count > 1) + { + src_param->reg.dimension = VSIR_DIMENSION_VEC4; + component_idx = sm6_value_get_constant_uint(operands[0]); + } + src_param_init_scalar(src_param, component_idx); + + instruction_dst_param_init_ssa_scalar(ins, sm6); +} + static enum vkd3d_shader_opcode sm6_dx_map_ma_op(enum dx_intrinsic_opcode op, const struct sm6_type *type) { switch (op) @@ -5093,12 +5156,14 @@ static const struct sm6_dx_opcode_info sm6_dx_op_table[] = [DX_FIRST_BIT_HI ] = {"i", "m", sm6_parser_emit_dx_unary}, [DX_FIRST_BIT_LO ] = {"i", "m", sm6_parser_emit_dx_unary}, [DX_FIRST_BIT_SHI ] = {"i", "m", sm6_parser_emit_dx_unary}, + [DX_FLATTENED_THREAD_ID_IN_GROUP ] = {"i", "", sm6_parser_emit_dx_compute_builtin}, [DX_FMA ] = {"g", "RRR", sm6_parser_emit_dx_ma}, [DX_FMAD ] = {"g", "RRR", sm6_parser_emit_dx_ma}, [DX_FMAX ] = {"g", "RR", sm6_parser_emit_dx_binary}, [DX_FMIN ] = {"g", "RR", sm6_parser_emit_dx_binary}, [DX_FRC ] = {"g", "R", sm6_parser_emit_dx_unary}, [DX_GET_DIMENSIONS ] = {"D", "Hi", sm6_parser_emit_dx_get_dimensions}, + [DX_GROUP_ID ] = {"i", "c", sm6_parser_emit_dx_compute_builtin}, [DX_IBFE ] = {"m", "iiR", sm6_parser_emit_dx_tertiary}, [DX_HCOS ] = {"g", "R", sm6_parser_emit_dx_unary}, [DX_HSIN ] = {"g", "R", sm6_parser_emit_dx_unary}, @@ -5136,6 +5201,8 @@ static const struct sm6_dx_opcode_info sm6_dx_op_table[] = [DX_TEXTURE_GATHER_CMP ] = {"o", "HHffffiicf", sm6_parser_emit_dx_texture_gather}, [DX_TEXTURE_LOAD ] = {"o", "HiiiiCCC", sm6_parser_emit_dx_texture_load}, [DX_TEXTURE_STORE ] = {"v", "Hiiiooooc", sm6_parser_emit_dx_texture_store}, + [DX_THREAD_ID ] = {"i", "c", sm6_parser_emit_dx_compute_builtin}, + [DX_THREAD_ID_IN_GROUP ] = {"i", "c", sm6_parser_emit_dx_compute_builtin}, [DX_UBFE ] = {"m", "iiR", sm6_parser_emit_dx_tertiary}, [DX_UMAD ] = {"m", "RRR", sm6_parser_emit_dx_ma}, [DX_UMAX ] = {"m", "RR", sm6_parser_emit_dx_binary}, diff --git a/tests/hlsl/numthreads.shader_test b/tests/hlsl/numthreads.shader_test index e8f7900bf..ceeb40e3e 100644 --- a/tests/hlsl/numthreads.shader_test +++ b/tests/hlsl/numthreads.shader_test @@ -172,7 +172,7 @@ size (2d, 2, 2) 1.0 1.0
[compute shader] -/* Attributes are taken from the first function, and dropped from the second. */ +/* In SM < 6.0, attributes are taken from the first function, and dropped from the second. */ RWTexture2D<float> u;
[numthreads(2, 1, 1)] @@ -185,8 +185,10 @@ void main(uint2 id : sv_dispatchthreadid) }
[test] -todo(sm>=6) dispatch 1 1 1 +dispatch 1 1 1 probe uav 0 (0, 0) r (2.0) -probe uav 0 (0, 1) r (1.0) -probe uav 0 (1, 0) r (2.0) +if(sm<6) probe uav 0 (0, 1) r (1.0) +if(sm<6) probe uav 0 (1, 0) r (2.0) probe uav 0 (1, 1) r (1.0) +if(sm>=6) probe uav 0 (0, 1) r (2.0) +if(sm>=6) probe uav 0 (1, 0) r (1.0)
From: Conor McCarthy cmccarthy@codeweavers.com
--- libs/vkd3d-shader/d3d_asm.c | 30 ++++ libs/vkd3d-shader/dxil.c | 171 +++++++++++++++++++++++ libs/vkd3d-shader/vkd3d_shader_private.h | 6 + tests/hlsl/tgsm.shader_test | 4 +- 4 files changed, 209 insertions(+), 2 deletions(-)
diff --git a/libs/vkd3d-shader/d3d_asm.c b/libs/vkd3d-shader/d3d_asm.c index 0623a129e..2b5feb941 100644 --- a/libs/vkd3d-shader/d3d_asm.c +++ b/libs/vkd3d-shader/d3d_asm.c @@ -448,6 +448,23 @@ static void shader_dump_global_flags(struct vkd3d_d3d_asm_compiler *compiler, vkd3d_string_buffer_printf(&compiler->buffer, "unknown_flags(%#"PRIx64")", (uint64_t)global_flags); }
+static void shader_dump_atomic_op_flags(struct vkd3d_d3d_asm_compiler *compiler, uint32_t atomic_flags) +{ + if (atomic_flags & VKD3DARF_SEQ_CST) + { + vkd3d_string_buffer_printf(&compiler->buffer, "_seqCst"); + atomic_flags &= ~VKD3DARF_SEQ_CST; + } + if (atomic_flags & VKD3DARF_VOLATILE) + { + vkd3d_string_buffer_printf(&compiler->buffer, "_volatile"); + atomic_flags &= ~VKD3DARF_VOLATILE; + } + + if (atomic_flags) + vkd3d_string_buffer_printf(&compiler->buffer, "_unknown_flags(%#x)", atomic_flags); +} + static void shader_dump_sync_flags(struct vkd3d_d3d_asm_compiler *compiler, uint32_t sync_flags) { if (sync_flags & VKD3DSSF_GLOBAL_UAV) @@ -1734,6 +1751,19 @@ static void shader_dump_instruction_flags(struct vkd3d_d3d_asm_compiler *compile } break;
+ case VKD3DSIH_IMM_ATOMIC_CMP_EXCH: + case VKD3DSIH_IMM_ATOMIC_IADD: + case VKD3DSIH_IMM_ATOMIC_AND: + case VKD3DSIH_IMM_ATOMIC_IMAX: + case VKD3DSIH_IMM_ATOMIC_IMIN: + case VKD3DSIH_IMM_ATOMIC_OR: + case VKD3DSIH_IMM_ATOMIC_UMAX: + case VKD3DSIH_IMM_ATOMIC_UMIN: + case VKD3DSIH_IMM_ATOMIC_EXCH: + case VKD3DSIH_IMM_ATOMIC_XOR: + shader_dump_atomic_op_flags(compiler, ins->flags); + break; + case VKD3DSIH_SYNC: shader_dump_sync_flags(compiler, ins->flags); break; diff --git a/libs/vkd3d-shader/dxil.c b/libs/vkd3d-shader/dxil.c index 183a2ca17..8db857fae 100644 --- a/libs/vkd3d-shader/dxil.c +++ b/libs/vkd3d-shader/dxil.c @@ -463,6 +463,32 @@ enum dxil_predicate ICMP_SLE = 41, };
+enum dxil_rmw_code +{ + RMW_XCHG = 0, + RMW_ADD = 1, + RMW_SUB = 2, + RMW_AND = 3, + RMW_NAND = 4, + RMW_OR = 5, + RMW_XOR = 6, + RMW_MAX = 7, + RMW_MIN = 8, + RMW_UMAX = 9, + RMW_UMIN = 10, +}; + +enum dxil_atomic_ordering +{ + ORDERING_NOTATOMIC = 0, + ORDERING_UNORDERED = 1, + ORDERING_MONOTONIC = 2, + ORDERING_ACQUIRE = 3, + ORDERING_RELEASE = 4, + ORDERING_ACQREL = 5, + ORDERING_SEQCST = 6, +}; + enum dxil_atomic_binop_code { ATOMIC_BINOP_ADD, @@ -2593,6 +2619,18 @@ static bool sm6_value_validate_is_bool(const struct sm6_value *value, struct sm6 return true; }
+static bool sm6_value_validate_is_pointer_to_i32(const struct sm6_value *value, struct sm6_parser *sm6) +{ + if (!sm6_type_is_pointer(value->type) || !sm6_type_is_i32(value->type->u.pointer.type)) + { + WARN("Operand result type %u is not a pointer to i32.\n", value->type->class); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + "An int32 pointer operand passed to a DXIL instruction is not an int32 pointer."); + return false; + } + return true; +} + static const struct sm6_value *sm6_parser_get_value_safe(struct sm6_parser *sm6, unsigned int idx) { if (idx < sm6->value_count) @@ -3507,6 +3545,9 @@ struct function_emission_state unsigned int temp_idx; };
+static bool sm6_parser_emit_reg_composite_construct(struct sm6_parser *sm6, const struct vkd3d_shader_register **operand_regs, + unsigned int component_count, struct function_emission_state *state, struct vkd3d_shader_register *reg); + static void sm6_parser_emit_alloca(struct sm6_parser *sm6, const struct dxil_record *record, struct vkd3d_shader_instruction *ins, struct sm6_value *dst) { @@ -3582,6 +3623,129 @@ static void sm6_parser_emit_alloca(struct sm6_parser *sm6, const struct dxil_rec sm6_parser_declare_indexable_temp(sm6, elem_type, type[0]->u.array.count, alignment, true, 0, ins, dst); }
+static enum vkd3d_shader_opcode map_dx_atomicrmw_op(uint64_t code) +{ + switch (code) + { + case RMW_ADD: + return VKD3DSIH_IMM_ATOMIC_IADD; + case RMW_AND: + return VKD3DSIH_IMM_ATOMIC_AND; + case RMW_MAX: + return VKD3DSIH_IMM_ATOMIC_IMAX; + case RMW_MIN: + return VKD3DSIH_IMM_ATOMIC_IMIN; + case RMW_OR: + return VKD3DSIH_IMM_ATOMIC_OR; + case RMW_UMAX: + return VKD3DSIH_IMM_ATOMIC_UMAX; + case RMW_UMIN: + return VKD3DSIH_IMM_ATOMIC_UMIN; + case RMW_XCHG: + return VKD3DSIH_IMM_ATOMIC_EXCH; + case RMW_XOR: + return VKD3DSIH_IMM_ATOMIC_XOR; + default: + /* DXIL currently doesn't use SUB and NAND. */ + return VKD3DSIH_INVALID; + } +} + +static void sm6_parser_emit_atomicrmw(struct sm6_parser *sm6, const struct dxil_record *record, + struct function_emission_state *state, struct sm6_value *dst) +{ + struct vkd3d_shader_register coord, const_offset, const_zero; + const struct vkd3d_shader_register *regs[2]; + struct vkd3d_shader_dst_param *dst_params; + struct vkd3d_shader_src_param *src_params; + struct vkd3d_shader_instruction *ins; + const struct sm6_value *ptr, *src; + enum vkd3d_shader_opcode op; + unsigned int i = 0; + bool is_volatile; + uint64_t code; + + if (!(ptr = sm6_parser_get_value_by_ref(sm6, record, NULL, &i)) + || !sm6_value_validate_is_pointer_to_i32(ptr, sm6)) + return; + + if (ptr->u.reg.type != VKD3DSPR_GROUPSHAREDMEM) + { + WARN("Register is not groupshared.\n"); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + "The destination register for an atomicrmw instruction is not groupshared memory."); + return; + } + + dst->type = ptr->type->u.pointer.type; + + if (!(src = sm6_parser_get_value_by_ref(sm6, record, dst->type, &i))) + return; + + if (!dxil_record_validate_operand_count(record, i + 4, i + 4, sm6)) + return; + + if ((op = map_dx_atomicrmw_op(code = record->operands[i++])) == VKD3DSIH_INVALID) + { + FIXME("Unhandled atomicrmw op %"PRIu64".\n", code); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + "Operation %"PRIu64" for an atomicrmw instruction is unhandled.", code); + return; + } + + is_volatile = record->operands[i++]; + + /* It's currently not possible to specify an atomic ordering in HLSL, and it defaults to seq_cst. */ + if ((code = record->operands[i++]) != ORDERING_SEQCST) + FIXME("Unhandled atomic ordering %"PRIu64".\n", code); + + if ((code = record->operands[i]) != 1) + WARN("Ignoring synchronisation scope %"PRIu64".\n", code); + + if (ptr->structure_stride) + { + if (ptr->u.reg.idx[1].rel_addr) + { + regs[0] = &ptr->u.reg.idx[1].rel_addr->reg; + } + else + { + register_make_constant_uint(&const_offset, ptr->u.reg.idx[1].offset); + regs[0] = &const_offset; + } + register_make_constant_uint(&const_zero, 0); + regs[1] = &const_zero; + if (!sm6_parser_emit_reg_composite_construct(sm6, regs, 2, state, &coord)) + return; + } + + ins = state->ins; + vsir_instruction_init(ins, &sm6->p.location, op); + ins->flags = is_volatile ? VKD3DARF_SEQ_CST | VKD3DARF_VOLATILE : VKD3DARF_SEQ_CST; + + if (!(src_params = instruction_src_params_alloc(ins, 2, sm6))) + return; + if (ptr->structure_stride) + src_param_init_vector_from_reg(&src_params[0], &coord); + else + src_param_make_constant_uint(&src_params[0], 0); + src_param_init_from_value(&src_params[1], src); + + dst_params = instruction_dst_params_alloc(ins, 2, sm6); + register_init_ssa_scalar(&dst_params[0].reg, dst->type, dst, sm6); + dst_param_init(&dst_params[0]); + + dst_params[1].reg = ptr->u.reg; + /* The groupshared register has data type UAV when accessed. */ + dst_params[1].reg.data_type = VKD3D_DATA_UAV; + dst_params[1].reg.idx[1].rel_addr = NULL; + dst_params[1].reg.idx[1].offset = ~0u; + dst_params[1].reg.idx_count = 1; + dst_param_init(&dst_params[1]); + + dst->u.reg = dst_params[0].reg; +} + static enum vkd3d_shader_opcode map_binary_op(uint64_t code, const struct sm6_type *type_a, const struct sm6_type *type_b, struct sm6_parser *sm6) { @@ -6676,6 +6840,13 @@ static enum vkd3d_result sm6_parser_function_init(struct sm6_parser *sm6, const case FUNC_CODE_INST_ALLOCA: sm6_parser_emit_alloca(sm6, record, ins, dst); break; + case FUNC_CODE_INST_ATOMICRMW: + { + struct function_emission_state state = {code_block, ins}; + sm6_parser_emit_atomicrmw(sm6, record, &state, dst); + sm6->p.program.temp_count = max(sm6->p.program.temp_count, state.temp_idx); + break; + } case FUNC_CODE_INST_BINOP: sm6_parser_emit_binop(sm6, record, ins, dst); break; diff --git a/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d-shader/vkd3d_shader_private.h index f72c867b6..84979f0cc 100644 --- a/libs/vkd3d-shader/vkd3d_shader_private.h +++ b/libs/vkd3d-shader/vkd3d_shader_private.h @@ -755,6 +755,12 @@ enum vkd3d_shader_uav_flags VKD3DSUF_ORDER_PRESERVING_COUNTER = 0x100, };
+enum vkd3d_shader_atomic_rmw_flags +{ + VKD3DARF_SEQ_CST = 0x1, + VKD3DARF_VOLATILE = 0x2, +}; + enum vkd3d_tessellator_domain { VKD3D_TESSELLATOR_DOMAIN_LINE = 1, diff --git a/tests/hlsl/tgsm.shader_test b/tests/hlsl/tgsm.shader_test index a1b7259e9..e6b1a6858 100644 --- a/tests/hlsl/tgsm.shader_test +++ b/tests/hlsl/tgsm.shader_test @@ -24,7 +24,7 @@ void main(uint local_idx : SV_GroupIndex, uint group_id : SV_GroupID) }
[test] -todo dispatch 4 1 1 +todo(sm<6) dispatch 4 1 1 probe uav 1 (0) rui (0) probe uav 1 (1) rui (33) probe uav 1 (2) rui (66) @@ -54,7 +54,7 @@ void main(uint local_idx : SV_GroupIndex, uint group_id : SV_GroupID) }
[test] -todo dispatch 4 1 1 +todo(sm<6) dispatch 4 1 1 probe uav 1 (0) ri (0) probe uav 1 (1) ri (-31) probe uav 1 (2) ri (-62)
From: Conor McCarthy cmccarthy@codeweavers.com
--- libs/vkd3d-shader/spirv.c | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/libs/vkd3d-shader/spirv.c b/libs/vkd3d-shader/spirv.c index f2c40e921..2bf6fee50 100644 --- a/libs/vkd3d-shader/spirv.c +++ b/libs/vkd3d-shader/spirv.c @@ -9248,6 +9248,9 @@ static void spirv_compiler_emit_atomic_instruction(struct spirv_compiler *compil
val_id = spirv_compiler_emit_load_src_with_type(compiler, &src[1], VKD3DSP_WRITEMASK_0, component_type);
+ if (instruction->flags & VKD3DARF_VOLATILE) + WARN("Ignoring 'volatile' attribute.\n"); + operands[i++] = pointer_id; operands[i++] = spirv_compiler_get_constant_uint(compiler, scope); operands[i++] = spirv_compiler_get_constant_uint(compiler, SpvMemorySemanticsMaskNone);
From: Conor McCarthy cmccarthy@codeweavers.com
--- libs/vkd3d-shader/spirv.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-)
diff --git a/libs/vkd3d-shader/spirv.c b/libs/vkd3d-shader/spirv.c index 2bf6fee50..4095dac51 100644 --- a/libs/vkd3d-shader/spirv.c +++ b/libs/vkd3d-shader/spirv.c @@ -9157,6 +9157,7 @@ static void spirv_compiler_emit_atomic_instruction(struct spirv_compiler *compil const struct vkd3d_shader_dst_param *resource; uint32_t coordinate_id, sample_id, pointer_id; struct vkd3d_shader_register_info reg_info; + SpvMemorySemanticsMask memory_semantic; struct vkd3d_shader_image image; unsigned int structure_stride; uint32_t coordinate_mask; @@ -9251,12 +9252,16 @@ static void spirv_compiler_emit_atomic_instruction(struct spirv_compiler *compil if (instruction->flags & VKD3DARF_VOLATILE) WARN("Ignoring 'volatile' attribute.\n");
+ memory_semantic = (instruction->flags & VKD3DARF_SEQ_CST) + ? SpvMemorySemanticsSequentiallyConsistentMask + : SpvMemorySemanticsMaskNone; + operands[i++] = pointer_id; operands[i++] = spirv_compiler_get_constant_uint(compiler, scope); - operands[i++] = spirv_compiler_get_constant_uint(compiler, SpvMemorySemanticsMaskNone); + operands[i++] = spirv_compiler_get_constant_uint(compiler, memory_semantic); if (instruction->src_count >= 3) { - operands[i++] = spirv_compiler_get_constant_uint(compiler, SpvMemorySemanticsMaskNone); + operands[i++] = spirv_compiler_get_constant_uint(compiler, memory_semantic); operands[i++] = spirv_compiler_emit_load_src_with_type(compiler, &src[2], VKD3DSP_WRITEMASK_0, component_type); } operands[i++] = val_id;
On Tue Mar 19 16:03:54 2024 +0000, Giovanni Mascellani wrote:
Do you plan to add them shortly? Sequential consistency should be pretty easy, you just have to add the appropriate flag. Volatile seems a bit more complicated: it requires the Vulkan memory model, which in turn is incompatible with sequential consistency. So I don't know what's the solution for this. I wonder whether volatile accesses are used often in practice.
It was in the dev branch, so I added it here. This MR is fairly big but this addition is not complex.
On Tue Mar 19 16:03:52 2024 +0000, Giovanni Mascellani wrote:
I see there are a few already, but I find this type of error message a bit confusing. What does it mean that an int32 pointer operand is not an int32 pointer? Maybe it should be something like "Expected an int32 pointer, got something else"?
I haven't changed this yet since it would be inconsistent with previous messages, which raises the question: should the others be changed in another MR?