Goes atop !681. The last 8 commits belong here.
-- v7: vkd3d-shader/spirv: Emit a warning if atomic RMW flags are unhandled. vkd3d-shader/dxil: Implement the DXIL ATOMICRMW instruction.
From: Conor McCarthy cmccarthy@codeweavers.com
--- libs/vkd3d-shader/spirv.c | 11 ++++++----- libs/vkd3d-shader/tpf.c | 2 ++ libs/vkd3d-shader/vkd3d_shader_private.h | 2 ++ 3 files changed, 10 insertions(+), 5 deletions(-)
diff --git a/libs/vkd3d-shader/spirv.c b/libs/vkd3d-shader/spirv.c index 56c89f762..54f453346 100644 --- a/libs/vkd3d-shader/spirv.c +++ b/libs/vkd3d-shader/spirv.c @@ -6435,9 +6435,9 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp }
static void spirv_compiler_emit_workgroup_memory(struct spirv_compiler *compiler, - const struct vkd3d_shader_register *reg, unsigned int size, unsigned int structure_stride) + const struct vkd3d_shader_register *reg, unsigned int size, unsigned int structure_stride, bool zero_init) { - uint32_t type_id, array_type_id, length_id, pointer_type_id, var_id; + uint32_t type_id, array_type_id, length_id, pointer_type_id, var_id, init_id; struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; const SpvStorageClass storage_class = SpvStorageClassWorkgroup; struct vkd3d_symbol reg_symbol; @@ -6447,8 +6447,9 @@ static void spirv_compiler_emit_workgroup_memory(struct spirv_compiler *compiler array_type_id = vkd3d_spirv_get_op_type_array(builder, type_id, length_id);
pointer_type_id = vkd3d_spirv_get_op_type_pointer(builder, storage_class, array_type_id); + init_id = zero_init ? vkd3d_spirv_get_op_constant_null(builder, array_type_id) : 0; var_id = vkd3d_spirv_build_op_variable(builder, &builder->global_stream, - pointer_type_id, storage_class, 0); + pointer_type_id, storage_class, init_id);
spirv_compiler_emit_register_debug_name(builder, var_id, reg);
@@ -6464,7 +6465,7 @@ static void spirv_compiler_emit_dcl_tgsm_raw(struct spirv_compiler *compiler, { const struct vkd3d_shader_tgsm_raw *tgsm_raw = &instruction->declaration.tgsm_raw; spirv_compiler_emit_workgroup_memory(compiler, &tgsm_raw->reg.reg, - tgsm_raw->byte_count / 4, 0); + tgsm_raw->byte_count / 4, 0, tgsm_raw->zero_init); }
static void spirv_compiler_emit_dcl_tgsm_structured(struct spirv_compiler *compiler, @@ -6473,7 +6474,7 @@ static void spirv_compiler_emit_dcl_tgsm_structured(struct spirv_compiler *compi const struct vkd3d_shader_tgsm_structured *tgsm_structured = &instruction->declaration.tgsm_structured; unsigned int stride = tgsm_structured->byte_stride / 4; spirv_compiler_emit_workgroup_memory(compiler, &tgsm_structured->reg.reg, - tgsm_structured->structure_count * stride, stride); + tgsm_structured->structure_count * stride, stride, tgsm_structured->zero_init); }
static void spirv_compiler_emit_dcl_input(struct spirv_compiler *compiler, diff --git a/libs/vkd3d-shader/tpf.c b/libs/vkd3d-shader/tpf.c index bd558693b..5c6c8e422 100644 --- a/libs/vkd3d-shader/tpf.c +++ b/libs/vkd3d-shader/tpf.c @@ -1263,6 +1263,7 @@ static void shader_sm5_read_dcl_tgsm_raw(struct vkd3d_shader_instruction *ins, u ins->declaration.tgsm_raw.byte_count = *tokens; if (ins->declaration.tgsm_raw.byte_count % 4) FIXME("Byte count %u is not multiple of 4.\n", ins->declaration.tgsm_raw.byte_count); + ins->declaration.tgsm_raw.zero_init = false; }
static void shader_sm5_read_dcl_tgsm_structured(struct vkd3d_shader_instruction *ins, uint32_t opcode, @@ -1274,6 +1275,7 @@ static void shader_sm5_read_dcl_tgsm_structured(struct vkd3d_shader_instruction ins->declaration.tgsm_structured.structure_count = *tokens; if (ins->declaration.tgsm_structured.byte_stride % 4) FIXME("Byte stride %u is not multiple of 4.\n", ins->declaration.tgsm_structured.byte_stride); + ins->declaration.tgsm_structured.zero_init = false; }
static void shader_sm5_read_dcl_resource_structured(struct vkd3d_shader_instruction *ins, uint32_t opcode, diff --git a/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d-shader/vkd3d_shader_private.h index ea52f151d..89e8aa1d6 100644 --- a/libs/vkd3d-shader/vkd3d_shader_private.h +++ b/libs/vkd3d-shader/vkd3d_shader_private.h @@ -1088,6 +1088,7 @@ struct vkd3d_shader_tgsm_raw { struct vkd3d_shader_dst_param reg; unsigned int byte_count; + bool zero_init; };
struct vkd3d_shader_tgsm_structured @@ -1095,6 +1096,7 @@ struct vkd3d_shader_tgsm_structured struct vkd3d_shader_dst_param reg; unsigned int byte_stride; unsigned int structure_count; + bool zero_init; };
struct vkd3d_shader_thread_group_size
From: Conor McCarthy cmccarthy@codeweavers.com
--- libs/vkd3d-shader/dxil.c | 125 ++++++++++++++++++++--- libs/vkd3d-shader/vkd3d_shader_private.h | 1 + 2 files changed, 111 insertions(+), 15 deletions(-)
diff --git a/libs/vkd3d-shader/dxil.c b/libs/vkd3d-shader/dxil.c index de51588b5..905c26323 100644 --- a/libs/vkd3d-shader/dxil.c +++ b/libs/vkd3d-shader/dxil.c @@ -755,6 +755,7 @@ struct sm6_parser
unsigned int indexable_temp_count; unsigned int icb_count; + unsigned int tgsm_count;
struct sm6_value *values; size_t value_count; @@ -2267,6 +2268,12 @@ static void register_init_ssa_scalar(struct vkd3d_shader_register *reg, const st register_init_ssa_vector(reg, sm6_type_get_scalar_type(type, 0), 1, value, sm6); }
+static void register_make_constant_uint(struct vkd3d_shader_register *reg, unsigned int value) +{ + vsir_register_init(reg, VKD3DSPR_IMMCONST, VKD3D_DATA_UINT, 0); + reg->u.immconst_u32[0] = value; +} + static void dst_param_init(struct vkd3d_shader_dst_param *param) { param->write_mask = VKD3DSP_WRITEMASK_0; @@ -2330,6 +2337,12 @@ static void src_param_init_vector_from_reg(struct vkd3d_shader_src_param *param, param->reg = *reg; }
+static void src_param_make_constant_uint(struct vkd3d_shader_src_param *param, unsigned int value) +{ + src_param_init(param); + register_make_constant_uint(¶m->reg, value); +} + static void register_index_address_init(struct vkd3d_shader_register_index *idx, const struct sm6_value *address, struct sm6_parser *sm6) { @@ -3009,6 +3022,30 @@ static void sm6_parser_declare_indexable_temp(struct sm6_parser *sm6, const stru register_init_with_id(&dst->u.reg, VKD3DSPR_IDXTEMP, data_type, ins->declaration.indexable_temp.register_idx); }
+static void sm6_parser_declare_tgsm_raw(struct sm6_parser *sm6, const struct sm6_type *elem_type, + unsigned int alignment, unsigned int init, struct sm6_value *dst) +{ + enum vkd3d_data_type data_type = vkd3d_data_type_from_sm6_type(elem_type); + struct vkd3d_shader_instruction *ins; + unsigned int byte_count; + + ins = sm6_parser_add_instruction(sm6, VKD3DSIH_DCL_TGSM_RAW); + dst_param_init(&ins->declaration.tgsm_raw.reg); + register_init_with_id(&ins->declaration.tgsm_raw.reg.reg, VKD3DSPR_GROUPSHAREDMEM, data_type, sm6->tgsm_count++); + dst->u.reg = ins->declaration.tgsm_raw.reg.reg; + ins->declaration.tgsm_raw.alignment = alignment; + byte_count = elem_type->u.width / 8u; + if (byte_count != 4) + { + FIXME("Unsupported byte count %u.\n", byte_count); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + "Raw TGSM byte count %u is not supported.", byte_count); + } + ins->declaration.tgsm_raw.byte_count = byte_count; + /* The initialiser value index will be resolved later when forward references can be handled. */ + ins->flags = init; +} + static bool sm6_parser_declare_global(struct sm6_parser *sm6, const struct dxil_record *record) { const struct sm6_type *type, *scalar_type; @@ -3134,10 +3171,22 @@ static bool sm6_parser_declare_global(struct sm6_parser *sm6, const struct dxil_ } else if (address_space == ADDRESS_SPACE_GROUPSHARED) { - FIXME("Unsupported TGSM.\n"); - vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, - "TGSM global variables are not supported."); - return false; + if (!sm6_type_is_numeric(scalar_type)) + { + WARN("Unsupported type class %u.\n", scalar_type->class); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + "TGSM variables of type class %u are not supported.", scalar_type->class); + return false; + } + if (count != 1) + { + FIXME("Unsupported structured TGSM.\n"); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + "Structured TGSM global variables are not supported."); + return false; + } + + sm6_parser_declare_tgsm_raw(sm6, scalar_type, alignment, init, dst); } else { @@ -3173,6 +3222,38 @@ static const struct vkd3d_shader_immediate_constant_buffer *resolve_forward_init return NULL; }
+static bool resolve_forward_zero_initialiser(size_t index, struct sm6_parser *sm6) +{ + const struct sm6_value *value; + + if (!index) + return false; + + --index; + if (!(value = sm6_parser_get_value_safe(sm6, index)) + || (!sm6_value_is_icb(value) && !sm6_value_is_constant(value) && !sm6_value_is_undef(value))) + { + WARN("Invalid initialiser index %zu.\n", index); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + "TGSM initialiser value index %zu is invalid.", index); + return false; + } + else if ((sm6_value_is_icb(value) && value->u.icb->is_null) || sm6_value_is_constant_zero(value)) + { + return true; + } + else if (sm6_value_is_undef(value)) + { + /* In VSIR, initialisation with undefined values of objects is implied, not explicit. */ + return false; + } + + FIXME("Non-zero initialisers are not supported.\n"); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + "Non-zero TGSM initialisers are not supported."); + return false; +} + static enum vkd3d_result sm6_parser_globals_init(struct sm6_parser *sm6) { size_t i, count, base_value_idx = sm6->value_count; @@ -3246,6 +3327,11 @@ static enum vkd3d_result sm6_parser_globals_init(struct sm6_parser *sm6) { ins->declaration.icb = resolve_forward_initialiser((uintptr_t)ins->declaration.icb, sm6); } + else if (ins->handler_idx == VKD3DSIH_DCL_TGSM_RAW) + { + ins->declaration.tgsm_raw.zero_init = resolve_forward_zero_initialiser(ins->flags, sm6); + ins->flags = 0; + } } for (i = base_value_idx; i < sm6->value_count; ++i) { @@ -5549,8 +5635,8 @@ static void sm6_parser_emit_load(struct sm6_parser *sm6, const struct dxil_recor struct vkd3d_shader_instruction *ins, struct sm6_value *dst) { const struct sm6_type *elem_type = NULL, *pointee_type; - struct vkd3d_shader_src_param *src_param; - unsigned int alignment, i = 0; + unsigned int alignment, operand_count, i = 0; + struct vkd3d_shader_src_param *src_params; const struct sm6_value *ptr; uint64_t alignment_code;
@@ -5587,12 +5673,15 @@ static void sm6_parser_emit_load(struct sm6_parser *sm6, const struct dxil_recor if (record->operands[i]) WARN("Ignoring volatile modifier.\n");
- vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_MOV); + operand_count = 1 + (ptr->u.reg.type == VKD3DSPR_GROUPSHAREDMEM); + vsir_instruction_init(ins, &sm6->p.location, (operand_count > 1) ? VKD3DSIH_LD_RAW : VKD3DSIH_MOV);
- if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) + if (!(src_params = instruction_src_params_alloc(ins, operand_count, sm6))) return; - src_param_init_from_value(&src_param[0], ptr); - src_param->reg.alignment = alignment; + if (operand_count > 1) + src_param_make_constant_uint(&src_params[0], 0); + src_param_init_from_value(&src_params[operand_count - 1], ptr); + src_params[operand_count - 1].reg.alignment = alignment;
instruction_dst_param_init_ssa_scalar(ins, sm6); } @@ -5710,11 +5799,11 @@ static void sm6_parser_emit_ret(struct sm6_parser *sm6, const struct dxil_record static void sm6_parser_emit_store(struct sm6_parser *sm6, const struct dxil_record *record, struct vkd3d_shader_instruction *ins, struct sm6_value *dst) { - struct vkd3d_shader_src_param *src_param; + unsigned int i = 0, alignment, operand_count; + struct vkd3d_shader_src_param *src_params; struct vkd3d_shader_dst_param *dst_param; const struct sm6_type *pointee_type; const struct sm6_value *ptr, *src; - unsigned int i = 0, alignment; uint64_t alignment_code;
if (!(ptr = sm6_parser_get_value_by_ref(sm6, record, NULL, &i)) @@ -5747,16 +5836,22 @@ static void sm6_parser_emit_store(struct sm6_parser *sm6, const struct dxil_reco if (record->operands[i]) WARN("Ignoring volatile modifier.\n");
- vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_MOV); + operand_count = 1 + (ptr->u.reg.type == VKD3DSPR_GROUPSHAREDMEM); + vsir_instruction_init(ins, &sm6->p.location, (operand_count > 1) ? VKD3DSIH_STORE_RAW : VKD3DSIH_MOV);
- if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) + if (!(src_params = instruction_src_params_alloc(ins, operand_count, sm6))) return; - src_param_init_from_value(&src_param[0], src); + if (operand_count > 1) + src_param_make_constant_uint(&src_params[0], 0); + src_param_init_from_value(&src_params[operand_count - 1], src);
dst_param = instruction_dst_params_alloc(ins, 1, sm6); dst_param_init(dst_param); dst_param->reg = ptr->u.reg; dst_param->reg.alignment = alignment; + /* Groupshared stores contain the address in the src params. */ + if (dst_param->reg.type != VKD3DSPR_IDXTEMP) + dst_param->reg.idx_count = 1; }
static void sm6_parser_emit_switch(struct sm6_parser *sm6, const struct dxil_record *record, diff --git a/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d-shader/vkd3d_shader_private.h index 89e8aa1d6..f533d167d 100644 --- a/libs/vkd3d-shader/vkd3d_shader_private.h +++ b/libs/vkd3d-shader/vkd3d_shader_private.h @@ -1087,6 +1087,7 @@ struct vkd3d_shader_tgsm struct vkd3d_shader_tgsm_raw { struct vkd3d_shader_dst_param reg; + unsigned int alignment; unsigned int byte_count; bool zero_init; };
From: Conor McCarthy cmccarthy@codeweavers.com
--- libs/vkd3d-shader/dxil.c | 115 ++++++++++++++++++----- libs/vkd3d-shader/vkd3d_shader_private.h | 1 + 2 files changed, 92 insertions(+), 24 deletions(-)
diff --git a/libs/vkd3d-shader/dxil.c b/libs/vkd3d-shader/dxil.c index 905c26323..09b0b0d9b 100644 --- a/libs/vkd3d-shader/dxil.c +++ b/libs/vkd3d-shader/dxil.c @@ -543,6 +543,7 @@ struct sm6_value { const struct sm6_type *type; enum sm6_value_type value_type; + unsigned int structure_stride; bool is_undefined; union { @@ -3033,6 +3034,7 @@ static void sm6_parser_declare_tgsm_raw(struct sm6_parser *sm6, const struct sm6 dst_param_init(&ins->declaration.tgsm_raw.reg); register_init_with_id(&ins->declaration.tgsm_raw.reg.reg, VKD3DSPR_GROUPSHAREDMEM, data_type, sm6->tgsm_count++); dst->u.reg = ins->declaration.tgsm_raw.reg.reg; + dst->structure_stride = 0; ins->declaration.tgsm_raw.alignment = alignment; byte_count = elem_type->u.width / 8u; if (byte_count != 4) @@ -3046,6 +3048,33 @@ static void sm6_parser_declare_tgsm_raw(struct sm6_parser *sm6, const struct sm6 ins->flags = init; }
+static void sm6_parser_declare_tgsm_structured(struct sm6_parser *sm6, const struct sm6_type *elem_type, + unsigned int count, unsigned int alignment, unsigned int init, struct sm6_value *dst) +{ + enum vkd3d_data_type data_type = vkd3d_data_type_from_sm6_type(elem_type); + struct vkd3d_shader_instruction *ins; + unsigned int structure_stride; + + ins = sm6_parser_add_instruction(sm6, VKD3DSIH_DCL_TGSM_STRUCTURED); + dst_param_init(&ins->declaration.tgsm_structured.reg); + register_init_with_id(&ins->declaration.tgsm_structured.reg.reg, VKD3DSPR_GROUPSHAREDMEM, + data_type, sm6->tgsm_count++); + dst->u.reg = ins->declaration.tgsm_structured.reg.reg; + structure_stride = elem_type->u.width / 8u; + if (structure_stride != 4) + { + FIXME("Unsupported structure stride %u.\n", structure_stride); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + "Structured TGSM byte stride %u is not supported.", structure_stride); + } + dst->structure_stride = structure_stride; + ins->declaration.tgsm_structured.alignment = alignment; + ins->declaration.tgsm_structured.byte_stride = structure_stride; + ins->declaration.tgsm_structured.structure_count = count; + /* The initialiser value index will be resolved later when forward references can be handled. */ + ins->flags = init; +} + static bool sm6_parser_declare_global(struct sm6_parser *sm6, const struct dxil_record *record) { const struct sm6_type *type, *scalar_type; @@ -3178,15 +3207,10 @@ static bool sm6_parser_declare_global(struct sm6_parser *sm6, const struct dxil_ "TGSM variables of type class %u are not supported.", scalar_type->class); return false; } - if (count != 1) - { - FIXME("Unsupported structured TGSM.\n"); - vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, - "Structured TGSM global variables are not supported."); - return false; - } - - sm6_parser_declare_tgsm_raw(sm6, scalar_type, alignment, init, dst); + if (count == 1) + sm6_parser_declare_tgsm_raw(sm6, scalar_type, alignment, init, dst); + else + sm6_parser_declare_tgsm_structured(sm6, scalar_type, count, alignment, init, dst); } else { @@ -3332,6 +3356,11 @@ static enum vkd3d_result sm6_parser_globals_init(struct sm6_parser *sm6) ins->declaration.tgsm_raw.zero_init = resolve_forward_zero_initialiser(ins->flags, sm6); ins->flags = 0; } + else if (ins->handler_idx == VKD3DSIH_DCL_TGSM_STRUCTURED) + { + ins->declaration.tgsm_structured.zero_init = resolve_forward_zero_initialiser(ins->flags, sm6); + ins->flags = 0; + } } for (i = base_value_idx; i < sm6->value_count; ++i) { @@ -5627,6 +5656,7 @@ static void sm6_parser_emit_gep(struct sm6_parser *sm6, const struct dxil_record register_index_address_init(®->idx[1], elem_value, sm6); reg->idx[1].is_in_bounds = is_in_bounds; reg->idx_count = 2; + dst->structure_stride = src->structure_stride;
ins->handler_idx = VKD3DSIH_NOP; } @@ -5673,15 +5703,34 @@ static void sm6_parser_emit_load(struct sm6_parser *sm6, const struct dxil_recor if (record->operands[i]) WARN("Ignoring volatile modifier.\n");
- operand_count = 1 + (ptr->u.reg.type == VKD3DSPR_GROUPSHAREDMEM); - vsir_instruction_init(ins, &sm6->p.location, (operand_count > 1) ? VKD3DSIH_LD_RAW : VKD3DSIH_MOV); + if (ptr->structure_stride) + { + assert(ptr->u.reg.type == VKD3DSPR_GROUPSHAREDMEM); + vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_LD_STRUCTURED); + + if (!(src_params = instruction_src_params_alloc(ins, 3, sm6))) + return; + if (ptr->u.reg.idx[1].rel_addr) + src_params[0] = *ptr->u.reg.idx[1].rel_addr; + else + src_param_make_constant_uint(&src_params[0], ptr->u.reg.idx[1].offset); + /* Struct offset is always zero as there is no struct, just an array. */ + src_param_make_constant_uint(&src_params[1], 0); + src_param_init_from_value(&src_params[2], ptr); + src_params[2].reg.alignment = alignment; + } + else + { + operand_count = 1 + (ptr->u.reg.type == VKD3DSPR_GROUPSHAREDMEM); + vsir_instruction_init(ins, &sm6->p.location, (operand_count > 1) ? VKD3DSIH_LD_RAW : VKD3DSIH_MOV);
- if (!(src_params = instruction_src_params_alloc(ins, operand_count, sm6))) - return; - if (operand_count > 1) - src_param_make_constant_uint(&src_params[0], 0); - src_param_init_from_value(&src_params[operand_count - 1], ptr); - src_params[operand_count - 1].reg.alignment = alignment; + if (!(src_params = instruction_src_params_alloc(ins, operand_count, sm6))) + return; + if (operand_count > 1) + src_param_make_constant_uint(&src_params[0], 0); + src_param_init_from_value(&src_params[operand_count - 1], ptr); + src_params[operand_count - 1].reg.alignment = alignment; + }
instruction_dst_param_init_ssa_scalar(ins, sm6); } @@ -5836,14 +5885,32 @@ static void sm6_parser_emit_store(struct sm6_parser *sm6, const struct dxil_reco if (record->operands[i]) WARN("Ignoring volatile modifier.\n");
- operand_count = 1 + (ptr->u.reg.type == VKD3DSPR_GROUPSHAREDMEM); - vsir_instruction_init(ins, &sm6->p.location, (operand_count > 1) ? VKD3DSIH_STORE_RAW : VKD3DSIH_MOV); + if (ptr->structure_stride) + { + assert(ptr->u.reg.type == VKD3DSPR_GROUPSHAREDMEM); + vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_STORE_STRUCTURED);
- if (!(src_params = instruction_src_params_alloc(ins, operand_count, sm6))) - return; - if (operand_count > 1) - src_param_make_constant_uint(&src_params[0], 0); - src_param_init_from_value(&src_params[operand_count - 1], src); + if (!(src_params = instruction_src_params_alloc(ins, 3, sm6))) + return; + if (ptr->u.reg.idx[1].rel_addr) + src_params[0] = *ptr->u.reg.idx[1].rel_addr; + else + src_param_make_constant_uint(&src_params[0], ptr->u.reg.idx[1].offset); + /* Struct offset is always zero as there is no struct, just an array. */ + src_param_make_constant_uint(&src_params[1], 0); + src_param_init_from_value(&src_params[2], src); + } + else + { + operand_count = 1 + (ptr->u.reg.type == VKD3DSPR_GROUPSHAREDMEM); + vsir_instruction_init(ins, &sm6->p.location, (operand_count > 1) ? VKD3DSIH_STORE_RAW : VKD3DSIH_MOV); + + if (!(src_params = instruction_src_params_alloc(ins, operand_count, sm6))) + return; + if (operand_count > 1) + src_param_make_constant_uint(&src_params[0], 0); + src_param_init_from_value(&src_params[operand_count - 1], src); + }
dst_param = instruction_dst_params_alloc(ins, 1, sm6); dst_param_init(dst_param); diff --git a/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d-shader/vkd3d_shader_private.h index f533d167d..6800547ac 100644 --- a/libs/vkd3d-shader/vkd3d_shader_private.h +++ b/libs/vkd3d-shader/vkd3d_shader_private.h @@ -1095,6 +1095,7 @@ struct vkd3d_shader_tgsm_raw struct vkd3d_shader_tgsm_structured { struct vkd3d_shader_dst_param reg; + unsigned int alignment; unsigned int byte_stride; unsigned int structure_count; bool zero_init;
From: Conor McCarthy cmccarthy@codeweavers.com
This would cause a lot of warning spam if it was a warning. --- libs/vkd3d-shader/spirv.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-)
diff --git a/libs/vkd3d-shader/spirv.c b/libs/vkd3d-shader/spirv.c index 54f453346..7bcfd2f1b 100644 --- a/libs/vkd3d-shader/spirv.c +++ b/libs/vkd3d-shader/spirv.c @@ -6435,13 +6435,18 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp }
static void spirv_compiler_emit_workgroup_memory(struct spirv_compiler *compiler, - const struct vkd3d_shader_register *reg, unsigned int size, unsigned int structure_stride, bool zero_init) + const struct vkd3d_shader_register *reg, unsigned int alignment, unsigned int size, + unsigned int structure_stride, bool zero_init) { uint32_t type_id, array_type_id, length_id, pointer_type_id, var_id, init_id; struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; const SpvStorageClass storage_class = SpvStorageClassWorkgroup; struct vkd3d_symbol reg_symbol;
+ /* Alignment is supported only in the Kernel execution model. */ + if (alignment) + TRACE("Ignoring alignment %u.\n", alignment); + type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); length_id = spirv_compiler_get_constant_uint(compiler, size); array_type_id = vkd3d_spirv_get_op_type_array(builder, type_id, length_id); @@ -6464,7 +6469,7 @@ static void spirv_compiler_emit_dcl_tgsm_raw(struct spirv_compiler *compiler, const struct vkd3d_shader_instruction *instruction) { const struct vkd3d_shader_tgsm_raw *tgsm_raw = &instruction->declaration.tgsm_raw; - spirv_compiler_emit_workgroup_memory(compiler, &tgsm_raw->reg.reg, + spirv_compiler_emit_workgroup_memory(compiler, &tgsm_raw->reg.reg, tgsm_raw->alignment, tgsm_raw->byte_count / 4, 0, tgsm_raw->zero_init); }
@@ -6473,7 +6478,7 @@ static void spirv_compiler_emit_dcl_tgsm_structured(struct spirv_compiler *compi { const struct vkd3d_shader_tgsm_structured *tgsm_structured = &instruction->declaration.tgsm_structured; unsigned int stride = tgsm_structured->byte_stride / 4; - spirv_compiler_emit_workgroup_memory(compiler, &tgsm_structured->reg.reg, + spirv_compiler_emit_workgroup_memory(compiler, &tgsm_structured->reg.reg, tgsm_structured->alignment, tgsm_structured->structure_count * stride, stride, tgsm_structured->zero_init); }
From: Conor McCarthy cmccarthy@codeweavers.com
--- libs/vkd3d-shader/dxil.c | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+)
diff --git a/libs/vkd3d-shader/dxil.c b/libs/vkd3d-shader/dxil.c index 09b0b0d9b..b3e4f0534 100644 --- a/libs/vkd3d-shader/dxil.c +++ b/libs/vkd3d-shader/dxil.c @@ -393,6 +393,7 @@ enum dx_intrinsic_opcode DX_TEXTURE_GATHER_CMP = 74, DX_ATOMIC_BINOP = 78, DX_ATOMIC_CMP_XCHG = 79, + DX_BARRIER = 80, DX_DERIV_COARSEX = 83, DX_DERIV_COARSEY = 84, DX_DERIV_FINEX = 85, @@ -465,6 +466,14 @@ enum dxil_atomic_binop_code ATOMIC_BINOP_INVALID, };
+enum dxil_sync_flags +{ + SYNC_THREAD_GROUP = 0x1, + SYNC_GLOBAL_UAV = 0x2, + SYNC_THREAD_GROUP_UAV = 0x4, + SYNC_GROUP_SHARED_MEMORY = 0x8, +}; + struct sm6_pointer_info { const struct sm6_type *type; @@ -4104,6 +4113,27 @@ static void sm6_parser_emit_dx_atomic_binop(struct sm6_parser *sm6, enum dx_intr dst->u.reg = dst_params[0].reg; }
+static void sm6_parser_emit_dx_barrier(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, + const struct sm6_value **operands, struct function_emission_state *state) +{ + struct vkd3d_shader_instruction *ins = state->ins; + enum dxil_sync_flags flags; + + vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_SYNC); + flags = sm6_value_get_constant_uint(operands[0]); + ins->flags = flags & (SYNC_THREAD_GROUP | SYNC_THREAD_GROUP_UAV); + if (flags & SYNC_GLOBAL_UAV) + ins->flags |= VKD3DSSF_GLOBAL_UAV; + if (flags & SYNC_GROUP_SHARED_MEMORY) + ins->flags |= VKD3DSSF_GROUP_SHARED_MEMORY; + if (flags &= ~(SYNC_THREAD_GROUP | SYNC_GLOBAL_UAV | SYNC_THREAD_GROUP_UAV | SYNC_GROUP_SHARED_MEMORY)) + { + FIXME("Unhandled flags %#x.\n", flags); + vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_IGNORING_OPERANDS, + "Barrier flags %#x are unhandled.", flags); + } +} + static void sm6_parser_emit_dx_cbuffer_load(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, const struct sm6_value **operands, struct function_emission_state *state) { @@ -4933,6 +4963,7 @@ static const struct sm6_dx_opcode_info sm6_dx_op_table[] = [DX_ATAN ] = {"g", "R", sm6_parser_emit_dx_unary}, [DX_ATOMIC_BINOP ] = {"o", "HciiiR", sm6_parser_emit_dx_atomic_binop}, [DX_ATOMIC_CMP_XCHG ] = {"o", "HiiiRR", sm6_parser_emit_dx_atomic_binop}, + [DX_BARRIER ] = {"v", "c", sm6_parser_emit_dx_barrier}, [DX_BFREV ] = {"m", "R", sm6_parser_emit_dx_unary}, [DX_BUFFER_LOAD ] = {"o", "Hii", sm6_parser_emit_dx_buffer_load}, [DX_BUFFER_STORE ] = {"v", "Hiiooooc", sm6_parser_emit_dx_buffer_store},
From: Conor McCarthy cmccarthy@codeweavers.com
--- libs/vkd3d-shader/spirv.c | 1 - 1 file changed, 1 deletion(-)
diff --git a/libs/vkd3d-shader/spirv.c b/libs/vkd3d-shader/spirv.c index 7bcfd2f1b..de92d9229 100644 --- a/libs/vkd3d-shader/spirv.c +++ b/libs/vkd3d-shader/spirv.c @@ -8766,7 +8766,6 @@ static void spirv_compiler_emit_ld_tgsm(struct spirv_compiler *compiler, ptr_id = vkd3d_spirv_build_op_access_chain1(builder, ptr_type_id, reg_info.id, coordinate_id); constituents[j++] = vkd3d_spirv_build_op_load(builder, type_id, ptr_id, SpvMemoryAccessMaskNone); } - assert(dst->reg.data_type == VKD3D_DATA_UINT); spirv_compiler_emit_store_dst_components(compiler, dst, VKD3D_SHADER_COMPONENT_UINT, constituents); }
From: Conor McCarthy cmccarthy@codeweavers.com
--- libs/vkd3d-shader/spirv.c | 1 - 1 file changed, 1 deletion(-)
diff --git a/libs/vkd3d-shader/spirv.c b/libs/vkd3d-shader/spirv.c index de92d9229..38f42d886 100644 --- a/libs/vkd3d-shader/spirv.c +++ b/libs/vkd3d-shader/spirv.c @@ -8887,7 +8887,6 @@ static void spirv_compiler_emit_store_tgsm(struct spirv_compiler *compiler, type_id, reg_info.structure_stride, &src[0], VKD3DSP_WRITEMASK_0, &src[1], VKD3DSP_WRITEMASK_0);
data = &src[instruction->src_count - 1]; - assert(data->reg.data_type == VKD3D_DATA_UINT); val_id = spirv_compiler_emit_load_src(compiler, data, dst->write_mask);
component_count = vsir_write_mask_component_count(dst->write_mask);
From: Conor McCarthy cmccarthy@codeweavers.com
For 64-bit indexable temps (and any other 64-bit declarations) the write mask must not be converted. --- libs/vkd3d-shader/spirv.c | 10 +++++----- libs/vkd3d-shader/vkd3d_shader_private.h | 5 +++++ 2 files changed, 10 insertions(+), 5 deletions(-)
diff --git a/libs/vkd3d-shader/spirv.c b/libs/vkd3d-shader/spirv.c index 38f42d886..5d9a6bb11 100644 --- a/libs/vkd3d-shader/spirv.c +++ b/libs/vkd3d-shader/spirv.c @@ -4094,7 +4094,7 @@ static uint32_t spirv_compiler_emit_load_reg(struct spirv_compiler *compiler, struct vkd3d_shader_register_info reg_info; unsigned int component_count; uint32_t type_id, val_id; - uint32_t write_mask32; + uint32_t val_write_mask;
if (reg->type == VKD3DSPR_IMMCONST) return spirv_compiler_emit_load_constant(compiler, reg, swizzle, write_mask); @@ -4114,17 +4114,17 @@ static uint32_t spirv_compiler_emit_load_reg(struct spirv_compiler *compiler, type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count); return vkd3d_spirv_get_op_undef(builder, type_id); } - assert(reg_info.component_type != VKD3D_SHADER_COMPONENT_DOUBLE); spirv_compiler_emit_dereference_register(compiler, reg, ®_info);
- write_mask32 = data_type_is_64_bit(reg->data_type) ? vsir_write_mask_32_from_64(write_mask) : write_mask; + val_write_mask = (data_type_is_64_bit(reg->data_type) && !component_type_is_64_bit(reg_info.component_type)) + ? vsir_write_mask_32_from_64(write_mask) : write_mask;
/* Intermediate value (no storage class). */ if (reg_info.storage_class == SpvStorageClassMax) { val_id = reg_info.id; } - else if (vsir_write_mask_component_count(write_mask32) == 1) + else if (vsir_write_mask_component_count(val_write_mask) == 1) { return spirv_compiler_emit_load_scalar(compiler, reg, swizzle, write_mask, ®_info); } @@ -4137,7 +4137,7 @@ static uint32_t spirv_compiler_emit_load_reg(struct spirv_compiler *compiler,
swizzle = data_type_is_64_bit(reg->data_type) ? vsir_swizzle_32_from_64(swizzle) : swizzle; val_id = spirv_compiler_emit_swizzle(compiler, - val_id, reg_info.write_mask, reg_info.component_type, swizzle, write_mask32); + val_id, reg_info.write_mask, reg_info.component_type, swizzle, val_write_mask);
if (component_type != reg_info.component_type) { diff --git a/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d-shader/vkd3d_shader_private.h index 6800547ac..05f76c5d1 100644 --- a/libs/vkd3d-shader/vkd3d_shader_private.h +++ b/libs/vkd3d-shader/vkd3d_shader_private.h @@ -1604,6 +1604,11 @@ static inline enum vkd3d_shader_component_type vkd3d_component_type_from_resourc } }
+static inline bool component_type_is_64_bit(enum vkd3d_shader_component_type component_type) +{ + return component_type == VKD3D_SHADER_COMPONENT_DOUBLE || component_type == VKD3D_SHADER_COMPONENT_UINT64; +} + enum vkd3d_shader_input_sysval_semantic vkd3d_siv_from_sysval_indexed(enum vkd3d_shader_sysval_semantic sysval, unsigned int index);
From: Conor McCarthy cmccarthy@codeweavers.com
--- libs/vkd3d-shader/dxil.c | 2 ++ 1 file changed, 2 insertions(+)
diff --git a/libs/vkd3d-shader/dxil.c b/libs/vkd3d-shader/dxil.c index b3e4f0534..4794a399f 100644 --- a/libs/vkd3d-shader/dxil.c +++ b/libs/vkd3d-shader/dxil.c @@ -2949,6 +2949,8 @@ static enum vkd3d_result sm6_parser_constants_init(struct sm6_parser *sm6, const
default: FIXME("Unhandled constant code %u.\n", record->code); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + "Constant code %u is unhandled.", record->code); dst->u.reg.type = VKD3DSPR_UNDEF; break; }
From: Conor McCarthy cmccarthy@codeweavers.com
--- Makefile.am | 1 + tests/hlsl/tgsm.shader_test | 135 ++++++++++++++++++++++++++++++++++++ 2 files changed, 136 insertions(+) create mode 100644 tests/hlsl/tgsm.shader_test
diff --git a/Makefile.am b/Makefile.am index d07218d88..dbc0adb60 100644 --- a/Makefile.am +++ b/Makefile.am @@ -199,6 +199,7 @@ vkd3d_shader_tests = \ tests/hlsl/texture-load-typed.shader_test \ tests/hlsl/texture-load.shader_test \ tests/hlsl/texture-ordering.shader_test \ + tests/hlsl/tgsm.shader_test \ tests/hlsl/transpose.shader_test \ tests/hlsl/trigonometry.shader_test \ tests/hlsl/trunc.shader_test \ diff --git a/tests/hlsl/tgsm.shader_test b/tests/hlsl/tgsm.shader_test new file mode 100644 index 000000000..a1b7259e9 --- /dev/null +++ b/tests/hlsl/tgsm.shader_test @@ -0,0 +1,135 @@ +[require] +shader model >= 5.0 + +[uav 1] +format r32 uint +size (buffer, 4) + +1 0 0 0 + +[compute shader todo] +RWByteAddressBuffer u : register(u1); +groupshared uint m; + + [numthreads(32, 1, 1)] +void main(uint local_idx : SV_GroupIndex, uint group_id : SV_GroupID) +{ + if (!local_idx) + m = group_id.x; + GroupMemoryBarrierWithGroupSync(); + InterlockedAdd(m, group_id.x); + GroupMemoryBarrierWithGroupSync(); + if (!local_idx) + u.Store(4 * group_id.x, m); +} + +[test] +todo dispatch 4 1 1 +probe uav 1 (0) rui (0) +probe uav 1 (1) rui (33) +probe uav 1 (2) rui (66) +probe uav 1 (3) rui (99) + + +[uav 1] +format r32 sint +size (buffer, 4) + +1 0 0 0 + +[compute shader todo] +RWByteAddressBuffer u : register(u1); +groupshared int m; + + [numthreads(32, 1, 1)] +void main(uint local_idx : SV_GroupIndex, uint group_id : SV_GroupID) +{ + if (!local_idx) + m = group_id.x; + GroupMemoryBarrierWithGroupSync(); + InterlockedAdd(m, -group_id.x); + GroupMemoryBarrierWithGroupSync(); + if (!local_idx) + u.Store(4 * group_id.x, m); +} + +[test] +todo dispatch 4 1 1 +probe uav 1 (0) ri (0) +probe uav 1 (1) ri (-31) +probe uav 1 (2) ri (-62) +probe uav 1 (3) ri (-93) + + +[uav 1] +format r32 float +size (buffer, 8) + +1 1 1 1 0 0 0 0 + +[uav 2] +format r32 sint +size (buffer, 8) + +1 1 1 1 0 0 0 0 + +[compute shader todo] +uniform uint idx; + +#define GROUP_SIZE 4 + +struct data +{ + float f; + uint u; +}; + +RWBuffer<float> u : register(u1); +RWBuffer<uint> u2 : register(u2); +groupshared data m[GROUP_SIZE]; + + [numthreads(GROUP_SIZE, 1, 1)] +void main(uint local_idx : SV_GroupIndex, uint group_id : SV_GroupID, + uint thread_id : SV_DispatchThreadID) +{ + uint i; + if (!local_idx) + { + for (i = 0; i < GROUP_SIZE; ++i) + { + m[i].f = group_id.x; + m[i].u = group_id.x; + } + } + GroupMemoryBarrierWithGroupSync(); + InterlockedAdd(m[0].u, 2); + InterlockedAdd(m[idx].u, 1); + GroupMemoryBarrierWithGroupSync(); + for (i = 0; i < local_idx; ++i) + { + m[local_idx].f += group_id.x; + m[local_idx].u += group_id.x; + } + u[thread_id.x] = m[local_idx].f; + u2[thread_id.x] = m[local_idx].u; +} + +[test] +uniform 0 uint 1 +todo dispatch 2 1 1 +probe uav 1 (0) r (0.0) +probe uav 1 (1) r (0.0) +probe uav 1 (2) r (0.0) +probe uav 1 (3) r (0.0) +probe uav 1 (4) r (1.0) +probe uav 1 (5) r (2.0) +probe uav 1 (6) r (3.0) +probe uav 1 (7) r (4.0) +probe uav 2 (0) ri (8) +probe uav 2 (1) ri (4) +probe uav 2 (2) ri (0) +probe uav 2 (3) ri (0) +probe uav 2 (4) ri (9) +probe uav 2 (5) ri (6) +probe uav 2 (6) ri (3) +probe uav 2 (7) ri (4)
From: Conor McCarthy cmccarthy@codeweavers.com
--- libs/vkd3d-shader/dxil.c | 69 +++++++++++++++++++++++++++++++ tests/hlsl/numthreads.shader_test | 10 +++-- 2 files changed, 75 insertions(+), 4 deletions(-)
diff --git a/libs/vkd3d-shader/dxil.c b/libs/vkd3d-shader/dxil.c index 4794a399f..3d540d269 100644 --- a/libs/vkd3d-shader/dxil.c +++ b/libs/vkd3d-shader/dxil.c @@ -398,6 +398,10 @@ enum dx_intrinsic_opcode DX_DERIV_COARSEY = 84, DX_DERIV_FINEX = 85, DX_DERIV_FINEY = 86, + DX_THREAD_ID = 93, + DX_GROUP_ID = 94, + DX_THREAD_ID_IN_GROUP = 95, + DX_FLATTENED_THREAD_ID_IN_GROUP = 96, DX_SPLIT_DOUBLE = 102, DX_LEGACY_F32TOF16 = 130, DX_LEGACY_F16TOF32 = 131, @@ -751,6 +755,7 @@ struct sm6_parser
struct vkd3d_shader_dst_param *output_params; struct vkd3d_shader_dst_param *input_params; + uint64_t input_reg_declarations;
struct sm6_function *functions; size_t function_count; @@ -4227,6 +4232,66 @@ static void sm6_parser_emit_dx_create_handle(struct sm6_parser *sm6, enum dx_int ins->handler_idx = VKD3DSIH_NOP; }
+static void sm6_parser_dcl_register_builtin(struct sm6_parser *sm6, + enum vkd3d_shader_register_type reg_type, enum vkd3d_data_type data_type, unsigned int component_count) +{ + struct vkd3d_shader_dst_param *dst_param; + struct vkd3d_shader_instruction *ins; + uint64_t reg_flag; + + reg_flag = 1ull << reg_type; + if (!(sm6->input_reg_declarations & reg_flag)) + { + sm6->input_reg_declarations |= reg_flag; + ins = sm6_parser_add_instruction(sm6, VKD3DSIH_DCL_INPUT); + dst_param = &ins->declaration.dst; + vsir_register_init(&dst_param->reg, reg_type, data_type, 0); + dst_param_init_vector(dst_param, component_count); + } +} + +static void sm6_parser_emit_dx_compute_builtin(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, + const struct sm6_value **operands, struct function_emission_state *state) +{ + unsigned int component_count = 3, component_idx = 0; + struct vkd3d_shader_instruction *ins = state->ins; + struct vkd3d_shader_src_param *src_param; + enum vkd3d_shader_register_type reg_type; + + switch (op) + { + case DX_THREAD_ID: + reg_type = VKD3DSPR_THREADID; + break; + case DX_GROUP_ID: + reg_type = VKD3DSPR_THREADGROUPID; + break; + case DX_THREAD_ID_IN_GROUP: + reg_type = VKD3DSPR_LOCALTHREADID; + break; + case DX_FLATTENED_THREAD_ID_IN_GROUP: + reg_type = VKD3DSPR_LOCALTHREADINDEX; + component_count = 1; + break; + default: + vkd3d_unreachable(); + } + + sm6_parser_dcl_register_builtin(sm6, reg_type, VKD3D_DATA_UINT, component_count); + vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_MOV); + if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) + return; + vsir_register_init(&src_param->reg, reg_type, VKD3D_DATA_UINT, 0); + if (component_count > 1) + { + src_param->reg.dimension = VSIR_DIMENSION_VEC4; + component_idx = sm6_value_get_constant_uint(operands[0]); + } + src_param_init_scalar(src_param, component_idx); + + instruction_dst_param_init_ssa_scalar(ins, sm6); +} + static void sm6_parser_emit_dx_get_dimensions(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, const struct sm6_value **operands, struct function_emission_state *state) { @@ -4981,10 +5046,12 @@ static const struct sm6_dx_opcode_info sm6_dx_op_table[] = [DX_FIRST_BIT_HI ] = {"i", "m", sm6_parser_emit_dx_unary}, [DX_FIRST_BIT_LO ] = {"i", "m", sm6_parser_emit_dx_unary}, [DX_FIRST_BIT_SHI ] = {"i", "m", sm6_parser_emit_dx_unary}, + [DX_FLATTENED_THREAD_ID_IN_GROUP ] = {"i", "", sm6_parser_emit_dx_compute_builtin}, [DX_FMAX ] = {"g", "RR", sm6_parser_emit_dx_binary}, [DX_FMIN ] = {"g", "RR", sm6_parser_emit_dx_binary}, [DX_FRC ] = {"g", "R", sm6_parser_emit_dx_unary}, [DX_GET_DIMENSIONS ] = {"D", "Hi", sm6_parser_emit_dx_get_dimensions}, + [DX_GROUP_ID ] = {"i", "c", sm6_parser_emit_dx_compute_builtin}, [DX_IBFE ] = {"m", "iiR", sm6_parser_emit_dx_tertiary}, [DX_HCOS ] = {"g", "R", sm6_parser_emit_dx_unary}, [DX_HSIN ] = {"g", "R", sm6_parser_emit_dx_unary}, @@ -5020,6 +5087,8 @@ static const struct sm6_dx_opcode_info sm6_dx_op_table[] = [DX_TEXTURE_GATHER_CMP ] = {"o", "HHffffiicf", sm6_parser_emit_dx_texture_gather}, [DX_TEXTURE_LOAD ] = {"o", "HiiiiCCC", sm6_parser_emit_dx_texture_load}, [DX_TEXTURE_STORE ] = {"v", "Hiiiooooc", sm6_parser_emit_dx_texture_store}, + [DX_THREAD_ID ] = {"i", "c", sm6_parser_emit_dx_compute_builtin}, + [DX_THREAD_ID_IN_GROUP ] = {"i", "c", sm6_parser_emit_dx_compute_builtin}, [DX_UBFE ] = {"m", "iiR", sm6_parser_emit_dx_tertiary}, [DX_UMAX ] = {"m", "RR", sm6_parser_emit_dx_binary}, [DX_UMIN ] = {"m", "RR", sm6_parser_emit_dx_binary}, diff --git a/tests/hlsl/numthreads.shader_test b/tests/hlsl/numthreads.shader_test index e8f7900bf..ceeb40e3e 100644 --- a/tests/hlsl/numthreads.shader_test +++ b/tests/hlsl/numthreads.shader_test @@ -172,7 +172,7 @@ size (2d, 2, 2) 1.0 1.0
[compute shader] -/* Attributes are taken from the first function, and dropped from the second. */ +/* In SM < 6.0, attributes are taken from the first function, and dropped from the second. */ RWTexture2D<float> u;
[numthreads(2, 1, 1)] @@ -185,8 +185,10 @@ void main(uint2 id : sv_dispatchthreadid) }
[test] -todo(sm>=6) dispatch 1 1 1 +dispatch 1 1 1 probe uav 0 (0, 0) r (2.0) -probe uav 0 (0, 1) r (1.0) -probe uav 0 (1, 0) r (2.0) +if(sm<6) probe uav 0 (0, 1) r (1.0) +if(sm<6) probe uav 0 (1, 0) r (2.0) probe uav 0 (1, 1) r (1.0) +if(sm>=6) probe uav 0 (0, 1) r (2.0) +if(sm>=6) probe uav 0 (1, 0) r (1.0)
From: Conor McCarthy cmccarthy@codeweavers.com
--- libs/vkd3d-shader/d3d_asm.c | 30 ++++ libs/vkd3d-shader/dxil.c | 184 +++++++++++++++++++++++ libs/vkd3d-shader/vkd3d_shader_private.h | 6 + tests/hlsl/tgsm.shader_test | 20 +-- tests/shader_runner.c | 20 ++- tests/shader_runner.h | 1 + 6 files changed, 244 insertions(+), 17 deletions(-)
diff --git a/libs/vkd3d-shader/d3d_asm.c b/libs/vkd3d-shader/d3d_asm.c index 7b30d2600..50d545673 100644 --- a/libs/vkd3d-shader/d3d_asm.c +++ b/libs/vkd3d-shader/d3d_asm.c @@ -448,6 +448,23 @@ static void shader_dump_global_flags(struct vkd3d_d3d_asm_compiler *compiler, vkd3d_string_buffer_printf(&compiler->buffer, "unknown_flags(%#"PRIx64")", (uint64_t)global_flags); }
+static void shader_dump_atomic_op_flags(struct vkd3d_d3d_asm_compiler *compiler, uint32_t atomic_flags) +{ + if (atomic_flags & VKD3DARF_SEQ_CST) + { + vkd3d_string_buffer_printf(&compiler->buffer, "_seq_cst"); + atomic_flags &= ~VKD3DARF_SEQ_CST; + } + if (atomic_flags & VKD3DARF_VOLATILE) + { + vkd3d_string_buffer_printf(&compiler->buffer, "_volatile"); + atomic_flags &= ~VKD3DARF_VOLATILE; + } + + if (atomic_flags) + vkd3d_string_buffer_printf(&compiler->buffer, "_unknown_flags(%#x)", atomic_flags); +} + static void shader_dump_sync_flags(struct vkd3d_d3d_asm_compiler *compiler, uint32_t sync_flags) { if (sync_flags & VKD3DSSF_GLOBAL_UAV) @@ -1712,6 +1729,19 @@ static void shader_dump_instruction_flags(struct vkd3d_d3d_asm_compiler *compile } break;
+ case VKD3DSIH_IMM_ATOMIC_CMP_EXCH: + case VKD3DSIH_IMM_ATOMIC_IADD: + case VKD3DSIH_IMM_ATOMIC_AND: + case VKD3DSIH_IMM_ATOMIC_IMAX: + case VKD3DSIH_IMM_ATOMIC_IMIN: + case VKD3DSIH_IMM_ATOMIC_OR: + case VKD3DSIH_IMM_ATOMIC_UMAX: + case VKD3DSIH_IMM_ATOMIC_UMIN: + case VKD3DSIH_IMM_ATOMIC_EXCH: + case VKD3DSIH_IMM_ATOMIC_XOR: + shader_dump_atomic_op_flags(compiler, ins->flags); + break; + case VKD3DSIH_SYNC: shader_dump_sync_flags(compiler, ins->flags); break; diff --git a/libs/vkd3d-shader/dxil.c b/libs/vkd3d-shader/dxil.c index 3d540d269..96bd3c111 100644 --- a/libs/vkd3d-shader/dxil.c +++ b/libs/vkd3d-shader/dxil.c @@ -456,6 +456,32 @@ enum dxil_predicate ICMP_SLE = 41, };
+enum dxil_rmw_code +{ + RMW_XCHG = 0, + RMW_ADD = 1, + RMW_SUB = 2, + RMW_AND = 3, + RMW_NAND = 4, + RMW_OR = 5, + RMW_XOR = 6, + RMW_MAX = 7, + RMW_MIN = 8, + RMW_UMAX = 9, + RMW_UMIN = 10, +}; + +enum dxil_atomic_ordering +{ + ORDERING_NOTATOMIC = 0, + ORDERING_UNORDERED = 1, + ORDERING_MONOTONIC = 2, + ORDERING_ACQUIRE = 3, + ORDERING_RELEASE = 4, + ORDERING_ACQREL = 5, + ORDERING_SEQCST = 6, +}; + enum dxil_atomic_binop_code { ATOMIC_BINOP_ADD, @@ -2582,6 +2608,30 @@ static bool sm6_value_validate_is_bool(const struct sm6_value *value, struct sm6 return true; }
+static bool sm6_value_validate_is_pointer_to_i32(const struct sm6_value *value, struct sm6_parser *sm6) +{ + if (!sm6_type_is_pointer(value->type) || !sm6_type_is_i32(value->type->u.pointer.type)) + { + WARN("Operand result type %u is not a pointer to i32.\n", value->type->class); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + "An int32 pointer operand passed to a DXIL instruction is not an int32 pointer."); + return false; + } + return true; +} + +static bool sm6_value_validate_is_i32(const struct sm6_value *value, struct sm6_parser *sm6) +{ + if (!sm6_type_is_i32(value->type)) + { + WARN("Operand result type %u is not i32.\n", value->type->class); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + "An int32 operand passed to a DXIL instruction is not an int32."); + return false; + } + return true; +} + static const struct sm6_value *sm6_parser_get_value_safe(struct sm6_parser *sm6, unsigned int idx) { if (idx < sm6->value_count) @@ -3496,6 +3546,9 @@ struct function_emission_state unsigned int temp_idx; };
+static bool sm6_parser_emit_reg_composite_construct(struct sm6_parser *sm6, const struct vkd3d_shader_register **operand_regs, + unsigned int component_count, struct function_emission_state *state, struct vkd3d_shader_register *reg); + static void sm6_parser_emit_alloca(struct sm6_parser *sm6, const struct dxil_record *record, struct vkd3d_shader_instruction *ins, struct sm6_value *dst) { @@ -3571,6 +3624,130 @@ static void sm6_parser_emit_alloca(struct sm6_parser *sm6, const struct dxil_rec sm6_parser_declare_indexable_temp(sm6, elem_type, type[0]->u.array.count, alignment, true, 0, ins, dst); }
+static enum vkd3d_shader_opcode map_dx_atomicrmw_op(uint64_t code) +{ + switch (code) + { + case RMW_ADD: + return VKD3DSIH_IMM_ATOMIC_IADD; + case RMW_AND: + return VKD3DSIH_IMM_ATOMIC_AND; + case RMW_MAX: + return VKD3DSIH_IMM_ATOMIC_IMAX; + case RMW_MIN: + return VKD3DSIH_IMM_ATOMIC_IMIN; + case RMW_OR: + return VKD3DSIH_IMM_ATOMIC_OR; + case RMW_UMAX: + return VKD3DSIH_IMM_ATOMIC_UMAX; + case RMW_UMIN: + return VKD3DSIH_IMM_ATOMIC_UMIN; + case RMW_XCHG: + return VKD3DSIH_IMM_ATOMIC_EXCH; + case RMW_XOR: + return VKD3DSIH_IMM_ATOMIC_XOR; + default: + /* DXIL currently doesn't use SUB and NAND. */ + return VKD3DSIH_INVALID; + } +} + +static void sm6_parser_emit_atomicrmw(struct sm6_parser *sm6, const struct dxil_record *record, + struct function_emission_state *state, struct sm6_value *dst) +{ + struct vkd3d_shader_register coord, const_offset, const_zero; + const struct vkd3d_shader_register *regs[2]; + struct vkd3d_shader_dst_param *dst_params; + struct vkd3d_shader_src_param *src_params; + struct vkd3d_shader_instruction *ins; + const struct sm6_value *ptr, *src; + enum vkd3d_shader_opcode op; + unsigned int i = 0; + bool is_volatile; + uint64_t code; + + if (!(ptr = sm6_parser_get_value_by_ref(sm6, record, NULL, &i)) + || !sm6_value_validate_is_pointer_to_i32(ptr, sm6)) + return; + + if (ptr->u.reg.type != VKD3DSPR_GROUPSHAREDMEM) + { + WARN("Register is not groupshared.\n"); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + "The destination register for an atomicrmw instruction is not groupshared memory."); + return; + } + + dst->type = ptr->type->u.pointer.type; + + if (!(src = sm6_parser_get_value_by_ref(sm6, record, dst->type, &i)) + || !sm6_value_validate_is_i32(src, sm6)) + return; + + if (!dxil_record_validate_operand_count(record, i + 4, i + 4, sm6)) + return; + + if ((op = map_dx_atomicrmw_op(code = record->operands[i++])) == VKD3DSIH_INVALID) + { + FIXME("Unhandled atomicrmw op %"PRIu64".\n", code); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + "Operation %"PRIu64" for an atomicrmw instruction is unhandled.", code); + return; + } + + is_volatile = record->operands[i++]; + + /* It's currently not possible to specify an atomic ordering in HLSL, and it defaults to seq_cst. */ + if ((code = record->operands[i++]) != ORDERING_SEQCST) + FIXME("Unhandled atomic ordering %"PRIu64".\n", code); + + if ((code = record->operands[i]) != 1) + WARN("Ignoring synchronisation scope %"PRIu64".\n", code); + + if (ptr->structure_stride) + { + if (ptr->u.reg.idx[1].rel_addr) + { + regs[0] = &ptr->u.reg.idx[1].rel_addr->reg; + } + else + { + register_make_constant_uint(&const_offset, ptr->u.reg.idx[1].offset); + regs[0] = &const_offset; + } + register_make_constant_uint(&const_zero, 0); + regs[1] = &const_zero; + if (!sm6_parser_emit_reg_composite_construct(sm6, regs, 2, state, &coord)) + return; + } + + ins = state->ins; + vsir_instruction_init(ins, &sm6->p.location, op); + ins->flags = is_volatile ? VKD3DARF_SEQ_CST | VKD3DARF_VOLATILE : VKD3DARF_SEQ_CST; + + if (!(src_params = instruction_src_params_alloc(ins, 2, sm6))) + return; + if (ptr->structure_stride) + src_param_init_vector_from_reg(&src_params[0], &coord); + else + src_param_make_constant_uint(&src_params[0], 0); + src_param_init_from_value(&src_params[1], src); + + dst_params = instruction_dst_params_alloc(ins, 2, sm6); + register_init_ssa_scalar(&dst_params[0].reg, dst->type, dst, sm6); + dst_param_init(&dst_params[0]); + + dst_params[1].reg = ptr->u.reg; + /* The groupshared register has data type UAV when accessed. */ + dst_params[1].reg.data_type = VKD3D_DATA_UAV; + dst_params[1].reg.idx[1].rel_addr = NULL; + dst_params[1].reg.idx[1].offset = ~0u; + dst_params[1].reg.idx_count = 1; + dst_param_init(&dst_params[1]); + + dst->u.reg = dst_params[0].reg; +} + static enum vkd3d_shader_opcode map_binary_op(uint64_t code, const struct sm6_type *type_a, const struct sm6_type *type_b, struct sm6_parser *sm6) { @@ -6561,6 +6738,13 @@ static enum vkd3d_result sm6_parser_function_init(struct sm6_parser *sm6, const case FUNC_CODE_INST_ALLOCA: sm6_parser_emit_alloca(sm6, record, ins, dst); break; + case FUNC_CODE_INST_ATOMICRMW: + { + struct function_emission_state state = {code_block, ins}; + sm6_parser_emit_atomicrmw(sm6, record, &state, dst); + sm6->p.program.temp_count = max(sm6->p.program.temp_count, state.temp_idx); + break; + } case FUNC_CODE_INST_BINOP: sm6_parser_emit_binop(sm6, record, ins, dst); break; diff --git a/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d-shader/vkd3d_shader_private.h index 05f76c5d1..8bbd224a9 100644 --- a/libs/vkd3d-shader/vkd3d_shader_private.h +++ b/libs/vkd3d-shader/vkd3d_shader_private.h @@ -754,6 +754,12 @@ enum vkd3d_shader_uav_flags VKD3DSUF_ORDER_PRESERVING_COUNTER = 0x100, };
+enum vkd3d_shader_atomic_rmw_flags +{ + VKD3DARF_SEQ_CST = 0x1, + VKD3DARF_VOLATILE = 0x2, +}; + enum vkd3d_tessellator_domain { VKD3D_TESSELLATOR_DOMAIN_LINE = 1, diff --git a/tests/hlsl/tgsm.shader_test b/tests/hlsl/tgsm.shader_test index a1b7259e9..08183fe9c 100644 --- a/tests/hlsl/tgsm.shader_test +++ b/tests/hlsl/tgsm.shader_test @@ -24,11 +24,11 @@ void main(uint local_idx : SV_GroupIndex, uint group_id : SV_GroupID) }
[test] -todo dispatch 4 1 1 -probe uav 1 (0) rui (0) -probe uav 1 (1) rui (33) -probe uav 1 (2) rui (66) -probe uav 1 (3) rui (99) +todo(sm<6) dispatch 4 1 1 +bug_if(sm>=6) probe uav 1 (0) rui (0) +bug_if(sm>=6) probe uav 1 (1) rui (33) +bug_if(sm>=6) probe uav 1 (2) rui (66) +bug_if(sm>=6) probe uav 1 (3) rui (99)
[uav 1] @@ -54,11 +54,11 @@ void main(uint local_idx : SV_GroupIndex, uint group_id : SV_GroupID) }
[test] -todo dispatch 4 1 1 -probe uav 1 (0) ri (0) -probe uav 1 (1) ri (-31) -probe uav 1 (2) ri (-62) -probe uav 1 (3) ri (-93) +todo(sm<6) dispatch 4 1 1 +bug_if(sm>=6) probe uav 1 (0) ri (0) +bug_if(sm>=6) probe uav 1 (1) ri (-31) +bug_if(sm>=6) probe uav 1 (2) ri (-62) +bug_if(sm>=6) probe uav 1 (3) ri (-93)
[uav 1] diff --git a/tests/shader_runner.c b/tests/shader_runner.c index ccdda5b78..cfe695000 100644 --- a/tests/shader_runner.c +++ b/tests/shader_runner.c @@ -729,6 +729,7 @@ static void parse_test_directive(struct shader_runner *runner, const char *line) int ret;
runner->is_todo = false; + runner->is_bug = false;
while (match) { @@ -739,6 +740,11 @@ static void parse_test_directive(struct shader_runner *runner, const char *line) runner->is_todo = true; match = true; } + if (match_string_with_args(line, "bug_if", &line, runner->minimum_shader_model)) + { + runner->is_bug = true; + match = true; + }
line_ini = line; if (match_string_with_args(line, "if", &line, runner->minimum_shader_model)) @@ -945,7 +951,7 @@ static void parse_test_directive(struct shader_runner *runner, const char *line) ++line; read_uint4(&line, &v, false); line = close_parentheses(line); - todo_if(runner->is_todo) check_readback_data_uvec4(rb, &rect, &v); + todo_if(runner->is_todo) bug_if(runner->is_bug) check_readback_data_uvec4(rb, &rect, &v); } else if (match_string(line, "rgbai", &line)) { @@ -956,7 +962,7 @@ static void parse_test_directive(struct shader_runner *runner, const char *line) ++line; read_int4(&line, &v, false); line = close_parentheses(line); - todo_if(runner->is_todo) check_readback_data_ivec4(rb, &rect, &v); + todo_if(runner->is_todo) bug_if(runner->is_bug) check_readback_data_ivec4(rb, &rect, &v); } else if (match_string(line, "rgba", &line)) { @@ -967,7 +973,7 @@ static void parse_test_directive(struct shader_runner *runner, const char *line) fatal_error("Malformed probe arguments '%s'.\n", line); if (ret < 5) ulps = 0; - todo_if(runner->is_todo) check_readback_data_vec4(rb, &rect, &v, ulps); + todo_if(runner->is_todo) bug_if(runner->is_bug) check_readback_data_vec4(rb, &rect, &v, ulps); } else if (match_string(line, "rui", &line) || (is_signed = match_string(line, "ri", &line))) { @@ -988,7 +994,7 @@ static void parse_test_directive(struct shader_runner *runner, const char *line) else read_uint(&line, &expect, false); line = close_parentheses(line); - todo_if(runner->is_todo) check_readback_data_uint(rb, &box, expect, 0); + todo_if(runner->is_todo) bug_if(runner->is_bug) check_readback_data_uint(rb, &box, expect, 0); } else if (match_string(line, "rui64", &line) || (is_signed = match_string(line, "ri64", &line))) { @@ -1009,7 +1015,7 @@ static void parse_test_directive(struct shader_runner *runner, const char *line) else read_uint64(&line, &expect, false); line = close_parentheses(line); - todo_if(runner->is_todo) check_readback_data_uint64(rb, &box, expect, 0); + todo_if(runner->is_todo) bug_if(runner->is_bug) check_readback_data_uint64(rb, &box, expect, 0); } else if (match_string(line, "rd", &line)) { @@ -1020,7 +1026,7 @@ static void parse_test_directive(struct shader_runner *runner, const char *line) fatal_error("Malformed probe arguments '%s'.\n", line); if (ret < 2) ulps = 0; - todo_if(runner->is_todo) check_readback_data_double(rb, &rect, expect, ulps); + todo_if(runner->is_todo) bug_if(runner->is_bug) check_readback_data_double(rb, &rect, expect, ulps); } else if (match_string(line, "r", &line)) { @@ -1031,7 +1037,7 @@ static void parse_test_directive(struct shader_runner *runner, const char *line) fatal_error("Malformed probe arguments '%s'.\n", line); if (ret < 2) ulps = 0; - todo_if(runner->is_todo) check_readback_data_float(rb, &rect, expect, ulps); + todo_if(runner->is_todo) bug_if(runner->is_bug) check_readback_data_float(rb, &rect, expect, ulps); } else { diff --git a/tests/shader_runner.h b/tests/shader_runner.h index 5666b2d59..cf403d9f1 100644 --- a/tests/shader_runner.h +++ b/tests/shader_runner.h @@ -148,6 +148,7 @@ struct shader_runner const struct shader_runner_ops *ops;
bool is_todo; + bool is_bug;
char *vs_source; char *ps_source;
From: Conor McCarthy cmccarthy@codeweavers.com
--- libs/vkd3d-shader/spirv.c | 5 +++++ 1 file changed, 5 insertions(+)
diff --git a/libs/vkd3d-shader/spirv.c b/libs/vkd3d-shader/spirv.c index 5d9a6bb11..9b56dddfa 100644 --- a/libs/vkd3d-shader/spirv.c +++ b/libs/vkd3d-shader/spirv.c @@ -9229,6 +9229,11 @@ static void spirv_compiler_emit_atomic_instruction(struct spirv_compiler *compil
val_id = spirv_compiler_emit_load_src_with_type(compiler, &src[1], VKD3DSP_WRITEMASK_0, component_type);
+ if (instruction->flags & VKD3DARF_SEQ_CST) + WARN("Ignoring sequentially consistent ordering.\n"); + if (instruction->flags & VKD3DARF_VOLATILE) + WARN("Ignoring 'volatile' attribute.\n"); + operands[i++] = pointer_id; operands[i++] = spirv_compiler_get_constant_uint(compiler, scope); operands[i++] = spirv_compiler_get_constant_uint(compiler, SpvMemorySemanticsMaskNone);
The same CI failure occurred in `test_tgsm()` when I dropped DXIL code in there, so the SM 6 shader was the issue. The CI runs on a 6600 XT and I have 6750 XT, so results should be the same, except the CI runs an old Mesa version. The failures were most likely caused by mishandling of barriers in the virtualised control flow, resulting in the UAV being written too early, but this is hard to prove.
The failures were most likely caused by mishandling of barriers in the virtualised control flow, resulting in the UAV being written too early, but this is hard to prove.
Yeah, that totally makes sense. And indeed I could reproduce the bug on my system using Mesa 22.3.6, but using the same build and cherry picking !711 the bug is solved. Thinking about it, I even wonder how that works on recent Mesa, the old structurizer messes up with control flow completely. The new one should be much better.
On Wed Mar 13 10:49:09 2024 +0000, Giovanni Mascellani wrote:
The failures were most likely caused by mishandling of barriers in the
virtualised control flow, resulting in the UAV being written too early, but this is hard to prove. Yeah, that totally makes sense. And indeed I could reproduce the bug on my system using Mesa 22.3.6, but using the same build and cherry picking !711 the bug is solved. Thinking about it, I even wonder how that works on recent Mesa, the old structurizer messes up with control flow completely. The new one should be much better.
Maybe it makes sense to delay this a little bit to have it go in after !711, so you don't even have to add `bug_if()` support in the shader runner.
Thinking about it, I even wonder how that works on recent Mesa
Bisecting led me to this behemoth: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17922/diffs?commit.... I guess I'll keep my curiosity.
On Wed Mar 13 11:12:18 2024 +0000, Giovanni Mascellani wrote:
Thinking about it, I even wonder how that works on recent Mesa
Bisecting led me to this behemoth: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17922/diffs?commit.... I guess I'll keep my curiosity.
CF is a pain for them too it seems :grin:
Yeah let's leave it until !711 is in.