Goes atop !681. The last 8 commits belong here.
-- v5: tests: Use DXIL code in test_tgsm().
From: Conor McCarthy cmccarthy@codeweavers.com
--- libs/vkd3d-shader/spirv.c | 11 ++++++----- libs/vkd3d-shader/tpf.c | 2 ++ libs/vkd3d-shader/vkd3d_shader_private.h | 2 ++ 3 files changed, 10 insertions(+), 5 deletions(-)
diff --git a/libs/vkd3d-shader/spirv.c b/libs/vkd3d-shader/spirv.c index 56c89f762..54f453346 100644 --- a/libs/vkd3d-shader/spirv.c +++ b/libs/vkd3d-shader/spirv.c @@ -6435,9 +6435,9 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp }
static void spirv_compiler_emit_workgroup_memory(struct spirv_compiler *compiler, - const struct vkd3d_shader_register *reg, unsigned int size, unsigned int structure_stride) + const struct vkd3d_shader_register *reg, unsigned int size, unsigned int structure_stride, bool zero_init) { - uint32_t type_id, array_type_id, length_id, pointer_type_id, var_id; + uint32_t type_id, array_type_id, length_id, pointer_type_id, var_id, init_id; struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; const SpvStorageClass storage_class = SpvStorageClassWorkgroup; struct vkd3d_symbol reg_symbol; @@ -6447,8 +6447,9 @@ static void spirv_compiler_emit_workgroup_memory(struct spirv_compiler *compiler array_type_id = vkd3d_spirv_get_op_type_array(builder, type_id, length_id);
pointer_type_id = vkd3d_spirv_get_op_type_pointer(builder, storage_class, array_type_id); + init_id = zero_init ? vkd3d_spirv_get_op_constant_null(builder, array_type_id) : 0; var_id = vkd3d_spirv_build_op_variable(builder, &builder->global_stream, - pointer_type_id, storage_class, 0); + pointer_type_id, storage_class, init_id);
spirv_compiler_emit_register_debug_name(builder, var_id, reg);
@@ -6464,7 +6465,7 @@ static void spirv_compiler_emit_dcl_tgsm_raw(struct spirv_compiler *compiler, { const struct vkd3d_shader_tgsm_raw *tgsm_raw = &instruction->declaration.tgsm_raw; spirv_compiler_emit_workgroup_memory(compiler, &tgsm_raw->reg.reg, - tgsm_raw->byte_count / 4, 0); + tgsm_raw->byte_count / 4, 0, tgsm_raw->zero_init); }
static void spirv_compiler_emit_dcl_tgsm_structured(struct spirv_compiler *compiler, @@ -6473,7 +6474,7 @@ static void spirv_compiler_emit_dcl_tgsm_structured(struct spirv_compiler *compi const struct vkd3d_shader_tgsm_structured *tgsm_structured = &instruction->declaration.tgsm_structured; unsigned int stride = tgsm_structured->byte_stride / 4; spirv_compiler_emit_workgroup_memory(compiler, &tgsm_structured->reg.reg, - tgsm_structured->structure_count * stride, stride); + tgsm_structured->structure_count * stride, stride, tgsm_structured->zero_init); }
static void spirv_compiler_emit_dcl_input(struct spirv_compiler *compiler, diff --git a/libs/vkd3d-shader/tpf.c b/libs/vkd3d-shader/tpf.c index bd558693b..5c6c8e422 100644 --- a/libs/vkd3d-shader/tpf.c +++ b/libs/vkd3d-shader/tpf.c @@ -1263,6 +1263,7 @@ static void shader_sm5_read_dcl_tgsm_raw(struct vkd3d_shader_instruction *ins, u ins->declaration.tgsm_raw.byte_count = *tokens; if (ins->declaration.tgsm_raw.byte_count % 4) FIXME("Byte count %u is not multiple of 4.\n", ins->declaration.tgsm_raw.byte_count); + ins->declaration.tgsm_raw.zero_init = false; }
static void shader_sm5_read_dcl_tgsm_structured(struct vkd3d_shader_instruction *ins, uint32_t opcode, @@ -1274,6 +1275,7 @@ static void shader_sm5_read_dcl_tgsm_structured(struct vkd3d_shader_instruction ins->declaration.tgsm_structured.structure_count = *tokens; if (ins->declaration.tgsm_structured.byte_stride % 4) FIXME("Byte stride %u is not multiple of 4.\n", ins->declaration.tgsm_structured.byte_stride); + ins->declaration.tgsm_structured.zero_init = false; }
static void shader_sm5_read_dcl_resource_structured(struct vkd3d_shader_instruction *ins, uint32_t opcode, diff --git a/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d-shader/vkd3d_shader_private.h index ea52f151d..89e8aa1d6 100644 --- a/libs/vkd3d-shader/vkd3d_shader_private.h +++ b/libs/vkd3d-shader/vkd3d_shader_private.h @@ -1088,6 +1088,7 @@ struct vkd3d_shader_tgsm_raw { struct vkd3d_shader_dst_param reg; unsigned int byte_count; + bool zero_init; };
struct vkd3d_shader_tgsm_structured @@ -1095,6 +1096,7 @@ struct vkd3d_shader_tgsm_structured struct vkd3d_shader_dst_param reg; unsigned int byte_stride; unsigned int structure_count; + bool zero_init; };
struct vkd3d_shader_thread_group_size
From: Conor McCarthy cmccarthy@codeweavers.com
--- libs/vkd3d-shader/dxil.c | 125 ++++++++++++++++++++--- libs/vkd3d-shader/vkd3d_shader_private.h | 1 + 2 files changed, 111 insertions(+), 15 deletions(-)
diff --git a/libs/vkd3d-shader/dxil.c b/libs/vkd3d-shader/dxil.c index de51588b5..905c26323 100644 --- a/libs/vkd3d-shader/dxil.c +++ b/libs/vkd3d-shader/dxil.c @@ -755,6 +755,7 @@ struct sm6_parser
unsigned int indexable_temp_count; unsigned int icb_count; + unsigned int tgsm_count;
struct sm6_value *values; size_t value_count; @@ -2267,6 +2268,12 @@ static void register_init_ssa_scalar(struct vkd3d_shader_register *reg, const st register_init_ssa_vector(reg, sm6_type_get_scalar_type(type, 0), 1, value, sm6); }
+static void register_make_constant_uint(struct vkd3d_shader_register *reg, unsigned int value) +{ + vsir_register_init(reg, VKD3DSPR_IMMCONST, VKD3D_DATA_UINT, 0); + reg->u.immconst_u32[0] = value; +} + static void dst_param_init(struct vkd3d_shader_dst_param *param) { param->write_mask = VKD3DSP_WRITEMASK_0; @@ -2330,6 +2337,12 @@ static void src_param_init_vector_from_reg(struct vkd3d_shader_src_param *param, param->reg = *reg; }
+static void src_param_make_constant_uint(struct vkd3d_shader_src_param *param, unsigned int value) +{ + src_param_init(param); + register_make_constant_uint(¶m->reg, value); +} + static void register_index_address_init(struct vkd3d_shader_register_index *idx, const struct sm6_value *address, struct sm6_parser *sm6) { @@ -3009,6 +3022,30 @@ static void sm6_parser_declare_indexable_temp(struct sm6_parser *sm6, const stru register_init_with_id(&dst->u.reg, VKD3DSPR_IDXTEMP, data_type, ins->declaration.indexable_temp.register_idx); }
+static void sm6_parser_declare_tgsm_raw(struct sm6_parser *sm6, const struct sm6_type *elem_type, + unsigned int alignment, unsigned int init, struct sm6_value *dst) +{ + enum vkd3d_data_type data_type = vkd3d_data_type_from_sm6_type(elem_type); + struct vkd3d_shader_instruction *ins; + unsigned int byte_count; + + ins = sm6_parser_add_instruction(sm6, VKD3DSIH_DCL_TGSM_RAW); + dst_param_init(&ins->declaration.tgsm_raw.reg); + register_init_with_id(&ins->declaration.tgsm_raw.reg.reg, VKD3DSPR_GROUPSHAREDMEM, data_type, sm6->tgsm_count++); + dst->u.reg = ins->declaration.tgsm_raw.reg.reg; + ins->declaration.tgsm_raw.alignment = alignment; + byte_count = elem_type->u.width / 8u; + if (byte_count != 4) + { + FIXME("Unsupported byte count %u.\n", byte_count); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + "Raw TGSM byte count %u is not supported.", byte_count); + } + ins->declaration.tgsm_raw.byte_count = byte_count; + /* The initialiser value index will be resolved later when forward references can be handled. */ + ins->flags = init; +} + static bool sm6_parser_declare_global(struct sm6_parser *sm6, const struct dxil_record *record) { const struct sm6_type *type, *scalar_type; @@ -3134,10 +3171,22 @@ static bool sm6_parser_declare_global(struct sm6_parser *sm6, const struct dxil_ } else if (address_space == ADDRESS_SPACE_GROUPSHARED) { - FIXME("Unsupported TGSM.\n"); - vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, - "TGSM global variables are not supported."); - return false; + if (!sm6_type_is_numeric(scalar_type)) + { + WARN("Unsupported type class %u.\n", scalar_type->class); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + "TGSM variables of type class %u are not supported.", scalar_type->class); + return false; + } + if (count != 1) + { + FIXME("Unsupported structured TGSM.\n"); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + "Structured TGSM global variables are not supported."); + return false; + } + + sm6_parser_declare_tgsm_raw(sm6, scalar_type, alignment, init, dst); } else { @@ -3173,6 +3222,38 @@ static const struct vkd3d_shader_immediate_constant_buffer *resolve_forward_init return NULL; }
+static bool resolve_forward_zero_initialiser(size_t index, struct sm6_parser *sm6) +{ + const struct sm6_value *value; + + if (!index) + return false; + + --index; + if (!(value = sm6_parser_get_value_safe(sm6, index)) + || (!sm6_value_is_icb(value) && !sm6_value_is_constant(value) && !sm6_value_is_undef(value))) + { + WARN("Invalid initialiser index %zu.\n", index); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + "TGSM initialiser value index %zu is invalid.", index); + return false; + } + else if ((sm6_value_is_icb(value) && value->u.icb->is_null) || sm6_value_is_constant_zero(value)) + { + return true; + } + else if (sm6_value_is_undef(value)) + { + /* In VSIR, initialisation with undefined values of objects is implied, not explicit. */ + return false; + } + + FIXME("Non-zero initialisers are not supported.\n"); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + "Non-zero TGSM initialisers are not supported."); + return false; +} + static enum vkd3d_result sm6_parser_globals_init(struct sm6_parser *sm6) { size_t i, count, base_value_idx = sm6->value_count; @@ -3246,6 +3327,11 @@ static enum vkd3d_result sm6_parser_globals_init(struct sm6_parser *sm6) { ins->declaration.icb = resolve_forward_initialiser((uintptr_t)ins->declaration.icb, sm6); } + else if (ins->handler_idx == VKD3DSIH_DCL_TGSM_RAW) + { + ins->declaration.tgsm_raw.zero_init = resolve_forward_zero_initialiser(ins->flags, sm6); + ins->flags = 0; + } } for (i = base_value_idx; i < sm6->value_count; ++i) { @@ -5549,8 +5635,8 @@ static void sm6_parser_emit_load(struct sm6_parser *sm6, const struct dxil_recor struct vkd3d_shader_instruction *ins, struct sm6_value *dst) { const struct sm6_type *elem_type = NULL, *pointee_type; - struct vkd3d_shader_src_param *src_param; - unsigned int alignment, i = 0; + unsigned int alignment, operand_count, i = 0; + struct vkd3d_shader_src_param *src_params; const struct sm6_value *ptr; uint64_t alignment_code;
@@ -5587,12 +5673,15 @@ static void sm6_parser_emit_load(struct sm6_parser *sm6, const struct dxil_recor if (record->operands[i]) WARN("Ignoring volatile modifier.\n");
- vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_MOV); + operand_count = 1 + (ptr->u.reg.type == VKD3DSPR_GROUPSHAREDMEM); + vsir_instruction_init(ins, &sm6->p.location, (operand_count > 1) ? VKD3DSIH_LD_RAW : VKD3DSIH_MOV);
- if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) + if (!(src_params = instruction_src_params_alloc(ins, operand_count, sm6))) return; - src_param_init_from_value(&src_param[0], ptr); - src_param->reg.alignment = alignment; + if (operand_count > 1) + src_param_make_constant_uint(&src_params[0], 0); + src_param_init_from_value(&src_params[operand_count - 1], ptr); + src_params[operand_count - 1].reg.alignment = alignment;
instruction_dst_param_init_ssa_scalar(ins, sm6); } @@ -5710,11 +5799,11 @@ static void sm6_parser_emit_ret(struct sm6_parser *sm6, const struct dxil_record static void sm6_parser_emit_store(struct sm6_parser *sm6, const struct dxil_record *record, struct vkd3d_shader_instruction *ins, struct sm6_value *dst) { - struct vkd3d_shader_src_param *src_param; + unsigned int i = 0, alignment, operand_count; + struct vkd3d_shader_src_param *src_params; struct vkd3d_shader_dst_param *dst_param; const struct sm6_type *pointee_type; const struct sm6_value *ptr, *src; - unsigned int i = 0, alignment; uint64_t alignment_code;
if (!(ptr = sm6_parser_get_value_by_ref(sm6, record, NULL, &i)) @@ -5747,16 +5836,22 @@ static void sm6_parser_emit_store(struct sm6_parser *sm6, const struct dxil_reco if (record->operands[i]) WARN("Ignoring volatile modifier.\n");
- vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_MOV); + operand_count = 1 + (ptr->u.reg.type == VKD3DSPR_GROUPSHAREDMEM); + vsir_instruction_init(ins, &sm6->p.location, (operand_count > 1) ? VKD3DSIH_STORE_RAW : VKD3DSIH_MOV);
- if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) + if (!(src_params = instruction_src_params_alloc(ins, operand_count, sm6))) return; - src_param_init_from_value(&src_param[0], src); + if (operand_count > 1) + src_param_make_constant_uint(&src_params[0], 0); + src_param_init_from_value(&src_params[operand_count - 1], src);
dst_param = instruction_dst_params_alloc(ins, 1, sm6); dst_param_init(dst_param); dst_param->reg = ptr->u.reg; dst_param->reg.alignment = alignment; + /* Groupshared stores contain the address in the src params. */ + if (dst_param->reg.type != VKD3DSPR_IDXTEMP) + dst_param->reg.idx_count = 1; }
static void sm6_parser_emit_switch(struct sm6_parser *sm6, const struct dxil_record *record, diff --git a/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d-shader/vkd3d_shader_private.h index 89e8aa1d6..f533d167d 100644 --- a/libs/vkd3d-shader/vkd3d_shader_private.h +++ b/libs/vkd3d-shader/vkd3d_shader_private.h @@ -1087,6 +1087,7 @@ struct vkd3d_shader_tgsm struct vkd3d_shader_tgsm_raw { struct vkd3d_shader_dst_param reg; + unsigned int alignment; unsigned int byte_count; bool zero_init; };
From: Conor McCarthy cmccarthy@codeweavers.com
--- libs/vkd3d-shader/dxil.c | 115 ++++++++++++++++++----- libs/vkd3d-shader/vkd3d_shader_private.h | 1 + 2 files changed, 92 insertions(+), 24 deletions(-)
diff --git a/libs/vkd3d-shader/dxil.c b/libs/vkd3d-shader/dxil.c index 905c26323..09b0b0d9b 100644 --- a/libs/vkd3d-shader/dxil.c +++ b/libs/vkd3d-shader/dxil.c @@ -543,6 +543,7 @@ struct sm6_value { const struct sm6_type *type; enum sm6_value_type value_type; + unsigned int structure_stride; bool is_undefined; union { @@ -3033,6 +3034,7 @@ static void sm6_parser_declare_tgsm_raw(struct sm6_parser *sm6, const struct sm6 dst_param_init(&ins->declaration.tgsm_raw.reg); register_init_with_id(&ins->declaration.tgsm_raw.reg.reg, VKD3DSPR_GROUPSHAREDMEM, data_type, sm6->tgsm_count++); dst->u.reg = ins->declaration.tgsm_raw.reg.reg; + dst->structure_stride = 0; ins->declaration.tgsm_raw.alignment = alignment; byte_count = elem_type->u.width / 8u; if (byte_count != 4) @@ -3046,6 +3048,33 @@ static void sm6_parser_declare_tgsm_raw(struct sm6_parser *sm6, const struct sm6 ins->flags = init; }
+static void sm6_parser_declare_tgsm_structured(struct sm6_parser *sm6, const struct sm6_type *elem_type, + unsigned int count, unsigned int alignment, unsigned int init, struct sm6_value *dst) +{ + enum vkd3d_data_type data_type = vkd3d_data_type_from_sm6_type(elem_type); + struct vkd3d_shader_instruction *ins; + unsigned int structure_stride; + + ins = sm6_parser_add_instruction(sm6, VKD3DSIH_DCL_TGSM_STRUCTURED); + dst_param_init(&ins->declaration.tgsm_structured.reg); + register_init_with_id(&ins->declaration.tgsm_structured.reg.reg, VKD3DSPR_GROUPSHAREDMEM, + data_type, sm6->tgsm_count++); + dst->u.reg = ins->declaration.tgsm_structured.reg.reg; + structure_stride = elem_type->u.width / 8u; + if (structure_stride != 4) + { + FIXME("Unsupported structure stride %u.\n", structure_stride); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + "Structured TGSM byte stride %u is not supported.", structure_stride); + } + dst->structure_stride = structure_stride; + ins->declaration.tgsm_structured.alignment = alignment; + ins->declaration.tgsm_structured.byte_stride = structure_stride; + ins->declaration.tgsm_structured.structure_count = count; + /* The initialiser value index will be resolved later when forward references can be handled. */ + ins->flags = init; +} + static bool sm6_parser_declare_global(struct sm6_parser *sm6, const struct dxil_record *record) { const struct sm6_type *type, *scalar_type; @@ -3178,15 +3207,10 @@ static bool sm6_parser_declare_global(struct sm6_parser *sm6, const struct dxil_ "TGSM variables of type class %u are not supported.", scalar_type->class); return false; } - if (count != 1) - { - FIXME("Unsupported structured TGSM.\n"); - vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, - "Structured TGSM global variables are not supported."); - return false; - } - - sm6_parser_declare_tgsm_raw(sm6, scalar_type, alignment, init, dst); + if (count == 1) + sm6_parser_declare_tgsm_raw(sm6, scalar_type, alignment, init, dst); + else + sm6_parser_declare_tgsm_structured(sm6, scalar_type, count, alignment, init, dst); } else { @@ -3332,6 +3356,11 @@ static enum vkd3d_result sm6_parser_globals_init(struct sm6_parser *sm6) ins->declaration.tgsm_raw.zero_init = resolve_forward_zero_initialiser(ins->flags, sm6); ins->flags = 0; } + else if (ins->handler_idx == VKD3DSIH_DCL_TGSM_STRUCTURED) + { + ins->declaration.tgsm_structured.zero_init = resolve_forward_zero_initialiser(ins->flags, sm6); + ins->flags = 0; + } } for (i = base_value_idx; i < sm6->value_count; ++i) { @@ -5627,6 +5656,7 @@ static void sm6_parser_emit_gep(struct sm6_parser *sm6, const struct dxil_record register_index_address_init(®->idx[1], elem_value, sm6); reg->idx[1].is_in_bounds = is_in_bounds; reg->idx_count = 2; + dst->structure_stride = src->structure_stride;
ins->handler_idx = VKD3DSIH_NOP; } @@ -5673,15 +5703,34 @@ static void sm6_parser_emit_load(struct sm6_parser *sm6, const struct dxil_recor if (record->operands[i]) WARN("Ignoring volatile modifier.\n");
- operand_count = 1 + (ptr->u.reg.type == VKD3DSPR_GROUPSHAREDMEM); - vsir_instruction_init(ins, &sm6->p.location, (operand_count > 1) ? VKD3DSIH_LD_RAW : VKD3DSIH_MOV); + if (ptr->structure_stride) + { + assert(ptr->u.reg.type == VKD3DSPR_GROUPSHAREDMEM); + vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_LD_STRUCTURED); + + if (!(src_params = instruction_src_params_alloc(ins, 3, sm6))) + return; + if (ptr->u.reg.idx[1].rel_addr) + src_params[0] = *ptr->u.reg.idx[1].rel_addr; + else + src_param_make_constant_uint(&src_params[0], ptr->u.reg.idx[1].offset); + /* Struct offset is always zero as there is no struct, just an array. */ + src_param_make_constant_uint(&src_params[1], 0); + src_param_init_from_value(&src_params[2], ptr); + src_params[2].reg.alignment = alignment; + } + else + { + operand_count = 1 + (ptr->u.reg.type == VKD3DSPR_GROUPSHAREDMEM); + vsir_instruction_init(ins, &sm6->p.location, (operand_count > 1) ? VKD3DSIH_LD_RAW : VKD3DSIH_MOV);
- if (!(src_params = instruction_src_params_alloc(ins, operand_count, sm6))) - return; - if (operand_count > 1) - src_param_make_constant_uint(&src_params[0], 0); - src_param_init_from_value(&src_params[operand_count - 1], ptr); - src_params[operand_count - 1].reg.alignment = alignment; + if (!(src_params = instruction_src_params_alloc(ins, operand_count, sm6))) + return; + if (operand_count > 1) + src_param_make_constant_uint(&src_params[0], 0); + src_param_init_from_value(&src_params[operand_count - 1], ptr); + src_params[operand_count - 1].reg.alignment = alignment; + }
instruction_dst_param_init_ssa_scalar(ins, sm6); } @@ -5836,14 +5885,32 @@ static void sm6_parser_emit_store(struct sm6_parser *sm6, const struct dxil_reco if (record->operands[i]) WARN("Ignoring volatile modifier.\n");
- operand_count = 1 + (ptr->u.reg.type == VKD3DSPR_GROUPSHAREDMEM); - vsir_instruction_init(ins, &sm6->p.location, (operand_count > 1) ? VKD3DSIH_STORE_RAW : VKD3DSIH_MOV); + if (ptr->structure_stride) + { + assert(ptr->u.reg.type == VKD3DSPR_GROUPSHAREDMEM); + vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_STORE_STRUCTURED);
- if (!(src_params = instruction_src_params_alloc(ins, operand_count, sm6))) - return; - if (operand_count > 1) - src_param_make_constant_uint(&src_params[0], 0); - src_param_init_from_value(&src_params[operand_count - 1], src); + if (!(src_params = instruction_src_params_alloc(ins, 3, sm6))) + return; + if (ptr->u.reg.idx[1].rel_addr) + src_params[0] = *ptr->u.reg.idx[1].rel_addr; + else + src_param_make_constant_uint(&src_params[0], ptr->u.reg.idx[1].offset); + /* Struct offset is always zero as there is no struct, just an array. */ + src_param_make_constant_uint(&src_params[1], 0); + src_param_init_from_value(&src_params[2], src); + } + else + { + operand_count = 1 + (ptr->u.reg.type == VKD3DSPR_GROUPSHAREDMEM); + vsir_instruction_init(ins, &sm6->p.location, (operand_count > 1) ? VKD3DSIH_STORE_RAW : VKD3DSIH_MOV); + + if (!(src_params = instruction_src_params_alloc(ins, operand_count, sm6))) + return; + if (operand_count > 1) + src_param_make_constant_uint(&src_params[0], 0); + src_param_init_from_value(&src_params[operand_count - 1], src); + }
dst_param = instruction_dst_params_alloc(ins, 1, sm6); dst_param_init(dst_param); diff --git a/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d-shader/vkd3d_shader_private.h index f533d167d..6800547ac 100644 --- a/libs/vkd3d-shader/vkd3d_shader_private.h +++ b/libs/vkd3d-shader/vkd3d_shader_private.h @@ -1095,6 +1095,7 @@ struct vkd3d_shader_tgsm_raw struct vkd3d_shader_tgsm_structured { struct vkd3d_shader_dst_param reg; + unsigned int alignment; unsigned int byte_stride; unsigned int structure_count; bool zero_init;
From: Conor McCarthy cmccarthy@codeweavers.com
This would cause a lot of warning spam if it was a warning. --- libs/vkd3d-shader/spirv.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-)
diff --git a/libs/vkd3d-shader/spirv.c b/libs/vkd3d-shader/spirv.c index 54f453346..7bcfd2f1b 100644 --- a/libs/vkd3d-shader/spirv.c +++ b/libs/vkd3d-shader/spirv.c @@ -6435,13 +6435,18 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp }
static void spirv_compiler_emit_workgroup_memory(struct spirv_compiler *compiler, - const struct vkd3d_shader_register *reg, unsigned int size, unsigned int structure_stride, bool zero_init) + const struct vkd3d_shader_register *reg, unsigned int alignment, unsigned int size, + unsigned int structure_stride, bool zero_init) { uint32_t type_id, array_type_id, length_id, pointer_type_id, var_id, init_id; struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; const SpvStorageClass storage_class = SpvStorageClassWorkgroup; struct vkd3d_symbol reg_symbol;
+ /* Alignment is supported only in the Kernel execution model. */ + if (alignment) + TRACE("Ignoring alignment %u.\n", alignment); + type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); length_id = spirv_compiler_get_constant_uint(compiler, size); array_type_id = vkd3d_spirv_get_op_type_array(builder, type_id, length_id); @@ -6464,7 +6469,7 @@ static void spirv_compiler_emit_dcl_tgsm_raw(struct spirv_compiler *compiler, const struct vkd3d_shader_instruction *instruction) { const struct vkd3d_shader_tgsm_raw *tgsm_raw = &instruction->declaration.tgsm_raw; - spirv_compiler_emit_workgroup_memory(compiler, &tgsm_raw->reg.reg, + spirv_compiler_emit_workgroup_memory(compiler, &tgsm_raw->reg.reg, tgsm_raw->alignment, tgsm_raw->byte_count / 4, 0, tgsm_raw->zero_init); }
@@ -6473,7 +6478,7 @@ static void spirv_compiler_emit_dcl_tgsm_structured(struct spirv_compiler *compi { const struct vkd3d_shader_tgsm_structured *tgsm_structured = &instruction->declaration.tgsm_structured; unsigned int stride = tgsm_structured->byte_stride / 4; - spirv_compiler_emit_workgroup_memory(compiler, &tgsm_structured->reg.reg, + spirv_compiler_emit_workgroup_memory(compiler, &tgsm_structured->reg.reg, tgsm_structured->alignment, tgsm_structured->structure_count * stride, stride, tgsm_structured->zero_init); }
From: Conor McCarthy cmccarthy@codeweavers.com
--- libs/vkd3d-shader/dxil.c | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+)
diff --git a/libs/vkd3d-shader/dxil.c b/libs/vkd3d-shader/dxil.c index 09b0b0d9b..b3e4f0534 100644 --- a/libs/vkd3d-shader/dxil.c +++ b/libs/vkd3d-shader/dxil.c @@ -393,6 +393,7 @@ enum dx_intrinsic_opcode DX_TEXTURE_GATHER_CMP = 74, DX_ATOMIC_BINOP = 78, DX_ATOMIC_CMP_XCHG = 79, + DX_BARRIER = 80, DX_DERIV_COARSEX = 83, DX_DERIV_COARSEY = 84, DX_DERIV_FINEX = 85, @@ -465,6 +466,14 @@ enum dxil_atomic_binop_code ATOMIC_BINOP_INVALID, };
+enum dxil_sync_flags +{ + SYNC_THREAD_GROUP = 0x1, + SYNC_GLOBAL_UAV = 0x2, + SYNC_THREAD_GROUP_UAV = 0x4, + SYNC_GROUP_SHARED_MEMORY = 0x8, +}; + struct sm6_pointer_info { const struct sm6_type *type; @@ -4104,6 +4113,27 @@ static void sm6_parser_emit_dx_atomic_binop(struct sm6_parser *sm6, enum dx_intr dst->u.reg = dst_params[0].reg; }
+static void sm6_parser_emit_dx_barrier(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, + const struct sm6_value **operands, struct function_emission_state *state) +{ + struct vkd3d_shader_instruction *ins = state->ins; + enum dxil_sync_flags flags; + + vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_SYNC); + flags = sm6_value_get_constant_uint(operands[0]); + ins->flags = flags & (SYNC_THREAD_GROUP | SYNC_THREAD_GROUP_UAV); + if (flags & SYNC_GLOBAL_UAV) + ins->flags |= VKD3DSSF_GLOBAL_UAV; + if (flags & SYNC_GROUP_SHARED_MEMORY) + ins->flags |= VKD3DSSF_GROUP_SHARED_MEMORY; + if (flags &= ~(SYNC_THREAD_GROUP | SYNC_GLOBAL_UAV | SYNC_THREAD_GROUP_UAV | SYNC_GROUP_SHARED_MEMORY)) + { + FIXME("Unhandled flags %#x.\n", flags); + vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_IGNORING_OPERANDS, + "Barrier flags %#x are unhandled.", flags); + } +} + static void sm6_parser_emit_dx_cbuffer_load(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, const struct sm6_value **operands, struct function_emission_state *state) { @@ -4933,6 +4963,7 @@ static const struct sm6_dx_opcode_info sm6_dx_op_table[] = [DX_ATAN ] = {"g", "R", sm6_parser_emit_dx_unary}, [DX_ATOMIC_BINOP ] = {"o", "HciiiR", sm6_parser_emit_dx_atomic_binop}, [DX_ATOMIC_CMP_XCHG ] = {"o", "HiiiRR", sm6_parser_emit_dx_atomic_binop}, + [DX_BARRIER ] = {"v", "c", sm6_parser_emit_dx_barrier}, [DX_BFREV ] = {"m", "R", sm6_parser_emit_dx_unary}, [DX_BUFFER_LOAD ] = {"o", "Hii", sm6_parser_emit_dx_buffer_load}, [DX_BUFFER_STORE ] = {"v", "Hiiooooc", sm6_parser_emit_dx_buffer_store},
From: Conor McCarthy cmccarthy@codeweavers.com
--- libs/vkd3d-shader/spirv.c | 1 - 1 file changed, 1 deletion(-)
diff --git a/libs/vkd3d-shader/spirv.c b/libs/vkd3d-shader/spirv.c index 7bcfd2f1b..de92d9229 100644 --- a/libs/vkd3d-shader/spirv.c +++ b/libs/vkd3d-shader/spirv.c @@ -8766,7 +8766,6 @@ static void spirv_compiler_emit_ld_tgsm(struct spirv_compiler *compiler, ptr_id = vkd3d_spirv_build_op_access_chain1(builder, ptr_type_id, reg_info.id, coordinate_id); constituents[j++] = vkd3d_spirv_build_op_load(builder, type_id, ptr_id, SpvMemoryAccessMaskNone); } - assert(dst->reg.data_type == VKD3D_DATA_UINT); spirv_compiler_emit_store_dst_components(compiler, dst, VKD3D_SHADER_COMPONENT_UINT, constituents); }
From: Conor McCarthy cmccarthy@codeweavers.com
--- libs/vkd3d-shader/spirv.c | 1 - 1 file changed, 1 deletion(-)
diff --git a/libs/vkd3d-shader/spirv.c b/libs/vkd3d-shader/spirv.c index de92d9229..38f42d886 100644 --- a/libs/vkd3d-shader/spirv.c +++ b/libs/vkd3d-shader/spirv.c @@ -8887,7 +8887,6 @@ static void spirv_compiler_emit_store_tgsm(struct spirv_compiler *compiler, type_id, reg_info.structure_stride, &src[0], VKD3DSP_WRITEMASK_0, &src[1], VKD3DSP_WRITEMASK_0);
data = &src[instruction->src_count - 1]; - assert(data->reg.data_type == VKD3D_DATA_UINT); val_id = spirv_compiler_emit_load_src(compiler, data, dst->write_mask);
component_count = vsir_write_mask_component_count(dst->write_mask);
From: Conor McCarthy cmccarthy@codeweavers.com
For 64-bit indexable temps (and any other 64-bit declarations) the write mask must not be converted. --- libs/vkd3d-shader/spirv.c | 10 +++++----- libs/vkd3d-shader/vkd3d_shader_private.h | 5 +++++ 2 files changed, 10 insertions(+), 5 deletions(-)
diff --git a/libs/vkd3d-shader/spirv.c b/libs/vkd3d-shader/spirv.c index 38f42d886..5d9a6bb11 100644 --- a/libs/vkd3d-shader/spirv.c +++ b/libs/vkd3d-shader/spirv.c @@ -4094,7 +4094,7 @@ static uint32_t spirv_compiler_emit_load_reg(struct spirv_compiler *compiler, struct vkd3d_shader_register_info reg_info; unsigned int component_count; uint32_t type_id, val_id; - uint32_t write_mask32; + uint32_t val_write_mask;
if (reg->type == VKD3DSPR_IMMCONST) return spirv_compiler_emit_load_constant(compiler, reg, swizzle, write_mask); @@ -4114,17 +4114,17 @@ static uint32_t spirv_compiler_emit_load_reg(struct spirv_compiler *compiler, type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count); return vkd3d_spirv_get_op_undef(builder, type_id); } - assert(reg_info.component_type != VKD3D_SHADER_COMPONENT_DOUBLE); spirv_compiler_emit_dereference_register(compiler, reg, ®_info);
- write_mask32 = data_type_is_64_bit(reg->data_type) ? vsir_write_mask_32_from_64(write_mask) : write_mask; + val_write_mask = (data_type_is_64_bit(reg->data_type) && !component_type_is_64_bit(reg_info.component_type)) + ? vsir_write_mask_32_from_64(write_mask) : write_mask;
/* Intermediate value (no storage class). */ if (reg_info.storage_class == SpvStorageClassMax) { val_id = reg_info.id; } - else if (vsir_write_mask_component_count(write_mask32) == 1) + else if (vsir_write_mask_component_count(val_write_mask) == 1) { return spirv_compiler_emit_load_scalar(compiler, reg, swizzle, write_mask, ®_info); } @@ -4137,7 +4137,7 @@ static uint32_t spirv_compiler_emit_load_reg(struct spirv_compiler *compiler,
swizzle = data_type_is_64_bit(reg->data_type) ? vsir_swizzle_32_from_64(swizzle) : swizzle; val_id = spirv_compiler_emit_swizzle(compiler, - val_id, reg_info.write_mask, reg_info.component_type, swizzle, write_mask32); + val_id, reg_info.write_mask, reg_info.component_type, swizzle, val_write_mask);
if (component_type != reg_info.component_type) { diff --git a/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d-shader/vkd3d_shader_private.h index 6800547ac..05f76c5d1 100644 --- a/libs/vkd3d-shader/vkd3d_shader_private.h +++ b/libs/vkd3d-shader/vkd3d_shader_private.h @@ -1604,6 +1604,11 @@ static inline enum vkd3d_shader_component_type vkd3d_component_type_from_resourc } }
+static inline bool component_type_is_64_bit(enum vkd3d_shader_component_type component_type) +{ + return component_type == VKD3D_SHADER_COMPONENT_DOUBLE || component_type == VKD3D_SHADER_COMPONENT_UINT64; +} + enum vkd3d_shader_input_sysval_semantic vkd3d_siv_from_sysval_indexed(enum vkd3d_shader_sysval_semantic sysval, unsigned int index);
From: Conor McCarthy cmccarthy@codeweavers.com
--- libs/vkd3d-shader/dxil.c | 2 ++ 1 file changed, 2 insertions(+)
diff --git a/libs/vkd3d-shader/dxil.c b/libs/vkd3d-shader/dxil.c index b3e4f0534..4794a399f 100644 --- a/libs/vkd3d-shader/dxil.c +++ b/libs/vkd3d-shader/dxil.c @@ -2949,6 +2949,8 @@ static enum vkd3d_result sm6_parser_constants_init(struct sm6_parser *sm6, const
default: FIXME("Unhandled constant code %u.\n", record->code); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + "Constant code %u is unhandled.", record->code); dst->u.reg.type = VKD3DSPR_UNDEF; break; }
From: Conor McCarthy cmccarthy@codeweavers.com
--- Makefile.am | 1 + tests/hlsl/tgsm.shader_test | 135 ++++++++++++++++++++++++++++++++++++ 2 files changed, 136 insertions(+) create mode 100644 tests/hlsl/tgsm.shader_test
diff --git a/Makefile.am b/Makefile.am index d07218d88..dbc0adb60 100644 --- a/Makefile.am +++ b/Makefile.am @@ -199,6 +199,7 @@ vkd3d_shader_tests = \ tests/hlsl/texture-load-typed.shader_test \ tests/hlsl/texture-load.shader_test \ tests/hlsl/texture-ordering.shader_test \ + tests/hlsl/tgsm.shader_test \ tests/hlsl/transpose.shader_test \ tests/hlsl/trigonometry.shader_test \ tests/hlsl/trunc.shader_test \ diff --git a/tests/hlsl/tgsm.shader_test b/tests/hlsl/tgsm.shader_test new file mode 100644 index 000000000..a1b7259e9 --- /dev/null +++ b/tests/hlsl/tgsm.shader_test @@ -0,0 +1,135 @@ +[require] +shader model >= 5.0 + +[uav 1] +format r32 uint +size (buffer, 4) + +1 0 0 0 + +[compute shader todo] +RWByteAddressBuffer u : register(u1); +groupshared uint m; + + [numthreads(32, 1, 1)] +void main(uint local_idx : SV_GroupIndex, uint group_id : SV_GroupID) +{ + if (!local_idx) + m = group_id.x; + GroupMemoryBarrierWithGroupSync(); + InterlockedAdd(m, group_id.x); + GroupMemoryBarrierWithGroupSync(); + if (!local_idx) + u.Store(4 * group_id.x, m); +} + +[test] +todo dispatch 4 1 1 +probe uav 1 (0) rui (0) +probe uav 1 (1) rui (33) +probe uav 1 (2) rui (66) +probe uav 1 (3) rui (99) + + +[uav 1] +format r32 sint +size (buffer, 4) + +1 0 0 0 + +[compute shader todo] +RWByteAddressBuffer u : register(u1); +groupshared int m; + + [numthreads(32, 1, 1)] +void main(uint local_idx : SV_GroupIndex, uint group_id : SV_GroupID) +{ + if (!local_idx) + m = group_id.x; + GroupMemoryBarrierWithGroupSync(); + InterlockedAdd(m, -group_id.x); + GroupMemoryBarrierWithGroupSync(); + if (!local_idx) + u.Store(4 * group_id.x, m); +} + +[test] +todo dispatch 4 1 1 +probe uav 1 (0) ri (0) +probe uav 1 (1) ri (-31) +probe uav 1 (2) ri (-62) +probe uav 1 (3) ri (-93) + + +[uav 1] +format r32 float +size (buffer, 8) + +1 1 1 1 0 0 0 0 + +[uav 2] +format r32 sint +size (buffer, 8) + +1 1 1 1 0 0 0 0 + +[compute shader todo] +uniform uint idx; + +#define GROUP_SIZE 4 + +struct data +{ + float f; + uint u; +}; + +RWBuffer<float> u : register(u1); +RWBuffer<uint> u2 : register(u2); +groupshared data m[GROUP_SIZE]; + + [numthreads(GROUP_SIZE, 1, 1)] +void main(uint local_idx : SV_GroupIndex, uint group_id : SV_GroupID, + uint thread_id : SV_DispatchThreadID) +{ + uint i; + if (!local_idx) + { + for (i = 0; i < GROUP_SIZE; ++i) + { + m[i].f = group_id.x; + m[i].u = group_id.x; + } + } + GroupMemoryBarrierWithGroupSync(); + InterlockedAdd(m[0].u, 2); + InterlockedAdd(m[idx].u, 1); + GroupMemoryBarrierWithGroupSync(); + for (i = 0; i < local_idx; ++i) + { + m[local_idx].f += group_id.x; + m[local_idx].u += group_id.x; + } + u[thread_id.x] = m[local_idx].f; + u2[thread_id.x] = m[local_idx].u; +} + +[test] +uniform 0 uint 1 +todo dispatch 2 1 1 +probe uav 1 (0) r (0.0) +probe uav 1 (1) r (0.0) +probe uav 1 (2) r (0.0) +probe uav 1 (3) r (0.0) +probe uav 1 (4) r (1.0) +probe uav 1 (5) r (2.0) +probe uav 1 (6) r (3.0) +probe uav 1 (7) r (4.0) +probe uav 2 (0) ri (8) +probe uav 2 (1) ri (4) +probe uav 2 (2) ri (0) +probe uav 2 (3) ri (0) +probe uav 2 (4) ri (9) +probe uav 2 (5) ri (6) +probe uav 2 (6) ri (3) +probe uav 2 (7) ri (4)
From: Conor McCarthy cmccarthy@codeweavers.com
--- libs/vkd3d-shader/dxil.c | 69 +++++++++++++++++++++++++++++++ tests/hlsl/numthreads.shader_test | 10 +++-- 2 files changed, 75 insertions(+), 4 deletions(-)
diff --git a/libs/vkd3d-shader/dxil.c b/libs/vkd3d-shader/dxil.c index 4794a399f..3d540d269 100644 --- a/libs/vkd3d-shader/dxil.c +++ b/libs/vkd3d-shader/dxil.c @@ -398,6 +398,10 @@ enum dx_intrinsic_opcode DX_DERIV_COARSEY = 84, DX_DERIV_FINEX = 85, DX_DERIV_FINEY = 86, + DX_THREAD_ID = 93, + DX_GROUP_ID = 94, + DX_THREAD_ID_IN_GROUP = 95, + DX_FLATTENED_THREAD_ID_IN_GROUP = 96, DX_SPLIT_DOUBLE = 102, DX_LEGACY_F32TOF16 = 130, DX_LEGACY_F16TOF32 = 131, @@ -751,6 +755,7 @@ struct sm6_parser
struct vkd3d_shader_dst_param *output_params; struct vkd3d_shader_dst_param *input_params; + uint64_t input_reg_declarations;
struct sm6_function *functions; size_t function_count; @@ -4227,6 +4232,66 @@ static void sm6_parser_emit_dx_create_handle(struct sm6_parser *sm6, enum dx_int ins->handler_idx = VKD3DSIH_NOP; }
+static void sm6_parser_dcl_register_builtin(struct sm6_parser *sm6, + enum vkd3d_shader_register_type reg_type, enum vkd3d_data_type data_type, unsigned int component_count) +{ + struct vkd3d_shader_dst_param *dst_param; + struct vkd3d_shader_instruction *ins; + uint64_t reg_flag; + + reg_flag = 1ull << reg_type; + if (!(sm6->input_reg_declarations & reg_flag)) + { + sm6->input_reg_declarations |= reg_flag; + ins = sm6_parser_add_instruction(sm6, VKD3DSIH_DCL_INPUT); + dst_param = &ins->declaration.dst; + vsir_register_init(&dst_param->reg, reg_type, data_type, 0); + dst_param_init_vector(dst_param, component_count); + } +} + +static void sm6_parser_emit_dx_compute_builtin(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, + const struct sm6_value **operands, struct function_emission_state *state) +{ + unsigned int component_count = 3, component_idx = 0; + struct vkd3d_shader_instruction *ins = state->ins; + struct vkd3d_shader_src_param *src_param; + enum vkd3d_shader_register_type reg_type; + + switch (op) + { + case DX_THREAD_ID: + reg_type = VKD3DSPR_THREADID; + break; + case DX_GROUP_ID: + reg_type = VKD3DSPR_THREADGROUPID; + break; + case DX_THREAD_ID_IN_GROUP: + reg_type = VKD3DSPR_LOCALTHREADID; + break; + case DX_FLATTENED_THREAD_ID_IN_GROUP: + reg_type = VKD3DSPR_LOCALTHREADINDEX; + component_count = 1; + break; + default: + vkd3d_unreachable(); + } + + sm6_parser_dcl_register_builtin(sm6, reg_type, VKD3D_DATA_UINT, component_count); + vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_MOV); + if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) + return; + vsir_register_init(&src_param->reg, reg_type, VKD3D_DATA_UINT, 0); + if (component_count > 1) + { + src_param->reg.dimension = VSIR_DIMENSION_VEC4; + component_idx = sm6_value_get_constant_uint(operands[0]); + } + src_param_init_scalar(src_param, component_idx); + + instruction_dst_param_init_ssa_scalar(ins, sm6); +} + static void sm6_parser_emit_dx_get_dimensions(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, const struct sm6_value **operands, struct function_emission_state *state) { @@ -4981,10 +5046,12 @@ static const struct sm6_dx_opcode_info sm6_dx_op_table[] = [DX_FIRST_BIT_HI ] = {"i", "m", sm6_parser_emit_dx_unary}, [DX_FIRST_BIT_LO ] = {"i", "m", sm6_parser_emit_dx_unary}, [DX_FIRST_BIT_SHI ] = {"i", "m", sm6_parser_emit_dx_unary}, + [DX_FLATTENED_THREAD_ID_IN_GROUP ] = {"i", "", sm6_parser_emit_dx_compute_builtin}, [DX_FMAX ] = {"g", "RR", sm6_parser_emit_dx_binary}, [DX_FMIN ] = {"g", "RR", sm6_parser_emit_dx_binary}, [DX_FRC ] = {"g", "R", sm6_parser_emit_dx_unary}, [DX_GET_DIMENSIONS ] = {"D", "Hi", sm6_parser_emit_dx_get_dimensions}, + [DX_GROUP_ID ] = {"i", "c", sm6_parser_emit_dx_compute_builtin}, [DX_IBFE ] = {"m", "iiR", sm6_parser_emit_dx_tertiary}, [DX_HCOS ] = {"g", "R", sm6_parser_emit_dx_unary}, [DX_HSIN ] = {"g", "R", sm6_parser_emit_dx_unary}, @@ -5020,6 +5087,8 @@ static const struct sm6_dx_opcode_info sm6_dx_op_table[] = [DX_TEXTURE_GATHER_CMP ] = {"o", "HHffffiicf", sm6_parser_emit_dx_texture_gather}, [DX_TEXTURE_LOAD ] = {"o", "HiiiiCCC", sm6_parser_emit_dx_texture_load}, [DX_TEXTURE_STORE ] = {"v", "Hiiiooooc", sm6_parser_emit_dx_texture_store}, + [DX_THREAD_ID ] = {"i", "c", sm6_parser_emit_dx_compute_builtin}, + [DX_THREAD_ID_IN_GROUP ] = {"i", "c", sm6_parser_emit_dx_compute_builtin}, [DX_UBFE ] = {"m", "iiR", sm6_parser_emit_dx_tertiary}, [DX_UMAX ] = {"m", "RR", sm6_parser_emit_dx_binary}, [DX_UMIN ] = {"m", "RR", sm6_parser_emit_dx_binary}, diff --git a/tests/hlsl/numthreads.shader_test b/tests/hlsl/numthreads.shader_test index e8f7900bf..ceeb40e3e 100644 --- a/tests/hlsl/numthreads.shader_test +++ b/tests/hlsl/numthreads.shader_test @@ -172,7 +172,7 @@ size (2d, 2, 2) 1.0 1.0
[compute shader] -/* Attributes are taken from the first function, and dropped from the second. */ +/* In SM < 6.0, attributes are taken from the first function, and dropped from the second. */ RWTexture2D<float> u;
[numthreads(2, 1, 1)] @@ -185,8 +185,10 @@ void main(uint2 id : sv_dispatchthreadid) }
[test] -todo(sm>=6) dispatch 1 1 1 +dispatch 1 1 1 probe uav 0 (0, 0) r (2.0) -probe uav 0 (0, 1) r (1.0) -probe uav 0 (1, 0) r (2.0) +if(sm<6) probe uav 0 (0, 1) r (1.0) +if(sm<6) probe uav 0 (1, 0) r (2.0) probe uav 0 (1, 1) r (1.0) +if(sm>=6) probe uav 0 (0, 1) r (2.0) +if(sm>=6) probe uav 0 (1, 0) r (1.0)
From: Conor McCarthy cmccarthy@codeweavers.com
--- libs/vkd3d-shader/d3d_asm.c | 30 ++++ libs/vkd3d-shader/dxil.c | 184 +++++++++++++++++++++++ libs/vkd3d-shader/vkd3d_shader_private.h | 6 + tests/hlsl/tgsm.shader_test | 4 +- 4 files changed, 222 insertions(+), 2 deletions(-)
diff --git a/libs/vkd3d-shader/d3d_asm.c b/libs/vkd3d-shader/d3d_asm.c index 7b30d2600..50d545673 100644 --- a/libs/vkd3d-shader/d3d_asm.c +++ b/libs/vkd3d-shader/d3d_asm.c @@ -448,6 +448,23 @@ static void shader_dump_global_flags(struct vkd3d_d3d_asm_compiler *compiler, vkd3d_string_buffer_printf(&compiler->buffer, "unknown_flags(%#"PRIx64")", (uint64_t)global_flags); }
+static void shader_dump_atomic_op_flags(struct vkd3d_d3d_asm_compiler *compiler, uint32_t atomic_flags) +{ + if (atomic_flags & VKD3DARF_SEQ_CST) + { + vkd3d_string_buffer_printf(&compiler->buffer, "_seq_cst"); + atomic_flags &= ~VKD3DARF_SEQ_CST; + } + if (atomic_flags & VKD3DARF_VOLATILE) + { + vkd3d_string_buffer_printf(&compiler->buffer, "_volatile"); + atomic_flags &= ~VKD3DARF_VOLATILE; + } + + if (atomic_flags) + vkd3d_string_buffer_printf(&compiler->buffer, "_unknown_flags(%#x)", atomic_flags); +} + static void shader_dump_sync_flags(struct vkd3d_d3d_asm_compiler *compiler, uint32_t sync_flags) { if (sync_flags & VKD3DSSF_GLOBAL_UAV) @@ -1712,6 +1729,19 @@ static void shader_dump_instruction_flags(struct vkd3d_d3d_asm_compiler *compile } break;
+ case VKD3DSIH_IMM_ATOMIC_CMP_EXCH: + case VKD3DSIH_IMM_ATOMIC_IADD: + case VKD3DSIH_IMM_ATOMIC_AND: + case VKD3DSIH_IMM_ATOMIC_IMAX: + case VKD3DSIH_IMM_ATOMIC_IMIN: + case VKD3DSIH_IMM_ATOMIC_OR: + case VKD3DSIH_IMM_ATOMIC_UMAX: + case VKD3DSIH_IMM_ATOMIC_UMIN: + case VKD3DSIH_IMM_ATOMIC_EXCH: + case VKD3DSIH_IMM_ATOMIC_XOR: + shader_dump_atomic_op_flags(compiler, ins->flags); + break; + case VKD3DSIH_SYNC: shader_dump_sync_flags(compiler, ins->flags); break; diff --git a/libs/vkd3d-shader/dxil.c b/libs/vkd3d-shader/dxil.c index 3d540d269..96bd3c111 100644 --- a/libs/vkd3d-shader/dxil.c +++ b/libs/vkd3d-shader/dxil.c @@ -456,6 +456,32 @@ enum dxil_predicate ICMP_SLE = 41, };
+enum dxil_rmw_code +{ + RMW_XCHG = 0, + RMW_ADD = 1, + RMW_SUB = 2, + RMW_AND = 3, + RMW_NAND = 4, + RMW_OR = 5, + RMW_XOR = 6, + RMW_MAX = 7, + RMW_MIN = 8, + RMW_UMAX = 9, + RMW_UMIN = 10, +}; + +enum dxil_atomic_ordering +{ + ORDERING_NOTATOMIC = 0, + ORDERING_UNORDERED = 1, + ORDERING_MONOTONIC = 2, + ORDERING_ACQUIRE = 3, + ORDERING_RELEASE = 4, + ORDERING_ACQREL = 5, + ORDERING_SEQCST = 6, +}; + enum dxil_atomic_binop_code { ATOMIC_BINOP_ADD, @@ -2582,6 +2608,30 @@ static bool sm6_value_validate_is_bool(const struct sm6_value *value, struct sm6 return true; }
+static bool sm6_value_validate_is_pointer_to_i32(const struct sm6_value *value, struct sm6_parser *sm6) +{ + if (!sm6_type_is_pointer(value->type) || !sm6_type_is_i32(value->type->u.pointer.type)) + { + WARN("Operand result type %u is not a pointer to i32.\n", value->type->class); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + "An int32 pointer operand passed to a DXIL instruction is not an int32 pointer."); + return false; + } + return true; +} + +static bool sm6_value_validate_is_i32(const struct sm6_value *value, struct sm6_parser *sm6) +{ + if (!sm6_type_is_i32(value->type)) + { + WARN("Operand result type %u is not i32.\n", value->type->class); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + "An int32 operand passed to a DXIL instruction is not an int32."); + return false; + } + return true; +} + static const struct sm6_value *sm6_parser_get_value_safe(struct sm6_parser *sm6, unsigned int idx) { if (idx < sm6->value_count) @@ -3496,6 +3546,9 @@ struct function_emission_state unsigned int temp_idx; };
+static bool sm6_parser_emit_reg_composite_construct(struct sm6_parser *sm6, const struct vkd3d_shader_register **operand_regs, + unsigned int component_count, struct function_emission_state *state, struct vkd3d_shader_register *reg); + static void sm6_parser_emit_alloca(struct sm6_parser *sm6, const struct dxil_record *record, struct vkd3d_shader_instruction *ins, struct sm6_value *dst) { @@ -3571,6 +3624,130 @@ static void sm6_parser_emit_alloca(struct sm6_parser *sm6, const struct dxil_rec sm6_parser_declare_indexable_temp(sm6, elem_type, type[0]->u.array.count, alignment, true, 0, ins, dst); }
+static enum vkd3d_shader_opcode map_dx_atomicrmw_op(uint64_t code) +{ + switch (code) + { + case RMW_ADD: + return VKD3DSIH_IMM_ATOMIC_IADD; + case RMW_AND: + return VKD3DSIH_IMM_ATOMIC_AND; + case RMW_MAX: + return VKD3DSIH_IMM_ATOMIC_IMAX; + case RMW_MIN: + return VKD3DSIH_IMM_ATOMIC_IMIN; + case RMW_OR: + return VKD3DSIH_IMM_ATOMIC_OR; + case RMW_UMAX: + return VKD3DSIH_IMM_ATOMIC_UMAX; + case RMW_UMIN: + return VKD3DSIH_IMM_ATOMIC_UMIN; + case RMW_XCHG: + return VKD3DSIH_IMM_ATOMIC_EXCH; + case RMW_XOR: + return VKD3DSIH_IMM_ATOMIC_XOR; + default: + /* DXIL currently doesn't use SUB and NAND. */ + return VKD3DSIH_INVALID; + } +} + +static void sm6_parser_emit_atomicrmw(struct sm6_parser *sm6, const struct dxil_record *record, + struct function_emission_state *state, struct sm6_value *dst) +{ + struct vkd3d_shader_register coord, const_offset, const_zero; + const struct vkd3d_shader_register *regs[2]; + struct vkd3d_shader_dst_param *dst_params; + struct vkd3d_shader_src_param *src_params; + struct vkd3d_shader_instruction *ins; + const struct sm6_value *ptr, *src; + enum vkd3d_shader_opcode op; + unsigned int i = 0; + bool is_volatile; + uint64_t code; + + if (!(ptr = sm6_parser_get_value_by_ref(sm6, record, NULL, &i)) + || !sm6_value_validate_is_pointer_to_i32(ptr, sm6)) + return; + + if (ptr->u.reg.type != VKD3DSPR_GROUPSHAREDMEM) + { + WARN("Register is not groupshared.\n"); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + "The destination register for an atomicrmw instruction is not groupshared memory."); + return; + } + + dst->type = ptr->type->u.pointer.type; + + if (!(src = sm6_parser_get_value_by_ref(sm6, record, dst->type, &i)) + || !sm6_value_validate_is_i32(src, sm6)) + return; + + if (!dxil_record_validate_operand_count(record, i + 4, i + 4, sm6)) + return; + + if ((op = map_dx_atomicrmw_op(code = record->operands[i++])) == VKD3DSIH_INVALID) + { + FIXME("Unhandled atomicrmw op %"PRIu64".\n", code); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + "Operation %"PRIu64" for an atomicrmw instruction is unhandled.", code); + return; + } + + is_volatile = record->operands[i++]; + + /* It's currently not possible to specify an atomic ordering in HLSL, and it defaults to seq_cst. */ + if ((code = record->operands[i++]) != ORDERING_SEQCST) + FIXME("Unhandled atomic ordering %"PRIu64".\n", code); + + if ((code = record->operands[i]) != 1) + WARN("Ignoring synchronisation scope %"PRIu64".\n", code); + + if (ptr->structure_stride) + { + if (ptr->u.reg.idx[1].rel_addr) + { + regs[0] = &ptr->u.reg.idx[1].rel_addr->reg; + } + else + { + register_make_constant_uint(&const_offset, ptr->u.reg.idx[1].offset); + regs[0] = &const_offset; + } + register_make_constant_uint(&const_zero, 0); + regs[1] = &const_zero; + if (!sm6_parser_emit_reg_composite_construct(sm6, regs, 2, state, &coord)) + return; + } + + ins = state->ins; + vsir_instruction_init(ins, &sm6->p.location, op); + ins->flags = is_volatile ? VKD3DARF_SEQ_CST | VKD3DARF_VOLATILE : VKD3DARF_SEQ_CST; + + if (!(src_params = instruction_src_params_alloc(ins, 2, sm6))) + return; + if (ptr->structure_stride) + src_param_init_vector_from_reg(&src_params[0], &coord); + else + src_param_make_constant_uint(&src_params[0], 0); + src_param_init_from_value(&src_params[1], src); + + dst_params = instruction_dst_params_alloc(ins, 2, sm6); + register_init_ssa_scalar(&dst_params[0].reg, dst->type, dst, sm6); + dst_param_init(&dst_params[0]); + + dst_params[1].reg = ptr->u.reg; + /* The groupshared register has data type UAV when accessed. */ + dst_params[1].reg.data_type = VKD3D_DATA_UAV; + dst_params[1].reg.idx[1].rel_addr = NULL; + dst_params[1].reg.idx[1].offset = ~0u; + dst_params[1].reg.idx_count = 1; + dst_param_init(&dst_params[1]); + + dst->u.reg = dst_params[0].reg; +} + static enum vkd3d_shader_opcode map_binary_op(uint64_t code, const struct sm6_type *type_a, const struct sm6_type *type_b, struct sm6_parser *sm6) { @@ -6561,6 +6738,13 @@ static enum vkd3d_result sm6_parser_function_init(struct sm6_parser *sm6, const case FUNC_CODE_INST_ALLOCA: sm6_parser_emit_alloca(sm6, record, ins, dst); break; + case FUNC_CODE_INST_ATOMICRMW: + { + struct function_emission_state state = {code_block, ins}; + sm6_parser_emit_atomicrmw(sm6, record, &state, dst); + sm6->p.program.temp_count = max(sm6->p.program.temp_count, state.temp_idx); + break; + } case FUNC_CODE_INST_BINOP: sm6_parser_emit_binop(sm6, record, ins, dst); break; diff --git a/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d-shader/vkd3d_shader_private.h index 05f76c5d1..8bbd224a9 100644 --- a/libs/vkd3d-shader/vkd3d_shader_private.h +++ b/libs/vkd3d-shader/vkd3d_shader_private.h @@ -754,6 +754,12 @@ enum vkd3d_shader_uav_flags VKD3DSUF_ORDER_PRESERVING_COUNTER = 0x100, };
+enum vkd3d_shader_atomic_rmw_flags +{ + VKD3DARF_SEQ_CST = 0x1, + VKD3DARF_VOLATILE = 0x2, +}; + enum vkd3d_tessellator_domain { VKD3D_TESSELLATOR_DOMAIN_LINE = 1, diff --git a/tests/hlsl/tgsm.shader_test b/tests/hlsl/tgsm.shader_test index a1b7259e9..e6b1a6858 100644 --- a/tests/hlsl/tgsm.shader_test +++ b/tests/hlsl/tgsm.shader_test @@ -24,7 +24,7 @@ void main(uint local_idx : SV_GroupIndex, uint group_id : SV_GroupID) }
[test] -todo dispatch 4 1 1 +todo(sm<6) dispatch 4 1 1 probe uav 1 (0) rui (0) probe uav 1 (1) rui (33) probe uav 1 (2) rui (66) @@ -54,7 +54,7 @@ void main(uint local_idx : SV_GroupIndex, uint group_id : SV_GroupID) }
[test] -todo dispatch 4 1 1 +todo(sm<6) dispatch 4 1 1 probe uav 1 (0) ri (0) probe uav 1 (1) ri (-31) probe uav 1 (2) ri (-62)
From: Conor McCarthy cmccarthy@codeweavers.com
--- libs/vkd3d-shader/spirv.c | 5 +++++ 1 file changed, 5 insertions(+)
diff --git a/libs/vkd3d-shader/spirv.c b/libs/vkd3d-shader/spirv.c index 5d9a6bb11..9b56dddfa 100644 --- a/libs/vkd3d-shader/spirv.c +++ b/libs/vkd3d-shader/spirv.c @@ -9229,6 +9229,11 @@ static void spirv_compiler_emit_atomic_instruction(struct spirv_compiler *compil
val_id = spirv_compiler_emit_load_src_with_type(compiler, &src[1], VKD3DSP_WRITEMASK_0, component_type);
+ if (instruction->flags & VKD3DARF_SEQ_CST) + WARN("Ignoring sequentially consistent ordering.\n"); + if (instruction->flags & VKD3DARF_VOLATILE) + WARN("Ignoring 'volatile' attribute.\n"); + operands[i++] = pointer_id; operands[i++] = spirv_compiler_get_constant_uint(compiler, scope); operands[i++] = spirv_compiler_get_constant_uint(compiler, SpvMemorySemanticsMaskNone);
From: Conor McCarthy cmccarthy@codeweavers.com
--- tests/d3d12.c | 87 ++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 75 insertions(+), 12 deletions(-)
diff --git a/tests/d3d12.c b/tests/d3d12.c index e3771b7fc..b98120581 100644 --- a/tests/d3d12.c +++ b/tests/d3d12.c @@ -22165,7 +22165,7 @@ static void test_tgsm(void) unsigned int i; HRESULT hr;
- static const DWORD raw_tgsm_code[] = + static const BYTE raw_tgsm_code[] = { #if 0 RWByteAddressBuffer u; @@ -22183,17 +22183,80 @@ static void test_tgsm(void) u.Store(4 * group_id.x, m); } #endif - 0x43425844, 0x467df6d9, 0x5f56edda, 0x5c96b787, 0x60c91fb8, 0x00000001, 0x00000148, 0x00000003, - 0x0000002c, 0x0000003c, 0x0000004c, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f, - 0x00000008, 0x00000000, 0x00000008, 0x58454853, 0x000000f4, 0x00050050, 0x0000003d, 0x0100086a, - 0x0300009d, 0x0011e000, 0x00000000, 0x0200005f, 0x00024000, 0x0200005f, 0x00021012, 0x02000068, - 0x00000001, 0x0400009f, 0x0011f000, 0x00000000, 0x00000004, 0x0400009b, 0x00000020, 0x00000001, - 0x00000001, 0x0200001f, 0x0002400a, 0x060000a6, 0x0011f012, 0x00000000, 0x00004001, 0x00000000, - 0x0002100a, 0x01000015, 0x010018be, 0x060000ad, 0x0011f000, 0x00000000, 0x00004001, 0x00000000, - 0x0002100a, 0x010018be, 0x0200001f, 0x0002400a, 0x06000029, 0x00100012, 0x00000000, 0x0002100a, - 0x00004001, 0x00000002, 0x070000a5, 0x00100022, 0x00000000, 0x00004001, 0x00000000, 0x0011f006, - 0x00000000, 0x070000a6, 0x0011e012, 0x00000000, 0x0010000a, 0x00000000, 0x0010001a, 0x00000000, - 0x01000015, 0x0100003e, + 0x44, 0x58, 0x42, 0x43, 0x6b, 0xd4, 0x7b, 0x20, 0x43, 0x67, 0x57, 0x77, 0x1f, 0x8e, 0xab, 0x24, 0x7d, 0x1d, 0xc3, 0x96, 0x01, 0x00, 0x00, 0x00, + 0xec, 0x06, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00, + 0xd0, 0x00, 0x00, 0x00, 0xec, 0x00, 0x00, 0x00, 0x53, 0x46, 0x49, 0x30, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x49, 0x53, 0x47, 0x31, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x4f, 0x53, 0x47, 0x31, 0x08, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x50, 0x53, 0x56, 0x30, 0x60, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0x05, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x18, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0x41, 0x53, 0x48, 0x14, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x4f, 0xc4, 0x63, 0x01, 0xf7, 0xab, 0x62, 0xc7, 0x39, 0x86, 0x7b, 0x33, 0x71, 0x2b, 0x93, 0xe2, 0x44, 0x58, 0x49, 0x4c, + 0xf8, 0x05, 0x00, 0x00, 0x60, 0x00, 0x05, 0x00, 0x7e, 0x01, 0x00, 0x00, 0x44, 0x58, 0x49, 0x4c, 0x00, 0x01, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, + 0xe0, 0x05, 0x00, 0x00, 0x42, 0x43, 0xc0, 0xde, 0x21, 0x0c, 0x00, 0x00, 0x75, 0x01, 0x00, 0x00, 0x0b, 0x82, 0x20, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x13, 0x00, 0x00, 0x00, 0x07, 0x81, 0x23, 0x91, 0x41, 0xc8, 0x04, 0x49, 0x06, 0x10, 0x32, 0x39, 0x92, 0x01, 0x84, 0x0c, 0x25, 0x05, 0x08, 0x19, + 0x1e, 0x04, 0x8b, 0x62, 0x80, 0x14, 0x45, 0x02, 0x42, 0x92, 0x0b, 0x42, 0xa4, 0x10, 0x32, 0x14, 0x38, 0x08, 0x18, 0x4b, 0x0a, 0x32, 0x52, 0x88, + 0x48, 0x90, 0x14, 0x20, 0x43, 0x46, 0x88, 0xa5, 0x00, 0x19, 0x32, 0x42, 0xe4, 0x48, 0x0e, 0x90, 0x91, 0x22, 0xc4, 0x50, 0x41, 0x51, 0x81, 0x8c, + 0xe1, 0x83, 0xe5, 0x8a, 0x04, 0x29, 0x46, 0x06, 0x51, 0x18, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x1b, 0x8c, 0xe0, 0xff, 0xff, 0xff, 0xff, 0x07, + 0x40, 0x02, 0xa8, 0x0d, 0x86, 0xf0, 0xff, 0xff, 0xff, 0xff, 0x03, 0xc0, 0x00, 0xd2, 0x06, 0x63, 0xf8, 0xff, 0xff, 0xff, 0xff, 0x01, 0x90, 0x80, + 0x6a, 0x03, 0x41, 0xfc, 0xff, 0xff, 0xff, 0xff, 0x00, 0x48, 0x00, 0x00, 0x49, 0x18, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x13, 0x82, 0x60, 0x42, + 0x20, 0x4c, 0x08, 0x86, 0x09, 0x01, 0x01, 0x00, 0x89, 0x20, 0x00, 0x00, 0x1f, 0x00, 0x00, 0x00, 0x32, 0x22, 0x48, 0x09, 0x20, 0x64, 0x85, 0x04, + 0x93, 0x22, 0xa4, 0x84, 0x04, 0x93, 0x22, 0xe3, 0x84, 0xa1, 0x90, 0x14, 0x12, 0x4c, 0x8a, 0x8c, 0x0b, 0x84, 0xa4, 0x4c, 0x10, 0x60, 0x73, 0x04, + 0x60, 0x30, 0x88, 0x00, 0x0c, 0x23, 0x00, 0x25, 0x10, 0x34, 0x8a, 0x00, 0x00, 0x95, 0x32, 0x00, 0x00, 0xe8, 0x94, 0x41, 0x00, 0x40, 0x69, 0x8e, + 0x00, 0xa1, 0x75, 0xcf, 0x70, 0xf9, 0x13, 0xf6, 0x10, 0x92, 0x1f, 0x02, 0xcd, 0xb0, 0x10, 0x28, 0x60, 0x73, 0x04, 0x41, 0x31, 0x1a, 0x58, 0x00, + 0x38, 0x7a, 0x45, 0x11, 0xa0, 0x01, 0x00, 0x00, 0x00, 0x8b, 0xe2, 0x40, 0xc0, 0x69, 0xd2, 0x14, 0x51, 0xc2, 0xe4, 0xaf, 0xf0, 0x86, 0x4d, 0x84, + 0x36, 0x0c, 0x11, 0x21, 0x49, 0x1b, 0x55, 0x14, 0x44, 0x84, 0x02, 0x40, 0x74, 0x8e, 0x00, 0x14, 0xa6, 0x00, 0x00, 0x00, 0x13, 0x14, 0x72, 0xc0, + 0x87, 0x74, 0x60, 0x87, 0x36, 0x68, 0x87, 0x79, 0x68, 0x03, 0x72, 0xc0, 0x87, 0x0d, 0xaf, 0x50, 0x0e, 0x6d, 0xd0, 0x0e, 0x7a, 0x50, 0x0e, 0x6d, + 0x00, 0x0f, 0x7a, 0x30, 0x07, 0x72, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x71, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x78, + 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x71, 0x60, 0x07, 0x7a, 0x30, 0x07, 0x72, 0xd0, 0x06, 0xe9, 0x30, 0x07, 0x72, 0xa0, 0x07, 0x73, + 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x76, 0x40, 0x07, 0x7a, 0x60, 0x07, 0x74, 0xd0, 0x06, 0xe6, 0x10, 0x07, 0x76, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, + 0x60, 0x0e, 0x73, 0x20, 0x07, 0x7a, 0x30, 0x07, 0x72, 0xd0, 0x06, 0xe6, 0x60, 0x07, 0x74, 0xa0, 0x07, 0x76, 0x40, 0x07, 0x6d, 0xe0, 0x0e, 0x78, + 0xa0, 0x07, 0x71, 0x60, 0x07, 0x7a, 0x30, 0x07, 0x72, 0xa0, 0x07, 0x76, 0x40, 0x07, 0x3a, 0x0f, 0x04, 0x90, 0x21, 0x23, 0x45, 0x44, 0x00, 0x3a, + 0x00, 0x78, 0xc8, 0x33, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x90, 0xa7, 0x00, 0x02, 0x20, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x80, 0x21, 0xcf, 0x01, 0x04, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x43, 0x9e, 0x04, 0x08, 0x00, 0x01, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x86, 0x3c, 0x0f, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x79, 0x22, 0x20, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc8, 0x02, 0x01, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x32, 0x1e, 0x98, 0x14, 0x19, 0x11, 0x4c, 0x90, + 0x8c, 0x09, 0x26, 0x47, 0xc6, 0x04, 0x43, 0x02, 0x25, 0x30, 0x02, 0x50, 0x0e, 0xc5, 0x50, 0x16, 0x85, 0x50, 0x40, 0xe4, 0x46, 0x00, 0xc8, 0x8e, + 0x00, 0x14, 0x08, 0xd5, 0x19, 0x00, 0x00, 0x00, 0x79, 0x18, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, 0x1a, 0x03, 0x4c, 0x90, 0x46, 0x02, 0x13, 0xc4, + 0x31, 0x20, 0xc3, 0x1b, 0x43, 0x81, 0x93, 0x4b, 0xb3, 0x0b, 0xa3, 0x2b, 0x4b, 0x01, 0x89, 0x71, 0xb9, 0x71, 0x81, 0x71, 0xa1, 0x81, 0x81, 0xb1, + 0x01, 0x41, 0xb1, 0xc9, 0x29, 0xab, 0xa1, 0x29, 0x93, 0xc9, 0x81, 0x49, 0xd9, 0x10, 0x04, 0x13, 0x04, 0xe0, 0x98, 0x20, 0x00, 0xc8, 0x06, 0x61, + 0x20, 0x26, 0x08, 0x40, 0xb2, 0x41, 0x18, 0x0c, 0x0a, 0x63, 0x73, 0x13, 0x04, 0x40, 0xd9, 0x30, 0x20, 0x09, 0x31, 0x41, 0xa8, 0x22, 0x02, 0x13, + 0x04, 0x60, 0x99, 0x20, 0x38, 0xce, 0x86, 0x85, 0x58, 0x18, 0x82, 0x18, 0x1a, 0xc7, 0x71, 0x80, 0x0d, 0xc1, 0xb3, 0x81, 0x00, 0x20, 0x00, 0x98, + 0x20, 0x10, 0x01, 0x89, 0xb6, 0xb0, 0x34, 0xb7, 0x09, 0x82, 0x05, 0x4d, 0x10, 0x00, 0x66, 0x82, 0x00, 0x34, 0x1b, 0x06, 0x6b, 0x18, 0x36, 0x10, + 0x04, 0x55, 0x5d, 0x1b, 0x0a, 0x69, 0x02, 0x22, 0x8c, 0x43, 0x9a, 0x1b, 0x1d, 0x9f, 0xb7, 0x36, 0xb7, 0x34, 0xb8, 0x37, 0xba, 0x32, 0x37, 0x3a, + 0x90, 0x31, 0xb4, 0x30, 0x39, 0x46, 0x53, 0x69, 0x6d, 0x70, 0x6c, 0x65, 0x20, 0x43, 0x2f, 0x43, 0x2b, 0x2b, 0x20, 0x54, 0x42, 0x41, 0x41, 0x1b, + 0x02, 0x6e, 0x82, 0x60, 0x3d, 0x1b, 0x86, 0xad, 0xf3, 0x36, 0x0c, 0xda, 0xe7, 0x6d, 0x18, 0xc0, 0x00, 0x0c, 0xbc, 0x2a, 0x6c, 0x6c, 0x76, 0x6d, + 0x2e, 0x69, 0x64, 0x65, 0x6e, 0x74, 0x53, 0x82, 0xa0, 0x0a, 0x19, 0x9e, 0x8b, 0x5d, 0x99, 0xdc, 0x5c, 0xda, 0x9b, 0xdb, 0x94, 0x80, 0x68, 0x42, + 0x86, 0xe7, 0x62, 0x17, 0xc6, 0x66, 0x57, 0x26, 0x37, 0x25, 0x30, 0xea, 0x90, 0xe1, 0xb9, 0xcc, 0xa1, 0x85, 0x91, 0x95, 0xc9, 0x35, 0xbd, 0x91, + 0x95, 0xb1, 0x4d, 0x09, 0x92, 0x32, 0x64, 0x78, 0x2e, 0x72, 0x65, 0x73, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x73, 0x53, 0x02, 0xa8, 0x0e, 0x19, 0x9e, + 0x4b, 0x99, 0x1b, 0x9d, 0x5c, 0x1e, 0xd4, 0x5b, 0x9a, 0x1b, 0xdd, 0xdc, 0x94, 0x00, 0x03, 0x00, 0x79, 0x18, 0x00, 0x00, 0x51, 0x00, 0x00, 0x00, + 0x33, 0x08, 0x80, 0x1c, 0xc4, 0xe1, 0x1c, 0x66, 0x14, 0x01, 0x3d, 0x88, 0x43, 0x38, 0x84, 0xc3, 0x8c, 0x42, 0x80, 0x07, 0x79, 0x78, 0x07, 0x73, + 0x98, 0x71, 0x0c, 0xe6, 0x00, 0x0f, 0xed, 0x10, 0x0e, 0xf4, 0x80, 0x0e, 0x33, 0x0c, 0x42, 0x1e, 0xc2, 0xc1, 0x1d, 0xce, 0xa1, 0x1c, 0x66, 0x30, + 0x05, 0x3d, 0x88, 0x43, 0x38, 0x84, 0x83, 0x1b, 0xcc, 0x03, 0x3d, 0xc8, 0x43, 0x3d, 0x8c, 0x03, 0x3d, 0xcc, 0x78, 0x8c, 0x74, 0x70, 0x07, 0x7b, + 0x08, 0x07, 0x79, 0x48, 0x87, 0x70, 0x70, 0x07, 0x7a, 0x70, 0x03, 0x76, 0x78, 0x87, 0x70, 0x20, 0x87, 0x19, 0xcc, 0x11, 0x0e, 0xec, 0x90, 0x0e, + 0xe1, 0x30, 0x0f, 0x6e, 0x30, 0x0f, 0xe3, 0xf0, 0x0e, 0xf0, 0x50, 0x0e, 0x33, 0x10, 0xc4, 0x1d, 0xde, 0x21, 0x1c, 0xd8, 0x21, 0x1d, 0xc2, 0x61, + 0x1e, 0x66, 0x30, 0x89, 0x3b, 0xbc, 0x83, 0x3b, 0xd0, 0x43, 0x39, 0xb4, 0x03, 0x3c, 0xbc, 0x83, 0x3c, 0x84, 0x03, 0x3b, 0xcc, 0xf0, 0x14, 0x76, + 0x60, 0x07, 0x7b, 0x68, 0x07, 0x37, 0x68, 0x87, 0x72, 0x68, 0x07, 0x37, 0x80, 0x87, 0x70, 0x90, 0x87, 0x70, 0x60, 0x07, 0x76, 0x28, 0x07, 0x76, + 0xf8, 0x05, 0x76, 0x78, 0x87, 0x77, 0x80, 0x87, 0x5f, 0x08, 0x87, 0x71, 0x18, 0x87, 0x72, 0x98, 0x87, 0x79, 0x98, 0x81, 0x2c, 0xee, 0xf0, 0x0e, + 0xee, 0xe0, 0x0e, 0xf5, 0xc0, 0x0e, 0xec, 0x30, 0x03, 0x62, 0xc8, 0xa1, 0x1c, 0xe4, 0xa1, 0x1c, 0xcc, 0xa1, 0x1c, 0xe4, 0xa1, 0x1c, 0xdc, 0x61, + 0x1c, 0xca, 0x21, 0x1c, 0xc4, 0x81, 0x1d, 0xca, 0x61, 0x06, 0xd6, 0x90, 0x43, 0x39, 0xc8, 0x43, 0x39, 0x98, 0x43, 0x39, 0xc8, 0x43, 0x39, 0xb8, + 0xc3, 0x38, 0x94, 0x43, 0x38, 0x88, 0x03, 0x3b, 0x94, 0xc3, 0x2f, 0xbc, 0x83, 0x3c, 0xfc, 0x82, 0x3b, 0xd4, 0x03, 0x3b, 0xb0, 0xc3, 0x0c, 0xc4, + 0x21, 0x07, 0x7c, 0x70, 0x03, 0x7a, 0x28, 0x87, 0x76, 0x80, 0x87, 0x19, 0xd1, 0x43, 0x0e, 0xf8, 0xe0, 0x06, 0xe4, 0x20, 0x0e, 0xe7, 0xe0, 0x06, + 0xf6, 0x10, 0x0e, 0xf2, 0xc0, 0x0e, 0xe1, 0x90, 0x0f, 0xef, 0x50, 0x0f, 0xf4, 0x30, 0x83, 0x81, 0xc8, 0x01, 0x1f, 0xdc, 0x40, 0x1c, 0xe4, 0xa1, + 0x1c, 0xc2, 0x61, 0x1d, 0xdc, 0x40, 0x1c, 0xe4, 0x01, 0x00, 0x00, 0x00, 0x71, 0x20, 0x00, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x05, 0x80, 0x04, 0x7e, + 0x6d, 0x20, 0x70, 0x96, 0x0c, 0x1a, 0x41, 0x33, 0x5c, 0xbe, 0xf3, 0xf8, 0x01, 0x10, 0x45, 0x08, 0x11, 0x99, 0x41, 0x35, 0x5c, 0xbe, 0xf3, 0xf8, + 0x01, 0x55, 0x14, 0x44, 0xc4, 0x4e, 0x4e, 0x44, 0xf8, 0xc8, 0x6d, 0x5b, 0x81, 0x34, 0x5c, 0xbe, 0xf3, 0xf8, 0x42, 0x44, 0x00, 0x13, 0x11, 0x02, + 0xcd, 0xb0, 0x10, 0x26, 0x20, 0x06, 0xc3, 0xe5, 0x3b, 0x8f, 0x5f, 0x2c, 0xc0, 0x34, 0x11, 0x0d, 0x31, 0xb4, 0x47, 0x44, 0x00, 0x83, 0x38, 0x88, + 0x0d, 0x18, 0x39, 0xd4, 0xe3, 0x23, 0xb7, 0x6d, 0x03, 0xd1, 0x70, 0xf9, 0xce, 0xe3, 0x1b, 0x91, 0x43, 0x3d, 0xe2, 0xe0, 0x23, 0xb7, 0x6d, 0x01, + 0x04, 0x03, 0x20, 0x0d, 0x00, 0x00, 0x00, 0x00, 0x61, 0x20, 0x00, 0x00, 0x2a, 0x00, 0x00, 0x00, 0x13, 0x04, 0x45, 0x2c, 0x10, 0x00, 0x00, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x04, 0x66, 0x00, 0x0a, 0x34, 0xa0, 0x24, 0x4a, 0xae, 0x80, 0x03, 0x8a, 0x37, 0xa0, 0x08, 0x4a, 0x31, 0x80, 0x56, 0x09, + 0x00, 0x00, 0x00, 0x00, 0x23, 0x06, 0x09, 0x00, 0x82, 0x60, 0xf0, 0x58, 0x46, 0x30, 0x4d, 0xcd, 0x88, 0x41, 0x01, 0x80, 0x20, 0x18, 0x14, 0x9a, + 0x31, 0x62, 0x60, 0x00, 0x20, 0x08, 0x06, 0x87, 0x66, 0x54, 0xc3, 0x0d, 0x82, 0x05, 0x06, 0xb3, 0x0c, 0x81, 0x10, 0x0c, 0x1b, 0x10, 0x9f, 0x30, + 0x00, 0xb3, 0x04, 0xc2, 0x40, 0x85, 0x23, 0x06, 0x80, 0x10, 0x8c, 0x18, 0x18, 0x00, 0x08, 0x82, 0x41, 0xc2, 0x35, 0xcc, 0x98, 0x81, 0x01, 0x06, + 0x43, 0x00, 0x18, 0xc1, 0x88, 0x81, 0x01, 0x80, 0x20, 0x18, 0x24, 0x9d, 0xd3, 0xcc, 0x32, 0x0c, 0x84, 0x40, 0x61, 0x00, 0x46, 0x15, 0xca, 0x8d, + 0x18, 0x34, 0x00, 0x08, 0x82, 0x41, 0xd4, 0x29, 0x48, 0x10, 0x09, 0x51, 0x14, 0x25, 0xb3, 0x04, 0x04, 0x06, 0xc4, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x5b, 0x86, 0x22, 0x00, 0x83, 0x2d, 0x03, 0x13, 0x80, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, }; static const D3D12_SHADER_BYTECODE cs_raw_tgsm = {raw_tgsm_code, sizeof(raw_tgsm_code)}; static const DWORD structured_tgsm_code[] =