-- v3: vkd3d-shader/dxil: Implement DX instruction LoadInput. vkd3d-shader/dxil: Declare shader inputs. vkd3d-shader/dxbc: Load input signatures also from ISG1 chunks. vkd3d-shader/spirv: Build undefined values once. vkd3d-shader/spirv: Introduce a Static Single Assignment register type.
From: Conor McCarthy cmccarthy@codeweavers.com
The allocator is used for DXIL input/output parameter arrays. --- libs/vkd3d-shader/vkd3d_shader_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/libs/vkd3d-shader/vkd3d_shader_main.c b/libs/vkd3d-shader/vkd3d_shader_main.c index 273a543a..2e2569d2 100644 --- a/libs/vkd3d-shader/vkd3d_shader_main.c +++ b/libs/vkd3d-shader/vkd3d_shader_main.c @@ -1748,7 +1748,7 @@ static struct vkd3d_shader_param_node *shader_param_allocator_node_create( static void shader_param_allocator_init(struct vkd3d_shader_param_allocator *allocator, unsigned int count, unsigned int stride) { - allocator->count = max(count, 4); + allocator->count = max(count, MAX_REG_OUTPUT); allocator->stride = stride; allocator->head = NULL; allocator->current = NULL;
From: Conor McCarthy cmccarthy@codeweavers.com
--- libs/vkd3d-shader/d3d_asm.c | 4 ++++ 1 file changed, 4 insertions(+)
diff --git a/libs/vkd3d-shader/d3d_asm.c b/libs/vkd3d-shader/d3d_asm.c index d72402eb..5eddb6fe 100644 --- a/libs/vkd3d-shader/d3d_asm.c +++ b/libs/vkd3d-shader/d3d_asm.c @@ -1066,6 +1066,10 @@ static void shader_dump_register(struct vkd3d_d3d_asm_compiler *compiler, const shader_addline(buffer, "oStencilRef"); break;
+ case VKD3DSPR_UNDEF: + shader_addline(buffer, "undef"); + break; + default: shader_addline(buffer, "<unhandled_rtype(%#x)>", reg->type); break;
From: Conor McCarthy cmccarthy@codeweavers.com
--- libs/vkd3d-shader/d3d_asm.c | 6 +- libs/vkd3d-shader/spirv.c | 116 ++++++++++++++++++++++- libs/vkd3d-shader/vkd3d_shader_private.h | 4 + 3 files changed, 124 insertions(+), 2 deletions(-)
diff --git a/libs/vkd3d-shader/d3d_asm.c b/libs/vkd3d-shader/d3d_asm.c index 5eddb6fe..dea35941 100644 --- a/libs/vkd3d-shader/d3d_asm.c +++ b/libs/vkd3d-shader/d3d_asm.c @@ -1070,6 +1070,10 @@ static void shader_dump_register(struct vkd3d_d3d_asm_compiler *compiler, const shader_addline(buffer, "undef"); break;
+ case VKD3DSPR_SSA: + shader_addline(buffer, "sr"); + break; + default: shader_addline(buffer, "<unhandled_rtype(%#x)>", reg->type); break; @@ -1185,7 +1189,7 @@ static void shader_dump_register(struct vkd3d_d3d_asm_compiler *compiler, const { shader_print_subscript_range(compiler, reg->idx[1].offset, reg->idx[2].offset); } - else + else if (reg->type != VKD3DSPR_SSA) { /* For descriptors in sm < 5.1 we move the reg->idx values up one slot * to normalise with 5.1. diff --git a/libs/vkd3d-shader/spirv.c b/libs/vkd3d-shader/spirv.c index f93960d6..47c8097f 100644 --- a/libs/vkd3d-shader/spirv.c +++ b/libs/vkd3d-shader/spirv.c @@ -1719,6 +1719,23 @@ static uint32_t vkd3d_spirv_build_op_glsl_std450_nclamp(struct vkd3d_spirv_build GLSLstd450NClamp, operands, ARRAY_SIZE(operands)); }
+static unsigned int shader_component_type_size(enum vkd3d_shader_component_type component_type) +{ + switch (component_type) + { + case VKD3D_SHADER_COMPONENT_FLOAT: + case VKD3D_SHADER_COMPONENT_INT: + case VKD3D_SHADER_COMPONENT_UINT: + case VKD3D_SHADER_COMPONENT_BOOL: + return 32; + case VKD3D_SHADER_COMPONENT_DOUBLE: + return 64; + default: + FIXME("Unhandled component type %#x.\n", component_type); + return 32; + } +} + static uint32_t vkd3d_spirv_get_type_id(struct vkd3d_spirv_builder *builder, enum vkd3d_shader_component_type component_type, unsigned int component_count) { @@ -2323,6 +2340,9 @@ struct spirv_compiler bool write_tess_geom_point_size;
struct vkd3d_string_buffer_cache string_buffers; + + uint32_t *ssa_register_ids; + unsigned int ssa_register_count; };
static bool is_in_default_phase(const struct spirv_compiler *compiler) @@ -2370,6 +2390,8 @@ static void spirv_compiler_destroy(struct spirv_compiler *compiler) shader_signature_cleanup(&compiler->output_signature); shader_signature_cleanup(&compiler->patch_constant_signature);
+ vkd3d_free(compiler->ssa_register_ids); + vkd3d_free(compiler); }
@@ -3682,6 +3704,72 @@ static uint32_t spirv_compiler_emit_load_scalar(struct spirv_compiler *compiler, return val_id; }
+static inline uint32_t spirv_compiler_get_ssa_register_id(const struct spirv_compiler *compiler, + const struct vkd3d_shader_register *reg) +{ + assert(reg->idx[0].offset < compiler->ssa_register_count); + assert(reg->idx_count == 1); + return compiler->ssa_register_ids[reg->idx[0].offset]; +} + +static uint32_t spirv_compiler_emit_load_ssa_reg(struct spirv_compiler *compiler, + const struct vkd3d_shader_register *reg, enum vkd3d_shader_component_type component_type, + unsigned int swizzle, unsigned int write_mask) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + unsigned int i, component_count, dst_idx, component_idx; + enum vkd3d_shader_component_type reg_component_type; + uint32_t components[VKD3D_VEC4_SIZE]; + uint32_t type_id, val_id; + bool reg_is_vec; + + /* SSA registers do not use 32-bit write masks for 64-bit values, so no special handling is needed. */ + component_count = vkd3d_write_mask_component_count(write_mask); + reg_is_vec = reg->immconst_type == VKD3D_IMMCONST_VEC4; + + val_id = spirv_compiler_get_ssa_register_id(compiler, reg); + assert(val_id); + reg_component_type = vkd3d_component_type_from_data_type(reg->u.dcl_data_type); + + if (reg_is_vec) + { + assert(shader_component_type_size(component_type) == shader_component_type_size(reg_component_type)); + type_id = vkd3d_spirv_get_type_id(builder, reg_component_type, component_count); + + if (component_count > 1) + { + /* We have no component count for the reg because it may be forward referenced. The following + * would emit a nop shuffle if the counts are equal, but DXIL doesn't emit vector extractions. */ + for (i = 0, dst_idx = 0; i < VKD3D_VEC4_SIZE; ++i) + if (write_mask & (VKD3DSP_WRITEMASK_0 << i)) + components[dst_idx++] = vkd3d_swizzle_get_component(swizzle, i); + val_id = vkd3d_spirv_build_op_vector_shuffle(builder, type_id, val_id, val_id, components, component_count); + } + else + { + component_idx = vkd3d_write_mask_get_component_idx(write_mask); + component_idx = vkd3d_swizzle_get_component(swizzle, component_idx); + val_id = vkd3d_spirv_build_op_composite_extract1(builder, type_id, val_id, component_idx); + } + } + + if (component_type != reg_component_type) + { + if (!reg_is_vec) + { + /* Note: write mask component count can be > 1 for a coordinate id. + * Set component_count for a scalar bitcast. */ + component_count = shader_component_type_size(reg_component_type) / shader_component_type_size(component_type); + assert(component_count); + } + + type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count); + val_id = vkd3d_spirv_build_op_bitcast(builder, type_id, val_id); + } + + return val_id; +} + static uint32_t spirv_compiler_emit_load_reg(struct spirv_compiler *compiler, const struct vkd3d_shader_register *reg, DWORD swizzle, DWORD write_mask) { @@ -3701,6 +3789,10 @@ static uint32_t spirv_compiler_emit_load_reg(struct spirv_compiler *compiler,
component_count = vkd3d_write_mask_component_count(write_mask); component_type = vkd3d_component_type_from_data_type(reg->data_type); + + if (reg->type == VKD3DSPR_SSA) + return spirv_compiler_emit_load_ssa_reg(compiler, reg, component_type, swizzle, write_mask); + if (!spirv_compiler_get_register_info(compiler, reg, ®_info)) { type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count); @@ -3912,6 +4004,13 @@ static void spirv_compiler_emit_store_reg(struct spirv_compiler *compiler,
assert(!register_is_constant_or_undef(reg));
+ if (reg->type == VKD3DSPR_SSA) + { + assert(reg->idx[0].offset < compiler->ssa_register_count); + compiler->ssa_register_ids[reg->idx[0].offset] = val_id; + return; + } + if (!spirv_compiler_get_register_info(compiler, reg, ®_info)) return; spirv_compiler_emit_dereference_register(compiler, reg, ®_info); @@ -5375,6 +5474,18 @@ static void spirv_compiler_emit_temps(struct spirv_compiler *compiler, uint32_t vkd3d_spirv_end_function_stream_insertion(builder); }
+static void spirv_compiler_emit_ssas(struct spirv_compiler *compiler, unsigned int count) +{ + assert(!compiler->ssa_register_ids); + if (!(compiler->ssa_register_ids = vkd3d_calloc(count, sizeof(*compiler->ssa_register_ids)))) + { + ERR("Failed to allocate SSA register value id array, count %u.\n", count); + spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_OUT_OF_MEMORY, + "Failed to allocate SSA register value id array of count %u.", count); + } + compiler->ssa_register_count = count; +} + static void spirv_compiler_emit_dcl_indexable_temp(struct spirv_compiler *compiler, const struct vkd3d_shader_instruction *instruction) { @@ -6692,7 +6803,8 @@ static void spirv_compiler_emit_mov(struct spirv_compiler *compiler, uint32_t components[VKD3D_VEC4_SIZE]; unsigned int i, component_count;
- if (register_is_constant_or_undef(&src->reg) || dst->modifiers || src->modifiers) + if (register_is_constant_or_undef(&src->reg) || src->reg.type == VKD3DSPR_SSA || dst->reg.type == VKD3DSPR_SSA + || dst->modifiers || src->modifiers) goto general_implementation;
spirv_compiler_get_register_info(compiler, &dst->reg, &dst_reg_info); @@ -9541,6 +9653,8 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler,
if (parser->shader_desc.temp_count) spirv_compiler_emit_temps(compiler, parser->shader_desc.temp_count); + if (parser->shader_desc.ssa_count) + spirv_compiler_emit_ssas(compiler, parser->shader_desc.ssa_count);
spirv_compiler_emit_descriptor_declarations(compiler);
diff --git a/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d-shader/vkd3d_shader_private.h index eab1c730..22645e2e 100644 --- a/libs/vkd3d-shader/vkd3d_shader_private.h +++ b/libs/vkd3d-shader/vkd3d_shader_private.h @@ -92,6 +92,7 @@ enum vkd3d_shader_error VKD3D_SHADER_ERROR_SPV_INVALID_DESCRIPTOR_BINDING = 2002, VKD3D_SHADER_ERROR_SPV_DESCRIPTOR_IDX_UNSUPPORTED = 2003, VKD3D_SHADER_ERROR_SPV_STENCIL_EXPORT_UNSUPPORTED = 2004, + VKD3D_SHADER_ERROR_SPV_OUT_OF_MEMORY = 2005,
VKD3D_SHADER_WARNING_SPV_INVALID_SWIZZLE = 2300,
@@ -523,6 +524,7 @@ enum vkd3d_shader_register_type VKD3DSPR_RASTERIZER, VKD3DSPR_OUTSTENCILREF, VKD3DSPR_UNDEF, + VKD3DSPR_SSA,
VKD3DSPR_COUNT,
@@ -738,6 +740,7 @@ struct vkd3d_shader_register uint64_t immconst_uint64[VKD3D_DVEC2_SIZE]; double immconst_double[VKD3D_DVEC2_SIZE]; unsigned fp_body_idx; + enum vkd3d_data_type dcl_data_type; } u; };
@@ -872,6 +875,7 @@ struct vkd3d_shader_desc struct shader_signature patch_constant_signature;
uint32_t temp_count; + unsigned int ssa_count;
struct {
From: Conor McCarthy cmccarthy@codeweavers.com
--- libs/vkd3d-shader/spirv.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-)
diff --git a/libs/vkd3d-shader/spirv.c b/libs/vkd3d-shader/spirv.c index 47c8097f..0985abf6 100644 --- a/libs/vkd3d-shader/spirv.c +++ b/libs/vkd3d-shader/spirv.c @@ -1215,10 +1215,14 @@ static uint32_t vkd3d_spirv_build_op_function_call(struct vkd3d_spirv_builder *b SpvOpFunctionCall, result_type, function_id, arguments, argument_count); }
-static uint32_t vkd3d_spirv_build_op_undef(struct vkd3d_spirv_builder *builder, - struct vkd3d_spirv_stream *stream, uint32_t type_id) +static uint32_t vkd3d_spirv_build_op_undef(struct vkd3d_spirv_builder *builder, uint32_t type_id) { - return vkd3d_spirv_build_op_tr(builder, stream, SpvOpUndef, type_id); + return vkd3d_spirv_build_op_tr(builder, &builder->global_stream, SpvOpUndef, type_id); +} + +static uint32_t vkd3d_spirv_get_op_undef(struct vkd3d_spirv_builder *builder, uint32_t type_id) +{ + return vkd3d_spirv_build_once1(builder, SpvOpUndef, type_id, vkd3d_spirv_build_op_undef); }
static uint32_t vkd3d_spirv_build_op_access_chain(struct vkd3d_spirv_builder *builder, @@ -2872,7 +2876,7 @@ static uint32_t spirv_compiler_get_constant(struct spirv_compiler *compiler, break; default: FIXME("Unhandled component_type %#x.\n", component_type); - return vkd3d_spirv_build_op_undef(builder, &builder->global_stream, type_id); + return vkd3d_spirv_get_op_undef(builder, type_id); }
if (component_count == 1) @@ -2901,7 +2905,7 @@ static uint32_t spirv_compiler_get_constant64(struct spirv_compiler *compiler, if (component_type != VKD3D_SHADER_COMPONENT_DOUBLE) { FIXME("Unhandled component_type %#x.\n", component_type); - return vkd3d_spirv_build_op_undef(builder, &builder->global_stream, type_id); + return vkd3d_spirv_get_op_undef(builder, type_id); }
if (component_count == 1) @@ -3653,7 +3657,7 @@ static uint32_t spirv_compiler_emit_load_undef(struct spirv_compiler *compiler, assert(reg->type == VKD3DSPR_UNDEF);
type_id = vkd3d_spirv_get_type_id_for_data_type(builder, reg->data_type, component_count); - return vkd3d_spirv_build_op_undef(builder, &builder->global_stream, type_id); + return vkd3d_spirv_get_op_undef(builder, type_id); }
static uint32_t spirv_compiler_emit_load_scalar(struct spirv_compiler *compiler, @@ -3796,7 +3800,7 @@ static uint32_t spirv_compiler_emit_load_reg(struct spirv_compiler *compiler, if (!spirv_compiler_get_register_info(compiler, reg, ®_info)) { type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count); - return vkd3d_spirv_build_op_undef(builder, &builder->global_stream, type_id); + return vkd3d_spirv_get_op_undef(builder, type_id); } assert(reg_info.component_type != VKD3D_SHADER_COMPONENT_DOUBLE); spirv_compiler_emit_dereference_register(compiler, reg, ®_info);
From: Conor McCarthy cmccarthy@codeweavers.com
When DXBC contains DXIL code it uses ISG1 signatures. --- libs/vkd3d-shader/dxbc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/libs/vkd3d-shader/dxbc.c b/libs/vkd3d-shader/dxbc.c index cedc3da4..52234872 100644 --- a/libs/vkd3d-shader/dxbc.c +++ b/libs/vkd3d-shader/dxbc.c @@ -429,7 +429,7 @@ static int isgn_handler(const struct vkd3d_shader_dxbc_section_desc *section, { struct shader_signature *is = ctx;
- if (section->tag != TAG_ISGN) + if (section->tag != TAG_ISGN && section->tag != TAG_ISG1) return VKD3D_OK;
if (is->elements)
From: Conor McCarthy cmccarthy@codeweavers.com
--- libs/vkd3d-shader/dxil.c | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-)
diff --git a/libs/vkd3d-shader/dxil.c b/libs/vkd3d-shader/dxil.c index 666d8b08..6c8d6ec5 100644 --- a/libs/vkd3d-shader/dxil.c +++ b/libs/vkd3d-shader/dxil.c @@ -296,6 +296,7 @@ struct sm6_parser size_t global_symbol_count;
struct vkd3d_shader_dst_param *output_params; + struct vkd3d_shader_dst_param *input_params;
struct sm6_function *functions; size_t function_count; @@ -2112,6 +2113,8 @@ static void sm6_parser_emit_signature(struct sm6_parser *sm6, const struct shade param = &ins->declaration.dst; }
+ /* TODO: set the interpolation mode when signatures are loaded from DXIL metadata. */ + ins->flags = (handler_idx == VKD3DSIH_DCL_INPUT_PS) ? VKD3DSIM_LINEAR_NOPERSPECTIVE : 0; *param = params[i]; } } @@ -2123,11 +2126,24 @@ static void sm6_parser_init_output_signature(struct sm6_parser *sm6, const struc sm6->output_params); }
+static void sm6_parser_init_input_signature(struct sm6_parser *sm6, const struct shader_signature *input_signature) +{ + sm6_parser_init_signature(sm6, input_signature, VKD3DSPR_INPUT, sm6->input_params); +} + static void sm6_parser_emit_output_signature(struct sm6_parser *sm6, const struct shader_signature *output_signature) { sm6_parser_emit_signature(sm6, output_signature, VKD3DSIH_DCL_OUTPUT, VKD3DSIH_DCL_OUTPUT_SIV, sm6->output_params); }
+static void sm6_parser_emit_input_signature(struct sm6_parser *sm6, const struct shader_signature *input_signature) +{ + sm6_parser_emit_signature(sm6, input_signature, + (sm6->p.shader_version.type == VKD3D_SHADER_TYPE_PIXEL) ? VKD3DSIH_DCL_INPUT_PS : VKD3DSIH_DCL_INPUT, + (sm6->p.shader_version.type == VKD3D_SHADER_TYPE_PIXEL) ? VKD3DSIH_DCL_INPUT_PS_SIV : VKD3DSIH_DCL_INPUT_SIV, + sm6->input_params); +} + static const struct sm6_value *sm6_parser_next_function_definition(struct sm6_parser *sm6) { size_t i, count = sm6->function_count; @@ -2683,6 +2699,7 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, const uint32_t const char *source_name, struct vkd3d_shader_message_context *message_context) { const struct shader_signature *output_signature = &sm6->p.shader_desc.output_signature; + const struct shader_signature *input_signature = &sm6->p.shader_desc.input_signature; const struct vkd3d_shader_location location = {.source_name = source_name}; uint32_t version_token, dxil_version, token_count, magic; unsigned int chunk_offset, chunk_size; @@ -2838,7 +2855,8 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, const uint32_t return ret; }
- if (!(sm6->output_params = shader_parser_get_dst_params(&sm6->p, output_signature->element_count))) + if (!(sm6->output_params = shader_parser_get_dst_params(&sm6->p, output_signature->element_count)) + || !(sm6->input_params = shader_parser_get_dst_params(&sm6->p, input_signature->element_count))) { ERR("Failed to allocate output parameters.\n"); vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, @@ -2877,6 +2895,7 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, const uint32_t }
sm6_parser_init_output_signature(sm6, output_signature); + sm6_parser_init_input_signature(sm6, input_signature);
if ((ret = sm6_parser_module_init(sm6, &sm6->root_block, 0)) < 0) { @@ -2889,13 +2908,14 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, const uint32_t return ret; }
- if (!sm6_parser_require_space(sm6, output_signature->element_count)) + if (!sm6_parser_require_space(sm6, output_signature->element_count + input_signature->element_count)) { vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, "Out of memory emitting shader signature declarations."); return VKD3D_ERROR_OUT_OF_MEMORY; } sm6_parser_emit_output_signature(sm6, output_signature); + sm6_parser_emit_input_signature(sm6, input_signature);
for (i = 0; i < sm6->function_count; ++i) {
From: Conor McCarthy cmccarthy@codeweavers.com
--- libs/vkd3d-shader/dxil.c | 103 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 103 insertions(+)
diff --git a/libs/vkd3d-shader/dxil.c b/libs/vkd3d-shader/dxil.c index 6c8d6ec5..a304bd97 100644 --- a/libs/vkd3d-shader/dxil.c +++ b/libs/vkd3d-shader/dxil.c @@ -141,6 +141,7 @@ enum bitcode_value_symtab_code
enum dx_intrinsic_opcode { + DX_LOAD_INPUT = 4, DX_STORE_OUTPUT = 5, };
@@ -305,6 +306,7 @@ struct sm6_parser size_t value_count; size_t value_capacity; size_t cur_max_value; + unsigned int ssa_next_id;
struct vkd3d_shader_parser p; }; @@ -1548,6 +1550,11 @@ static inline unsigned int sm6_value_get_constant_uint(const struct sm6_value *v return register_get_uint_value(&value->u.reg); }
+static inline unsigned int sm6_parser_alloc_ssa_id(struct sm6_parser *sm6) +{ + return sm6->ssa_next_id++; +} + static struct vkd3d_shader_src_param *instruction_src_params_alloc(struct vkd3d_shader_instruction *ins, unsigned int count, struct sm6_parser *sm6) { @@ -1580,6 +1587,24 @@ static struct vkd3d_shader_dst_param *instruction_dst_params_alloc(struct vkd3d_ return params; }
+static void register_init_with_id(struct vkd3d_shader_register *reg, + enum vkd3d_shader_register_type reg_type, enum vkd3d_data_type data_type, unsigned int index) +{ + shader_register_init(reg, reg_type, data_type, 1); + reg->idx[0].offset = index; +} + +static void register_init_ssa_vector(struct vkd3d_shader_register *reg, enum vkd3d_data_type data_type, + unsigned int component_count, struct sm6_parser *sm6) +{ + unsigned int id; + + id = sm6_parser_alloc_ssa_id(sm6); + register_init_with_id(reg, VKD3DSPR_SSA, data_type, id); + reg->u.dcl_data_type = data_type; + reg->immconst_type = component_count > 1 ? VKD3D_IMMCONST_VEC4 : VKD3D_IMMCONST_SCALAR; +} + static enum vkd3d_data_type vkd3d_data_type_from_sm6_type(const struct sm6_type *type) { if (type->class == TYPE_CLASS_INTEGER) @@ -1613,6 +1638,19 @@ static enum vkd3d_data_type vkd3d_data_type_from_sm6_type(const struct sm6_type return VKD3D_DATA_UINT; }
+static inline void register_init_ssa_scalar(struct vkd3d_shader_register *reg, const struct sm6_type *type, + struct sm6_parser *sm6) +{ + register_init_ssa_vector(reg, vkd3d_data_type_from_sm6_type(sm6_type_get_scalar_type(type, 0)), 1, sm6); +} + +static inline void dst_param_init(struct vkd3d_shader_dst_param *param) +{ + param->write_mask = VKD3DSP_WRITEMASK_0; + param->modifiers = 0; + param->shift = 0; +} + static inline void dst_param_init_scalar(struct vkd3d_shader_dst_param *param, unsigned int component_idx) { param->write_mask = 1u << component_idx; @@ -1620,12 +1658,25 @@ static inline void dst_param_init_scalar(struct vkd3d_shader_dst_param *param, u param->shift = 0; }
+static inline void dst_param_init_ssa_scalar(struct vkd3d_shader_dst_param *param, const struct sm6_type *type, + struct sm6_parser *sm6) +{ + dst_param_init(param); + register_init_ssa_scalar(¶m->reg, type, sm6); +} + static inline void src_param_init(struct vkd3d_shader_src_param *param) { param->swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); param->modifiers = VKD3DSPSM_NONE; }
+static inline void src_param_init_scalar(struct vkd3d_shader_src_param *param, unsigned int component_idx) +{ + param->swizzle = vkd3d_shader_create_swizzle(component_idx, component_idx, component_idx, component_idx); + param->modifiers = VKD3DSPSM_NONE; +} + static void src_param_init_from_value(struct vkd3d_shader_src_param *param, const struct sm6_value *src) { src_param_init(param); @@ -1654,6 +1705,22 @@ static void register_address_init(struct vkd3d_shader_register *reg, const struc } }
+static void instruction_dst_param_init_ssa_scalar_component(struct vkd3d_shader_instruction *ins, + unsigned int component_idx, struct sm6_parser *sm6) +{ + struct vkd3d_shader_dst_param *param = instruction_dst_params_alloc(ins, 1, sm6); + struct sm6_value *dst = sm6_parser_get_current_value(sm6); + + dst_param_init_ssa_scalar(param, dst->type, sm6); + param->write_mask = VKD3DSP_WRITEMASK_0 << component_idx; + dst->u.reg = param->reg; +} + +static inline void instruction_dst_param_init_ssa_scalar(struct vkd3d_shader_instruction *ins, struct sm6_parser *sm6) +{ + return instruction_dst_param_init_ssa_scalar_component(ins, 0, sm6); +} + /* Recurse through the block tree while maintaining a current value count. The current * count is the sum of the global count plus all declarations within the current function. * Store into value_capacity the highest count seen. */ @@ -2166,6 +2233,38 @@ static struct sm6_block *sm6_block_create() return block; }
+static void sm6_parser_emit_dx_load_input(struct sm6_parser *sm6, struct sm6_block *code_block, + enum dx_intrinsic_opcode op, const struct sm6_value **operands, struct vkd3d_shader_instruction *ins) +{ + struct vkd3d_shader_src_param *src_param; + const struct shader_signature *signature; + unsigned int row_index, column_index; + const struct signature_element *e; + + row_index = sm6_value_get_constant_uint(operands[0]); + column_index = sm6_value_get_constant_uint(operands[2]); + + shader_instruction_init(ins, VKD3DSIH_MOV); + + signature = &sm6->p.shader_desc.input_signature; + if (row_index >= signature->element_count) + { + WARN("Invalid row index %u.\n", row_index); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + "Invalid input row index %u.", row_index); + return; + } + e = &signature->elements[row_index]; + + src_param = instruction_src_params_alloc(ins, 1, sm6); + src_param->reg = sm6->input_params[row_index].reg; + src_param_init_scalar(src_param, column_index); + if (e->register_count > 1) + register_address_init(&src_param->reg, operands[1], 0, sm6); + + instruction_dst_param_init_ssa_scalar(ins, sm6); +} + static void sm6_parser_emit_dx_store_output(struct sm6_parser *sm6, struct sm6_block *code_block, enum dx_intrinsic_opcode op, const struct sm6_value **operands, struct vkd3d_shader_instruction *ins) { @@ -2235,6 +2334,7 @@ struct sm6_dx_opcode_info */ static const struct sm6_dx_opcode_info sm6_dx_op_table[] = { + [DX_LOAD_INPUT ] = {'o', "ii8i", sm6_parser_emit_dx_load_input}, [DX_STORE_OUTPUT ] = {'v', "ii8o", sm6_parser_emit_dx_store_output}, };
@@ -2887,6 +2987,7 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, const uint32_t "Out of memory allocating DXIL value array."); return VKD3D_ERROR_OUT_OF_MEMORY; } + sm6->ssa_next_id = 1;
if ((ret = sm6_parser_globals_init(sm6)) < 0) { @@ -2917,6 +3018,8 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, const uint32_t sm6_parser_emit_output_signature(sm6, output_signature); sm6_parser_emit_input_signature(sm6, input_signature);
+ sm6->p.shader_desc.ssa_count = sm6->ssa_next_id; + for (i = 0; i < sm6->function_count; ++i) { if (!sm6_block_emit_instructions(sm6->functions[i].blocks[0], sm6))
`shader_component_type_size()` needs to be higher up for the next series.
A general design question though: it seems that you could avoid introducing a new register type simply by creating "regular" temporary registers instead of SSA ones. An SSA register has the additional property that you cannot write it after the first time, essentially, but that doesn't prevent you from using a temporary and just writing it once. Would there be a fundamental problem with that approach? If not, then why did you decide to introduce a new type?
For the record, I have the same concern.
Both DXIL and SPIR-V use SSA, so using it in the IR is by far the simplest way to handle the values.
I don't think the second part of that statement necessarily follows from the first part.
Using temps would introduce the problem of selecting an unused temp, i.e. one whose value is no longer needed, and it becomes even more complex when dealing with `PHI` instructions.
Why is that?
Also, temps are written and read with `OpStore` and `OpLoad`, which SSA renders unnecessary. I'm not inclined to add a comment on this as I think the question won't arise when everything is upstream.
How valuable is that? If it's significant, we may want to consider converting TPF to SSA form as well in the SPIR-V backend.
It does become a bit of an issue with upstreaming this though; in principle upstreaming dxil support isn't affected much by the current freeze, but that's predicated on not requiring significant changes to the existing code. Making significant changes to the shared IR or SPIR-V backend would likely need to wait until after the 1.9 release.
On Mon Sep 4 14:01:18 2023 +0000, Henri Verbeet wrote:
A general design question though: it seems that you could avoid
introducing a new register type simply by creating "regular" temporary registers instead of SSA ones. An SSA register has the additional property that you cannot write it after the first time, essentially, but that doesn't prevent you from using a temporary and just writing it once. Would there be a fundamental problem with that approach? If not, then why did you decide to introduce a new type? For the record, I have the same concern.
Both DXIL and SPIR-V use SSA, so using it in the IR is by far the
simplest way to handle the values. I don't think the second part of that statement necessarily follows from the first part.
Using temps would introduce the problem of selecting an unused temp,
i.e. one whose value is no longer needed, and it becomes even more complex when dealing with `PHI` instructions. Why is that?
Also, temps are written and read with `OpStore` and `OpLoad`, which
SSA renders unnecessary. I'm not inclined to add a comment on this as I think the question won't arise when everything is upstream. How valuable is that? If it's significant, we may want to consider converting TPF to SSA form as well in the SPIR-V backend. It does become a bit of an issue with upstreaming this though; in principle upstreaming dxil support isn't affected much by the current freeze, but that's predicated on not requiring significant changes to the existing code. Making significant changes to the shared IR or SPIR-V backend would likely need to wait until after the 1.9 release.
On revisiting this I think PHI instructions should disappear if temps are handled correctly, and it would eliminate some complications in the structuriser. On the other hand, optimisation of code generation by Vulkan drivers may be compromised when temps are used instead of SSA. We would need to test it to know for sure, but if it's ok, there's no guarantee we won't encounter another driver later which has issues.
How valuable is that? If it's significant, we may want to consider converting TPF to SSA form as well in the SPIR-V backend.
I have wondered that too. TPF's structured control flow probably makes it not especially complicated to do, even though it will need PHI instructions. If `OpStore` and `OpLoad` are an issue, we should see performance improvements in, e.g. SotTR with this change. I'll try that unless anyone has a better idea.
How valuable is that? If it's significant, we may want to consider converting TPF to SSA form as well in the SPIR-V backend.
It seems to me quite likely that translating sm4 to SSA would be more expensive than passing it to the driver (and letting the driver translate it to SSA internally, probably), but translating sm6 to temps would be more expensive than passing it through to the driver. I don't think we have any special knowledge that would let us convert sm4 to SSA more efficiently. Granted, maybe those differences aren't significant, but I hesitate to just dismiss them out of hand.
Also: if we are going to translate HLSL to vsir, and then perform optimizations, or raising passes, on that vsir, then it is distinctly easier to have those in SSA form, then convert the SSA form to registers once the actual instruction sequence has been finalized.
I haven't tried it, nor actually read Conor's code, but it doesn't seem likely that adding SSA is going to complexify the code that much. [Tracking def-use chains would, but I don't think we need that...]
How valuable is that? If it's significant, we may want to consider converting TPF to SSA form as well in the SPIR-V backend.
It seems to me quite likely that translating sm4 to SSA would be more expensive than passing it to the driver (and letting the driver translate it to SSA internally, probably), but translating sm6 to temps would be more expensive than passing it through to the driver.
To be clear, my expectation would be that these redundant OpStore and OpLoad instructions largely don't matter. I could very well be wrong about that, of course. And perhaps there are other reasons to avoid using temps here, but I haven't seen those.
So what I imagine we want to do, is to start with naively translating DXIL SSA values to vsir temp assignments. That should unblock upstreaming plenty of bits. We don't have control flow yet, we don't have phi instructions yet either, and it's not clear to me that there's a fundamental reason e.g. vsir phi instructions couldn't operate on vsir temps anyway. I think there are a few scenarios that could happen from there:
- It's fine. - We introduce vsir SSA registers for some other reason, e.g. because the HLSL compiler wants to do optimisations in that form; DXIL/SPIR-V could then just take advantage of that. - We encounter some issue that can't reasonably be resolved with vsir temps. We discuss it when we get there, instead of now in the abstract. - We do some benchmarking/profiling and find that there are either advantages in compile time or run time to introducing a separate register type. We discuss it when we get there, with the hard data to back it up, instead of speculating about it now.
And perhaps this is also a good time to reflect on the broader upstreaming strategy. On a very broad level, the sequence that I would have hoped/expected to see would be something along these lines:
- Some shader_runner infrastructure to compile and run DXIL shaders. - The most basic, straightforward implementation of the bits required to make a fairly minimal test pass. E.g., tests/hlsl/swizzles.shader_test. - Basic, straightforward implementations of features required to make the rest of the tests pass. - Features required by applications, but not covered by the tests. Writing tests for these as they're implemented. - Optimisations and other complications.
Also, I'd like to stress this because it has come up before for other MRs, please don't pre-emptively add complications for issues that will only come up later; whether that's in a later patch in the same series or 400 patches later in the branch you're upstreaming from. Reviewers generally can't or don't want to look that far ahead, and it just ends up slowing the entire process down. Patches should make sense in isolation, at the point in time where they're introduced.
Once branched shaders are supported, assigning temps will require parsing the code graph to check all possible paths for temp usage. DXIL code graphs are spaghetti, and this is not a problem I am interested in solving.
We encounter some issue that can't reasonably be resolved with vsir temps.
vsir temps can't reasonably be assigned without a lot of work which doesn't need to be done.
SPIR-V converted from DXIL is already somewhat bloated because it does nearly everything with scalars. Adding all the access chains, loads, stores and some bitcasts will make the shaders much larger.
I see no cause for concern with `VKD3DSPR_SSA`. It's a new register type not used in TPF, the new code is not executed for TPF, and aside from a few checks for the new type it has no effect on existing code.