Tracing of very simple SM 6 shaders is possible with these patches; for example a pixel shader which returns a vector of constants.
From: Conor McCarthy cmccarthy@codeweavers.com
Makes the introduction of DXIL tracing neater. --- libs/vkd3d-shader/trace.c | 119 +++++++++++++++++++++----------------- 1 file changed, 66 insertions(+), 53 deletions(-)
diff --git a/libs/vkd3d-shader/trace.c b/libs/vkd3d-shader/trace.c index 6c30edc9..dc711f52 100644 --- a/libs/vkd3d-shader/trace.c +++ b/libs/vkd3d-shader/trace.c @@ -1851,6 +1851,70 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, shader_addline(buffer, "\n"); }
+static enum vkd3d_result vkd3d_dxbc_dump_instructions(struct vkd3d_shader_parser *parser, + enum vkd3d_shader_compile_option_formatting_flags formatting, struct vkd3d_d3d_asm_compiler *compiler) +{ + struct vkd3d_string_buffer *buffer = &compiler->buffer; + enum vkd3d_result result = VKD3D_OK; + unsigned int indent, i; + const char *indent_str; + + if (formatting & VKD3D_SHADER_COMPILE_OPTION_FORMATTING_INDENT) + indent_str = " "; + else + indent_str = ""; + + indent = 0; + while (!vkd3d_shader_parser_is_end(parser)) + { + struct vkd3d_shader_instruction ins; + + vkd3d_shader_parser_read_instruction(parser, &ins); + if (ins.handler_idx == VKD3DSIH_INVALID) + { + WARN("Skipping unrecognized instruction.\n"); + vkd3d_string_buffer_printf(buffer, "<unrecognized instruction>\n"); + result = VKD3D_ERROR; + continue; + } + + switch (ins.handler_idx) + { + case VKD3DSIH_ELSE: + case VKD3DSIH_ENDIF: + case VKD3DSIH_ENDLOOP: + case VKD3DSIH_ENDSWITCH: + --indent; + break; + + default: + break; + } + + for (i = 0; i < indent; ++i) + { + vkd3d_string_buffer_printf(buffer, "%s", indent_str); + } + + shader_dump_instruction(compiler, &ins); + + switch (ins.handler_idx) + { + case VKD3DSIH_ELSE: + case VKD3DSIH_IF: + case VKD3DSIH_LOOP: + case VKD3DSIH_SWITCH: + ++indent; + break; + + default: + break; + } + } + + return result; +} + enum vkd3d_result vkd3d_dxbc_binary_to_text(struct vkd3d_shader_parser *parser, const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out) { @@ -1859,8 +1923,7 @@ enum vkd3d_result vkd3d_dxbc_binary_to_text(struct vkd3d_shader_parser *parser, struct vkd3d_d3d_asm_compiler compiler; enum vkd3d_result result = VKD3D_OK; struct vkd3d_string_buffer *buffer; - unsigned int indent, i; - const char *indent_str; + unsigned int i; void *code;
static const struct vkd3d_d3d_asm_colours no_colours = @@ -1905,10 +1968,6 @@ enum vkd3d_result vkd3d_dxbc_binary_to_text(struct vkd3d_shader_parser *parser, compiler.colours = colours; else compiler.colours = no_colours; - if (formatting & VKD3D_SHADER_COMPILE_OPTION_FORMATTING_INDENT) - indent_str = " "; - else - indent_str = "";
buffer = &compiler.buffer; vkd3d_string_buffer_init(buffer); @@ -1919,54 +1978,8 @@ enum vkd3d_result vkd3d_dxbc_binary_to_text(struct vkd3d_shader_parser *parser, shader_get_type_prefix(shader_version->type), shader_version->major, shader_version->minor, compiler.colours.reset);
- indent = 0; vkd3d_shader_parser_reset(parser); - while (!vkd3d_shader_parser_is_end(parser)) - { - struct vkd3d_shader_instruction ins; - - vkd3d_shader_parser_read_instruction(parser, &ins); - if (ins.handler_idx == VKD3DSIH_INVALID) - { - WARN("Skipping unrecognized instruction.\n"); - vkd3d_string_buffer_printf(buffer, "<unrecognized instruction>\n"); - result = VKD3D_ERROR; - continue; - } - - switch (ins.handler_idx) - { - case VKD3DSIH_ELSE: - case VKD3DSIH_ENDIF: - case VKD3DSIH_ENDLOOP: - case VKD3DSIH_ENDSWITCH: - --indent; - break; - - default: - break; - } - - for (i = 0; i < indent; ++i) - { - vkd3d_string_buffer_printf(buffer, "%s", indent_str); - } - - shader_dump_instruction(&compiler, &ins); - - switch (ins.handler_idx) - { - case VKD3DSIH_ELSE: - case VKD3DSIH_IF: - case VKD3DSIH_LOOP: - case VKD3DSIH_SWITCH: - ++indent; - break; - - default: - break; - } - } + result = vkd3d_dxbc_dump_instructions(parser, formatting, &compiler);
if (parser->failed) result = VKD3D_ERROR_INVALID_SHADER;
From: Conor McCarthy cmccarthy@codeweavers.com
DXIL is emitted by a fork of LLVM and therefore is compressed into bitcode format. Only a subset of this format is required for loading DXIL.
Trace output is a subset of the output from 'dxc -dumpbin' except it includes function and constant definitions. Metadata is not yet loaded or traced. --- Makefile.am | 2 + libs/vkd3d-shader/dxbc.c | 17 +- libs/vkd3d-shader/dxil.c | 2515 ++++++++++++++++++++++ libs/vkd3d-shader/sm6.h | 682 ++++++ libs/vkd3d-shader/trace.c | 395 +++- libs/vkd3d-shader/vkd3d_shader_main.c | 7 + libs/vkd3d-shader/vkd3d_shader_private.h | 365 ++++ 7 files changed, 3978 insertions(+), 5 deletions(-) create mode 100644 libs/vkd3d-shader/dxil.c create mode 100644 libs/vkd3d-shader/sm6.h
diff --git a/Makefile.am b/Makefile.am index 1340be10..843fb4f8 100644 --- a/Makefile.am +++ b/Makefile.am @@ -234,6 +234,7 @@ libvkd3d_shader_la_SOURCES = \ libs/vkd3d-shader/checksum.c \ libs/vkd3d-shader/d3dbc.c \ libs/vkd3d-shader/dxbc.c \ + libs/vkd3d-shader/dxil.c \ libs/vkd3d-shader/glsl.c \ libs/vkd3d-shader/hlsl.c \ libs/vkd3d-shader/hlsl.h \ @@ -243,6 +244,7 @@ libvkd3d_shader_la_SOURCES = \ libs/vkd3d-shader/hlsl_sm4.c \ libs/vkd3d-shader/preproc.h \ libs/vkd3d-shader/sm4.h \ + libs/vkd3d-shader/sm6.h \ libs/vkd3d-shader/spirv.c \ libs/vkd3d-shader/trace.c \ libs/vkd3d-shader/vkd3d_shader.map \ diff --git a/libs/vkd3d-shader/dxbc.c b/libs/vkd3d-shader/dxbc.c index 17be2306..69c3ec89 100644 --- a/libs/vkd3d-shader/dxbc.c +++ b/libs/vkd3d-shader/dxbc.c @@ -1998,6 +1998,9 @@ static int shdr_handler(const char *data, DWORD data_size, DWORD tag, void *cont return ret; break;
+ case TAG_DXIL: + desc->is_dxil = true; + /* fall through */ case TAG_SHDR: case TAG_SHEX: if (desc->byte_code) @@ -2010,10 +2013,6 @@ static int shdr_handler(const char *data, DWORD data_size, DWORD tag, void *cont TRACE("Skipping AON9 shader code chunk.\n"); break;
- case TAG_DXIL: - FIXME("Skipping DXIL shader model 6+ code chunk.\n"); - break; - default: TRACE("Skipping chunk %#x.\n", tag); break; @@ -2075,6 +2074,16 @@ int vkd3d_shader_sm4_parser_create(const struct vkd3d_shader_compile_info *compi return ret; }
+ if (shader_desc->is_dxil) + { + if ((ret = vkd3d_shader_sm6_parser_create(compile_info, message_context, shader_desc, parser)) < 0) + { + free_shader_desc(shader_desc); + vkd3d_free(sm4); + } + return ret; + } + if (!shader_sm4_init(sm4, shader_desc->byte_code, shader_desc->byte_code_size, compile_info->source_name, &shader_desc->output_signature, message_context)) { diff --git a/libs/vkd3d-shader/dxil.c b/libs/vkd3d-shader/dxil.c new file mode 100644 index 00000000..1f4d28b1 --- /dev/null +++ b/libs/vkd3d-shader/dxil.c @@ -0,0 +1,2515 @@ +/* + * Copyright 2022 Conor McCarthy for CodeWeavers + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#include "vkd3d_shader_private.h" +#include "sm6.h" + +struct vkd3d_shader_sm6_record +{ + unsigned int code; + unsigned int operand_count; + uint64_t operands[]; +}; + +struct vkd3d_shader_sm6_block +{ + const struct vkd3d_shader_sm6_block *parent; + enum dxil_bc_block_id id; + unsigned int abbrev_len; + unsigned int start; + unsigned int length; + unsigned int level; + + /* The abbrev, block and record structs are not relocatable. */ + struct vkd3d_shader_sm6_abbrev **abbrevs; + size_t abbrev_capacity; + size_t abbrev_count; + unsigned int blockinfo_bid; + + struct vkd3d_shader_sm6_block **child_blocks; + size_t child_block_capacity; + size_t child_block_count; + + struct vkd3d_shader_sm6_record **records; + size_t record_capacity; + size_t record_count; +}; + +struct vkd3d_shader_sm6_symbol +{ + unsigned int id; + const char *name; +}; + +struct vkd3d_shader_sm6_value +{ + const struct vkd3d_shader_sm6_type *type; + const struct vkd3d_shader_sm6_instruction *ins; +}; + +struct vkd3d_shader_sm6_parser +{ + const uint32_t *start, *end; + + struct vkd3d_shader_sm6_block root_block; + struct vkd3d_shader_sm6_block *current_block; + + struct vkd3d_shader_sm6_global_abbrev **abbrevs; + size_t abbrev_capacity; + size_t abbrev_count; + + struct vkd3d_shader_sm6_type *types; + unsigned int type_count; + + struct vkd3d_shader_sm6_parameter_group *attr_groups; + unsigned int attr_group_count; + + struct vkd3d_shader_sm6_parameter_attribute_entry **attributes; + unsigned int attribute_count; + + struct vkd3d_shader_sm6_symbol *global_symbols; + unsigned int global_symbol_count; + + struct vkd3d_shader_sm6_value *values; + unsigned int value_count; + unsigned int value_capacity; + unsigned int cur_max_value; + + struct vkd3d_shader_sm6_instruction *global_instructions; + unsigned int global_instruction_count; + struct vkd3d_shader_sm6_function *functions; + unsigned int function_def_count; + + struct vkd3d_shader_parser p; + + uint32_t byte_code[]; +}; + +struct vkd3d_shader_sm6_abbrev_operand +{ + uint64_t context; + bool (*read_operand)(struct vkd3d_shader_parser *parser, uint64_t context, uint64_t *operand); +}; + +struct vkd3d_shader_sm6_abbrev +{ + unsigned int count; + bool is_array; + struct vkd3d_shader_sm6_abbrev_operand operands[]; +}; + +struct vkd3d_shader_sm6_global_abbrev +{ + unsigned int block_id; + struct vkd3d_shader_sm6_abbrev abbrev; +}; + +struct vkd3d_sm6_call_opcode_info +{ + enum vkd3d_shader_sm6_call_op op; + const char ret_type; + const char *operand_info; +}; + +static struct vkd3d_shader_sm6_parser *vkd3d_shader_sm6_parser(struct vkd3d_shader_parser *parser) +{ + return CONTAINING_RECORD(parser, struct vkd3d_shader_sm6_parser, p); +} + +static void shader_sm6_reset(struct vkd3d_shader_parser *parser) +{ + parser->failed = false; +} + +static bool shader_sm6_is_end(struct vkd3d_shader_parser *parser) +{ + struct vkd3d_shader_sm6_parser *sm6 = vkd3d_shader_sm6_parser(parser); + + return parser->ptr == sm6->end; +} + +static inline unsigned int shader_sm6_read_uint32(struct vkd3d_shader_parser *parser) +{ + if (shader_sm6_is_end(parser)) + { + parser->failed = true; + return 0; + } + return *parser->ptr++; +} + +static inline unsigned int shader_sm6_read_bits(struct vkd3d_shader_parser *parser, unsigned int length) +{ + unsigned int l, prev_len = 0; + uint32_t bits; + + if (!length) + return 0; + + if (shader_sm6_is_end(parser)) + { + parser->failed = true; + return 0; + } + + bits = *parser->ptr >> parser->bitpos; + l = 32 - parser->bitpos; + /* The result of uint32 >> 32 is undefined, so bitpos must be <= 31 */ + if (l <= length) + { + ++parser->ptr; + if (shader_sm6_is_end(parser) && l < length) + { + parser->failed = true; + return bits; + } + parser->bitpos = 0; + bits |= *parser->ptr << l; + prev_len = l; + } + parser->bitpos += length - prev_len; + + return bits & ((1 << length) - 1); +} + +static uint64_t shader_sm6_read_vbr(struct vkd3d_shader_parser *parser, unsigned int length) +{ + unsigned int bits, flag, mask, shift = 0; + uint64_t result = 0; + + if (!length) + return 0; + + if (shader_sm6_is_end(parser)) + { + parser->failed = true; + return 0; + } + + flag = 1 << (length - 1); + mask = flag - 1; + do + { + bits = shader_sm6_read_bits(parser, length); + result |= (uint64_t)(bits & mask) << shift; + shift += length - 1; + } while ((bits & flag) && !parser->failed); + + return result; +} + +static inline void shader_sm6_align_32(struct vkd3d_shader_parser *parser) +{ + if (!parser->bitpos) + return; + + if (shader_sm6_is_end(parser)) + { + parser->failed = true; + return; + } + + ++parser->ptr; + parser->bitpos = 0; +} + +static bool shader_sm6_record_handle_blockinfo(struct vkd3d_shader_parser *parser, + struct vkd3d_shader_sm6_record *record) +{ + struct vkd3d_shader_sm6_parser *sm6 = vkd3d_shader_sm6_parser(parser); + struct vkd3d_shader_sm6_block *block = sm6->current_block; + + switch (record->code) + { + case SETBID: + if (!record->operand_count) + return false; + if (record->operands[0] > ~0u) + WARN("Truncating block id %"PRIu64".\n", record->operands[0]); + block->blockinfo_bid = record->operands[0]; + break; + case BLOCKNAME: + case SETRECORDNAME: + break; + default: + FIXME("Unhandled BLOCKINFO record type %u.\n", record->code); + break; + } + + return true; +} + +static bool shader_sm6_block_add_record(struct vkd3d_shader_sm6_block *block, struct vkd3d_shader_sm6_record *record) +{ + unsigned int reserve; + + switch (block->id) + { + case CONSTANTS_BLOCK: reserve = 32; break; + case FUNCTION_BLOCK: reserve = 128; break; + case METADATA_BLOCK: reserve = 32; break; + case TYPE_BLOCK: reserve = 32; break; + default: reserve = 8; break; + } + reserve = max(reserve, block->record_count + 1); + if (!vkd3d_array_reserve((void **)&block->records, &block->record_capacity, reserve, sizeof(*block->records))) + { + ERR("Failed to allocate %u records.\n", reserve); + return false; + } + + block->records[block->record_count++] = record; + + return true; +} + +static bool shader_sm6_read_unabbrev_record(struct vkd3d_shader_parser *parser) +{ + struct vkd3d_shader_sm6_parser *sm6 = vkd3d_shader_sm6_parser(parser); + struct vkd3d_shader_sm6_block *block = sm6->current_block; + struct vkd3d_shader_sm6_record *record; + unsigned int code, count, i; + + code = shader_sm6_read_vbr(parser, 6); + + count = shader_sm6_read_vbr(parser, 6); + if (!(record = vkd3d_malloc(sizeof(*record) + count * sizeof(record->operands[0])))) + { + ERR("Failed to allocate record with %u operands.\n", count); + return false; + } + + record->code = code; + record->operand_count = count; + + for (i = 0; i < count; ++i) + record->operands[i] = shader_sm6_read_vbr(parser, 6); + + if (parser->failed) + { + vkd3d_free(record); + return false; + } + + if (!shader_sm6_block_add_record(block, record)) + return false; + + if (block->id == BLOCKINFO_BLOCK) + return shader_sm6_record_handle_blockinfo(parser, record); + + return true; +} + +static bool shader_sm6_read_literal_operand(struct vkd3d_shader_parser *parser, uint64_t context, + uint64_t *op) +{ + *op = context; + return !parser->failed; +} + +static bool shader_sm6_read_fixed_operand(struct vkd3d_shader_parser *parser, uint64_t context, + uint64_t *op) +{ + *op = shader_sm6_read_bits(parser, context); + return !parser->failed; +} + +static bool shader_sm6_read_vbr_operand(struct vkd3d_shader_parser *parser, uint64_t context, + uint64_t *op) +{ + *op = shader_sm6_read_vbr(parser, context); + return !parser->failed; +} + +static bool shader_sm6_read_char6_operand(struct vkd3d_shader_parser *parser, uint64_t context, + uint64_t *op) +{ + *op = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789._"[shader_sm6_read_bits(parser, 6)]; + return !parser->failed; +} + +static bool shader_sm6_read_blob_operand(struct vkd3d_shader_parser *parser, uint64_t context, + uint64_t *op) +{ + int count = shader_sm6_read_vbr(parser, 6); + shader_sm6_align_32(parser); + for (; count > 0; count -= 4) + shader_sm6_read_uint32(parser); + FIXME("Unhandled blob operand.\n"); + return false; +} + +static bool shader_sm6_abbrev_init(struct vkd3d_shader_sm6_abbrev *abbrev, + unsigned int count, struct vkd3d_shader_parser *parser) +{ + enum dxil_bc_abbrev_type prev_type, type; + unsigned int i; + + abbrev->is_array = false; + + for (i = 0, prev_type = 0; i < count && !parser->failed; ++i) + { + if (shader_sm6_read_bits(parser, 1)) + { + if (prev_type == ABBREV_ARRAY) + { + FIXME("Unexpected literal abbreviation after array.\n"); + return false; + } + abbrev->operands[i].context = shader_sm6_read_vbr(parser, 8); + abbrev->operands[i].read_operand = shader_sm6_read_literal_operand; + continue; + } + + switch (type = shader_sm6_read_bits(parser, 3)) + { + case ABBREV_FIXED: + case ABBREV_VBR: + abbrev->operands[i].context = shader_sm6_read_vbr(parser, 5); + abbrev->operands[i].read_operand = (type == ABBREV_FIXED) ? shader_sm6_read_fixed_operand + : shader_sm6_read_vbr_operand; + break; + + case ABBREV_ARRAY: + if (prev_type == ABBREV_ARRAY || i != count - 2) + { + FIXME("Unexpected array abbreviation.\n"); + return false; + } + abbrev->is_array = true; + --i; + break; + + case ABBREV_CHAR: + abbrev->operands[i].read_operand = shader_sm6_read_char6_operand; + break; + + case ABBREV_BLOB: + if (prev_type == ABBREV_ARRAY) + { + FIXME("Unexpected blob abbreviation after array.\n"); + return false; + } + abbrev->operands[i].read_operand = shader_sm6_read_blob_operand; + break; + } + + count -= (prev_type == ABBREV_ARRAY); + prev_type = type; + } + + abbrev->count = count; + + return !parser->failed; +} + +static bool shader_sm6_add_global_abbrev(struct vkd3d_shader_parser *parser) +{ + struct vkd3d_shader_sm6_parser *sm6 = vkd3d_shader_sm6_parser(parser); + struct vkd3d_shader_sm6_block *block = sm6->current_block; + struct vkd3d_shader_sm6_global_abbrev *global_abbrev; + unsigned int count = shader_sm6_read_vbr(parser, 5); + + assert(block->id == 0); + + if (!vkd3d_array_reserve((void **)&sm6->abbrevs, &sm6->abbrev_capacity, sm6->abbrev_count + 1, sizeof(*sm6->abbrevs)) + || !(global_abbrev = vkd3d_malloc(sizeof(*global_abbrev) + count * sizeof(global_abbrev->abbrev.operands[0])))) + { + ERR("Failed to allocate global abbreviation.\n"); + return false; + } + + if (!shader_sm6_abbrev_init(&global_abbrev->abbrev, count, parser)) + return false; + + global_abbrev->block_id = block->blockinfo_bid; + + sm6->abbrevs[sm6->abbrev_count++] = global_abbrev; + + return true; +} + +static bool shader_sm6_add_block_abbrev(struct vkd3d_shader_parser *parser) +{ + struct vkd3d_shader_sm6_parser *sm6 = vkd3d_shader_sm6_parser(parser); + struct vkd3d_shader_sm6_block *block = sm6->current_block; + struct vkd3d_shader_sm6_abbrev *abbrev; + unsigned int count; + + if (block->id == BLOCKINFO_BLOCK) + return shader_sm6_add_global_abbrev(parser); + + count = shader_sm6_read_vbr(parser, 5); + if (!vkd3d_array_reserve((void **)&block->abbrevs, &block->abbrev_capacity, block->abbrev_count + 1, sizeof(*block->abbrevs)) + || !(abbrev = vkd3d_malloc(sizeof(*abbrev) + count * sizeof(abbrev->operands[0])))) + { + ERR("Failed to allocate block abbreviation.\n"); + return false; + } + + if (!shader_sm6_abbrev_init(abbrev, count, parser)) + return false; + + block->abbrevs[block->abbrev_count++] = abbrev; + + return true; +} + +static bool shader_sm6_read_abbrev_record(struct vkd3d_shader_parser *parser, unsigned int abbrev_id) +{ + struct vkd3d_shader_sm6_parser *sm6 = vkd3d_shader_sm6_parser(parser); + struct vkd3d_shader_sm6_block *block = sm6->current_block; + struct vkd3d_shader_sm6_record *temp, *record; + struct vkd3d_shader_sm6_abbrev *abbrev; + unsigned int i, count, array_len; + uint64_t code; + + if (abbrev_id >= block->abbrev_count) + { + FIXME("Invalid abbreviation id %u.\n", abbrev_id); + return false; + } + + abbrev = block->abbrevs[abbrev_id]; + if (!(count = abbrev->count)) + return true; + if (count == 1 && abbrev->is_array) + return false; + + /* First operand is the record code. The array is included in the count, but will be done separately. */ + count -= abbrev->is_array + 1; + if (!(record = vkd3d_malloc(sizeof(*record) + count * sizeof(record->operands[0])))) + { + ERR("Failed to allocate record with %u operands.\n", count); + return false; + } + + if (!abbrev->operands[0].read_operand(parser, abbrev->operands[0].context, &code)) + return false; + if (code > ~0u) + FIXME("Invalid 64-bit record code %#"PRIx64".\n", code); + record->code = code; + + for (i = 0; i < count; ++i) + if (!abbrev->operands[i + 1].read_operand(parser, abbrev->operands[i + 1].context, &record->operands[i])) + return false; + record->operand_count = count; + + /* An array can occur only as the last operand. */ + if (abbrev->is_array) + { + array_len = shader_sm6_read_vbr(parser, 6); + if (!(temp = vkd3d_realloc(record, sizeof(*record) + (count + array_len) * sizeof(record->operands[0])))) + { + ERR("Failed to allocate record with %u operands.\n", count + array_len); + vkd3d_free(record); + return false; + } + record = temp; + + for (i = 0; i < array_len; ++i) + { + if (!abbrev->operands[count + 1].read_operand(parser, abbrev->operands[count + 1].context, + &record->operands[count + i])) + { + vkd3d_free(record); + return false; + } + } + record->operand_count += array_len; + } + + if (!shader_sm6_block_add_record(block, record)) + return false; + + if (block->id == BLOCKINFO_BLOCK) + return shader_sm6_record_handle_blockinfo(parser, record); + + return true; +} + +static bool shader_sm6_block_init(struct vkd3d_shader_sm6_block *block, const struct vkd3d_shader_sm6_block *parent, + struct vkd3d_shader_parser *parser); + +static bool shader_sm6_block_read(struct vkd3d_shader_sm6_block *parent, struct vkd3d_shader_parser *parser) +{ + struct vkd3d_shader_sm6_parser *sm6 = vkd3d_shader_sm6_parser(parser); + unsigned int reserve = (parent->id == MODULE_BLOCK) ? 12 : 2; + struct vkd3d_shader_sm6_block *block; + + sm6->current_block = parent; + + do + { + unsigned int abbrev_id = shader_sm6_read_bits(parser, parent->abbrev_len); + + switch (abbrev_id) + { + case END_BLOCK: + shader_sm6_align_32(parser); + return true; + + case ENTER_SUBBLOCK: + if (!(block = vkd3d_malloc(sizeof(*block))) || !vkd3d_array_reserve((void **)&parent->child_blocks, + &parent->child_block_capacity, max(reserve, parent->child_block_count + 1), + sizeof(*parent->child_blocks))) + { + ERR("Failed to allocate block.\n"); + return false; + } + + if (!shader_sm6_block_init(block, parent, parser)) + return false; + + parent->child_blocks[parent->child_block_count++] = block; + sm6->current_block = parent; + break; + + case DEFINE_ABBREV: + if (!shader_sm6_add_block_abbrev(parser)) + return false; + break; + + case UNABBREV_RECORD: + if (!shader_sm6_read_unabbrev_record(parser)) + { + FIXME("Failed to read unabbreviated record.\n"); + return false; + } + break; + + default: + if (!shader_sm6_read_abbrev_record(parser, abbrev_id - 4)) + { + FIXME("Failed to read abbreviated record.\n"); + return false; + } + break; + } + } while (!parser->failed); + + return false; +} + +static inline unsigned int shader_sm6_compute_global_abbrev_count_for_block_id(struct vkd3d_shader_sm6_parser *sm6, + unsigned int block_id) +{ + unsigned int i, count; + + for (i = 0, count = 0; i < sm6->abbrev_count; ++i) + count += sm6->abbrevs[i]->block_id == block_id; + + return count; +} + +static bool shader_sm6_block_init(struct vkd3d_shader_sm6_block *block, const struct vkd3d_shader_sm6_block *parent, + struct vkd3d_shader_parser *parser) +{ + struct vkd3d_shader_sm6_parser *sm6 = vkd3d_shader_sm6_parser(parser); + unsigned int i, abbrev_count = 0; + bool ret; + + block->parent = parent; + block->level = parent ? parent->level + 1 : 0; + block->id = shader_sm6_read_vbr(parser, 8); + block->abbrev_len = shader_sm6_read_vbr(parser, 4); + shader_sm6_align_32(parser); + block->length = shader_sm6_read_uint32(parser); + block->start = parser->ptr - sm6->start; + block->abbrevs = NULL; + block->abbrev_capacity = 0; + + if (parser->failed) + return false; + + if ((block->abbrev_count = shader_sm6_compute_global_abbrev_count_for_block_id(sm6, block->id))) + { + if (!vkd3d_array_reserve((void **)&block->abbrevs, &block->abbrev_capacity, + block->abbrev_count, sizeof(*block->abbrevs))) + { + ERR("Failed to allocate block abbreviations.\n"); + return false; + } + + for (i = 0; i < sm6->abbrev_count; ++i) + if (sm6->abbrevs[i]->block_id == block->id) + block->abbrevs[abbrev_count++] = &sm6->abbrevs[i]->abbrev; + + assert(abbrev_count == block->abbrev_count); + } + + block->child_blocks = NULL; + block->child_block_capacity = 0; + block->child_block_count = 0; + block->records = NULL; + block->record_capacity = 0; + block->record_count = 0; + + ret = shader_sm6_block_read(block, parser); + + for (i = abbrev_count; i < block->abbrev_count; ++i) + vkd3d_free(block->abbrevs[i]); + vkd3d_free(block->abbrevs); + block->abbrevs = NULL; + block->abbrev_count = 0; + + return ret; +} + +static char *shader_sm6_record_to_string(const struct vkd3d_shader_sm6_record *record, unsigned int first) +{ + unsigned int i; + char *str; + + if (!(str = vkd3d_malloc(record->operand_count + 1))) + return NULL; + + for (i = first; i < record->operand_count; ++i) + str[i - first] = record->operands[i]; + str[i - first] = 0; + + return str; +} + +static unsigned int shader_sm6_block_module_decl_count(const struct vkd3d_shader_sm6_block *block) +{ + unsigned int i, count; + + for (i = 0, count = 0; i < block->record_count; ++i) + count += block->records[i]->code == MODULE_CODE_FUNCTION || block->records[i]->code == MODULE_CODE_GLOBALVAR; + return count; +} + +static unsigned int shader_sm6_block_constants_count(const struct vkd3d_shader_sm6_block *block) +{ + unsigned int i, count; + + for (i = 0, count = 0; i < block->record_count; ++i) + count += block->records[i]->code != CST_CODE_SETTYPE; + return count; +} + +static const struct vkd3d_shader_sm6_block *shader_sm6_get_level_one_block(const struct vkd3d_shader_sm6_parser *sm6, + enum dxil_bc_block_id id, bool *is_unique) +{ + const struct vkd3d_shader_sm6_block *block, *found = NULL; + unsigned int i; + + for (i = 0, *is_unique = true; i < sm6->root_block.child_block_count; ++i) + { + block = sm6->root_block.child_blocks[i]; + if (block->id != id) + continue; + + if (!found) + found = block; + else + *is_unique = false; + } + + return found; +} + +static bool shader_sm6_type_table_init(struct vkd3d_shader_sm6_parser *sm6) +{ + const struct vkd3d_shader_sm6_record *record; + const struct vkd3d_shader_sm6_block *block; + unsigned int i, j, count, type_count; + struct vkd3d_shader_sm6_type *type; + char *struct_name = NULL; + uint64_t type_id; + bool is_unique; + + if (!(block = shader_sm6_get_level_one_block(sm6, TYPE_BLOCK, &is_unique))) + { + WARN("No type definitions found.\n"); + return true; + } + if (!is_unique) + FIXME("Ignoring extra type table(s).\n"); + + type_count = 0; + for (i = 0; i < block->record_count; ++i) + type_count += block->records[i]->code != TYPE_CODE_NUMENTRY && block->records[i]->code != TYPE_CODE_STRUCT_NAME; + + /* Types are not relocatable. */ + if (!(sm6->types = vkd3d_malloc(type_count * sizeof(*sm6->types)))) + { + ERR("Failed to allocate type array.\n"); + return false; + } + + for (i = 0; i < block->record_count; ++i) + { + record = block->records[i]; + type = &sm6->types[sm6->type_count]; + type->index = sm6->type_count; + memset(&type->u, 0, sizeof(type->u)); + + switch (record->code) + { + case TYPE_CODE_ARRAY: + case TYPE_CODE_VECTOR: + if (record->operand_count < 2) + { + FIXME("Missing operands for array/vector type %u.\n", type->index); + return false; + } + + type->type = record->code == TYPE_CODE_ARRAY ? VKD3D_SM6_ARRAY : VKD3D_SM6_VECTOR; + + if (!(type->u.array.count = record->operands[0])) + { + FIXME("Unsupported array size 0 for type %u.\n", type->index); + return false; + } + + if ((type_id = record->operands[1]) >= type_count) + { + FIXME("Invalid contained type id %"PRIu64" for type %u.\n", type_id, type->index); + return false; + } + type->u.array.elem_type = &sm6->types[type_id]; + break; + + case TYPE_CODE_DOUBLE: + type->type = VKD3D_SM6_FLOAT; + type->u.width = 64; + break; + + case TYPE_CODE_FLOAT: + type->type = VKD3D_SM6_FLOAT; + type->u.width = 32; + break; + + case TYPE_CODE_FUNCTION: + if (record->operand_count < 2) + { + FIXME("Missing operands for function type %u.\n", type->index); + return false; + } + if (record->operands[0]) + FIXME("Unhandled vararg function type %u.\n", type->index); + + count = record->operand_count - 2; + if (!(type->u.function = vkd3d_malloc(sizeof(*type->u.function) + count * sizeof(type->u.function->param_types[0])))) + { + ERR("Failed to allocate function type.\n"); + return false; + } + + type->type = VKD3D_SM6_FUNCTION; + + if ((type_id = record->operands[1]) >= type_count) + { + FIXME("Invalid return type id %"PRIu64" for type %u.\n", type_id, type->index); + return false; + } + type->u.function->ret_type = &sm6->types[type_id]; + + type->u.function->param_count = count; + for (j = 0; j < count; ++j) + { + if ((type_id = record->operands[j + 2]) >= type_count) + { + FIXME("Invalid parameter type id %"PRIu64" for type %u.\n", type_id, type->index); + return false; + } + type->u.function->param_types[j] = &sm6->types[type_id]; + } + break; + + case TYPE_CODE_HALF: + type->type = VKD3D_SM6_FLOAT; + type->u.width = 16; + break; + + case TYPE_CODE_INTEGER: + { + uint64_t width; + + if (!record->operand_count) + { + FIXME("Missing integer width for type %u.\n", type->index); + return false; + } + + type->type = VKD3D_SM6_INTEGER; + + width = record->operands[0]; + if (width != 1 && (width > 64 || width < 8 || vkd3d_popcount(width) != 1)) + { + FIXME("Invalid integer width %"PRIu64" for type %u.\n", width, type->index); + return false; + } + type->u.width = width; + break; + } + + case TYPE_CODE_LABEL: + type->type = VKD3D_SM6_LABEL; + break; + + case TYPE_CODE_METADATA: + type->type = VKD3D_SM6_METADATA; + break; + + case TYPE_CODE_NUMENTRY: + continue; + + case TYPE_CODE_POINTER: + if (!record->operand_count) + { + FIXME("Missing pointee type for pointer type %u.\n", type->index); + return false; + } + + type->type = VKD3D_SM6_POINTER; + + if ((type_id = record->operands[0]) >= type_count) + { + FIXME("Invalid pointee type id %"PRIu64" for type %u.\n", type_id, type->index); + return false; + } + type->u.pointer.type = &sm6->types[type_id]; + type->u.pointer.addr_space = (record->operand_count > 1) ? record->operands[1] : 0; + break; + + case TYPE_CODE_STRUCT_ANON: + case TYPE_CODE_STRUCT_NAMED: + if (record->operand_count < 2) + { + FIXME("Missing operands for struct type %u.\n", type->index); + return false; + } + + type->type = VKD3D_SM6_STRUCT; + + count = record->operand_count - 1; + if (!(type->u.struc = vkd3d_malloc(sizeof(*type->u.struc) + count * sizeof(type->u.struc->elem_types[0])))) + { + ERR("Failed to allocate struct type.\n"); + return false; + } + + type->u.struc->is_packed = !!record->operands[0]; + type->u.struc->elem_count = count; + + for (j = 0; j < count; ++j) + { + if ((type_id = record->operands[j + 1]) >= type_count) + { + FIXME("Invalid contained type id %"PRIu64" for type %u.\n", type_id, type->index); + return false; + } + type->u.struc->elem_types[j] = &sm6->types[type_id]; + } + + if (record->code == TYPE_CODE_STRUCT_ANON) + { + type->u.struc->name = NULL; + break; + } + + if (!struct_name) + { + FIXME("Missing struct name before struct type %u.\n", type->index); + return false; + } + type->u.struc->name = struct_name; + struct_name = NULL; + break; + + case TYPE_CODE_STRUCT_NAME: + if (!(struct_name = shader_sm6_record_to_string(record, 0))) + { + ERR("Failed to allocate struct name.\n"); + return false; + } + if (!struct_name[0]) + WARN("Struct name is empty for type %u.\n", type->index); + continue; + + case TYPE_CODE_VOID: + type->type = VKD3D_SM6_VOID; + break; + + default: + FIXME("Unhandled type %u at index %u.\n", record->code, type->index); + return false; + } + ++sm6->type_count; + } + + assert(sm6->type_count == type_count); + + if (struct_name) + { + FIXME("Unused struct name %s.\n", struct_name); + vkd3d_free(struct_name); + } + + return true; +} + +static inline const struct vkd3d_shader_sm6_type *shader_sm6_get_type(const struct vkd3d_shader_sm6_parser *sm6, + const char *function, uint64_t type_id) +{ + if (type_id >= sm6->type_count) + { + vkd3d_dbg_printf(VKD3D_DBG_LEVEL_FIXME, function, "Invalid type index %"PRIu64" at %u.\n", + type_id, sm6->value_count); + return NULL; + } + return &sm6->types[type_id]; +} + +#define shader_sm6_get_type(sm6, id) shader_sm6_get_type(sm6, __FUNCTION__, id) + +static inline const struct vkd3d_shader_sm6_type *shader_sm6_get_element_type( + const struct vkd3d_shader_sm6_type *type, unsigned int elem_idx) +{ + switch (type->type) + { + case VKD3D_SM6_ARRAY: + case VKD3D_SM6_VECTOR: + if (elem_idx != ~0u && elem_idx >= type->u.array.count) + return NULL; + return type->u.array.elem_type; + + case VKD3D_SM6_POINTER: + return type->u.pointer.type; + + case VKD3D_SM6_STRUCT: + if (elem_idx >= type->u.struc->elem_count) + return NULL; + return type->u.struc->elem_types[elem_idx]; + + default: + return NULL; + } +} + +static inline uint64_t decode_rotated_signed_value(uint64_t value) +{ + if (value != 1) + { + bool neg = value & 1; + value >>= 1; + return neg ? -value : value; + } + return value << 63; +} + +static inline bool shader_sm6_type_is_1wil(const struct vkd3d_shader_sm6_type *type) +{ + return type->type == VKD3D_SM6_INTEGER && (type->u.width == 1 || type->u.width >= 16); +} + +static inline bool shader_sm6_type_is_i8(const struct vkd3d_shader_sm6_type *type) +{ + return type->type == VKD3D_SM6_INTEGER && type->u.width == 8; +} + +static inline bool shader_sm6_type_is_i16(const struct vkd3d_shader_sm6_type *type) +{ + return type->type == VKD3D_SM6_INTEGER && type->u.width == 16; +} + +static inline bool shader_sm6_type_is_bool(const struct vkd3d_shader_sm6_type *type) +{ + return type->type == VKD3D_SM6_INTEGER && type->u.width == 1; +} + +static inline bool shader_sm6_type_is_half(const struct vkd3d_shader_sm6_type *type) +{ + return type->type == VKD3D_SM6_FLOAT && type->u.width == 16; +} + +static inline bool shader_sm6_type_is_float(const struct vkd3d_shader_sm6_type *type) +{ + return type->type == VKD3D_SM6_FLOAT && type->u.width == 32; +} + +static inline bool shader_sm6_type_is_double(const struct vkd3d_shader_sm6_type *type) +{ + return type->type == VKD3D_SM6_FLOAT && type->u.width == 64; +} + +static inline bool shader_sm6_type_is_scalar(const struct vkd3d_shader_sm6_type *type) +{ + return type->type == VKD3D_SM6_INTEGER || type->type == VKD3D_SM6_FLOAT || type->type == VKD3D_SM6_POINTER; +} + +static inline bool shader_sm6_type_is_struct(const struct vkd3d_shader_sm6_type *type) +{ + return type->type == VKD3D_SM6_STRUCT; +} + +static inline bool shader_sm6_type_is_aggregate(const struct vkd3d_shader_sm6_type *type) +{ + return type->type == VKD3D_SM6_STRUCT || type->type == VKD3D_SM6_VECTOR || type->type == VKD3D_SM6_ARRAY; +} + +static inline const struct vkd3d_shader_sm6_type *shader_sm6_type_get_pointee_type( + const struct vkd3d_shader_sm6_type *type) +{ + return shader_sm6_type_is_pointer(type) ? type->u.pointer.type : NULL; +} + +static const struct vkd3d_shader_sm6_type *shader_sm6_type_get_pointer_to_type(struct vkd3d_shader_sm6_parser *sm6, + const struct vkd3d_shader_sm6_type *type, enum vkd3d_shader_address_space addr_space) +{ + const struct vkd3d_shader_sm6_type *pointer_type; + unsigned int i, start = type->index; + + /* LLVM seems usually to place the pointer type immediately after its pointee. */ + for (i = (start + 1) % sm6->type_count; i != start; i = (i + 1) % sm6->type_count) + { + pointer_type = &sm6->types[i]; + if (shader_sm6_type_is_pointer(pointer_type) && pointer_type->u.pointer.type == type + && pointer_type->u.pointer.addr_space == addr_space) + return pointer_type; + } + + return NULL; +} + +static unsigned int shader_sm6_get_value_index(const struct vkd3d_shader_sm6_parser *sm6, + const char *function, uint64_t id) +{ + uint64_t i; + + if ((i = sm6->value_count - id) >= sm6->cur_max_value) + { + vkd3d_dbg_printf(VKD3D_DBG_LEVEL_FIXME, function, "Invalid value index %"PRIu64" at %u.\n", + id, sm6->value_count); + return ~0u; + } + if (i > sm6->value_count) + { + /* Forward references are followed by a type id so would require special handling. */ + vkd3d_dbg_printf(VKD3D_DBG_LEVEL_FIXME, function, "Unhandled forward reference %"PRIu64" at %u.\n", + id, sm6->value_count); + return ~0u; + } + if (i == sm6->value_count) + { + vkd3d_dbg_printf(VKD3D_DBG_LEVEL_FIXME, function, "Invalid value self-reference at %u.\n", sm6->value_count); + return ~0u; + } + + return i; +} + +#define SM6_RECORD_VALIDATE_OPERAND_MIN_COUNT(min_count) do {\ + if (record->operand_count < (min_count))\ + {\ + FIXME("Invalid operand count %u.\n", record->operand_count);\ + return;\ + } } while (false) + +#define SM6_RECORD_VALIDATE_OPERAND_MIN_COUNT_ODD(min_count) do {\ + if (record->operand_count < (min_count) || !(record->operand_count & 1))\ + {\ + FIXME("Invalid operand count %u.\n", record->operand_count);\ + return;\ + } } while (false) + +#define SM6_RECORD_VALIDATE_OPERAND_MAX_COUNT(max_count) do {\ + if (record->operand_count > (max_count))\ + WARN("Ignoring %u extra operands.\n", record->operand_count - (max_count)); } while (false) + +#define SM6_RECORD_VALIDATE_OPERAND_COUNT(min_count, max_count) do {\ + SM6_RECORD_VALIDATE_OPERAND_MIN_COUNT(min_count);\ + SM6_RECORD_VALIDATE_OPERAND_MAX_COUNT(max_count); } while (false) + +#define SM6_VALUE_VALIDATE_IS_POINTER(ptr) do {\ + if (!shader_sm6_type_is_pointer((ptr)->type))\ + {\ + FIXME("Operand is not a pointer.\n");\ + return;\ + } } while (false) + +#define SM6_VALUE_VALIDATE_IS_POINTER_TO_I32(ptr) do {\ + SM6_VALUE_VALIDATE_IS_POINTER(ptr);\ + if (!shader_sm6_type_is_i32((ptr)->type->u.pointer.type))\ + {\ + WARN("Pointee type is not i32.\n");\ + return;\ + } } while (false) + +/* DXIL doesn't use vectors except for ray tracing. Everywhere else it extracts and operates + * on each individual value, so these validations currently don't need to check for vectors. */ + +#define SM6_VALUE_VALIDATE_IS_I32(v) do {\ + if (!shader_sm6_type_is_i32((v)->type))\ + {\ + WARN("Operand is not of type i32.\n");\ + return;\ + } } while (false) + +#define SM6_VALUE_VALIDATE_ARE_I32(a, b) do {\ + if (!shader_sm6_type_is_i32((a)->type) || !shader_sm6_type_is_i32((b)->type))\ + {\ + WARN("Operand is not of type i32.\n");\ + return;\ + } } while (false) + +#define SM6_VALUE_VALIDATE_IS_BOOL(v) do {\ + if (!shader_sm6_type_is_bool((v)->type))\ + {\ + WARN("Condition is not of type bool.\n");\ + return;\ + } } while (false) + +#define SM6_VALUE_VALIDATE_IS_SCALAR(v) do {\ + if (!shader_sm6_type_is_scalar((v)->type))\ + {\ + WARN("Operand is not of scalar type.\n");\ + return;\ + } } while (false) + +#define SM6_INSTRUCTION_LOAD_VALUE_REF(value, ins_idx, rec_idx) do {\ + uint64_t val_ref = record->operands[rec_idx];\ + unsigned int operand = shader_sm6_get_value_index(sm6, __FUNCTION__, val_ref);\ + if (operand == ~0u)\ + return;\ + value = shader_sm6_get_value(sm6, operand);\ + ins->u.operands[ins_idx].ins = value->ins;\ +} while (false) + +static inline struct vkd3d_shader_sm6_instruction *shader_sm6_get_current_instruction( + const struct vkd3d_shader_sm6_parser *sm6) +{ + return &sm6->global_instructions[sm6->global_instruction_count]; +} + +static inline struct vkd3d_shader_sm6_value *shader_sm6_get_current_value(const struct vkd3d_shader_sm6_parser *sm6) +{ + return &sm6->values[sm6->value_count]; +} + +static inline const struct vkd3d_shader_sm6_value *shader_sm6_get_value(const struct vkd3d_shader_sm6_parser *sm6, + unsigned int id) +{ + return &sm6->values[id]; +} + +static inline const struct vkd3d_shader_sm6_value *shader_sm6_get_value_unsafe(const struct vkd3d_shader_sm6_parser *sm6, + uint64_t id) +{ + return (id >= sm6->value_count) ? NULL : &sm6->values[id]; +} + +static inline const struct vkd3d_shader_sm6_instruction *shader_sm6_get_value_source_unsafe( + const struct vkd3d_shader_sm6_parser *sm6, uint64_t id) +{ + const struct vkd3d_shader_sm6_value *value = shader_sm6_get_value_unsafe(sm6, id); + return value ? value->ins : NULL; +} + +static inline float shader_sm6_bitcast_uint64_to_float(uint64_t value) +{ + union + { + uint32_t uint32_value; + float float_value; + } u; + + u.uint32_value = value; + return u.float_value; +} + +static inline double shader_sm6_bitcast_uint64_to_double(uint64_t value) +{ + union + { + uint64_t uint64_value; + double double_value; + } u; + + u.uint64_value = value; + return u.double_value; +} + +static unsigned int shader_sm6_instruction_allocate_constant_array(struct vkd3d_shader_sm6_instruction *ins, + const struct vkd3d_shader_sm6_type *type, unsigned int value_idx) +{ + const struct vkd3d_shader_sm6_type *elem_type; + unsigned int size; + + elem_type = type->u.array.elem_type; + /* Multidimensional arrays are emitted in flattened form. */ + if (elem_type->type != VKD3D_SM6_INTEGER && elem_type->type != VKD3D_SM6_FLOAT) + { + FIXME("Unhandled element type %u for data array constant id %u.\n", elem_type->type, + value_idx); + ins->handler_idx = VKD3DSM6IH_INVALID; + return 0; + } + + /* Arrays of bool are not used in DXIL. dxc will emit an array of int32 instead if necessary. */ + if (!(size = elem_type->u.width / 8u)) + { + FIXME("Invalid data type width %u.\n", elem_type->u.width); + ins->handler_idx = VKD3DSM6IH_INVALID; + return 0; + } + + if (!(ins->u.var.u.aggregate_data.pvoid = vkd3d_malloc(type->u.array.count * size))) + { + ERR("Failed to allocate constant array.\n"); + return ~0u; + } + + return size; +} + +static bool shader_sm6_constants_init(struct vkd3d_shader_sm6_parser *sm6, const struct vkd3d_shader_sm6_block *block, + struct vkd3d_shader_sm6_instruction *instructions, unsigned int *instruction_count) +{ + const struct vkd3d_shader_sm6_type *type, *fwd_type; + unsigned int i, j, ins_count, size, value_idx; + const struct vkd3d_shader_sm6_record *record; + union vkd3d_shader_sm6_aggregate_data data; + struct vkd3d_shader_sm6_instruction *ins; + struct vkd3d_shader_sm6_value *dst; + uint64_t type_id, value; + + for (i = 0, type = NULL, ins_count = *instruction_count; i < block->record_count; ++i) + { + record = block->records[i]; + value_idx = sm6->value_count; + + if (record->code == CST_CODE_SETTYPE) + { + if (!record->operand_count) + { + FIXME("Missing type id for constant idx %u.\n", value_idx); + return false; + } + if ((type_id = record->operands[0]) >= sm6->type_count) + { + FIXME("Invalid type id %"PRIu64".\n", type_id); + return false; + } + type = shader_sm6_get_type(sm6, type_id); + + if (i == block->record_count - 1) + WARN("Unused SETTYPE record.\n"); + + continue; + } + + if (!type) + { + FIXME("Constant record %u has no type.\n", value_idx); + return false; + } + + ins = &instructions[ins_count]; + ins->handler_idx = VKD3DSM6IH_DCL_CONSTANT; + ins->value_index = sm6->value_count; + ins->result_type = type; + ins->u.var.is_constant = true; + + dst = shader_sm6_get_current_value(sm6); + fwd_type = dst->type; + dst->type = type; + dst->ins = ins; + + switch (record->code) + { + case CST_CODE_NULL: + memset(&ins->u.var.u, 0, sizeof(ins->u.var.u)); + break; + + case CST_CODE_INTEGER: + if (!record->operand_count) + { + FIXME("Missing integer value for constant idx %u.\n", value_idx); + ins->handler_idx = VKD3DSM6IH_INVALID; + break; + } + + if (!shader_sm6_type_is_integer(type)) + { + FIXME("Invalid integer constant idx %u of non-integer type %u.\n", + value_idx, type->type); + ins->handler_idx = VKD3DSM6IH_INVALID; + break; + } + + value = decode_rotated_signed_value(record->operands[0]); + if (type->u.width <= 32) + ins->u.var.u.uint_value = (value &= (((uint64_t)1 << type->u.width) - 1)); + else + ins->u.var.u.uint64_value = value; + + break; + + case CST_CODE_FLOAT: + if (!record->operand_count) + { + FIXME("Missing floating point value for constant idx %u.\n", value_idx); + ins->handler_idx = VKD3DSM6IH_INVALID; + break; + } + + if (!shader_sm6_type_is_floating_point(type)) + { + FIXME("Invalid float of non-fp type %u at constant idx %u.\n", type->type, value_idx); + ins->handler_idx = VKD3DSM6IH_INVALID; + break; + } + + if (type->u.width < 32) + ins->u.var.u.half_value = record->operands[0]; + else if (type->u.width == 32) + ins->u.var.u.float_value = shader_sm6_bitcast_uint64_to_float(record->operands[0]); + else + ins->u.var.u.double_value = shader_sm6_bitcast_uint64_to_double(record->operands[0]); + + break; + + case CST_CODE_DATA: + if (record->operand_count < type->u.array.count) + { + FIXME("Missing operands for array of size %u.\n", type->u.array.count); + ins->handler_idx = VKD3DSM6IH_INVALID; + break; + } + + if (!shader_sm6_type_is_array(type)) + { + FIXME("Invalid type %u for data constant idx %u.\n", type->type, value_idx); + ins->handler_idx = VKD3DSM6IH_INVALID; + break; + } + + if ((size = shader_sm6_instruction_allocate_constant_array(ins, type, value_idx)) == ~0u) + return false; + else if (!size) + break; + + data = ins->u.var.u.aggregate_data; + if (size == 1) + for (j = 0; j < record->operand_count; ++j) + data.p8[j] = record->operands[j]; + else if (size == 2) + for (j = 0; j < record->operand_count; ++j) + data.p16[j] = record->operands[j]; + else if (size == 4) + for (j = 0; j < record->operand_count; ++j) + data.p32[j] = record->operands[j]; + else + for (j = 0; j < record->operand_count; ++j) + data.p64[j] = record->operands[j]; + break; + + case CST_CODE_UNDEF: + if (record->operand_count) + WARN("Unexpected value for undefined constant idx %u.\n", value_idx); + ins->u.var.is_undefined = true; + break; + + default: + FIXME("Unhandled constant code %u.\n", record->code); + ins->u.var.is_undefined = true; + break; + } + + /* fwd_type can be non-NULL with constexpr declarations. */ + if (fwd_type && fwd_type != dst->type) + WARN("Type mismatch, %u vs forward type declaration %u.\n", dst->type->index, fwd_type->index); + + sm6->p.failed |= ins->handler_idx == VKD3DSM6IH_INVALID; + ++sm6->value_count; + ++ins_count; + } + + *instruction_count = ins_count; + return true; +} + +unsigned int shader_sm6_instruction_get_uint_value(const struct vkd3d_shader_sm6_instruction *ins) +{ + if (ins->handler_idx != VKD3DSM6IH_DCL_CONSTANT || !shader_sm6_type_is_integer(ins->result_type)) + return ~0u; + + if (ins->result_type->u.width > 32) + { + if (ins->u.var.u.uint64_value > UINT_MAX) + FIXME("Truncating 64-bit value.\n"); + return ins->u.var.u.uint64_value; + } + + return ins->u.var.u.uint_value; +} + +static enum vkd3d_sm6_parameter_attribute_flag shader_sm6_decode_parameter_attribute( + enum dxil_bc_parameter_attribute_key key) +{ + switch (key) + { + case ATTR_NODUPLICATE: + return PARAM_ATTR_NODUPLICATE; + case ATTR_NOUNWIND: + return PARAM_ATTR_NOUNWIND; + case ATTR_READNONE: + return PARAM_ATTR_READNONE; + case ATTR_READONLY: + return PARAM_ATTR_READONLY; + default: + return PARAM_ATTR_INVALID; + } +} + +static bool shader_sm6_parameter_attributes_init(struct vkd3d_shader_sm6_parser *sm6) +{ + struct vkd3d_shader_sm6_parameter_attribute_entry *entry; + struct vkd3d_shader_sm6_parameter_group *group; + const struct vkd3d_shader_sm6_record *record; + const struct vkd3d_shader_sm6_block *block; + unsigned int i, j, count; + bool is_unique; + + if (!(block = shader_sm6_get_level_one_block(sm6, PARAMATTR_GROUP_BLOCK, &is_unique))) + return true; + if (!is_unique) + FIXME("Ignoring extra parameter group block(s).\n"); + + if (!(sm6->attr_groups = vkd3d_malloc(block->record_count * sizeof(*sm6->attr_groups)))) + { + ERR("Failed to allocate attribute groups.\n"); + return false; + } + + for (i = 0; i < block->record_count; ++i) + { + record = block->records[i]; + if (record->code != PARAMATTR_GRP_CODE_ENTRY) + { + WARN("Skipping group code %u.\n", record->code); + continue; + } + + if (record->operand_count < 4 || (record->operand_count & 1)) + { + FIXME("Invalid operand count for group code entry %u.\n", sm6->attr_group_count); + continue; + } + + group = &sm6->attr_groups[sm6->attr_group_count++]; + group->key = record->operands[0]; + if (record->operands[1] > ~0u) + WARN("Truncating parameter index %"PRIu64".\n", record->operands[1]); + group->param_index = record->operands[1]; + group->attributes = 0; + for (j = 2; j < record->operand_count; j += 2) + { + if (record->operands[j] != ATTR_KIND_WELL_KNOWN_VOID) + { + FIXME("Unhandled attribute kind %"PRIu64".\n", record->operands[j]); + continue; + } + group->attributes |= shader_sm6_decode_parameter_attribute(record->operands[j + 1]); + } + assert(j == record->operand_count); + } + + if (!(block = shader_sm6_get_level_one_block(sm6, PARAMATTR_BLOCK, &is_unique))) + { + WARN("No parameter attribute block found.\n"); + return true; + } + if (!is_unique) + FIXME("Ignoring extra parameter attribute block(s).\n"); + + if (!(sm6->attributes = vkd3d_malloc(block->record_count * sizeof(*sm6->attributes)))) + { + ERR("Failed to allocate attributes.\n"); + return false; + } + + for (i = 0; i < block->record_count; ++i) + { + record = block->records[i]; + if (record->code != PARAMATTR_CODE_ENTRY) + { + WARN("Skipping code %u.\n", record->code); + continue; + } + + if (!record->operand_count) + { + FIXME("Missing parameter attribute operand(s) for entry %u.\n", sm6->attribute_count); + continue; + } + + count = record->operand_count; + if (!(entry = vkd3d_malloc(sizeof(*entry) + count * sizeof(entry->keys[0])))) + { + ERR("Failed to allocate attribute entry.\n"); + return false; + } + entry->count = count; + + for (j = 0; j < count; ++j) + entry->keys[j] = record->operands[j]; + + sm6->attributes[sm6->attribute_count++] = entry; + } + + return true; +} + +static bool shader_sm6_symtab_init(struct vkd3d_shader_sm6_parser *sm6) +{ + const struct vkd3d_shader_sm6_record *record; + const struct vkd3d_shader_sm6_block *block; + struct vkd3d_shader_sm6_symbol *symbol; + unsigned int i, count; + bool is_unique; + + if (!(block = shader_sm6_get_level_one_block(sm6, VALUE_SYMTAB_BLOCK, &is_unique))) + { + /* There should always be at least one symbol: the name of the entry point function. */ + FIXME("No value symtab block found.\n"); + return false; + } + if (!is_unique) + FIXME("Ignoring extra value symtab block(s).\n"); + + for (i = 0, count = 0; i < block->record_count; ++i) + count += block->records[i]->code == VST_CODE_ENTRY; + + if (!(sm6->global_symbols = vkd3d_malloc(count * sizeof(*sm6->global_symbols)))) + { + ERR("Failed to allocate global symbols.\n"); + return false; + } + + for (i = 0; i < block->record_count; ++i) + { + record = block->records[i]; + if (record->code != VST_CODE_ENTRY) + { + WARN("Skipping code %u.\n", record->code); + continue; + } + if (record->operand_count < 2) + { + FIXME("Missing operands for function symbol %u.\n", sm6->global_symbol_count); + continue; + } + symbol = &sm6->global_symbols[sm6->global_symbol_count]; + symbol->id = record->operands[0]; + if (!(symbol->name = shader_sm6_record_to_string(record, 1))) + { + ERR("Failed to allocate symbol name.\n"); + return false; + } + ++sm6->global_symbol_count; + } + + return true; +} + +static const char *shader_sm6_global_symbol_name(const struct vkd3d_shader_sm6_parser *sm6, unsigned int id) +{ + unsigned int i; + + for (i = 0; i < sm6->global_symbol_count; ++i) + { + if (sm6->global_symbols[i].id == id) + return sm6->global_symbols[i].name; + } + return NULL; +} + +static bool shader_sm6_declare_function(struct vkd3d_shader_sm6_parser *sm6, + const struct vkd3d_shader_sm6_record *record) +{ + struct vkd3d_shader_sm6_instruction *ins = shader_sm6_get_current_instruction(sm6); + struct vkd3d_shader_sm6_value *fn; + unsigned int i, j; + + if (record->operand_count < 5) + { + FIXME("Missing operands for function %u.\n", sm6->value_count); + return false; + } + + fn = shader_sm6_get_current_value(sm6); + if (!(ins->u.function.name = shader_sm6_global_symbol_name(sm6, sm6->value_count))) + { + WARN("Missing symbol name for function %u.\n", sm6->value_count); + ins->u.function.name = ""; + } + + if (!(fn->type = shader_sm6_get_type(sm6, record->operands[0]))) + return false; + ins->u.function.type = fn->type; + if (!(fn->type = shader_sm6_type_get_pointer_to_type(sm6, fn->type, VKD3D_AS_DEFAULT))) + { + FIXME("Failed to get pointer type for type %u.\n", fn->type->type); + return false; + } + ins->result_type = fn->type; + fn->ins = ins; + + ins->u.function.is_prototype = !!record->operands[2]; + + if (record->operands[4] > ~0u) + WARN("Truncating attributes id %"PRIu64".\n", record->operands[4]); + /* 1-based index */ + if ((ins->u.function.attribs_id = record->operands[4]) > sm6->attribute_count) + { + FIXME("Invalid attributes id %u for function %u.\n", ins->u.function.attribs_id, sm6->value_count); + return false; + } + + for (i = 5, j = !!record->operands[1] + !!record->operands[3]; i < record->operand_count; ++i) + j += !!record->operands[i]; + if (j) + WARN("Ignoring %u operands.\n", j); + + ins->handler_idx = VKD3DSM6IH_DCL_FUNCTION; + ins->value_index = sm6->value_count++; + ++sm6->global_instruction_count; + + return true; +} + +static bool shader_sm6_globals_init(struct vkd3d_shader_sm6_parser *sm6) +{ + const struct vkd3d_shader_sm6_block *block = &sm6->root_block; + const struct vkd3d_shader_sm6_record *record; + unsigned int i; + + for (i = 0; i < block->record_count; ++i) + { + record = block->records[i]; + switch (record->code) + { + case MODULE_CODE_FUNCTION: + if (!shader_sm6_declare_function(sm6, record)) + return false; + break; + case MODULE_CODE_GLOBALVAR: + FIXME("Global variables are not implemented yet.\n"); + return false; + case MODULE_CODE_VERSION: + if (!record->operand_count) + { + FIXME("Missing version operand.\n"); + } + else if (record->operands[0] != 1) + { + FIXME("Unsupported format version %"PRIu64".\n", record->operands[0]); + return false; + } + default: + break; + } + } + + for (i = 0; i < block->child_block_count; ++i) + { + if (block->child_blocks[i]->id == CONSTANTS_BLOCK && !shader_sm6_constants_init(sm6, block->child_blocks[i], + sm6->global_instructions, &sm6->global_instruction_count)) + return false; + } + + return true; +} + +/* + 1 -> int1 + 8 -> int8 + w -> int16 + i -> int32 + l -> int64 + m -> int16/32/64 + h -> half + f -> float + g -> float or double + d -> double + p -> int32* + q -> float* + H -> handle + D -> dimensions + P -> sample pos + v -> void + B -> binary with carry + 2 -> binary with 2 x int32 outputs + S -> split double + V -> 4 x int32 + o -> overloaded + */ +static const struct vkd3d_sm6_call_opcode_info sm6_call_op_table[] = +{ + [CALL_LOAD_INPUT ] = {VKD3D_CALL_OP_LOAD_INPUT, 'o', "ii8i"}, + [CALL_STORE_OUTPUT ] = {VKD3D_CALL_OP_STORE_OUTPUT, 'v', "ii8o"}, + [CALL_FABS ] = {VKD3D_CALL_OP_FABS, 'g', "g"}, + [CALL_SATURATE ] = {VKD3D_CALL_OP_SATURATE, 'g', "g"}, + [CALL_ISNAN ] = {VKD3D_CALL_OP_ISNAN, '1', "g"}, + [CALL_ISINF ] = {VKD3D_CALL_OP_ISINF, '1', "g"}, + [CALL_ISFINITE ] = {VKD3D_CALL_OP_ISFINITE, '1', "g"}, + [CALL_ISNORMAL ] = {VKD3D_CALL_OP_ISNORMAL, '1', "g"}, + [CALL_COS ] = {VKD3D_CALL_OP_COS, 'g', "g"}, + [CALL_SIN ] = {VKD3D_CALL_OP_SIN, 'g', "g"}, + [CALL_TAN ] = {VKD3D_CALL_OP_TAN, 'g', "g"}, + [CALL_ACOS ] = {VKD3D_CALL_OP_ACOS, 'g', "g"}, + [CALL_ASIN ] = {VKD3D_CALL_OP_ASIN, 'g', "g"}, + [CALL_ATAN ] = {VKD3D_CALL_OP_ATAN, 'g', "g"}, + [CALL_EXP ] = {VKD3D_CALL_OP_EXP, 'g', "g"}, + [CALL_FRC ] = {VKD3D_CALL_OP_FRC, 'g', "g"}, + [CALL_LOG ] = {VKD3D_CALL_OP_LOG, 'g', "g"}, + [CALL_SQRT ] = {VKD3D_CALL_OP_SQRT, 'g', "g"}, + [CALL_RSQRT ] = {VKD3D_CALL_OP_RSQRT, 'g', "g"}, + [CALL_ROUND_NE ] = {VKD3D_CALL_OP_ROUND_NE, 'g', "g"}, + [CALL_ROUND_NI ] = {VKD3D_CALL_OP_ROUND_NI, 'g', "g"}, + [CALL_ROUND_PI ] = {VKD3D_CALL_OP_ROUND_PI, 'g', "g"}, + [CALL_ROUND_Z ] = {VKD3D_CALL_OP_ROUND_Z, 'g', "g"}, + [CALL_FIRST_BIT_LO ] = {VKD3D_CALL_OP_FIRST_BIT_LO, 'i', "m"}, + [CALL_FIRST_BIT_HI ] = {VKD3D_CALL_OP_FIRST_BIT_HI, 'i', "m"}, + [CALL_FIRST_BIT_SHI ] = {VKD3D_CALL_OP_FIRST_BIT_SHI, 'i', "m"}, + [CALL_FMAX ] = {VKD3D_CALL_OP_FMAX, 'g', "gg"}, + [CALL_FMIN ] = {VKD3D_CALL_OP_FMIN, 'g', "gg"}, + [CALL_IMAX ] = {VKD3D_CALL_OP_IMAX, 'm', "mm"}, + [CALL_IMIN ] = {VKD3D_CALL_OP_IMIN, 'm', "mm"}, + [CALL_UMAX ] = {VKD3D_CALL_OP_UMAX, 'm', "mm"}, + [CALL_UMIN ] = {VKD3D_CALL_OP_UMIN, 'm', "mm"}, + [CALL_FMAD ] = {VKD3D_CALL_OP_FMAD, 'g', "ggg"}, + [CALL_FMA ] = {VKD3D_CALL_OP_FMA, 'g', "ggg"}, + [CALL_DOT2 ] = {VKD3D_CALL_OP_DOT2, 'o', "oooo"}, + [CALL_DOT3 ] = {VKD3D_CALL_OP_DOT3, 'o', "oooooo"}, + [CALL_DOT4 ] = {VKD3D_CALL_OP_DOT4, 'o', "oooooooo"}, + [CALL_CREATE_HANDLE ] = {VKD3D_CALL_OP_CREATE_HANDLE, 'H', "8ii1"}, + [CALL_CBUFFER_LOAD ] = {VKD3D_CALL_OP_CBUFFER_LOAD, 'o', "Hii"}, + [CALL_CBUFFER_LOAD_LEGACY ] = {VKD3D_CALL_OP_CBUFFER_LOAD_LEGACY, 'o', "Hi"}, + [CALL_SAMPLE ] = {VKD3D_CALL_OP_SAMPLE, 'o', "HHffffiiif"}, + [CALL_SAMPLE_B ] = {VKD3D_CALL_OP_SAMPLE_B, 'o', "HHffffiiiff"}, + [CALL_SAMPLE_LOD ] = {VKD3D_CALL_OP_SAMPLE_LOD, 'o', "HHffffiiif"}, + [CALL_SAMPLE_GRAD ] = {VKD3D_CALL_OP_SAMPLE_GRAD, 'o', "HHffffiiifffffff"}, + [CALL_SAMPLE_C ] = {VKD3D_CALL_OP_SAMPLE_C, 'o', "HHffffiiiff"}, + [CALL_SAMPLE_C_LZ ] = {VKD3D_CALL_OP_SAMPLE_C_LZ, 'o', "HHffffiiif"}, + [CALL_TEXTURE_LOAD ] = {VKD3D_CALL_OP_TEXTURE_LOAD, 'o', "Hiiiiiii"}, + [CALL_TEXTURE_STORE ] = {VKD3D_CALL_OP_TEXTURE_STORE, 'v', "Hiiioooo8"}, + [CALL_BUFFER_LOAD ] = {VKD3D_CALL_OP_BUFFER_LOAD, 'o', "Hii"}, + [CALL_BUFFER_STORE ] = {VKD3D_CALL_OP_BUFFER_STORE, 'v', "Hiioooo8"}, + [CALL_GET_DIMENSIONS ] = {VKD3D_CALL_OP_GET_DIMENSIONS, 'D', "Hi"}, + [CALL_TEXTURE_GATHER ] = {VKD3D_CALL_OP_TEXTURE_GATHER, 'o', "HHffffiii"}, + [CALL_TEXTURE_GATHER_CMP ] = {VKD3D_CALL_OP_TEXTURE_GATHER_CMP, 'o', "HHffffiiif"}, + [CALL_ATOMIC_BINOP ] = {VKD3D_CALL_OP_ATOMIC_BINOP, 'o', "Hiiiio"}, + [CALL_ATOMIC_CMP_XCHG ] = {VKD3D_CALL_OP_ATOMIC_CMP_XCHG, 'o', "Hiiioo"}, + [CALL_BARRIER ] = {VKD3D_CALL_OP_BARRIER, 'v', "i"}, + [CALL_DISCARD ] = {VKD3D_CALL_OP_DISCARD, 'v', "1"}, + [CALL_DERIV_COARSEX ] = {VKD3D_CALL_OP_DERIV_COARSEX, 'o', "o"}, + [CALL_DERIV_COARSEY ] = {VKD3D_CALL_OP_DERIV_COARSEY, 'o', "o"}, + [CALL_THREAD_ID ] = {VKD3D_CALL_OP_THREAD_ID, 'i', "i"}, + [CALL_GROUP_ID ] = {VKD3D_CALL_OP_GROUP_ID, 'i', "i"}, + [CALL_THREAD_ID_IN_GROUP ] = {VKD3D_CALL_OP_THREAD_ID_IN_GROUP, 'i', "i"}, + [CALL_FLATTENED_THREAD_ID_IN_GROUP ] = {VKD3D_CALL_OP_FLATTENED_THREAD_ID_IN_GROUP, 'i', ""}, + [CALL_EMIT_STREAM ] = {VKD3D_CALL_OP_EMIT_STREAM, 'v', "8"}, + [CALL_MAKE_DOUBLE ] = {VKD3D_CALL_OP_MAKE_DOUBLE, 'd', "ii"}, + [CALL_SPLIT_DOUBLE ] = {VKD3D_CALL_OP_SPLIT_DOUBLE, 'S', "d"}, + [CALL_LOAD_OUTPUT_CONTROL_POINT ] = {VKD3D_CALL_OP_LOAD_OUTPUT_CONTROL_POINT, 'o', "ii8i"}, + [CALL_LOAD_PATCH_CONSTANT ] = {VKD3D_CALL_OP_LOAD_PATCH_CONSTANT, 'o', "ii8"}, + [CALL_DOMAIN_LOCATION ] = {VKD3D_CALL_OP_DOMAIN_LOCATION, 'f', "8"}, + [CALL_STORE_PATCH_CONSTANT ] = {VKD3D_CALL_OP_STORE_PATCH_CONSTANT, 'v', "ii8o"}, + [CALL_OUTPUT_CONTROL_POINT_ID ] = {VKD3D_CALL_OP_OUTPUT_CONTROL_POINT_ID, 'i', ""}, + [CALL_PRIMITIVE_ID ] = {VKD3D_CALL_OP_PRIMITIVE_ID, 'i', ""}, + [CALL_WAVE_IS_FIRST_LANE ] = {VKD3D_CALL_OP_WAVE_IS_FIRST_LANE, '1', ""}, + [CALL_WAVE_GET_LANE_COUNT ] = {VKD3D_CALL_OP_WAVE_GET_LANE_COUNT, 'i', ""}, + [CALL_WAVE_GET_LANE_INDEX ] = {VKD3D_CALL_OP_WAVE_GET_LANE_INDEX, 'i', ""}, + [CALL_WAVE_READ_LANE_AT ] = {VKD3D_CALL_OP_WAVE_READ_LANE_AT, 'o', "oi"}, + [CALL_WAVE_READ_LANE_FIRST ] = {VKD3D_CALL_OP_WAVE_READ_LANE_FIRST, 'o', "o"}, + [CALL_WAVE_PREFIX_OP ] = {VKD3D_CALL_OP_WAVE_PREFIX_OP, 'o', "o88"}, + [CALL_LEGACY_F32TOF16 ] = {VKD3D_CALL_OP_LEGACY_F32TOF16, 'i', "f"}, + [CALL_LEGACY_F16TOF32 ] = {VKD3D_CALL_OP_LEGACY_F16TOF32, 'f', "i"}, + [CALL_WAVE_ALL_BIT_COUNT ] = {VKD3D_CALL_OP_WAVE_ALL_BIT_COUNT, 'i', "1"}, + [CALL_WAVE_PREFIX_BIT_COUNT ] = {VKD3D_CALL_OP_WAVE_PREFIX_BIT_COUNT, 'i', "1"}, + [CALL_RAW_BUFFER_LOAD ] = {VKD3D_CALL_OP_RAW_BUFFER_LOAD, 'o', "Hii8i"}, + [CALL_RAW_BUFFER_STORE ] = {VKD3D_CALL_OP_RAW_BUFFER_STORE, 'v', "Hiioooo8i"}, +}; + +static bool shader_sm6_validate_operand_type(const struct vkd3d_shader_sm6_type *type, char info_type) +{ + switch (info_type) + { + case 0: + FIXME("Invalid operand count.\n"); + return false; + case '1': + return shader_sm6_type_is_bool(type); + case '8': + return shader_sm6_type_is_i8(type); + case 'w': + return shader_sm6_type_is_i16(type); + case 'p': + if (!shader_sm6_type_is_pointer(type)) + return false; + type = type->u.pointer.type; + /* fall through */ + case 'i': + return shader_sm6_type_is_i32(type); + case 'l': + return shader_sm6_type_is_i64(type); + case 'm': + return shader_sm6_type_is_1wil(type); + case 'h': + return shader_sm6_type_is_half(type); + case 'q': + if (!shader_sm6_type_is_pointer(type)) + return false; + type = type->u.pointer.type; + /* fall through */ + case 'f': + return shader_sm6_type_is_float(type); + case 'd': + return shader_sm6_type_is_double(type); + case 'g': + return shader_sm6_type_is_floating_point(type); + case 'H': + return shader_sm6_type_is_struct(type) && type->u.struc->name && !strcmp(type->u.struc->name, "dx.types.Handle"); + case 'D': + return shader_sm6_type_is_struct(type) && type->u.struc->name && !strcmp(type->u.struc->name, "dx.types.Dimensions"); + case 'P': + return shader_sm6_type_is_struct(type) && type->u.struc->name && !strcmp(type->u.struc->name, "dx.types.SamplePos"); + case 'v': + return type->type == VKD3D_SM6_VOID; + case 'o': + /* TODO: some type checking may be possible */ + return true; + default: + FIXME("Unhandled operand code '%c'.\n", info_type); + return false; + } +} + +static void shader_sm6_validate_call_op(enum dxil_call_opcode code, struct vkd3d_shader_sm6_instruction *ins) +{ + const struct vkd3d_sm6_call_opcode_info *info; + unsigned int i; + + info = &sm6_call_op_table[code]; + + for (i = 2; i < ins->operand_count; ++i) + { + const struct vkd3d_shader_sm6_instruction *op = ins->u.operands[i].ins; + if (!shader_sm6_validate_operand_type(op->result_type, info->operand_info[i - 2])) + WARN("Failed to validate call operand %u for code %u.\n", i - 1, code); + } +} + +static enum vkd3d_shader_sm6_call_op shader_sm6_decode_call_op(enum dxil_call_opcode code, + struct vkd3d_shader_sm6_instruction *ins) +{ + if (code >= ARRAY_SIZE(sm6_call_op_table) || !sm6_call_op_table[code].operand_info) + { + FIXME("Unhandled dx.op function call code %u.\n", code); + return VKD3D_CALL_OP_INVALID; + } + shader_sm6_validate_call_op(code, ins); + return sm6_call_op_table[code].op; +} + +/* 0 -> function value + * 1+ -> operands + * r -> return type + * modifier -> 1-based parameter attributes id + */ +static void shader_sm6_instruction_call(struct vkd3d_shader_sm6_parser *sm6, + const struct vkd3d_shader_sm6_record *record, struct vkd3d_shader_sm6_instruction *ins) +{ + const struct vkd3d_shader_sm6_instruction *fn_ins, *op_ins = NULL; + const struct vkd3d_shader_sm6_type *type = NULL; + const struct vkd3d_shader_sm6_value *fn_value; + struct vkd3d_shader_sm6_value *ret_value; + uint64_t attribs_id, call_conv; + unsigned int i = 0, j, operand; + + SM6_RECORD_VALIDATE_OPERAND_MIN_COUNT(3); + + /* 1-based index */ + if ((attribs_id = record->operands[i++]) > sm6->attribute_count) + { + WARN("Invalid parameter attributes id %"PRIu64".\n", attribs_id); + attribs_id = 0; + } + ins->modifier = attribs_id; + + if ((call_conv = record->operands[i++]) & (1u << 15)) + { + type = shader_sm6_get_type(sm6, record->operands[i++]); + SM6_RECORD_VALIDATE_OPERAND_MIN_COUNT(4); + } + + SM6_INSTRUCTION_LOAD_VALUE_REF(fn_value, 0, i++); + fn_ins = fn_value->ins; + if (!shader_sm6_instruction_is_function_dcl(fn_ins)) + { + FIXME("Target function value is not a function declaration.\n"); + return; + } + + if (!type) + { + type = fn_value->type->u.pointer.type; + } + else if (type != fn_value->type->u.pointer.type) + { + WARN("Explicit call type %u does not match function type %u.\n", type->index, + fn_value->type->u.pointer.type->index); + type = fn_value->type->u.pointer.type; + } + + ins->result_type = type->u.function->ret_type; + if (ins->result_type->type != VKD3D_SM6_VOID) + { + ret_value = shader_sm6_get_current_value(sm6); + ret_value->type = ins->result_type; + } + + SM6_RECORD_VALIDATE_OPERAND_MIN_COUNT(i + type->u.function->param_count); + + for (j = 0; j < type->u.function->param_count; ++j) + { + if ((operand = shader_sm6_get_value_index(sm6, __FUNCTION__, record->operands[i++])) == ~0u) + return; + ins->u.operands[j + 1].ins = shader_sm6_get_value(sm6, operand)->ins; + } + ins->operand_count = j + 1; + assert(ins->operand_count <= ARRAY_SIZE(ins->u.operands)); + + if (fn_ins->u.function.is_prototype) + { + /* Instructions not supported in LLVM are emitted as calls to functions named 'dx.op.*' */ + op_ins = ins->u.operands[1].ins; + if (!shader_sm6_instruction_is_constant(op_ins) || !shader_sm6_type_is_integer(op_ins->result_type)) + { + FIXME("dx.op id is not a constant int; handler %u.\n", op_ins->handler_idx); + return; + } + + ins->sub_opcode = shader_sm6_decode_call_op(shader_sm6_instruction_get_uint_value(op_ins), ins); + } + + ins->handler_idx = VKD3DSM6IH_CALL; +} + +/* 0 -> value */ +static void shader_sm6_instruction_ret(struct vkd3d_shader_sm6_parser *sm6, + const struct vkd3d_shader_sm6_record *record, struct vkd3d_shader_sm6_instruction *ins) +{ + const struct vkd3d_shader_sm6_value *value; + + SM6_RECORD_VALIDATE_OPERAND_COUNT(0, 1); + + if ((ins->operand_count = !!record->operand_count)) + { + SM6_INSTRUCTION_LOAD_VALUE_REF(value, 0, 0); + (void)value; + } + + ins->handler_idx = VKD3DSM6IH_RET; +} + +static inline const struct vkd3d_shader_sm6_instruction *shader_sm6_next_function_definition( + struct vkd3d_shader_sm6_parser *sm6) +{ + unsigned count = sm6->function_def_count; + unsigned int i; + + for (i = 0; i < sm6->global_instruction_count; ++i) + { + if (sm6->global_instructions[i].handler_idx == VKD3DSM6IH_DCL_FUNCTION + && !sm6->global_instructions[i].u.function.is_prototype && !count--) + break; + } + if (i == sm6->global_instruction_count) + return NULL; + + ++sm6->function_def_count; + return &sm6->global_instructions[i]; +} + +static bool shader_sm6_function_init(struct vkd3d_shader_sm6_parser *sm6, const struct vkd3d_shader_sm6_block *block, + struct vkd3d_shader_sm6_function *function) +{ + const struct vkd3d_shader_sm6_record *record; + struct vkd3d_shader_sm6_instruction *ins; + struct vkd3d_shader_sm6_value *dst; + bool has_result; + unsigned int i; + + if (!(function->declaration = shader_sm6_next_function_definition(sm6))) + { + FIXME("No definition for function body.\n"); + return false; + } + if (!function->instruction_count) + function->base_value_index = sm6->value_count; + sm6->cur_max_value = function->value_count; + + for (i = 0; i < block->record_count; ++i) + { + ins = &function->instructions[function->instruction_count]; + ins->handler_idx = VKD3DSM6IH_INVALID; + ins->value_index = ~0u; + + dst = shader_sm6_get_current_value(sm6); + assert(!dst->type); + dst->ins = ins; + has_result = true; + + record = block->records[i]; + switch (record->code) + { + case FUNC_CODE_DECLAREBLOCKS: + if (!record->operand_count || record->operands[0] > ~0u) + { + FIXME("Invalid DECLAREBLOCKS record.\n"); + return false; + } + function->block_count = record->operands[0]; + continue; + case FUNC_CODE_INST_CALL: + shader_sm6_instruction_call(sm6, record, ins); + if (!ins->result_type) + return false; + has_result = !shader_sm6_type_is_void_safe(ins->result_type); + break; + case FUNC_CODE_INST_RET: + shader_sm6_instruction_ret(sm6, record, ins); + has_result = false; + break; + default: + FIXME("Unhandled dxil instruction %u.\n", record->code); + return false; + } + + if (ins->handler_idx == VKD3DSM6IH_INVALID) + { + sm6->p.failed = true; + /* If a result type is needed, parsing can continue if we have it, otherwise + * the value references will be corrupted. */ + if (has_result && !dst->type) + return false; + } + ++function->instruction_count; + if (!shader_sm6_type_is_void_safe(dst->type)) + ins->value_index = sm6->value_count++; + } + + function->value_count = sm6->value_count - function->base_value_index; + + return true; +} + +static bool shader_sm6_module_init(struct vkd3d_shader_sm6_parser *sm6, const struct vkd3d_shader_sm6_block *block, + unsigned int depth) +{ + unsigned int i, value_count = sm6->value_count; + struct vkd3d_shader_sm6_function *function; + + for (i = 0; i < block->child_block_count; ++i) + { + if (!shader_sm6_module_init(sm6, block->child_blocks[i], depth + 1)) + return false; + } + + switch (block->id) + { + case CONSTANTS_BLOCK: + if (depth < 2) + break; + function = &sm6->functions[sm6->function_def_count]; + if (!function->instruction_count) + function->base_value_index = sm6->value_count; + sm6->cur_max_value = function->value_count; + return shader_sm6_constants_init(sm6, block, function->instructions, &function->instruction_count); + + case FUNCTION_BLOCK: + function = &sm6->functions[sm6->function_def_count]; + if (!shader_sm6_function_init(sm6, block, function)) + return false; + /* The value index returns to its previous value after handling a function. It's usually nonzero + * at the start because of global constants/variables/function declarations. Function constants + * occur in a child block, so value_count is already saved before they are emitted. */ + memset(&sm6->values[value_count], 0, (sm6->value_count - value_count) * sizeof(*sm6->values)); + sm6->value_count = value_count; + break; + + default: + break; + } + + return true; +} + +static void shader_sm6_global_abbrevs_cleanup(struct vkd3d_shader_sm6_global_abbrev **abbrevs, unsigned int count) +{ + unsigned int i; + + for (i = 0; i < count; ++i) + vkd3d_free(abbrevs[i]); + vkd3d_free(abbrevs); +} + +static unsigned int shader_sm6_functions_count(const struct vkd3d_shader_sm6_block *root) +{ + unsigned int i, count = 0; + + for (i = 0; i < root->child_block_count; ++i) + count += root->child_blocks[i]->id == FUNCTION_BLOCK; + + return count; +} + +static void shader_sm6_compute_max_value_count(struct vkd3d_shader_sm6_parser *sm6, + const struct vkd3d_shader_sm6_block *block, unsigned int depth) +{ + unsigned int i, count = 0, value_count, old_value_count = sm6->value_count; + + if (block->id == MODULE_BLOCK) + { + count = shader_sm6_block_module_decl_count(block); + sm6->global_instruction_count += count; + sm6->value_count += count; + } + + for (i = 0; i < block->child_block_count; ++i) + shader_sm6_compute_max_value_count(sm6, block->child_blocks[i], depth + 1); + + switch (block->id) + { + case CONSTANTS_BLOCK: + count = shader_sm6_block_constants_count(block); + if (depth > 1) + sm6->functions[sm6->function_def_count].instruction_count += count; + else + sm6->global_instruction_count += count; + sm6->value_count += count; + break; + case FUNCTION_BLOCK: + count = block->record_count; + value_count = sm6->value_count + count; + sm6->value_capacity = max(sm6->value_capacity, value_count); + sm6->functions[sm6->function_def_count].value_count = value_count; + sm6->functions[sm6->function_def_count++].instruction_count += count; + sm6->value_count = old_value_count; + break; + default: + break; + } +} + +static void shader_sm6_instructions_cleanup(struct vkd3d_shader_sm6_instruction *instructions, unsigned int count) +{ + struct vkd3d_shader_sm6_instruction *ins; + unsigned int i; + + for (i = 0; i < count; ++i) + { + ins = &instructions[i]; + if (ins->handler_idx == VKD3DSM6IH_DCL_CONSTANT && ins->result_type->type == VKD3D_SM6_ARRAY) + vkd3d_free(ins->u.var.u.aggregate_data.pvoid); + if (ins->operand_count > ARRAY_SIZE(ins->u.operands)) + vkd3d_free(ins->u.extended_operands); + } + vkd3d_free(instructions); +} + +static void shader_sm6_block_destroy(struct vkd3d_shader_sm6_block *block) +{ + unsigned int i; + + for (i = 0; i < block->record_count; ++i) + vkd3d_free(block->records[i]); + vkd3d_free(block->records); + + for (i = 0; i < block->child_block_count; ++i) + { + shader_sm6_block_destroy(block->child_blocks[i]); + vkd3d_free(block->child_blocks[i]); + } + vkd3d_free(block->child_blocks); +} + +static void shader_sm6_parameter_attributes_cleanup(struct vkd3d_shader_sm6_parameter_attribute_entry **attributes, + unsigned int count) +{ + unsigned int i; + + for (i = 0; i < count; ++i) + vkd3d_free(attributes[i]); + vkd3d_free(attributes); +} + +static void shader_sm6_type_table_cleanup(struct vkd3d_shader_sm6_type *types, unsigned int count) +{ + unsigned int i; + + if (!types) + return; + + for (i = 0; i < count; ++i) + { + switch (types[i].type) + { + case VKD3D_SM6_STRUCT: + vkd3d_free((void *)types[i].u.struc->name); + vkd3d_free(types[i].u.struc); + break; + case VKD3D_SM6_FUNCTION: + vkd3d_free(types[i].u.function); + break; + default: + break; + } + } + + vkd3d_free(types); +} + +static void shader_sm6_symtab_cleanup(struct vkd3d_shader_sm6_symbol *symbols, unsigned int count) +{ + unsigned int i; + + for (i = 0; i < count; ++i) + vkd3d_free((void *)symbols[i].name); + vkd3d_free(symbols); +} + +static void shader_sm6_destroy(struct vkd3d_shader_parser *parser) +{ + struct vkd3d_shader_sm6_parser *sm6 = vkd3d_shader_sm6_parser(parser); + unsigned int i; + + vkd3d_free(sm6->values); + shader_sm6_instructions_cleanup(sm6->global_instructions, sm6->global_instruction_count); + for (i = 0; i < sm6->function_def_count; ++i) + shader_sm6_instructions_cleanup((void *)sm6->functions[i].instructions, sm6->functions[i].instruction_count); + shader_sm6_block_destroy(&sm6->root_block); + vkd3d_free(sm6->attr_groups); + shader_sm6_parameter_attributes_cleanup(sm6->attributes, sm6->attribute_count); + shader_sm6_symtab_cleanup(sm6->global_symbols, sm6->global_symbol_count); + shader_sm6_type_table_cleanup(sm6->types, sm6->type_count); + free_shader_desc(&parser->shader_desc); + vkd3d_free(sm6); +} + +static void shader_sm6_read_module(const struct vkd3d_shader_parser *parser, struct vkd3d_shader_sm6_module *module) +{ + const struct vkd3d_shader_sm6_parser *sm6 = CONTAINING_RECORD(parser, const struct vkd3d_shader_sm6_parser, p); + + module->types = sm6->types; + module->type_count = sm6->type_count; + module->max_value_count = sm6->value_capacity; + module->global_instructions = sm6->global_instructions; + module->global_instruction_count = sm6->global_instruction_count; + module->functions = sm6->functions; + module->function_count = sm6->function_def_count; +} + +static const struct vkd3d_shader_parser_ops shader_sm6_parser_ops = +{ + .parser_reset = shader_sm6_reset, + .parser_destroy = shader_sm6_destroy, + .parser_read_module = shader_sm6_read_module, +}; + +static bool shader_sm6_init(struct vkd3d_shader_sm6_parser *sm6, const uint32_t *byte_code, + size_t byte_code_size, const char *source_name, const struct vkd3d_shader_signature *output_signature, + struct vkd3d_shader_message_context *message_context) +{ + uint32_t version_token, dxil_version, token_count; + unsigned int i, word_count, function_count; + struct vkd3d_shader_sm6_block *block; + enum dxil_bc_block_abbreviation abbr; + struct vkd3d_shader_version version; + + word_count = byte_code_size / sizeof(*byte_code); + if (word_count < 2) + { + FIXME("Invalid byte code size %zu.\n", byte_code_size); + return false; + } + + version_token = byte_code[0]; + TRACE("Compiler version: 0x%08x.\n", version_token); + token_count = byte_code[1]; + TRACE("Token count: %u.\n", token_count); + + if (token_count < 6 || word_count < token_count) + { + FIXME("Invalid token count %u.\n", token_count); + return false; + } + + if (byte_code[2] != TAG_DXIL) + WARN("Unrecognised magic number 0x%08x.\n", byte_code[2]); + + dxil_version = byte_code[3]; + TRACE("DXIL version: 0x%08x.\n", dxil_version); + + if (byte_code[4] < 16) + { + FIXME("Invalid bitcode chunk offset %u.\n", byte_code[4]); + return false; + } + sm6->start = (const uint32_t *)((const char*)&byte_code[2] + byte_code[4]); + if (sm6->start[0] != BITCODE_MAGIC) + WARN("Unrecognised magic number 0x%08x.\n", sm6->start[0]); + + sm6->end = &sm6->start[(byte_code[5] + sizeof(*sm6->start) - 1) / sizeof(*sm6->start)]; + + if ((version.type = version_token >> 16) >= VKD3D_SHADER_TYPE_COUNT) + FIXME("Unrecognised shader type %#x.\n", version.type); + + version.major = VKD3D_SM6_VERSION_MAJOR(version_token); + version.minor = VKD3D_SM6_VERSION_MINOR(version_token); + + if ((abbr = sm6->start[1] & 3) != ENTER_SUBBLOCK) + { + FIXME("The first block abbreviation must be ENTER_SUBBLOCK, but is %u.\n", abbr); + return false; + } + + vkd3d_shader_parser_init(&sm6->p, message_context, source_name, &version, &shader_sm6_parser_ops); + sm6->p.ptr = &sm6->start[1]; + sm6->p.bitpos = 2; + + block = &sm6->root_block; + if (!shader_sm6_block_init(block, NULL, &sm6->p)) + return false; + shader_sm6_global_abbrevs_cleanup(sm6->abbrevs, sm6->abbrev_count); + + if (block->start + block->length != sm6->p.ptr - sm6->start) + { + FIXME("Invalid block end position %zu, expected %u.\n", sm6->p.ptr - sm6->start, + block->start + block->length); + } + if (sm6->p.ptr != sm6->end) + { + FIXME("Invalid module end position %zu, expected %zu.\n", sm6->p.ptr - sm6->start, + sm6->end - sm6->start); + } + + if (!shader_sm6_type_table_init(sm6)) + { + FIXME("Failed to load types.\n"); + return false; + } + + if (!shader_sm6_parameter_attributes_init(sm6)) + { + FIXME("Failed to load parameter attributes.\n"); + return false; + } + + if (!shader_sm6_symtab_init(sm6)) + { + FIXME("Failed to load value symtab.\n"); + return false; + } + + function_count = shader_sm6_functions_count(&sm6->root_block); + if (!(sm6->functions = vkd3d_calloc(function_count, sizeof(*sm6->functions)))) + { + ERR("Failed to allocate function array.\n"); + return false; + } + shader_sm6_compute_max_value_count(sm6, &sm6->root_block, 0); + if (!(sm6->values = vkd3d_calloc(sm6->value_capacity, sizeof(*sm6->values))) + || !(sm6->global_instructions = vkd3d_calloc(sm6->global_instruction_count, sizeof(*sm6->global_instructions)))) + { + ERR("Failed to allocate value array.\n"); + return false; + } + for (i = 0; i < sm6->function_def_count; ++i) + { + if (!(sm6->functions[i].instructions = vkd3d_calloc(sm6->functions[i].instruction_count, + sizeof(*sm6->functions[i].instructions)))) + { + ERR("Failed to allocate instruction array.\n"); + return false; + } + sm6->functions[i].instruction_count = 0; + } + sm6->function_def_count = 0; + sm6->global_instruction_count = 0; + sm6->value_count = 0; + + if (!shader_sm6_globals_init(sm6)) + { + FIXME("Failed to load global declarations.\n"); + return false; + } + + if (!shader_sm6_module_init(sm6, &sm6->root_block, 0)) + { + FIXME("Failed to init module.\n"); + return false; + } + + shader_sm6_block_destroy(&sm6->root_block); + sm6->root_block.records = NULL; + sm6->root_block.record_count = 0; + sm6->root_block.child_blocks = NULL; + sm6->root_block.child_block_count = 0; + + return true; +} + +int vkd3d_shader_sm6_parser_create(const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_desc *shader_desc, + struct vkd3d_shader_parser **parser) +{ + struct vkd3d_shader_sm6_parser *sm6; + + if (!(sm6 = vkd3d_calloc(1, sizeof(*sm6) + align(shader_desc->byte_code_size, sizeof(uint32_t))))) + { + ERR("Failed to allocate parser.\n"); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + + sm6->p.shader_desc = *shader_desc; + shader_desc = &sm6->p.shader_desc; + /* LLVM bitcode should be 32-bit aligned, but this is not done in the DXBC container. + * Get an aligned copy to prevent unaligned access. */ + memcpy(sm6->byte_code, shader_desc->byte_code, shader_desc->byte_code_size); + shader_desc->byte_code = sm6->byte_code; + + if (!shader_sm6_init(sm6, shader_desc->byte_code, shader_desc->byte_code_size, + compile_info->source_name, &shader_desc->output_signature, message_context)) + { + WARN("Failed to initialise shader parser.\n"); + vkd3d_free(sm6); + return VKD3D_ERROR_INVALID_ARGUMENT; + } + + *parser = &sm6->p; + + return VKD3D_OK; +} diff --git a/libs/vkd3d-shader/sm6.h b/libs/vkd3d-shader/sm6.h new file mode 100644 index 00000000..db1f9768 --- /dev/null +++ b/libs/vkd3d-shader/sm6.h @@ -0,0 +1,682 @@ +/* + * Copyright 2022 Conor McCarthy for CodeWeavers + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#ifndef __VKD3D_SM6_H +#define __VKD3D_SM6_H + +#define VKD3D_SM6_VERSION_MAJOR(version) (((version) >> 4) & 0xf) +#define VKD3D_SM6_VERSION_MINOR(version) (((version) >> 0) & 0xf) + +#define DXIL_MAX_ALIGNMENT_EXPONENT 29 + +enum dxil_bc_block_id +{ + BLOCKINFO_BLOCK = 0, + MODULE_BLOCK = 8, + PARAMATTR_BLOCK = 9, + PARAMATTR_GROUP_BLOCK = 10, + CONSTANTS_BLOCK = 11, + FUNCTION_BLOCK = 12, + VALUE_SYMTAB_BLOCK = 14, + METADATA_BLOCK = 15, + METADATA_ATTACHMENT_BLOCK = 16, + TYPE_BLOCK = 17, + USELIST_BLOCK = 18, +}; + +enum dxil_bc_blockinfo_code +{ + SETBID = 1, + BLOCKNAME = 2, + SETRECORDNAME = 3, +}; + +enum dxil_bc_module_code +{ + MODULE_CODE_VERSION = 1, + MODULE_CODE_TRIPLE = 2, + MODULE_CODE_DATALAYOUT = 3, + MODULE_CODE_ASM = 4, + MODULE_CODE_SECTIONNAME = 5, + MODULE_CODE_DEPLIB = 6, + MODULE_CODE_GLOBALVAR = 7, + MODULE_CODE_FUNCTION = 8, + MODULE_CODE_ALIAS = 9, + MODULE_CODE_PURGEVALS = 10, + MODULE_CODE_GCNAME = 11, + MODULE_CODE_COMDAT = 12, +}; + +enum dxil_bc_attribute_code +{ + PARAMATTR_CODE_ENTRY_OLD = 1, + PARAMATTR_CODE_ENTRY = 2, + PARAMATTR_GRP_CODE_ENTRY = 3, +}; + +enum dxil_bc_constant_code +{ + CST_CODE_SETTYPE = 1, + CST_CODE_NULL = 2, + CST_CODE_UNDEF = 3, + CST_CODE_INTEGER = 4, + CST_CODE_WIDE_INTEGER = 5, + CST_CODE_FLOAT = 6, + CST_CODE_AGGREGATE = 7, + CST_CODE_STRING = 8, + CST_CODE_CSTRING = 9, + CST_CODE_CE_BINOP = 10, + CST_CODE_CE_CAST = 11, + CST_CODE_CE_GEP = 12, + CST_CODE_CE_SELECT = 13, + CST_CODE_CE_EXTRACTELT = 14, + CST_CODE_CE_INSERTELT = 15, + CST_CODE_CE_SHUFFLEVEC = 16, + CST_CODE_CE_CMP = 17, + CST_CODE_INLINEASM_OLD = 18, + CST_CODE_CE_SHUFVEC_EX = 19, + CST_CODE_CE_INBOUNDS_GEP = 20, + CST_CODE_BLOCKADDRESS = 21, + CST_CODE_DATA = 22, + CST_CODE_INLINEASM = 23, +}; + +enum dxil_bc_function_code +{ + FUNC_CODE_DECLAREBLOCKS = 1, + FUNC_CODE_INST_BINOP = 2, + FUNC_CODE_INST_CAST = 3, + FUNC_CODE_INST_GEP_OLD = 4, + FUNC_CODE_INST_SELECT = 5, + FUNC_CODE_INST_EXTRACTELT = 6, + FUNC_CODE_INST_INSERTELT = 7, + FUNC_CODE_INST_SHUFFLEVEC = 8, + FUNC_CODE_INST_CMP = 9, + FUNC_CODE_INST_RET = 10, + FUNC_CODE_INST_BR = 11, + FUNC_CODE_INST_SWITCH = 12, + FUNC_CODE_INST_INVOKE = 13, + FUNC_CODE_INST_UNREACHABLE = 15, + FUNC_CODE_INST_PHI = 16, + FUNC_CODE_INST_ALLOCA = 19, + FUNC_CODE_INST_LOAD = 20, + FUNC_CODE_INST_VAARG = 23, + FUNC_CODE_INST_STORE_OLD = 24, + FUNC_CODE_INST_EXTRACTVAL = 26, + FUNC_CODE_INST_INSERTVAL = 27, + FUNC_CODE_INST_CMP2 = 28, + FUNC_CODE_INST_VSELECT = 29, + FUNC_CODE_INST_INBOUNDS_GEP_OLD = 30, + FUNC_CODE_INST_INDIRECTBR = 31, + FUNC_CODE_DEBUG_LOC_AGAIN = 33, + FUNC_CODE_INST_CALL = 34, + FUNC_CODE_DEBUG_LOC = 35, + FUNC_CODE_INST_FENCE = 36, + FUNC_CODE_INST_CMPXCHG_OLD = 37, + FUNC_CODE_INST_ATOMICRMW = 38, + FUNC_CODE_INST_RESUME = 39, + FUNC_CODE_INST_LOADATOMIC = 41, + FUNC_CODE_INST_GEP = 43, + FUNC_CODE_INST_STORE = 44, + FUNC_CODE_INST_STOREATOMIC = 45, + FUNC_CODE_INST_CMPXCHG = 46, + FUNC_CODE_INST_LANDINGPAD = 47, +}; + +enum dxil_bc_value_symtab_code +{ + VST_CODE_ENTRY = 1, + VST_CODE_BBENTRY = 2, +}; + +enum dxil_bc_metadata_code +{ + METADATA_STRING = 1, + METADATA_VALUE = 2, + METADATA_NODE = 3, + METADATA_NAME = 4, + METADATA_DISTINCT_NODE = 5, + METADATA_KIND = 6, + METADATA_LOCATION = 7, + METADATA_OLD_NODE = 8, + METADATA_OLD_FN_NODE = 9, + METADATA_NAMED_NODE = 10, + METADATA_ATTACHMENT = 11, + METADATA_GENERIC_DEBUG = 12, + METADATA_SUBRANGE = 13, + METADATA_ENUMERATOR = 14, + METADATA_BASIC_TYPE = 15, + METADATA_FILE = 16, + METADATA_DERIVED_TYPE = 17, + METADATA_COMPOSITE_TYPE = 18, + METADATA_SUBROUTINE_TYPE = 19, + METADATA_COMPILE_UNIT = 20, + METADATA_SUBPROGRAM = 21, + METADATA_LEXICAL_BLOCK = 22, + METADATA_LEXICAL_BLOCK_FILE = 23, + METADATA_NAMESPACE = 24, + METADATA_TEMPLATE_TYPE = 25, + METADATA_TEMPLATE_VALUE = 26, + METADATA_GLOBAL_VAR = 27, + METADATA_LOCAL_VAR = 28, + METADATA_EXPRESSION = 29, + METADATA_OBJC_PROPERTY = 30, + METADATA_IMPORTED_ENTITY = 31, + METADATA_MODULE = 32, +}; + +enum dxil_bc_type_code +{ + TYPE_CODE_NUMENTRY = 1, + TYPE_CODE_VOID = 2, + TYPE_CODE_FLOAT = 3, + TYPE_CODE_DOUBLE = 4, + TYPE_CODE_LABEL = 5, + TYPE_CODE_OPAQUE = 6, + TYPE_CODE_INTEGER = 7, + TYPE_CODE_POINTER = 8, + TYPE_CODE_FUNCTION_OLD = 9, + TYPE_CODE_HALF = 10, + TYPE_CODE_ARRAY = 11, + TYPE_CODE_VECTOR = 12, + TYPE_CODE_X86_FP80 = 13, + TYPE_CODE_FP128 = 14, + TYPE_CODE_PPC_FP128 = 15, + TYPE_CODE_METADATA = 16, + TYPE_CODE_X86_MMX = 17, + TYPE_CODE_STRUCT_ANON = 18, + TYPE_CODE_STRUCT_NAME = 19, + TYPE_CODE_STRUCT_NAMED = 20, + TYPE_CODE_FUNCTION = 21, +}; + +enum dxil_bc_uselist_code +{ + USELIST_CODE_DEFAULT = 1, + USELIST_CODE_BB = 2, +}; + +enum dxil_bc_block_abbreviation +{ + END_BLOCK = 0, + ENTER_SUBBLOCK = 1, + DEFINE_ABBREV = 2, + UNABBREV_RECORD = 3, +}; + +enum dxil_bc_abbrev_type +{ + ABBREV_FIXED = 1, + ABBREV_VBR = 2, + ABBREV_ARRAY = 3, + ABBREV_CHAR = 4, + ABBREV_BLOB = 5, +}; + +enum dxil_bc_parameter_attribute_kind +{ + ATTR_KIND_WELL_KNOWN_VOID = 0, + ATTR_KIND_WELL_KNOWN_INT = 1, + ATTR_KIND_STRING = 2, + ATTR_KIND_STRING_STRING = 3, +}; + +enum dxil_bc_parameter_attribute_key +{ + ATTR_NODUPLICATE = 12, + ATTR_NOUNWIND = 18, + ATTR_READNONE = 20, + ATTR_READONLY = 21, +}; + +enum dxil_bc_linkage +{ + LINKAGE_EXTERNAL = 0, + LINKAGE_APPENDING = 2, + LINKAGE_INTERNAL = 3, +}; + +enum dxil_address_space +{ + AS_DEFAULT = 0, + AS_MEMORY = 1, + AS_CBUFFER = 2, + AS_GROUPSHARED = 3, +}; + +enum dxil_binop_code +{ + BINOP_ADD = 0, + BINOP_SUB = 1, + BINOP_MUL = 2, + BINOP_UDIV = 3, + BINOP_SDIV = 4, + BINOP_UREM = 5, + BINOP_SREM = 6, + BINOP_SHL = 7, + BINOP_LSHR = 8, + BINOP_ASHR = 9, + BINOP_AND = 10, + BINOP_OR = 11, + BINOP_XOR = 12 +}; + +enum dxil_overflowing_binop_flag_shift +{ + OBO_NO_UNSIGNED_WRAP = 0, + OBO_NO_SIGNED_WRAP = 1, +}; + +enum dxil_possibly_exact_binop_flag_shift +{ + PEO_EXACT = 0, +}; + +enum dxil_fast_fp_flag +{ + FAST_FP_UNSAFE_ALGEBRA = (1 << 0), + FAST_FP_NO_NANS = (1 << 1), + FAST_FP_NO_INFS = (1 << 2), + FAST_FP_NO_SIGNED_ZEROS = (1 << 3), + FAST_FP_ALLOW_RECIPROCAL = (1 << 4), +}; + +enum dxil_cast_code +{ + CAST_TRUNC = 0, + CAST_ZEXT = 1, + CAST_SEXT = 2, + CAST_FPTOUI = 3, + CAST_FPTOSI = 4, + CAST_UITOFP = 5, + CAST_SITOFP = 6, + CAST_FPTRUNC = 7, + CAST_FPEXT = 8, + CAST_PTRTOINT = 9, + CAST_INTTOPTR = 10, + CAST_BITCAST = 11, + CAST_ADDRSPACECAST = 12 +}; + +enum dxil_rmw_op +{ + RMW_XCHG = 0, + RMW_ADD = 1, + RMW_SUB = 2, + RMW_AND = 3, + RMW_NAND = 4, + RMW_OR = 5, + RMW_XOR = 6, + RMW_MAX = 7, + RMW_MIN = 8, + RMW_UMAX = 9, + RMW_UMIN = 10, +}; + +enum dxil_atomic_ordering +{ + ORDERING_NOTATOMIC = 0, + ORDERING_UNORDERED = 1, + ORDERING_MONOTONIC = 2, + ORDERING_ACQUIRE = 3, + ORDERING_RELEASE = 4, + ORDERING_ACQREL = 5, + ORDERING_SEQCST = 6, +}; + +enum dxil_synchronisation_scope +{ + SYNCHSCOPE_SINGLETHREAD = 0, + SYNCHSCOPE_CROSSTHREAD = 1, +}; + +enum dxil_call_opcode +{ + CALL_TEMP_REG_LOAD = 0, + CALL_TEMP_REG_STORE = 1, + CALL_MIN_PREC_XREG_LOAD = 2, + CALL_MIN_PREC_XREG_STORE = 3, + CALL_LOAD_INPUT = 4, + CALL_STORE_OUTPUT = 5, + CALL_FABS = 6, + CALL_SATURATE = 7, + CALL_ISNAN = 8, + CALL_ISINF = 9, + CALL_ISFINITE = 10, + CALL_ISNORMAL = 11, + CALL_COS = 12, + CALL_SIN = 13, + CALL_TAN = 14, + CALL_ACOS = 15, + CALL_ASIN = 16, + CALL_ATAN = 17, + CALL_HCOS = 18, + CALL_HSIN = 19, + CALL_HTAN = 20, + CALL_EXP = 21, + CALL_FRC = 22, + CALL_LOG = 23, + CALL_SQRT = 24, + CALL_RSQRT = 25, + CALL_ROUND_NE = 26, + CALL_ROUND_NI = 27, + CALL_ROUND_PI = 28, + CALL_ROUND_Z = 29, + CALL_BFREV = 30, + CALL_COUNT_BITS = 31, + CALL_FIRST_BIT_LO = 32, + CALL_FIRST_BIT_HI = 33, + CALL_FIRST_BIT_SHI = 34, + CALL_FMAX = 35, + CALL_FMIN = 36, + CALL_IMAX = 37, + CALL_IMIN = 38, + CALL_UMAX = 39, + CALL_UMIN = 40, + CALL_IMUL = 41, + CALL_UMUL = 42, + CALL_UDIV = 43, + CALL_UADDC = 44, + CALL_USUBB = 45, + CALL_FMAD = 46, + CALL_FMA = 47, + CALL_IMAD = 48, + CALL_UMAD = 49, + CALL_MSAD = 50, + CALL_IBFE = 51, + CALL_UBFE = 52, + CALL_BFI = 53, + CALL_DOT2 = 54, + CALL_DOT3 = 55, + CALL_DOT4 = 56, + CALL_CREATE_HANDLE = 57, + CALL_CBUFFER_LOAD = 58, + CALL_CBUFFER_LOAD_LEGACY = 59, + CALL_SAMPLE = 60, + CALL_SAMPLE_B = 61, + CALL_SAMPLE_LOD = 62, + CALL_SAMPLE_GRAD = 63, + CALL_SAMPLE_C = 64, + CALL_SAMPLE_C_LZ = 65, + CALL_TEXTURE_LOAD = 66, + CALL_TEXTURE_STORE = 67, + CALL_BUFFER_LOAD = 68, + CALL_BUFFER_STORE = 69, + CALL_BUFFER_UPDATE_COUNTER = 70, + CALL_CHECK_ACCESS_FULLY_MAPPED = 71, + CALL_GET_DIMENSIONS = 72, + CALL_TEXTURE_GATHER = 73, + CALL_TEXTURE_GATHER_CMP = 74, + CALL_TEX2DMS_GET_SAMPLE_POS = 75, + CALL_RT_GET_SAMPLE_POS = 76, + CALL_RT_GET_SAMPLE_COUNT = 77, + CALL_ATOMIC_BINOP = 78, + CALL_ATOMIC_CMP_XCHG = 79, + CALL_BARRIER = 80, + CALL_CALCULATE_LOD = 81, + CALL_DISCARD = 82, + CALL_DERIV_COARSEX = 83, + CALL_DERIV_COARSEY = 84, + CALL_DERIV_FINEX = 85, + CALL_DERIV_FINEY = 86, + CALL_EVAL_SNAPPED = 87, + CALL_EVAL_SAMPLE_INDEX = 88, + CALL_EVAL_CENTROID = 89, + CALL_SAMPLE_INDEX = 90, + CALL_COVERAGE = 91, + CALL_INNER_COVERAGE = 92, + CALL_THREAD_ID = 93, + CALL_GROUP_ID = 94, + CALL_THREAD_ID_IN_GROUP = 95, + CALL_FLATTENED_THREAD_ID_IN_GROUP = 96, + CALL_EMIT_STREAM = 97, + CALL_CUT_STREAM = 98, + CALL_EMIT_THEN_CUT_STREAM = 99, + CALL_GS_INSTANCE_ID = 100, + CALL_MAKE_DOUBLE = 101, + CALL_SPLIT_DOUBLE = 102, + CALL_LOAD_OUTPUT_CONTROL_POINT = 103, + CALL_LOAD_PATCH_CONSTANT = 104, + CALL_DOMAIN_LOCATION = 105, + CALL_STORE_PATCH_CONSTANT = 106, + CALL_OUTPUT_CONTROL_POINT_ID = 107, + CALL_PRIMITIVE_ID = 108, + CALL_CYCLE_COUNTER_LEGACY = 109, + CALL_WAVE_IS_FIRST_LANE = 110, + CALL_WAVE_GET_LANE_INDEX = 111, + CALL_WAVE_GET_LANE_COUNT = 112, + CALL_WAVE_ANY_TRUE = 113, + CALL_WAVE_ALL_TRUE = 114, + CALL_WAVE_ACTIVE_ALL_EQUAL = 115, + CALL_WAVE_ACTIVE_BALLOT = 116, + CALL_WAVE_READ_LANE_AT = 117, + CALL_WAVE_READ_LANE_FIRST = 118, + CALL_WAVE_ACTIVE_OP = 119, + CALL_WAVE_ACTIVE_BIT = 120, + CALL_WAVE_PREFIX_OP = 121, + CALL_QUAD_READ_LANE_AT = 122, + CALL_QUAD_OP = 123, + CALL_BITCAST_I16TOF16 = 124, + CALL_BITCAST_F16TOI16 = 125, + CALL_BITCAST_I32TOF32 = 126, + CALL_BITCAST_F32TOI32 = 127, + CALL_BITCAST_I64TOF64 = 128, + CALL_BITCAST_F64TOI64 = 129, + CALL_LEGACY_F32TOF16 = 130, + CALL_LEGACY_F16TOF32 = 131, + CALL_LEGACY_DTOF = 132, + CALL_LEGACY_DTOSI32 = 133, + CALL_LEGACY_DTOUI32 = 134, + CALL_WAVE_ALL_BIT_COUNT = 135, + CALL_WAVE_PREFIX_BIT_COUNT = 136, + CALL_ATTRIBUTE_AT_VERTEX = 137, + CALL_VIEW_ID = 138, + CALL_RAW_BUFFER_LOAD = 139, + CALL_RAW_BUFFER_STORE = 140, +}; + +enum dxil_predicate +{ + FCMP_FALSE = 0, + FCMP_OEQ = 1, + FCMP_OGT = 2, + FCMP_OGE = 3, + FCMP_OLT = 4, + FCMP_OLE = 5, + FCMP_ONE = 6, + FCMP_ORD = 7, + FCMP_UNO = 8, + FCMP_UEQ = 9, + FCMP_UGT = 10, + FCMP_UGE = 11, + FCMP_ULT = 12, + FCMP_ULE = 13, + FCMP_UNE = 14, + FCMP_TRUE = 15, + ICMP_EQ = 32, + ICMP_NE = 33, + ICMP_UGT = 34, + ICMP_UGE = 35, + ICMP_ULT = 36, + ICMP_ULE = 37, + ICMP_SGT = 38, + ICMP_SGE = 39, + ICMP_SLT = 40, + ICMP_SLE = 41, +}; + +enum dxil_resource_index +{ + RESOURCE_INDEX_SRV = 0, + RESOURCE_INDEX_UAV = 1, + RESOURCE_INDEX_CBV = 2, + RESOURCE_INDEX_SAMPLER = 3, +}; + +enum dxil_resource_kind +{ + RESOURCE_KIND_INVALID = 0, + RESOURCE_KIND_TEXTURE1D = 1, + RESOURCE_KIND_TEXTURE2D = 2, + RESOURCE_KIND_TEXTURE2DMS = 3, + RESOURCE_KIND_TEXTURE3D = 4, + RESOURCE_KIND_TEXTURECUBE = 5, + RESOURCE_KIND_TEXTURE1DARRAY = 6, + RESOURCE_KIND_TEXTURE2DARRAY = 7, + RESOURCE_KIND_TEXTURE2DMSARRAY = 8, + RESOURCE_KIND_TEXTURECUBEARRAY = 9, + RESOURCE_KIND_TYPEDBUFFER = 10, + RESOURCE_KIND_RAWBUFFER = 11, + RESOURCE_KIND_STRUCTUREDBUFFER = 12, + RESOURCE_KIND_CBUFFER = 13, + RESOURCE_KIND_SAMPLER = 14, + RESOURCE_KIND_TBUFFER = 15, + RESOURCE_KIND_RTACCELERATIONSTRUCTURE = 16, + RESOURCE_KIND_FEEDBACKTEXTURE2D = 17, + RESOURCE_KIND_FEEDBACKTEXTURE2DARRAY = 18, +}; + +enum dxil_resource_type +{ + RESOURCE_TYPE_NON_RAW_STRUCTURED = 0, + RESOURCE_TYPE_RAW_STRUCTURED = 1, +}; + +enum dxil_component_type +{ + COMPONENT_TYPE_INVALID = 0, + COMPONENT_TYPE_I1 = 1, + COMPONENT_TYPE_I16 = 2, + COMPONENT_TYPE_U16 = 3, + COMPONENT_TYPE_I32 = 4, + COMPONENT_TYPE_U32 = 5, + COMPONENT_TYPE_I64 = 6, + COMPONENT_TYPE_U64 = 7, + COMPONENT_TYPE_F16 = 8, + COMPONENT_TYPE_F32 = 9, + COMPONENT_TYPE_F64 = 10, + COMPONENT_TYPE_SNORMF16 = 11, + COMPONENT_TYPE_UNORMF16 = 12, + COMPONENT_TYPE_SNORMF32 = 13, + COMPONENT_TYPE_UNORMF32 = 14, + COMPONENT_TYPE_SNORMF64 = 15, + COMPONENT_TYPE_UNORMF64 = 16, + COMPONENT_TYPE_PACKEDS8X32 = 17, + COMPONENT_TYPE_PACKEDU8X32 = 18, +}; + +enum dxil_sampler_kind +{ + SAMPLER_KIND_DEFAULT = 0, + SAMPLER_KIND_COMPARISON = 1, + SAMPLER_KIND_MONO = 2, +}; + +enum dxil_interpolation_mode +{ + INTERPOLATION_UNDEFINED = 0, + INTERPOLATION_CONSTANT = 1, + INTERPOLATION_LINEAR = 2, + INTERPOLATION_LINEARCENTROID = 3, + INTERPOLATION_LINEARNOPERSPECTIVE = 4, + INTERPOLATION_LINEARNOPERSPECTIVECENTROID = 5, + INTERPOLATION_LINEARSAMPLE = 6, + INTERPOLATION_LINEARNOPERSPECTIVESAMPLE = 7, + INTERPOLATION_INVALID = 8, +}; + +enum dxil_semantic_kind +{ + SEMANTIC_KIND_ARBITRARY = 0, + SEMANTIC_KIND_VERTEXID = 1, + SEMANTIC_KIND_INSTANCEID = 2, + SEMANTIC_KIND_POSITION = 3, + SEMANTIC_KIND_RTARRAYINDEX = 4, + SEMANTIC_KIND_VIEWPORTARRAYINDEX = 5, + SEMANTIC_KIND_CLIPDISTANCE = 6, + SEMANTIC_KIND_CULLDISTANCE = 7, + SEMANTIC_KIND_OUTPUTCONTROLPOINTID = 8, + SEMANTIC_KIND_DOMAINLOCATION = 9, + SEMANTIC_KIND_PRIMITIVEID = 10, + SEMANTIC_KIND_GSINSTANCEID = 11, + SEMANTIC_KIND_SAMPLEINDEX = 12, + SEMANTIC_KIND_ISFRONTFACE = 13, + SEMANTIC_KIND_COVERAGE = 14, + SEMANTIC_KIND_INNERCOVERAGE = 15, + SEMANTIC_KIND_TARGET = 16, + SEMANTIC_KIND_DEPTH = 17, + SEMANTIC_KIND_DEPTHLESSEQUAL = 18, + SEMANTIC_KIND_DEPTHGREATEREQUAL = 19, + SEMANTIC_KIND_STENCILREF = 20, + SEMANTIC_KIND_DISPATCHTHREADID = 21, + SEMANTIC_KIND_GROUPID = 22, + SEMANTIC_KIND_GROUPINDEX = 23, + SEMANTIC_KIND_GROUPTHREADID = 24, + SEMANTIC_KIND_TESSFACTOR = 25, + SEMANTIC_KIND_INSIDETESSFACTOR = 26, + SEMANTIC_KIND_VIEWID = 27, + SEMANTIC_KIND_BARYCENTRICS = 28, + SEMANTIC_KIND_SHADINGRATE = 29, + SEMANTIC_KIND_CULLPRIMITIVE = 30, + SEMANTIC_KIND_INVALID = 31, +}; + +enum dxil_semantic_interpretation_kind +{ + SEMANTIC_INTERPRETATION_NOT_AVAILABLE = 0, + SEMANTIC_INTERPRETATION_NORMAL = 1, + SEMANTIC_INTERPRETATION_SYSTEM_GENERATED = 2, + SEMANTIC_INTERPRETATION_ARBITRARY = 3, + SEMANTIC_INTERPRETATION_NOT_INCLUDED = 4, + SEMANTIC_INTERPRETATION_NOT_PACKED = 5, + SEMANTIC_INTERPRETATION_TARGET = 6, + SEMANTIC_INTERPRETATION_TESS_FACTOR = 7, + SEMANTIC_INTERPRETATION_SHADOW = 8, + SEMANTIC_INTERPRETATION_CLIP_CULL = 9, +}; + +enum dxil_input_primitive +{ + INPUT_PRIMITIVE_UNDEFINED = 0, + INPUT_PRIMITIVE_POINT = 1, + INPUT_PRIMITIVE_LINE = 2, + INPUT_PRIMITIVE_TRIANGLE = 3, + INPUT_PRIMITIVE_LINEWITHADJACENCY = 6, + INPUT_PRIMITIVE_TRIANGLEWITHADJACENY = 7, +}; + +enum dxil_primitive_topology +{ + PRIMITIVE_TOPOLOGY_UNDEFINED = 0, + PRIMITIVE_TOPOLOGY_POINTLIST = 1, + PRIMITIVE_TOPOLOGY_LINELIST = 2, + PRIMITIVE_TOPOLOGY_LINESTRIP = 3, + PRIMITIVE_TOPOLOGY_TRIANGLELIST = 4, + PRIMITIVE_TOPOLOGY_TRIANGLESTRIP = 5, +}; + +enum dxil_shader_properties_tag +{ + SHADER_PROPERTIES_FLAGS = 0, + SHADER_PROPERTIES_GEOMETRY = 1, + SHADER_PROPERTIES_DOMAIN = 2, + SHADER_PROPERTIES_HULL = 3, + SHADER_PROPERTIES_COMPUTE = 4, +}; + +#endif /* __VKD3D_SM6_H */ diff --git a/libs/vkd3d-shader/trace.c b/libs/vkd3d-shader/trace.c index dc711f52..ac65c13c 100644 --- a/libs/vkd3d-shader/trace.c +++ b/libs/vkd3d-shader/trace.c @@ -355,6 +355,7 @@ struct vkd3d_d3d_asm_compiler struct vkd3d_string_buffer buffer; struct vkd3d_shader_version shader_version; struct vkd3d_d3d_asm_colours colours; + bool in_function; };
static int shader_ver_ge(const struct vkd3d_shader_version *v, int major, int minor) @@ -804,6 +805,17 @@ static void shader_print_hex_literal(struct vkd3d_d3d_asm_compiler *compiler, prefix, compiler->colours.literal, i, compiler->colours.reset, suffix); }
+static void shader_print_hex_typed_literal(struct vkd3d_d3d_asm_compiler *compiler, + const char *prefix, uint64_t i, unsigned int width, const char *suffix) +{ + if (i) + vkd3d_string_buffer_printf(&compiler->buffer, "%s%s0x%0*"PRIx64"%s%s", + prefix, compiler->colours.literal, width / 4, i, compiler->colours.reset, suffix); + else + vkd3d_string_buffer_printf(&compiler->buffer, "%s%s0%s%s", + prefix, compiler->colours.literal, compiler->colours.reset, suffix); +} + static void shader_print_bool_literal(struct vkd3d_d3d_asm_compiler *compiler, const char *prefix, unsigned int b, const char *suffix) { @@ -1851,6 +1863,356 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, shader_addline(buffer, "\n"); }
+const char * const shader_sm6_opcode_names[] = +{ + [VKD3DSM6IH_CALL ] = "call", + [VKD3DSM6IH_DCL_CONSTANT ] = "constant", + [VKD3DSM6IH_DCL_FUNCTION ] = "function", + [VKD3DSM6IH_RET ] = "ret", +}; + +static bool shader_sm6_print_opcode(struct vkd3d_d3d_asm_compiler *compiler, + const struct vkd3d_shader_sm6_instruction *ins) +{ + static const char unrecognized[] = "<unrecognized>"; + const char *name = unrecognized; + + if (ins->handler_idx >= ARRAY_SIZE(shader_sm6_opcode_names)) + { + FIXME("Skipping unrecognized instruction.\n"); + vkd3d_string_buffer_printf(&compiler->buffer, "<unrecognized instruction>"); + return false; + } + + name = shader_sm6_opcode_names[ins->handler_idx]; + + assert(name); + vkd3d_string_buffer_printf(&compiler->buffer, "%s%s%s", compiler->colours.opcode, name, compiler->colours.reset); + + return true; +} + +static void shader_sm6_trace_type(struct vkd3d_d3d_asm_compiler *compiler, const char *prefix, + const struct vkd3d_shader_sm6_type *type, const char *suffix); + +static void shader_sm6_trace_function_declaration(struct vkd3d_d3d_asm_compiler *compiler, const char *prefix, + const struct vkd3d_shader_sm6_type *type, const char *fn_name, const char *suffix) +{ + unsigned int i; + + shader_sm6_trace_type(compiler, prefix, type->u.function->ret_type, ""); + vkd3d_string_buffer_printf(&compiler->buffer, " %s%s(", fn_name ? "@" : "", fn_name ? fn_name : ""); + for (i = 0; i < type->u.function->param_count; ++i) + shader_sm6_trace_type(compiler, i ? ", " : "", type->u.function->param_types[i], ""); + vkd3d_string_buffer_printf(&compiler->buffer, ")%s", suffix); +} + +static void shader_sm6_trace_type(struct vkd3d_d3d_asm_compiler *compiler, const char *prefix, + const struct vkd3d_shader_sm6_type *type, const char *suffix) +{ + const char *name; + + if (!type) + return; + switch (type->type) + { + case VKD3D_SM6_VOID: + vkd3d_string_buffer_printf(&compiler->buffer, "%svoid%s", prefix, suffix); + break; + case VKD3D_SM6_INTEGER: + vkd3d_string_buffer_printf(&compiler->buffer, "%si%u%s", prefix, type->u.width, suffix); + break; + case VKD3D_SM6_FLOAT: + switch (type->u.width) + { + case 16: + name = "half"; + break; + case 32: + name = "float"; + break; + case 64: + name = "double"; + break; + default: + name = "unknown"; + break; + } + vkd3d_string_buffer_printf(&compiler->buffer, "%s%s%s", prefix, name, suffix); + break; + case VKD3D_SM6_POINTER: + shader_sm6_trace_type(compiler, prefix, type->u.pointer.type, ""); + vkd3d_string_buffer_printf(&compiler->buffer, "*%s", suffix); + break; + case VKD3D_SM6_STRUCT: + name = type->u.struc->name ? type->u.struc->name : "<anon>"; + vkd3d_string_buffer_printf(&compiler->buffer, "%s%%%s%s", prefix, name, suffix); + break; + case VKD3D_SM6_FUNCTION: + shader_sm6_trace_function_declaration(compiler, prefix, type, NULL, suffix); + break; + case VKD3D_SM6_VECTOR: + case VKD3D_SM6_ARRAY: + vkd3d_string_buffer_printf(&compiler->buffer, "%s[%u x ", prefix, type->u.array.count); + shader_sm6_trace_type(compiler, "", type->u.array.elem_type, ""); + vkd3d_string_buffer_printf(&compiler->buffer, "]%s", suffix); + break; + default: + FIXME("Unhandled type %u.\n", type->type); + break; + } +} + +static void shader_sm6_trace_array_values(struct vkd3d_d3d_asm_compiler *compiler, const char *prefix, + const struct vkd3d_shader_sm6_type *type, void *data, const char *suffix) +{ + unsigned int i, width; + bool is_null = !data; + uint64_t value; + union + { + uint8_t *p8; + uint16_t *p16; + uint32_t *p32; + uint64_t *p64; + float *f; + double *d; + } array; + + array.p8 = data; + width = type->u.array.elem_type->u.width; + + vkd3d_string_buffer_printf(&compiler->buffer, "%s[", prefix); + + switch (type->u.array.elem_type->type) + { + case VKD3D_SM6_FLOAT: + for (i = 0; i < type->u.array.count; ++i) + { + const char *prefix = i ? ", " : ""; + if (width == 32) + shader_print_float_literal(compiler, prefix, is_null ? 0.0f : array.f[i], ""); + else if (width == 64) + shader_print_double_literal(compiler, prefix, is_null ? 0.0 : array.d[i], ""); + else + shader_print_hex_typed_literal(compiler, prefix, is_null ? 0 : array.p16[i], width, ""); + } + break; + case VKD3D_SM6_INTEGER: + for (i = 0; i < type->u.array.count; ++i) + { + if (is_null) + value = 0; + else if (width == 16) + value = array.p16[i]; + else if (width == 32) + value = array.p32[i]; + else if (width == 64) + value = array.p64[i]; + else + value = array.p8[i]; + + shader_print_hex_typed_literal(compiler, i ? ", " : "", value, width, ""); + } + break; + default: + break; + } + + vkd3d_string_buffer_printf(&compiler->buffer, "]%s", suffix); +} + +static void shader_sm6_trace_constant_value(struct vkd3d_d3d_asm_compiler *compiler, const char *prefix, + const struct vkd3d_shader_sm6_instruction *ins, const char *suffix) +{ + const struct vkd3d_shader_sm6_type *type = ins->result_type; + + if (ins->u.var.is_undefined) + { + vkd3d_string_buffer_printf(&compiler->buffer, "%sundef%s", prefix, suffix); + return; + } + + switch (type->type) + { + case VKD3D_SM6_INTEGER: + if (type->u.width <= 32) + shader_print_int_literal(compiler, prefix, ins->u.var.u.uint_value, suffix); + else + shader_print_hex_typed_literal(compiler, prefix, ins->u.var.u.uint64_value, type->u.width, suffix); + break; + case VKD3D_SM6_FLOAT: + if (type->u.width == 16) + shader_print_hex_typed_literal(compiler, prefix, ins->u.var.u.half_value, type->u.width, suffix); + else if (type->u.width == 32) + shader_print_float_literal(compiler, prefix, ins->u.var.u.float_value, suffix); + else + shader_print_double_literal(compiler, prefix, ins->u.var.u.double_value, suffix); + break; + case VKD3D_SM6_ARRAY: + shader_sm6_trace_array_values(compiler, prefix, type, ins->u.var.u.aggregate_data.pvoid, suffix); + break; + default: + FIXME("Unhandled type %u.\n", type->type); + break; + } +} + +static void shader_sm6_trace_value(struct vkd3d_d3d_asm_compiler *compiler, const char *prefix, + const struct vkd3d_shader_sm6_instruction *ins, const char *suffix) +{ + const char *name = shader_sm6_instruction_get_name(ins); + + if (!ins) + return; + if (name) + { + vkd3d_string_buffer_printf(&compiler->buffer, "%s@%s%s", prefix, name, suffix); + return; + } + + if (shader_sm6_instruction_is_constant(ins) && shader_sm6_type_is_numeric(ins->result_type)) + shader_sm6_trace_constant_value(compiler, prefix, ins, suffix); + else + vkd3d_string_buffer_printf(&compiler->buffer, "%s%%%u%s", prefix, ins->value_index + 1, suffix); +} + +static void shader_sm6_trace_type_value_pair(struct vkd3d_d3d_asm_compiler *compiler, const char *prefix, + const union vkd3d_shader_sm6_operand operand, const char *suffix) +{ + shader_sm6_trace_type(compiler, prefix, operand.ins->result_type, ""); + shader_sm6_trace_value(compiler, " ", operand.ins, suffix); +} + +/* Operand codes: + * a -> alignment + * e[,] -> pointee of result type (NOT checked) + * i -> value ref + * n -> literal uint + * r[,] -> result type + * t -> value type + * v -> "volatile" + * * -> repeat for remaining operands + */ +static void shader_sm6_trace_instruction(struct vkd3d_d3d_asm_compiler *compiler, + const struct vkd3d_shader_sm6_instruction *ins, const char *operands, const char *suffix) +{ + const char *separator = " "; + const char *op = operands; + unsigned int i; + + if (*op == 'r' || *op == 'e') + { + const struct vkd3d_shader_sm6_type *type = ins->result_type; + bool pointee = *op == 'e'; + bool comma = *++op == ','; + + op += comma; + if (type) + shader_sm6_trace_type(compiler, separator, pointee ? type->u.pointer.type : type, comma ? "," : ""); + else + vkd3d_string_buffer_printf(&compiler->buffer, "%svoid", separator); + } + + for (i = 0; i < ins->operand_count; ++i) + { + if (!*op) + { + FIXME("Invalid operand spec.\n"); + break; + } + + if (*op == 'i' || *op == 't') + { + if (*op == 't') + { + shader_sm6_trace_type(compiler, separator, ins->u.operands[i].ins->result_type, ""); + separator = " "; + } + shader_sm6_trace_value(compiler, separator, ins->u.operands[i].ins, ""); + } + else if (*op == 'a') + { + vkd3d_string_buffer_printf(&compiler->buffer, "%salign %u", separator, ins->u.operands[i].value); + } + else if (*op == 'n') + { + vkd3d_string_buffer_printf(&compiler->buffer, "%s%u", separator, ins->u.operands[i].value); + } + else if (*op == 'v') + { + if (ins->modifier) + vkd3d_string_buffer_printf(&compiler->buffer, "%svolatile", separator); + --i; + } + op += op[1] != '*'; + separator = ", "; + } + + vkd3d_string_buffer_printf(&compiler->buffer, "%s\n", suffix); +} + +static void shader_sm6_trace_function_call(struct vkd3d_d3d_asm_compiler *compiler, + const struct vkd3d_shader_sm6_instruction *ins) +{ + const struct vkd3d_shader_sm6_type *type; + unsigned int i; + + type = ins->u.operands[0].ins->result_type; + assert(type->type == VKD3D_SM6_POINTER); + type = type->u.pointer.type->u.function->ret_type; + shader_sm6_trace_type(compiler, " ", type, ""); + vkd3d_string_buffer_printf(&compiler->buffer, " @%s(", shader_sm6_instruction_get_name(ins->u.operands[0].ins)); + + for (i = 1; i < ins->operand_count; ++i) + shader_sm6_trace_type_value_pair(compiler, i > 1 ? ", " : "", ins->u.operands[i], ""); + + vkd3d_string_buffer_printf(&compiler->buffer, ")"); + + vkd3d_string_buffer_printf(&compiler->buffer, "\n"); +} + +static bool shader_sm6_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, + const struct vkd3d_shader_sm6_instruction *ins) +{ + bool result = true; + + if (compiler->in_function) + vkd3d_string_buffer_printf(&compiler->buffer, " "); + + if (!shader_sm6_type_is_void_safe(ins->result_type)) + vkd3d_string_buffer_printf(&compiler->buffer, "%%%u%s", ins->value_index + 1, " = "); + + result = shader_sm6_print_opcode(compiler, ins); + + switch (ins->handler_idx) + { + case VKD3DSM6IH_CALL: + shader_sm6_trace_function_call(compiler, ins); + break; + + case VKD3DSM6IH_DCL_CONSTANT: + shader_sm6_trace_type(compiler, " ", ins->result_type, ""); + shader_sm6_trace_constant_value(compiler, " ", ins, "\n"); + break; + + case VKD3DSM6IH_DCL_FUNCTION: + shader_sm6_trace_function_declaration(compiler, " ", ins->result_type->u.pointer.type, + ins->u.function.name, "\n"); + break; + + case VKD3DSM6IH_RET: + shader_sm6_trace_instruction(compiler, ins, "r", ""); + break; + + default: + vkd3d_string_buffer_printf(&compiler->buffer, "\n"); + break; + } + + return result; +} + static enum vkd3d_result vkd3d_dxbc_dump_instructions(struct vkd3d_shader_parser *parser, enum vkd3d_shader_compile_option_formatting_flags formatting, struct vkd3d_d3d_asm_compiler *compiler) { @@ -1915,6 +2277,34 @@ static enum vkd3d_result vkd3d_dxbc_dump_instructions(struct vkd3d_shader_parser return result; }
+static enum vkd3d_result vkd3d_dxil_dump_module(struct vkd3d_shader_parser *parser, + enum vkd3d_shader_compile_option_formatting_flags formatting, struct vkd3d_d3d_asm_compiler *compiler) +{ + struct vkd3d_shader_sm6_module module; + bool result = true; + unsigned int i, j; + + vkd3d_shader_parser_read_module(parser, &module); + + for (i = 0; i < module.global_instruction_count; ++i) + result &= shader_sm6_dump_instruction(compiler, &module.global_instructions[i]); + for (i = 0; i < module.function_count; ++i) + { + const struct vkd3d_shader_sm6_function *function = &module.functions[i]; + shader_sm6_trace_function_declaration(compiler, "\ndefine ", function->declaration->result_type->u.pointer.type, + function->declaration->u.function.name, " {\n"); + compiler->in_function = true; + + for (j = 0; j < function->instruction_count; ++j) + result &= shader_sm6_dump_instruction(compiler, &function->instructions[j]); + + vkd3d_string_buffer_printf(&compiler->buffer, (i < module.function_count - 1) ? "}\n\n" : "}\n"); + compiler->in_function = false; + } + + return result ? VKD3D_OK : VKD3D_ERROR; +} + enum vkd3d_result vkd3d_dxbc_binary_to_text(struct vkd3d_shader_parser *parser, const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out) { @@ -1978,8 +2368,11 @@ enum vkd3d_result vkd3d_dxbc_binary_to_text(struct vkd3d_shader_parser *parser, shader_get_type_prefix(shader_version->type), shader_version->major, shader_version->minor, compiler.colours.reset);
+ compiler.in_function = false; + vkd3d_shader_parser_reset(parser); - result = vkd3d_dxbc_dump_instructions(parser, formatting, &compiler); + result = (compiler.shader_version.major >= 6) ? vkd3d_dxil_dump_module(parser, formatting, &compiler) + : vkd3d_dxbc_dump_instructions(parser, formatting, &compiler);
if (parser->failed) result = VKD3D_ERROR_INVALID_SHADER; diff --git a/libs/vkd3d-shader/vkd3d_shader_main.c b/libs/vkd3d-shader/vkd3d_shader_main.c index 1575a004..edbb7c1b 100644 --- a/libs/vkd3d-shader/vkd3d_shader_main.c +++ b/libs/vkd3d-shader/vkd3d_shader_main.c @@ -1045,6 +1045,13 @@ static int scan_with_parser(const struct vkd3d_shader_compile_info *compile_info vkd3d_shader_parser_reset(parser); }
+ if (parser->shader_version.major >= 6) + { + FIXME("DXIL scanning is not implemented yet.\n"); + ret = VKD3D_ERROR; + goto done; + } + while (!vkd3d_shader_parser_is_end(parser)) { vkd3d_shader_parser_read_instruction(parser, &instruction); diff --git a/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d-shader/vkd3d_shader_private.h index aca5606b..9cdfc842 100644 --- a/libs/vkd3d-shader/vkd3d_shader_private.h +++ b/libs/vkd3d-shader/vkd3d_shader_private.h @@ -414,6 +414,108 @@ enum vkd3d_shader_opcode VKD3DSIH_INVALID, };
+enum vkd3d_shader_sm6_opcode +{ + VKD3DSM6IH_CALL, + VKD3DSM6IH_DCL_CONSTANT, + VKD3DSM6IH_DCL_FUNCTION, + VKD3DSM6IH_RET, + + VKD3DSM6IH_INVALID, +}; + +enum vkd3d_shader_sm6_call_op +{ + VKD3D_CALL_OP_NONE, + + VKD3D_CALL_OP_ACOS, + VKD3D_CALL_OP_ASIN, + VKD3D_CALL_OP_ATAN, + VKD3D_CALL_OP_ATOMIC_BINOP, + VKD3D_CALL_OP_ATOMIC_CMP_XCHG, + VKD3D_CALL_OP_BARRIER, + VKD3D_CALL_OP_BUFFER_LOAD, + VKD3D_CALL_OP_BUFFER_STORE, + VKD3D_CALL_OP_CBUFFER_LOAD, + VKD3D_CALL_OP_CBUFFER_LOAD_LEGACY, + VKD3D_CALL_OP_COS, + VKD3D_CALL_OP_CREATE_HANDLE, + VKD3D_CALL_OP_DERIV_COARSEX, + VKD3D_CALL_OP_DERIV_COARSEY, + VKD3D_CALL_OP_DISCARD, + VKD3D_CALL_OP_DOMAIN_LOCATION, + VKD3D_CALL_OP_DOT2, + VKD3D_CALL_OP_DOT3, + VKD3D_CALL_OP_DOT4, + VKD3D_CALL_OP_EMIT_STREAM, + VKD3D_CALL_OP_EXP, + VKD3D_CALL_OP_FABS, + VKD3D_CALL_OP_FIRST_BIT_HI, + VKD3D_CALL_OP_FIRST_BIT_LO, + VKD3D_CALL_OP_FIRST_BIT_SHI, + VKD3D_CALL_OP_FLATTENED_THREAD_ID_IN_GROUP, + VKD3D_CALL_OP_FMA, + VKD3D_CALL_OP_FMAD, + VKD3D_CALL_OP_FMAX, + VKD3D_CALL_OP_FMIN, + VKD3D_CALL_OP_FRC, + VKD3D_CALL_OP_GET_DIMENSIONS, + VKD3D_CALL_OP_GROUP_ID, + VKD3D_CALL_OP_IMAX, + VKD3D_CALL_OP_IMIN, + VKD3D_CALL_OP_ISFINITE, + VKD3D_CALL_OP_ISINF, + VKD3D_CALL_OP_ISNAN, + VKD3D_CALL_OP_ISNORMAL, + VKD3D_CALL_OP_LEGACY_F16TOF32, + VKD3D_CALL_OP_LEGACY_F32TOF16, + VKD3D_CALL_OP_LOAD_INPUT, + VKD3D_CALL_OP_LOAD_OUTPUT_CONTROL_POINT, + VKD3D_CALL_OP_LOAD_PATCH_CONSTANT, + VKD3D_CALL_OP_LOG, + VKD3D_CALL_OP_MAKE_DOUBLE, + VKD3D_CALL_OP_OUTPUT_CONTROL_POINT_ID, + VKD3D_CALL_OP_PRIMITIVE_ID, + VKD3D_CALL_OP_RAW_BUFFER_LOAD, + VKD3D_CALL_OP_RAW_BUFFER_STORE, + VKD3D_CALL_OP_ROUND_NE, + VKD3D_CALL_OP_ROUND_NI, + VKD3D_CALL_OP_ROUND_PI, + VKD3D_CALL_OP_ROUND_Z, + VKD3D_CALL_OP_RSQRT, + VKD3D_CALL_OP_SAMPLE, + VKD3D_CALL_OP_SAMPLE_B, + VKD3D_CALL_OP_SAMPLE_C, + VKD3D_CALL_OP_SAMPLE_C_LZ, + VKD3D_CALL_OP_SAMPLE_GRAD, + VKD3D_CALL_OP_SAMPLE_LOD, + VKD3D_CALL_OP_SATURATE, + VKD3D_CALL_OP_SIN, + VKD3D_CALL_OP_SPLIT_DOUBLE, + VKD3D_CALL_OP_SQRT, + VKD3D_CALL_OP_STORE_PATCH_CONSTANT, + VKD3D_CALL_OP_STORE_OUTPUT, + VKD3D_CALL_OP_TAN, + VKD3D_CALL_OP_TEXTURE_GATHER, + VKD3D_CALL_OP_TEXTURE_GATHER_CMP, + VKD3D_CALL_OP_TEXTURE_LOAD, + VKD3D_CALL_OP_TEXTURE_STORE, + VKD3D_CALL_OP_THREAD_ID, + VKD3D_CALL_OP_THREAD_ID_IN_GROUP, + VKD3D_CALL_OP_UMAX, + VKD3D_CALL_OP_UMIN, + VKD3D_CALL_OP_WAVE_ALL_BIT_COUNT, + VKD3D_CALL_OP_WAVE_GET_LANE_COUNT, + VKD3D_CALL_OP_WAVE_GET_LANE_INDEX, + VKD3D_CALL_OP_WAVE_IS_FIRST_LANE, + VKD3D_CALL_OP_WAVE_PREFIX_BIT_COUNT, + VKD3D_CALL_OP_WAVE_PREFIX_OP, + VKD3D_CALL_OP_WAVE_READ_LANE_AT, + VKD3D_CALL_OP_WAVE_READ_LANE_FIRST, + + VKD3D_CALL_OP_INVALID, +}; + enum vkd3d_shader_register_type { VKD3DSPR_TEMP = 0, @@ -770,6 +872,7 @@ struct vkd3d_shader_desc { const uint32_t *byte_code; size_t byte_code_size; + bool is_dxil; struct vkd3d_shader_signature input_signature; struct vkd3d_shader_signature output_signature; struct vkd3d_shader_signature patch_constant_signature; @@ -904,6 +1007,255 @@ struct vkd3d_shader_instruction } declaration; };
+enum vkd3d_shader_address_space +{ + VKD3D_AS_DEFAULT, + VKD3D_AS_DEVICEMEM, + VKD3D_AS_CBUFFER, + VKD3D_AS_GROUPSHARED, +}; + +struct vkd3d_shader_sm6_pointer_info +{ + const struct vkd3d_shader_sm6_type *type; + enum vkd3d_shader_address_space addr_space; +}; + +struct vkd3d_shader_sm6_struct_info +{ + const char *name; + bool is_packed; + unsigned int elem_count; + const struct vkd3d_shader_sm6_type *elem_types[]; +}; + +struct vkd3d_shader_sm6_function_info +{ + const struct vkd3d_shader_sm6_type *ret_type; + unsigned int param_count; + const struct vkd3d_shader_sm6_type *param_types[]; +}; + +struct vkd3d_shader_sm6_array_info +{ + unsigned int count; + const struct vkd3d_shader_sm6_type *elem_type; +}; + +enum shader_sm6_type +{ + VKD3D_SM6_VOID, + VKD3D_SM6_INTEGER, + VKD3D_SM6_FLOAT, + VKD3D_SM6_POINTER, + VKD3D_SM6_STRUCT, + VKD3D_SM6_FUNCTION, + VKD3D_SM6_VECTOR, + VKD3D_SM6_ARRAY, + VKD3D_SM6_OPAQUE, + VKD3D_SM6_LABEL, + VKD3D_SM6_METADATA, +}; + +struct vkd3d_shader_sm6_type +{ + unsigned int index; + enum shader_sm6_type type; + union + { + unsigned int width; + struct vkd3d_shader_sm6_pointer_info pointer; + struct vkd3d_shader_sm6_struct_info *struc; + struct vkd3d_shader_sm6_function_info *function; + struct vkd3d_shader_sm6_array_info array; + } u; +}; + +static inline bool shader_sm6_type_is_void_safe(const struct vkd3d_shader_sm6_type *type) +{ + return !type || type->type == VKD3D_SM6_VOID; +} + +static inline bool shader_sm6_type_is_integer(const struct vkd3d_shader_sm6_type *type) +{ + return type->type == VKD3D_SM6_INTEGER; +} + +static inline bool shader_sm6_type_is_i1(const struct vkd3d_shader_sm6_type *type) +{ + return type->type == VKD3D_SM6_INTEGER && type->u.width == 1; +} + +static inline bool shader_sm6_type_is_i32(const struct vkd3d_shader_sm6_type *type) +{ + return type->type == VKD3D_SM6_INTEGER && type->u.width == 32; +} + +static inline bool shader_sm6_type_is_i64(const struct vkd3d_shader_sm6_type *type) +{ + return type->type == VKD3D_SM6_INTEGER && type->u.width == 64; +} + +static inline bool shader_sm6_type_is_floating_point(const struct vkd3d_shader_sm6_type *type) +{ + return type->type == VKD3D_SM6_FLOAT; +} + +static inline bool shader_sm6_type_is_numeric(const struct vkd3d_shader_sm6_type *type) +{ + return type->type == VKD3D_SM6_INTEGER || type->type == VKD3D_SM6_FLOAT; +} + +static inline bool shader_sm6_type_is_pointer(const struct vkd3d_shader_sm6_type *type) +{ + return type->type == VKD3D_SM6_POINTER; +} + +static inline bool shader_sm6_type_is_vector(const struct vkd3d_shader_sm6_type *type) +{ + return type->type == VKD3D_SM6_VECTOR; +} + +static inline bool shader_sm6_type_is_array(const struct vkd3d_shader_sm6_type *type) +{ + return type->type == VKD3D_SM6_ARRAY; +} + +struct vkd3d_shader_sm6_parameter_attribute_entry +{ + unsigned int count; + uint64_t keys[]; +}; + +enum vkd3d_sm6_parameter_attribute_flag +{ + PARAM_ATTR_NODUPLICATE = 0x1, + PARAM_ATTR_NOUNWIND = 0x2, + PARAM_ATTR_READNONE = 0x4, + PARAM_ATTR_READONLY = 0x8, + + PARAM_ATTR_INVALID = 0x80000000, +}; + +struct vkd3d_shader_sm6_parameter_group +{ + uint64_t key; + unsigned int param_index; + enum vkd3d_sm6_parameter_attribute_flag attributes; +}; + +struct vkd3d_shader_sm6_function_data +{ + const struct vkd3d_shader_sm6_type *type; + const char *name; + bool is_prototype; + unsigned int attribs_id; +}; + +union vkd3d_shader_sm6_aggregate_data +{ + void *pvoid; + uint8_t *p8; + uint16_t *p16; + uint32_t *p32; + uint64_t *p64; +}; + +struct vkd3d_shader_sm6_variable +{ + bool is_null; + bool is_undefined; + bool is_constant; + bool is_external; + enum vkd3d_shader_address_space addr_space; + unsigned int alignment; + union + { + unsigned int value_idx; + const struct vkd3d_shader_sm6_instruction *ins; + } init; + const char *name; + union + { + uint32_t uint_value; + uint64_t uint64_value; + uint16_t half_value; + int16_t int16_value; + float float_value; + double double_value; + void *pointer_value; + union vkd3d_shader_sm6_aggregate_data aggregate_data; + } u; +}; + +union vkd3d_shader_sm6_operand +{ + const struct vkd3d_shader_sm6_instruction *ins; + unsigned int value; +}; + +#define VKD3D_SHADER_SM6_MAX_OPERANDS 18u + +struct vkd3d_shader_sm6_instruction +{ + unsigned int value_index; + enum vkd3d_shader_sm6_opcode handler_idx; + int sub_opcode; + int modifier; + const struct vkd3d_shader_sm6_type *result_type; + unsigned int operand_count; + union + { + struct vkd3d_shader_sm6_variable var; + struct vkd3d_shader_sm6_function_data function; + union vkd3d_shader_sm6_operand operands[VKD3D_SHADER_SM6_MAX_OPERANDS]; + union vkd3d_shader_sm6_operand *extended_operands; + } u; +}; + +unsigned int shader_sm6_instruction_get_uint_value(const struct vkd3d_shader_sm6_instruction *ins); + +static inline bool shader_sm6_instruction_is_constant(const struct vkd3d_shader_sm6_instruction *ins) +{ + return ins && ins->handler_idx == VKD3DSM6IH_DCL_CONSTANT; +} + + +static inline bool shader_sm6_instruction_is_function_dcl(const struct vkd3d_shader_sm6_instruction *ins) +{ + return ins && ins->handler_idx == VKD3DSM6IH_DCL_FUNCTION; +} + +static inline const char *shader_sm6_instruction_get_name(const struct vkd3d_shader_sm6_instruction *ins) +{ + if (ins->handler_idx == VKD3DSM6IH_DCL_FUNCTION) + return ins->u.function.name; + else if (ins->handler_idx == VKD3DSM6IH_DCL_CONSTANT) + return ins->u.var.name; + return NULL; +} + +struct vkd3d_shader_sm6_function +{ + const struct vkd3d_shader_sm6_instruction *declaration; + struct vkd3d_shader_sm6_instruction *instructions; + unsigned int instruction_count; + unsigned int block_count; + unsigned int base_value_index; + unsigned int value_count; +}; + +struct vkd3d_shader_sm6_module +{ + const struct vkd3d_shader_sm6_type *types; + unsigned int type_count; + unsigned int max_value_count; + struct vkd3d_shader_sm6_instruction *global_instructions; + unsigned int global_instruction_count; + const struct vkd3d_shader_sm6_function *functions; + unsigned int function_count; +}; + static inline bool vkd3d_shader_instruction_has_texel_offset(const struct vkd3d_shader_instruction *ins) { return ins->texel_offset.u || ins->texel_offset.v || ins->texel_offset.w; @@ -934,6 +1286,7 @@ struct vkd3d_shader_parser struct vkd3d_shader_desc shader_desc; struct vkd3d_shader_version shader_version; const uint32_t *ptr; + unsigned int bitpos; const struct vkd3d_shader_parser_ops *ops; };
@@ -942,6 +1295,7 @@ struct vkd3d_shader_parser_ops void (*parser_reset)(struct vkd3d_shader_parser *parser); void (*parser_destroy)(struct vkd3d_shader_parser *parser); void (*parser_read_instruction)(struct vkd3d_shader_parser *parser, struct vkd3d_shader_instruction *instruction); + void (*parser_read_module)(const struct vkd3d_shader_parser *parser, struct vkd3d_shader_sm6_module *module); bool (*parser_is_end)(struct vkd3d_shader_parser *parser); };
@@ -969,6 +1323,12 @@ static inline void vkd3d_shader_parser_read_instruction(struct vkd3d_shader_pars parser->ops->parser_read_instruction(parser, instruction); }
+static inline void vkd3d_shader_parser_read_module(const struct vkd3d_shader_parser *parser, + struct vkd3d_shader_sm6_module *module) +{ + parser->ops->parser_read_module(parser, module); +} + static inline void vkd3d_shader_parser_reset(struct vkd3d_shader_parser *parser) { parser->ops->parser_reset(parser); @@ -1071,6 +1431,9 @@ int vkd3d_shader_sm1_parser_create(const struct vkd3d_shader_compile_info *compi struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser **parser); int vkd3d_shader_sm4_parser_create(const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser **parser); +int vkd3d_shader_sm6_parser_create(const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_desc *shader_desc, + struct vkd3d_shader_parser **parser);
void free_shader_desc(struct vkd3d_shader_desc *desc);
@@ -1257,6 +1620,8 @@ static inline void *vkd3d_find_struct_(const struct vkd3d_struct *chain, #define TAG_SHEX VKD3D_MAKE_TAG('S', 'H', 'E', 'X') #define TAG_TEXT VKD3D_MAKE_TAG('T', 'E', 'X', 'T')
+#define BITCODE_MAGIC 0xdec04342 + struct dxbc_writer_section { uint32_t tag;
Giovanni Mascellani (@giomasce) commented about libs/vkd3d-shader/dxil.c:
+int vkd3d_shader_sm6_parser_create(const struct vkd3d_shader_compile_info *compile_info,
struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_desc *shader_desc,
struct vkd3d_shader_parser **parser)
+{
- struct vkd3d_shader_sm6_parser *sm6;
- if (!(sm6 = vkd3d_calloc(1, sizeof(*sm6) + align(shader_desc->byte_code_size, sizeof(uint32_t)))))
- {
ERR("Failed to allocate parser.\n");
return VKD3D_ERROR_OUT_OF_MEMORY;
- }
- sm6->p.shader_desc = *shader_desc;
- shader_desc = &sm6->p.shader_desc;
- /* LLVM bitcode should be 32-bit aligned, but this is not done in the DXBC container.
* Get an aligned copy to prevent unaligned access. */
Have you seen DXBC files with unaligned chunks in the wild? I have never, it seems that all chunks are always aligned to four bytes. So I would suggest to enforce this behavior `parse_dxbc()` and avoid a copy here.
Giovanni Mascellani (@giomasce) commented about libs/vkd3d-shader/dxbc.c:
return ret; }
- if (shader_desc->is_dxil)
- {
if ((ret = vkd3d_shader_sm6_parser_create(compile_info, message_context, shader_desc, parser)) < 0)
My feeling is that we shouldn't accept a SM6 shader if the user specified a SM4 input. We should provide a new `VKD3D_SHADER_SOURCE_DXBC_DXIL` value in `enum vkd3d_shader_source_type` and accept a SM6 shader if that value was specified.
Out of convenience, we could also add something like `VKD3D_SHADER_SOURCE_DXBC`, which would allow any DXBC shader to be accepted. And possibly generalize it even further in order to autodetect SM1 too. I don't know if this would be useful for something.
My feeling is that we shouldn't accept a SM6 shader if the user specified a SM4 input. We should provide a new `VKD3D_SHADER_SOURCE_DXBC_DXIL` value in `enum vkd3d_shader_source_type` and accept a SM6 shader if that value was specified.
Yes, agreed.
Out of convenience, we could also add something like `VKD3D_SHADER_SOURCE_DXBC`, which would allow any DXBC shader to be accepted. And possibly generalize it even further in order to autodetect SM1 too. I don't know if this would be useful for something.
Not at this level. We do have autodetection code in vkd3d-compiler, and an argument could perhaps be made for making vkd3d-shader API available for that, but we probably don't want to do that in vkd3d_shader_compile()/vkd3d_shader_scan().
I've only had a somewhat cursory look at this series, but I did notice a couple of higher level things:
- This uses vkd3d_shader_parser, but doesn't actually use the existing vkd3d_shader_parser interfaces. Most notably, this doesn't implement parser_read_instruction, and instead introduces parser_read_module. Changing the vkd3d_shader_parser interface to accommodate DXIL is fine of course, but then the other users/implementations of that interface need to be adjusted as well. Similarly, we don't want types specific to a particular implementation like vkd3d_shader_sm6_module in that interface.
- Somewhat similarly, this introduces variants of existing structures and enumerations like vkd3d_shader_instruction and vkd3d_shader_opcode. Perhaps that makes sense, but it's not obvious from this series what the motivation behind that is, or that this couldn't be reconciled with the existing infrastructure.
- The individual patches in this series introduce quite a lot of code at once, and it doesn't seem to hard to split these patches.
- There's a conspicuous lack of vkd3d_shader_error() or similar calls in these patches. I.e., this generally uses ERR/FIXME/WARN for messages that should instead get propagated to the application.
Have you seen DXBC files with unaligned chunks in the wild? I have never, it seems that all chunks are always aligned to four bytes. So I would suggest to enforce this behavior `parse_dxbc()` and avoid a copy here.
Yes, this seems suspicious. Even if this does (legitimately) happen in practice, it seems preferable to only make this kind of copy when actually needed.
Also, I noticed that shaders compiled with dxc (both the Linux version I compiled from upstream and the version used by https://shader-playground.timjones.io/, which appears to be running on Windows) have zero checksum: ``` warn:parse_dxbc Checksum {0x00000000, 0x00000000, 0x00000000, 0x00000000} does not match calculated checksum {0xe7148a8f, 0x5904ee85, 0x1cb3ac97, 0x25c380e9}. ``` Does this happen to you? Unless there is something I am missing, we might want to avoid treating this circumstance as a fatal error.
Also, I noticed that shaders compiled with dxc (both the Linux version I compiled from upstream and the version used by https://shader-playground.timjones.io/, which appears to be running on Windows) have zero checksum:
warn:parse_dxbc Checksum {0x00000000, 0x00000000, 0x00000000, 0x00000000} does not match calculated checksum {0xe7148a8f, 0x5904ee85, 0x1cb3ac97, 0x25c380e9}.
Does this happen to you? Unless there is something I am missing, we might want to avoid treating this circumstance as a fatal error.
AFAIK generating the checksum requires dxil.dll to be available to the compiler. I don't think (native) Direct3D 12 will accept shaders with invalid checksums, but we could certainly introduce a compile option for vkd3d-shader and vkd3d-compiler to ignore DXBC checksums.
On Tue Oct 25 11:40:43 2022 +0000, Henri Verbeet wrote:
Have you seen DXBC files with unaligned chunks in the wild? I have
never, it seems that all chunks are always aligned to four bytes. So I would suggest to enforce this behavior `parse_dxbc()` and avoid a copy here. Yes, this seems suspicious. Even if this does (legitimately) happen in practice, it seems preferable to only make this kind of copy when actually needed.
177 of 568 in my Cyberpunk shader collection are misaligned, and some I built are too, but it's uncommon enough to justify only copying when needed.
On Mon Oct 31 01:36:27 2022 +0000, Giovanni Mascellani wrote:
Also, I noticed that shaders compiled with dxc (both the Linux version I compiled from upstream and the version used by https://shader-playground.timjones.io/, which appears to be running on Windows) have zero checksum:
warn:parse_dxbc Checksum {0x00000000, 0x00000000, 0x00000000, 0x00000000} does not match calculated checksum {0xe7148a8f, 0x5904ee85, 0x1cb3ac97, 0x25c380e9}.
Does this happen to you? Unless there is something I am missing, we might want to avoid treating this circumstance as a fatal error.
I added checksum writing to dxc (maybe there's a better way to make this available): [checksum hack](https://www.codeweavers.com/xfer/cmccarthy/dxc-add-checksum.diff)
On Tue Oct 25 11:40:38 2022 +0000, Henri Verbeet wrote:
My feeling is that we shouldn't accept a SM6 shader if the user
specified a SM4 input. We should provide a new `VKD3D_SHADER_SOURCE_DXBC_DXIL` value in `enum vkd3d_shader_source_type` and accept a SM6 shader if that value was specified. Yes, agreed.
Out of convenience, we could also add something like
`VKD3D_SHADER_SOURCE_DXBC`, which would allow any DXBC shader to be accepted. And possibly generalize it even further in order to autodetect SM1 too. I don't know if this would be useful for something. Not at this level. We do have autodetection code in vkd3d-compiler, and an argument could perhaps be made for making vkd3d-shader API available for that, but we probably don't want to do that in vkd3d_shader_compile()/vkd3d_shader_scan(). I've only had a somewhat cursory look at this series, but I did notice a couple of higher level things:
- This uses vkd3d_shader_parser, but doesn't actually use the existing
vkd3d_shader_parser interfaces. Most notably, this doesn't implement parser_read_instruction, and instead introduces parser_read_module. Changing the vkd3d_shader_parser interface to accommodate DXIL is fine of course, but then the other users/implementations of that interface need to be adjusted as well. Similarly, we don't want types specific to a particular implementation like vkd3d_shader_sm6_module in that interface.
- Somewhat similarly, this introduces variants of existing structures
and enumerations like vkd3d_shader_instruction and vkd3d_shader_opcode. Perhaps that makes sense, but it's not obvious from this series what the motivation behind that is, or that this couldn't be reconciled with the existing infrastructure.
- The individual patches in this series introduce quite a lot of code
at once, and it doesn't seem to hard to split these patches.
- There's a conspicuous lack of vkd3d_shader_error() or similar calls
in these patches. I.e., this generally uses ERR/FIXME/WARN for messages that should instead get propagated to the application.
In addition to instructions, much extra information needs to be passed out to the tracer or compiler, and parser_read_instruction is not practical for this. Instructions also need to reference other instructions, and the practice of writing one instruction to a struct would make that messy. Should vkd3d_shader_parser not be used at all for SM 6?
I initially was translating instructions to use vkd3d_shader_opcode, but since none can be handled by common code, we would end up using the same enum for two different things, with numerous values being used in one implementation but not the other. Also there would be inefficiencies, e.g. all numeric instructions with two inputs are emitted using a single BINOP with the actual operation encoded as an operand. We would translate this to one of the numerous VKD3DSIH equivalents, only to handle them all later in a single case statement.
It's possible to add another union member to vkd3d_shader_instruction for SM 6, but of the other fields only handler_idx, flags and src_count would be used. Use of common code would be rare; vkd3d_dxbc_compiler_map_ext_glsl_instruction() and vkd3d_dxbc_compiler_map_atomic_instruction() are the only two I found in a brief search, and the translation overhead probably cancels any gain.
I could reduce the large patch to nothing more than reading the block tree. This wouldn't do anything except validate the blocks and emit any errors encountered. Is that enough, or too much?
177 of 568 in my Cyberpunk shader collection are misaligned, and some I built are too, but it's uncommon enough to justify only copying when needed.
Do we know what causes this misalignment? Does dxc simply make no effort to align things while fxc does? Could you attach such a shader you compiled yourself here?
I added checksum writing to dxc (maybe there's a better way to make this available): [checksum hack](https://www.codeweavers.com/xfer/cmccarthy/dxc-add-checksum.diff)
I don't think we want to modify dxc like that, no.
In addition to instructions, much extra information needs to be passed out to the tracer or compiler, and parser_read_instruction is not practical for this. Instructions also need to reference other instructions, and the practice of writing one instruction to a struct would make that messy. Should vkd3d_shader_parser not be used at all for SM 6?
Well, a decision needs to be made. There are a couple of options:
- Use the existing vkd3d_shader_parser interface. This would of course be ideal; we'd be able to largely use the existing backends, perhaps with minor adjustments for things like handling additional instructions/capabilities. This seems unlikely to be feasible; still, it would be good to address the specific reasons behind that in the patch introducing whichever alternative we end up with.
- Don't use the vkd3d_shader_parser interface at all, and introduce something new only for DXIL. Perhaps this is unavoidable, but this would be the least desirable outcome, since it would imply needing DXIL-specific backends as well. That would most notably mean SPIR-V, but would have consequences for e.g. potential GLSL output as well.
- Extend/adjust the vkd3d_shader_parser interface. In particular, assuming for the sake of argument that the "parser_read_module" interface is the ideal interface for DXIL input, what are the reasons it would be unsuitable for dxbc-tpf and d3dbc input?
- Parse DXIL to the vkd3d_shader_sm6_module interface, and then do a lowering/flattening pass to turn it into a list of (likely extended) vkd3d_shader_instruction's.
The current patch essentially takes the approach of "pretend to use the vkd3d_shader_parser interface, but don't really", which I don't think is an appropriate option.
In any case, the patches you'll end up with will have to explain what specific issues they're addressing and what considerations went into the chosen approach, to someone potentially much less familiar with DXIL than you currently are. (And note that that person isn't necessarily "Henri, today"; this might be e.g. a new contributor reading the commit log 6 months from now, or perhaps yourself in a decade or so.)
It's perhaps also worth pointing out that this touches on ongoing conversations about the HLSL compiler's IRs. We currently essentially go from "HLSL IR" (almost) straight to d3dbc or dxbc-tpf bytecode. However, it turns out there may be some value in introducing an IR between that, on a level slightly higher than vkd3d_shader_instruction.
I initially was translating instructions to use vkd3d_shader_opcode, but since none can be handled by common code, we would end up using the same enum for two different things, with numerous values being used in one implementation but not the other. Also there would be inefficiencies, e.g. all numeric instructions with two inputs are emitted using a single BINOP with the actual operation encoded as an operand. We would translate this to one of the numerous VKD3DSIH equivalents, only to handle them all later in a single case statement.
It's possible to add another union member to vkd3d_shader_instruction for SM 6, but of the other fields only handler_idx, flags and src_count would be used. Use of common code would be rare; vkd3d_dxbc_compiler_map_ext_glsl_instruction() and vkd3d_dxbc_compiler_map_atomic_instruction() are the only two I found in a brief search, and the translation overhead probably cancels any gain.
Why is that? Or put a different way, what would prevent someone from transforming a list of vkd3d_shader_instruction's into a vkd3d_shader_sm6_module? And would that be worse than having to handle both in the backends?
I could reduce the large patch to nothing more than reading the block tree. This wouldn't do anything except validate the blocks and emit any errors encountered. Is that enough, or too much?
I'd have to see it; perhaps it would make sense to still split things further from there. It sounds like a decent start though.
It's perhaps also worth pointing out that this touches on ongoing conversations about the HLSL compiler's IRs. We currently essentially go from "HLSL IR" (almost) straight to d3dbc or dxbc-tpf bytecode. However, it turns out there may be some value in introducing an IR between that, on a level slightly higher than vkd3d_shader_instruction.
The discussion about that has mostly been along the lines of having separate sm1 and sm4 IR, although it's been vague and I think it's reasonable that we could use a common struct on the same level as vkd3d_shader_instruction instead. I don't think there's any reason that we want anything higher-level than that (with the possible caveat that maybe we want to use pointers rather than register numbers?), particularly because most of the impetus for introducing this new IR is that the current HLSL IR is *too* high-level (e.g. around things like variable loads and stores, and not being able to express the more CISC aspects of the smX assembly, like source modifiers.)
Maybe there's an argument to have one or more unified IRs across all of vkd3d-shader? It would be nice in some respects, but I imagine that compilation speed would be a concern. I gather that one reason that the dxbc->glsl/spirv path is arranged the way it is, is that we only want to do one pass over the dxbc, and want to avoid allocating memory as much as we can.
On Mon Oct 31 23:31:44 2022 +0000, Zebediah Figura wrote:
It's perhaps also worth pointing out that this touches on ongoing
conversations about the HLSL compiler's IRs. We currently essentially go from "HLSL IR" (almost) straight to d3dbc or dxbc-tpf bytecode. However, it turns out there may be some value in introducing an IR between that, on a level slightly higher than vkd3d_shader_instruction. The discussion about that has mostly been along the lines of having separate sm1 and sm4 IR, although it's been vague and I think it's reasonable that we could use a common struct on the same level as vkd3d_shader_instruction instead. I don't think there's any reason that we want anything higher-level than that (with the possible caveat that maybe we want to use pointers rather than register numbers?), particularly because most of the impetus for introducing this new IR is that the current HLSL IR is *too* high-level (e.g. around things like variable loads and stores, and not being able to express the more CISC aspects of the smX assembly, like source modifiers.) Maybe there's an argument to have one or more unified IRs across all of vkd3d-shader? It would be nice in some respects, but I imagine that compilation speed would be a concern. I gather that one reason that the dxbc->glsl/spirv path is arranged the way it is, is that we only want to do one pass over the dxbc, and want to avoid allocating memory as much as we can.
After a signature name is emitted, no realignment occurs. I've raised an issue: https://github.com/microsoft/DirectXShaderCompiler/issues/4755
The idea behind the vkd3d_shader_parser interface is not applicable to DXIL, and existing backends are unusable. The only resemblance DXIL bears to TPF is it ends up doing the same shader operations expressed in the HLSL source. It's much closer to SPIR-V because it uses static single assignment. Instructions don't use registers, and contain none of the information in the 'declaration' union in vkd3d_shader_instruction. This information is contained elsewhere. Reading instructions from a stream of 32-bit words until the end is reached doesn't make sense given the way DXIL is organised. Because vkd3d_shader_parser and the current backends are designed for doing that on a complex instruction set, it won't work for DXIL.
Code for converting TPF to DXIL would probably be at least as complex as the existing SPIR-V backend, possibly more so, and introduce many regressions.
Converting DXIL to TPF may be simpler, but there are still complications. DXIL doesn't use vectors; instead it extracts scalars from any load/sample/read operations which return vectors, and operates on the scalars. We would need to analyse the code and reconstruct vector inputs for each TPF instruction. PHI instructions would need to be replaced with temporary variables. But the most complex problem is building loop, if and else constructs. Hans-Kristian's structuriser probably builds most or all of the needed info but assembling it may turn out to be a major pain. I doubt this route would be less code than a separate backend either.
The discussion about that has mostly been along the lines of having separate sm1 and sm4 IR, although it's been vague and I think it's reasonable that we could use a common struct on the same level as vkd3d_shader_instruction instead. I don't think there's any reason that we want anything higher-level than that (with the possible caveat that maybe we want to use pointers rather than register numbers?), particularly because most of the impetus for introducing this new IR is that the current HLSL IR is *too* high-level (e.g. around things like variable loads and stores, and not being able to express the more CISC aspects of the smX assembly, like source modifiers.)
I doesn't need to be a much higher level as far as I'm concerned, but a few issues I can think of with the current scheme:
- Declarations being part of the same instruction stream as the rest of the shader can be a bit awkward; particular for shader model 1-3 which doesn't necessarily have them. I suspect you're either going to run into this with your d3dbc->spirv efforts, or already have. In wined3d that's somewhat addressed by constructing the wined3d_shader_reg_maps structure and then ignoring declaration instructions in the GLSL backend.
- There are some differences in behaviour between the same instructions in various shader models. (E.g., mov/mova, exp/log/rcp/rsq, sincos, various tex* instructions.) We currently expect the backends to be aware of those, but it may be nicer to smooth over that in the frontends.
- Somewhat similar to declaration instructions, it probably makes sense to make hull shader phases available as separate blocks of instructions, instead of a single instruction stream.
- We typically end up parsing the same shader bytecode multiple times anyway. scan_with_parser() does a pass, vkd3d_shader_trace() may do a pass, and then the actual translation does a pass. (Though note that technically that's not a limitation of the interface as such; it would be entirely possible for a particular frontend to parse the shader only once and return existing instructions from parser_read_instruction().)
Note that I'm quite explicitly not suggesting to throw out vkd3d_shader_instruction and replacing it with something new; the suggestion is that the vkd3d_shader_instruction interface could likely be made to work for both HLSL and DXIL with a reasonable number of adjustments. If HLSL could use it as-is, that's all the better.
Maybe there's an argument to have one or more unified IRs across all of vkd3d-shader? It would be nice in some respects, but I imagine that compilation speed would be a concern. I gather that one reason that the dxbc->glsl/spirv path is arranged the way it is, is that we only want to do one pass over the dxbc, and want to avoid allocating memory as much as we can.
Well, things started out as directly translating shader model 1 bytecode to ARB_vertex_program/ARB_fragment_program instructions. (Compare e.g. IWineD3DVertexShaderImpl_GenerateProgramArbHW() from dlls/wined3d/vertexshader.c in wine-0.9.) Certain abstractions were introduced as needed; the introduction of the GLSL backend was an important event, as was the introduction of shader model 4 support. We've probably reached a similar point again, although this time it seems both DXIL and HLSL are getting there at roughly the same time.
The idea behind the vkd3d_shader_parser interface is not applicable to DXIL, and existing backends are unusable.
I suspect your view of what is and isn't the vkd3d_shader_parser interface may be a bit too rigid. This is an internal interface, and it's fairly malleable. It can be adjusted to the needs of new or existing frontends and backends, and different functionality can be moved around between the frontend and the backend, or into common helper functions where that makes sense.
The only resemblance DXIL bears to TPF is it ends up doing the same shader operations expressed in the HLSL source. It's much closer to SPIR-V because it uses static single assignment. Instructions don't use registers, and contain none of the information in the 'declaration' union in vkd3d_shader_instruction. This information is contained elsewhere. Reading instructions from a stream of 32-bit words until the end is reached doesn't make sense given the way DXIL is organised. Because vkd3d_shader_parser and the current backends are designed for doing that on a complex instruction set, it won't work for DXIL.
You mention a couple of concrete points here; that's great, it makes things a lot easier to discuss.
- SSA variables vs registers. Why is that a problem? As a first approximation, could we simply map these to VKD3DSPR_TEMP/VKD3DSPR_IDXTEMP? We have plenty of those at the IR level...
- Declaration instructions. See also my reply to Zeb further above. We could synthesise declaration instructions if we wanted to keep the current setup, but we may actually want to separate declarations from struct vkd3d_shader_instruction anyway, which would move us closer to the DXIL model.
- Reading single instructions. I touched on this in my reply to Zeb as well; this is not a restriction on the frontends. The interface is like this because it's sufficient for the current backends, but it could certainly make sense to return the entire program at once. We could even support both; going from one form to the other is largely just a trivial transformation. In any case, a frontend can of course choose to parse the entire shader at once, and only iterate over the parsed instructions in parser_read_instruction().
Code for converting TPF to DXIL would probably be at least as complex as the existing SPIR-V backend, possibly more so, and introduce many regressions.
In principle replacing the current SPIR-V backend with something of equal complexity in the frontend may still be a win if that means not having to handle two separate IRs in the backend(s). But sure; I don't think we'd want to actually do this, but there are no fundamental incompatibilities preventing it, right?
Converting DXIL to TPF may be simpler, but there are still complications. DXIL doesn't use vectors; instead it extracts scalars from any load/sample/read operations which return vectors, and operates on the scalars. We would need to analyse the code and reconstruct vector inputs for each TPF instruction. PHI instructions would need to be replaced with temporary variables. But the most complex problem is building loop, if and else constructs. Hans-Kristian's structuriser probably builds most or all of the needed info but assembling it may turn out to be a major pain. I doubt this route would be less code than a separate backend either.
- Vectorisation. I'm not sure we strictly need to do this. The backends should be fine with scalar instructions for things like basic arithmetic instructions and the like. And for things like dot products we'd currently need to do this anyway in the backend, right? Still, we kind of want a vectorisation pass for the HLSL compiler anyway...
- PHI instructions. Sure; on the IR level we could also just pass these through to the backend though.
- Control flow. Right, that's probably the most complex part of all this. It may not be any easier to do that in the frontend, but hopefully it shouldn't be much harder either? This too may be something the HLSL compiler already handles, although on the HLSL IR level. Alternatively, we could try introducing instruction blocks into the IR and just letting the backends handle it.
The discussion about that has mostly been along the lines of having separate sm1 and sm4 IR, although it's been vague and I think it's reasonable that we could use a common struct on the same level as vkd3d_shader_instruction instead. I don't think there's any reason that we want anything higher-level than that (with the possible caveat that maybe we want to use pointers rather than register numbers?), particularly because most of the impetus for introducing this new IR is that the current HLSL IR is *too* high-level (e.g. around things like variable loads and stores, and not being able to express the more CISC aspects of the smX assembly, like source modifiers.)
I doesn't need to be a much higher level as far as I'm concerned, but a few issues I can think of with the current scheme:
- Declarations being part of the same instruction stream as the rest of the shader can be a bit awkward; particular for shader model 1-3 which doesn't necessarily have them. I suspect you're either going to run into this with your d3dbc->spirv efforts, or already have. In wined3d that's somewhat addressed by constructing the wined3d_shader_reg_maps structure and then ignoring declaration instructions in the GLSL backend.
In terms of translating sm1->spirv (and in general translating out of vkd3d_shader_instruction), sort of, although for the most part spirv.c should be capable of lazily initializing varyings, and most of my difficulty with sm1 thus far has been rearranging it so that it will. (And also so that it doesn't demand an input signature.)
In terms of translating hlsl->smX (and in general translating from something more high-level into vkd3d_shader_instruction), I don't think declarations are awkward at all?
I can definitely see this being an improvement—it'd make the spirv code for handling declarations less complicated—but as below, it'd also mean doing more passes, and probably more allocations as well.
- There are some differences in behaviour between the same instructions in various shader models. (E.g., mov/mova, exp/log/rcp/rsq, sincos, various tex* instructions.) We currently expect the backends to be aware of those, but it may be nicer to smooth over that in the frontends.
Agreed, we could definitely be doing more than we are right now.
- Somewhat similar to declaration instructions, it probably makes sense to make hull shader phases available as separate blocks of instructions, instead of a single instruction stream.
I can't much comment on this as I have thus far avoided touching or understanding tessellation. God only knows why it's so complicated...
- We typically end up parsing the same shader bytecode multiple times anyway. scan_with_parser() does a pass, vkd3d_shader_trace() may do a pass, and then the actual translation does a pass. (Though note that technically that's not a limitation of the interface as such; it would be entirely possible for a particular frontend to parse the shader only once and return existing instructions from parser_read_instruction().)
Hmm, right, I forgot we do two passes already. There's still basically no allocation from the smX frontends, though, and I think it'd be hard to improve on that...
Note that I'm quite explicitly not suggesting to throw out vkd3d_shader_instruction and replacing it with something new; the suggestion is that the vkd3d_shader_instruction interface could likely be made to work for both HLSL and DXIL with a reasonable number of adjustments. If HLSL could use it as-is, that's all the better.
Potentially. The main reason I haven't thus far is that it's extra work (if not per se a *lot* of extra work) for no clear benefit, versus our more ad-hoc struct smX_instruction infrastructure. If there were other frontends that wanted to generate sm1/sm4, or a reason for hlsl to feed directly to glsl or spirv instead of going through sm4 first, that'd more easily tip the scales.
If we do want to adopt it in more places, I support throwing out the vkd3d_shader_instruction naming and replacing it with something new :D
Maybe there's an argument to have one or more unified IRs across all of vkd3d-shader? It would be nice in some respects, but I imagine that compilation speed would be a concern. I gather that one reason that the dxbc->glsl/spirv path is arranged the way it is, is that we only want to do one pass over the dxbc, and want to avoid allocating memory as much as we can.
Well, things started out as directly translating shader model 1 bytecode to ARB_vertex_program/ARB_fragment_program instructions. (Compare e.g. IWineD3DVertexShaderImpl_GenerateProgramArbHW() from dlls/wined3d/vertexshader.c in wine-0.9.) Certain abstractions were introduced as needed; the introduction of the GLSL backend was an important event, as was the introduction of shader model 4 support. We've probably reached a similar point again, although this time it seems both DXIL and HLSL are getting there at roughly the same time.
I guess my point is, if we have a unified IR and it's basically vkd3d_shader_instruction, that's probably fine (and it may honestly be possible to do that, the way things are). But if we want to change vkd3d_shader_instruction, that might mean making the sm4->spirv path slower, which doesn't seem desirable.
On Wed Nov 2 05:57:57 2022 +0000, Henri Verbeet wrote:
The discussion about that has mostly been along the lines of having
separate sm1 and sm4 IR, although it's been vague and I think it's reasonable that we could use a common struct on the same level as vkd3d_shader_instruction instead. I don't think there's any reason that we want anything higher-level than that (with the possible caveat that maybe we want to use pointers rather than register numbers?), particularly because most of the impetus for introducing this new IR is that the current HLSL IR is *too* high-level (e.g. around things like variable loads and stores, and not being able to express the more CISC aspects of the smX assembly, like source modifiers.) I doesn't need to be a much higher level as far as I'm concerned, but a few issues I can think of with the current scheme:
- Declarations being part of the same instruction stream as the rest
of the shader can be a bit awkward; particular for shader model 1-3 which doesn't necessarily have them. I suspect you're either going to run into this with your d3dbc->spirv efforts, or already have. In wined3d that's somewhat addressed by constructing the wined3d_shader_reg_maps structure and then ignoring declaration instructions in the GLSL backend.
- There are some differences in behaviour between the same
instructions in various shader models. (E.g., mov/mova, exp/log/rcp/rsq, sincos, various tex* instructions.) We currently expect the backends to be aware of those, but it may be nicer to smooth over that in the frontends.
- Somewhat similar to declaration instructions, it probably makes
sense to make hull shader phases available as separate blocks of instructions, instead of a single instruction stream.
- We typically end up parsing the same shader bytecode multiple times
anyway. scan_with_parser() does a pass, vkd3d_shader_trace() may do a pass, and then the actual translation does a pass. (Though note that technically that's not a limitation of the interface as such; it would be entirely possible for a particular frontend to parse the shader only once and return existing instructions from parser_read_instruction().) Note that I'm quite explicitly not suggesting to throw out vkd3d_shader_instruction and replacing it with something new; the suggestion is that the vkd3d_shader_instruction interface could likely be made to work for both HLSL and DXIL with a reasonable number of adjustments. If HLSL could use it as-is, that's all the better.
Maybe there's an argument to have one or more unified IRs across all
of vkd3d-shader? It would be nice in some respects, but I imagine that compilation speed would be a concern. I gather that one reason that the dxbc->glsl/spirv path is arranged the way it is, is that we only want to do one pass over the dxbc, and want to avoid allocating memory as much as we can. Well, things started out as directly translating shader model 1 bytecode to ARB_vertex_program/ARB_fragment_program instructions. (Compare e.g. IWineD3DVertexShaderImpl_GenerateProgramArbHW() from dlls/wined3d/vertexshader.c in wine-0.9.) Certain abstractions were introduced as needed; the introduction of the GLSL backend was an important event, as was the introduction of shader model 4 support. We've probably reached a similar point again, although this time it seems both DXIL and HLSL are getting there at roughly the same time.
The idea behind the vkd3d_shader_parser interface is not applicable to
DXIL, and existing backends are unusable. I suspect your view of what is and isn't the vkd3d_shader_parser interface may be a bit too rigid. This is an internal interface, and it's fairly malleable. It can be adjusted to the needs of new or existing frontends and backends, and different functionality can be moved around between the frontend and the backend, or into common helper functions where that makes sense.
The only resemblance DXIL bears to TPF is it ends up doing the same
shader operations expressed in the HLSL source. It's much closer to SPIR-V because it uses static single assignment. Instructions don't use registers, and contain none of the information in the 'declaration' union in vkd3d_shader_instruction. This information is contained elsewhere. Reading instructions from a stream of 32-bit words until the end is reached doesn't make sense given the way DXIL is organised. Because vkd3d_shader_parser and the current backends are designed for doing that on a complex instruction set, it won't work for DXIL. You mention a couple of concrete points here; that's great, it makes things a lot easier to discuss.
- SSA variables vs registers. Why is that a problem? As a first
approximation, could we simply map these to VKD3DSPR_TEMP/VKD3DSPR_IDXTEMP? We have plenty of those at the IR level...
- Declaration instructions. See also my reply to Zeb further above. We
could synthesise declaration instructions if we wanted to keep the current setup, but we may actually want to separate declarations from struct vkd3d_shader_instruction anyway, which would move us closer to the DXIL model.
- Reading single instructions. I touched on this in my reply to Zeb as
well; this is not a restriction on the frontends. The interface is like this because it's sufficient for the current backends, but it could certainly make sense to return the entire program at once. We could even support both; going from one form to the other is largely just a trivial transformation. In any case, a frontend can of course choose to parse the entire shader at once, and only iterate over the parsed instructions in parser_read_instruction().
Code for converting TPF to DXIL would probably be at least as complex
as the existing SPIR-V backend, possibly more so, and introduce many regressions. In principle replacing the current SPIR-V backend with something of equal complexity in the frontend may still be a win if that means not having to handle two separate IRs in the backend(s). But sure; I don't think we'd want to actually do this, but there are no fundamental incompatibilities preventing it, right?
Converting DXIL to TPF may be simpler, but there are still
complications. DXIL doesn't use vectors; instead it extracts scalars from any load/sample/read operations which return vectors, and operates on the scalars. We would need to analyse the code and reconstruct vector inputs for each TPF instruction. PHI instructions would need to be replaced with temporary variables. But the most complex problem is building loop, if and else constructs. Hans-Kristian's structuriser probably builds most or all of the needed info but assembling it may turn out to be a major pain. I doubt this route would be less code than a separate backend either.
- Vectorisation. I'm not sure we strictly need to do this. The
backends should be fine with scalar instructions for things like basic arithmetic instructions and the like. And for things like dot products we'd currently need to do this anyway in the backend, right? Still, we kind of want a vectorisation pass for the HLSL compiler anyway...
- PHI instructions. Sure; on the IR level we could also just pass
these through to the backend though.
- Control flow. Right, that's probably the most complex part of all
this. It may not be any easier to do that in the frontend, but hopefully it shouldn't be much harder either? This too may be something the HLSL compiler already handles, although on the HLSL IR level. Alternatively, we could try introducing instruction blocks into the IR and just letting the backends handle it.
After reviewing the possibilities, I think converting TPF to the DXIL-like IR is a far better option in the long run, at least for SM6 (I'm not sure how this would impact HLSL work). For reference, Microsoft has released code for such a conversion. This option would mean having an extra SM6 SPIR-V backend for a while at least, until the conversion code is written.
Using the current SPIR-V backend means converting DXIL to something which pretends to be TPF-like but actually isn't. For one thing it would have poorer performance. For example, in the new backend, loading a source mostly involves simply reading value->spirv_id, except when a constant must be emitted which only happens once. The existing backend has a significant overhead in vkd3d_dxbc_compiler_emit_load_reg(). A similar situation occurs for storing dst. Plus we'd have the inital overhead of DXIL conversion. In future we expect more and more use of SM 6, and if we encounter performance issues in the existing backend they may be hard to solve.
Inputs/outputs are another issue. Private variables are unnecessary in the new backend, and overall the I/O code is relatively simple compared to the existing functions. Making I/O loads/stores work through the existing code is a problem I'd rather not deal with. Tessellation is another complication. There are no shader phase declarations.
If we want DXIL trace output to look similar to that from 'dxc -dumpbin' we need a separate trace backend. I'd argue this is more useful for debugging that emitting something very similar to the current TPF trace.
On Wed Nov 2 05:57:57 2022 +0000, Conor McCarthy wrote:
After reviewing the possibilities, I think converting TPF to the DXIL-like IR is a far better option in the long run, at least for SM6 (I'm not sure how this would impact HLSL work). For reference, Microsoft has released code for such a conversion. This option would mean having an extra SM6 SPIR-V backend for a while at least, until the conversion code is written. Using the current SPIR-V backend means converting DXIL to something which pretends to be TPF-like but actually isn't. For one thing it would have poorer performance. For example, in the new backend, loading a source mostly involves simply reading value->spirv_id, except when a constant must be emitted which only happens once. The existing backend has a significant overhead in vkd3d_dxbc_compiler_emit_load_reg(). A similar situation occurs for storing dst. Plus we'd have the inital overhead of DXIL conversion. In future we expect more and more use of SM 6, and if we encounter performance issues in the existing backend they may be hard to solve. Inputs/outputs are another issue. Private variables are unnecessary in the new backend, and overall the I/O code is relatively simple compared to the existing functions. Making I/O loads/stores work through the existing code is a problem I'd rather not deal with. Tessellation is another complication. There are no shader phase declarations. If we want DXIL trace output to look similar to that from 'dxc -dumpbin' we need a separate trace backend. I'd argue this is more useful for debugging that emitting something very similar to the current TPF trace.
Adding a register type VKD3DSPR_SSA may work reasonably well, combined with separate instructions for SM6 input/output and tessellation. I'd need to write enough code to compile a simple shader to get an idea of how it looks. The question of what to trace remains though.
After reviewing the possibilities, I think converting TPF to the DXIL-like IR is a far better option in the long run, at least for SM6 (I'm not sure how this would impact HLSL work). For reference, Microsoft has released code for such a conversion. This option would mean having an extra SM6 SPIR-V backend for a while at least, until the conversion code is written.
We generally try very hard to avoid big rewrites like that, and I'm not convinced this should be an exception. I think the right way to approach this would be to make an (exhaustive) list of issues with the current IR for the purpose of representing DXIL, as well as a corresponding list with potential solutions to those issues. (Ideally with concrete examples.) The main point would be to get everyone involved on the same page in terms of understanding the issues and possible solutions. You have significantly more experience with translating DXIL, and are probably aware of issues that the rest of us aren't; at the same time, we may be able to come up with solutions/approaches you hadn't considered yet.
If we want DXIL trace output to look similar to that from 'dxc -dumpbin' we need a separate trace backend. I'd argue this is more useful for debugging that emitting something very similar to the current TPF trace.
Sure, broadly. It could perhaps be argued whether this should be a completely separate backend or if this could instead be an alternate output mode for the existing backend, but the difference would largely be trivial.
It does however depend on the approach taken for the IR. Specifically, if we're going to do significant transformations on DXIL IR before turning it into vkd3d IR (e.g. vectorisation, eliminating PHI-instructions, running it through the structuriser), that implies it would make more sense to run the disassembler directly on the parsed DXIL IR, instead of on the vkd3d IR.
- Declarations being part of the same instruction stream as the rest of the shader can be a bit awkward; particular for shader model 1-3 which doesn't necessarily have them. I suspect you're either going to run into this with your d3dbc->spirv efforts, or already have. In wined3d that's somewhat addressed by constructing the wined3d_shader_reg_maps structure and then ignoring declaration instructions in the GLSL backend.
In terms of translating sm1->spirv (and in general translating out of vkd3d_shader_instruction), sort of, although for the most part spirv.c should be capable of lazily initializing varyings, and most of my difficulty with sm1 thus far has been rearranging it so that it will. (And also so that it doesn't demand an input signature.)
I'd argue the frontend should just generate the required information during parsing. The "reg_maps" approach from wined3d would be one option; another option would be to generate dcl_ instructions as needed into a separate instruction stream and merge the two at the end, similar to how the SPIR-V backend has "global_stream" and "function_stream". Taking that option one step further, we could decide to not merge them, and that would then allow getting rid of the "after_declarations_section" flag in the SPIR-V backend.
In terms of translating hlsl->smX (and in general translating from something more high-level into vkd3d_shader_instruction), I don't think declarations are awkward at all?
Sure, it's probably fine for HLSL.
I can definitely see this being an improvement—it'd make the spirv code for handling declarations less complicated—but as below, it'd also mean doing more passes, and probably more allocations as well.
I don't think we'd end up with more passes, certainly not in total. It would probably mean allocating more memory, but I don't think it would be prohibitively more.
- Somewhat similar to declaration instructions, it probably makes sense to make hull shader phases available as separate blocks of instructions, instead of a single instruction stream.
I can't much comment on this as I have thus far avoided touching or understanding tessellation. God only knows why it's so complicated...
Mostly just for illustrative purposes, here's a random hull shader from my collection: ``` hs_5_0
hs_decls dcl_input_control_point_count 4 dcl_output_control_point_count 4 dcl_tessellator_domain domain_quad dcl_tessellator_partitioning partitioning_integer dcl_tessellator_output_primitive output_triangle_ccw dcl_globalFlags refactoringAllowed dcl_constantBuffer cb0[2], immediateIndexed
hs_fork_phase dcl_output_siv o0.x, finalQuadUeq0EdgeTessFactor mov o0.x, cb0[0].x ret
hs_fork_phase dcl_output_siv o1.x, finalQuadVeq0EdgeTessFactor mov o1.x, cb0[0].y ret
hs_fork_phase dcl_output_siv o2.x, finalQuadUeq1EdgeTessFactor mov o2.x, cb0[0].z ret
hs_fork_phase dcl_output_siv o3.x, finalQuadVeq1EdgeTessFactor mov o3.x, cb0[0].w ret
hs_fork_phase dcl_output_siv o4.x, finalQuadUInsideTessFactor mov o4.x, cb0[1].x ret
hs_fork_phase dcl_output_siv o5.x, finalQuadVInsideTessFactor mov o5.x, cb0[1].y ret ``` We end up turning phases into their own functions and then invoking them in vkd3d_dxbc_compiler_emit_hull_shader_main().
Note that I'm quite explicitly not suggesting to throw out vkd3d_shader_instruction and replacing it with something new; the suggestion is that the vkd3d_shader_instruction interface could likely be made to work for both HLSL and DXIL with a reasonable number of adjustments. If HLSL could use it as-is, that's all the better.
Potentially. The main reason I haven't thus far is that it's extra work (if not per se a *lot* of extra work) for no clear benefit, versus our more ad-hoc struct smX_instruction infrastructure. If there were other frontends that wanted to generate sm1/sm4, or a reason for hlsl to feed directly to glsl or spirv instead of going through sm4 first, that'd more easily tip the scales.
The assembler comes to mind. In terms of benefits, there would of course be not having to deal with 3 similar, but slightly different low-level IRs. Also, I think the discussion that prompted this was that there were passes the HLSL compiler would like to do on a common low-level IR, instead of either duplicating those passes for SM1/4 or doing them on the HLSL IR.
If we do want to adopt it in more places, I support throwing out the vkd3d_shader_instruction naming and replacing it with something new :D
Good suggestions are always welcome. :)
Maybe there's an argument to have one or more unified IRs across all of vkd3d-shader? It would be nice in some respects, but I imagine that compilation speed would be a concern. I gather that one reason that the dxbc->glsl/spirv path is arranged the way it is, is that we only want to do one pass over the dxbc, and want to avoid allocating memory as much as we can.
Well, things started out as directly translating shader model 1 bytecode to ARB_vertex_program/ARB_fragment_program instructions. (Compare e.g. IWineD3DVertexShaderImpl_GenerateProgramArbHW() from dlls/wined3d/vertexshader.c in wine-0.9.) Certain abstractions were introduced as needed; the introduction of the GLSL backend was an important event, as was the introduction of shader model 4 support. We've probably reached a similar point again, although this time it seems both DXIL and HLSL are getting there at roughly the same time.
I guess my point is, if we have a unified IR and it's basically vkd3d_shader_instruction, that's probably fine (and it may honestly be possible to do that, the way things are). But if we want to change vkd3d_shader_instruction, that might mean making the sm4->spirv path slower, which doesn't seem desirable.
Sure, the intention would be for these changes to be fairly minor. Though in terms of shader compilation speed, the SPIR-V->GPU part is likely much worse than anything we could do to the vkd3d IR. There are also things in the existing code, like get_opcode_info() that could be improved.
Here's one RFC regarding use of vkd3d_shader_instruction (still working on a new name for that) in the HLSL compiler (and, at the same time, elsewhere): how CISC should the IR be? I'm thinking about things like source/dest modifiers, complex instructions like "div", restrictions on what register types can be used in the same instruction, and so on. I see four options:
(1) Maximally CISC, i.e. as CISC as the union of sm1, sm4, sm6, and anything else we translate into it. The disadvantage of this is that this means we're going to have to have specific hlsl backends for sm1 and sm4 anyway (this may be unavoidable, frankly, but it at least increases the extent of that code), which conceptually defeats the point of using a common IR—all we really get out of it is structure definitions. This is basically the current state as far as HLSL is concerned, except we don't even use the structure definitions.
(2) Minimally CISC, i.e. as CISC as the intersection of sm1 and sm4 [and sm6?], or less. This is great for HLSL, and fits the model of vkd3d_shader_instruction as a generic IR, but it means that we need to modify the instruction stream in some nontrivial ways when translating dxbc -> vazir. (Although maybe not that nontrivial? All we really need to do is split up some instructions, and that can be done as we read—i.e. it doesn't require inserting into the middle of an already built instruction list.) The other disadvantage is that we can't use vkd3d_shader_instruction for disassembly anymore.
(3) Maximally CISC, but we don't use the whole CISC set when translating from vkd3d-shader, and instead do a lot of peepholing *after* translating into vkd3d_shader_instruction. This is conceptually nice as far as the HLSL -> smX translation is concerned, and avoids the potential overhead of (2), but I feel like it also adds a bit of extra mental burden on understanding what's legal IR for the HLSL compiler to output.
(4) Maximally CISC, but we lower some instructions into multiple instructions when converting out of vkd3d_shader_instruction. This avoids all the disadvantages mentioned thus far, but then means that adding a new backend might imply modifying all the other backends to explicitly lower or fail on new CISC constructions.
I think (2) is my favourite solution. I was originally worried about the overhead of modifying the instruction stream, but then I realized that we don't really need to modify it, and it doesn't really increase the number of IR passes either. It's also generally nicer the simpler an IR is. I also think that it's worth having to write a separate disassembly pass, but I can also anticipate disagreement there...
The following covers most of the issues with adapting the SPIR-V backend for SM 6.
**Types:**
SM 6 adds half, int16 and int64 support. The DXIL code uses the required width, and emits any necessary casts, so a dedicated backend compiles these correctly without any special handling except feature requirements. The existing backend must handle them explicitly. This will occur in more places than the current support for double because it includes integer types.
Support for integer width casts SEXT, ZEXT and TRUNC is required, plus DXIL sometimes uses BITCAST too.
DXIL types have no signedness, which causes problems when a sampled type is signed int. Bitcasts need to be added to convert signed SPIR-V types to unsigned after sampling/reading and back to signed for image writes.
**SSA:**
An SSA register type will remove the need to emit large numbers of unnecessary Load/Store instructions.
**Instructions:**
ALLOCA:
I think this must be done using DCL_TEMPS.
ATOMICRMW:
Addressing seems to be wrong in the backend; raw/structured addressing won't work for DXIL because it uses an array index not a byte offset. I can't test TPF because fxc crashes on arrayed groupshared variables. If it proves impossible to check addressing for SM <= 5.x then just fix it for DXIL.
BINOP:
Bool is a 1-bit integer in DXIL which means binops are valid for them, as are some casts such as SEXT, ZEXT, SITOFP and UITOFP. Special handling is required - synthesise ops on registers of COMPONENT_BOOL?
BR:
Create new instruction.
ATOMIC_BINOP:
In DXIL it can support 64-bit on capable hardware.
BUFFER LOAD/STORE:
Must be able to support 16-bit ops when native 16-bit types are enabled.
CBUFFER LOAD:
Must be able to support loading a float scalar for when CBufferLoad is used instead of CBufferLoadLegacy.
CREATE HANDLE:
Translates to one of the DCL_RESOURCE/UAV/CONSTANT_BUFFER instructions.
LOAD/STORE_INPUT:
It's probably not worth trying to sort through the tangle of private variables and other complications in the existing backend. Private variables are not needed, nor are variables for I/O between phases. Create a new instruction, or have the existing handler detect SM 6 and pass control to a separate handler function.
SAMPLE/TEXTURE_LOAD (and any other instruction which uses coords/offsets):
Coordinates are specified as individual scalars of any origin, so they need to be assembled into a vector. A new instruction or register type may be a good idea, instead of e.g.: mov r0.x, coord1.x mov r0.y, coord2.x mov r0.z, coord3.x
EXTRACTVAL:
Probably just a mov instruction.
GEP:
In TPF this is the address part of an instruction which accesses an array. We may need new instructions, one to express the GEP and another the LOAD.
PHI:
Create an instruction for it.
SWITCH:
There are no separate CASE and ENDSWITCH instructions; instead the switch instruction contains all cases and the default. Merge info needs to be supplied by the structuriser. A new instruction is probably best.
Unary, binary, and trinary arithmetic ops like BFREV, SIN, EXP, FMAD, FRC, UMAX etc probably require no special handling.
LOAD/STORE_INPUT:
It's probably not worth trying to sort through the tangle of private variables and other complications in the existing backend. Private variables are not needed, nor are variables for I/O between phases. Create a new instruction, or have the existing handler detect SM 6 and pass control to a separate handler function.
Well, that or we change vkd3d_shader_instruction to look more like sm6 or spirv, and put the onus of conversion on the sm4 frontend (and sm1, eventually). Of course "create a new instruction for this" is probably a reasonable intermediate step, so we don't block sm6 on that refactoring.
I don't know what sm6 cares about, although I gather that from your statement the impedance mismatch between it and spirv is minimal to nonexistent. But I went and listed the things we need to deal with wrt varyings in sm4 -> spirv (and sm1 -> spirv eventually):
* Space/register -> id -> spirv id mapping? I guess? I count this mostly because the vazir layer doesn't actually need to know anything about register index/space, just id.
* sm4 has sysvals and builtin registers, which are two different ways of doing the same thing I guess. Also for hull shaders the sysval is sometimes in the signature instead of the dcl instruction? sm1 has a similar thing. So vazir should have that difference abstracted away.
* Things which cause us to need a private i/o variable:
- Multiple semantics packed into the same v# register. (Only if one of them is a builtin, though. For the others we just calmly treat them as the same register. E.g. we only declare one spirv variable. Note that sm4 won't actually pack together registers with a different interpolation mode [at least the HLSL compiler won't], so this works. Not that we validate it...)
- Impedance mismatches between spirv builtins and sm4 builtins/sysvals (well, that's most of this, but the ones that fall under fixup_pfn):
* Vertex and instance ID need the base vertex/instance subtracted.
* Front face is bool in spirv but int in sm4.
* Frag coord W is inverted.
* sm1 is going to have more of these. Along the same lines it also has an FFP builtin, specifically, fog frag coord, although that ends up mapping to an extra varying. (My intuition is that vazir should not be aware of this as such, but that it should be handled up front in the sm1 frontend.)
- Registers which are arrayed in spirv but might not be arrayed in sm4.
- Hull shader vicp/vpc. Also hull shader outputs. I don't understand this and the code is not exactly easy to read.
- Output swizzle, which requires the use of a private variable. This is currently encoded into vkd3d_shader_spirv_target_info, but it could be encoded into other outputs or made more general.
- I think compiler->output_info is basically just the same thing (it tracks the real outputs, not the private ones), except that we need to store it ahead of time in spirv_compiler_emit_output() [instead of spirv_compiler_emit_input() where we can write it immediately]. In theory we could use a separate instruction stream here instead?
Note that we handle clip and cull distances (from the output signature) at creation time instead of when declaring the outputs. I dunno why.
* The writemask in the dcl instruction might be a proper subset of the one from the signature element (this is actually easy to reproduce in HLSL by only using part of the semantic). We declare spirv variables with the writemask from the signature element. Why? Judging from 2bdc63d68 I guess it's because we can't declare a non-contiguous spirv variable. My guess is that we'd want to declare the whole variable in vazir, i.e. use the signature writemask. (sm1 will take scanning and then probably filling in the whole register, that or we just declare everything as vec4s.)
Register packing is probably the most interesting open question here, I think. My tentative proposal is that we want to match spirv, which would mean synthesizing new private I/O variables on the dxbc side. Essentially the vkd3d_shader_instruction would get synthesized variable IDs instead of directly passing v#/o# from the sm1/4 side, and the sm4 frontend would be responsible for translating those.
Not sure about the other reasons for private i/o variables, though (i.e. the impedance mismatches). I guess we have options there. I'm kind of surprised though; does sm6 really match spirv and not sm4 for all of those? Also output swizzle is going to be a concern regardless, unless we handle that before even translating to vkd3d_shader_instruction IR.
On Mon Nov 7 06:25:27 2022 +0000, Zebediah Figura wrote:
LOAD/STORE_INPUT:
It's probably not worth trying to sort through the tangle of private
variables and other complications in the existing backend. Private variables are not needed, nor are variables for I/O between phases. Create a new instruction, or have the existing handler detect SM 6 and pass control to a separate handler function. Well, that or we change vkd3d_shader_instruction to look more like sm6 or spirv, and put the onus of conversion on the sm4 frontend (and sm1, eventually). Of course "create a new instruction for this" is probably a reasonable intermediate step, so we don't block sm6 on that refactoring. I don't know what sm6 cares about, although I gather that from your statement the impedance mismatch between it and spirv is minimal to nonexistent. But I went and listed the things we need to deal with wrt varyings in sm4 -> spirv (and sm1 -> spirv eventually):
- Space/register -> id -> spirv id mapping? I guess? I count this mostly because the vazir layer doesn't actually need to know anything about register index/space, just id.
- sm4 has sysvals and builtin registers, which are two different ways of doing the same thing I guess. Also for hull shaders the sysval is sometimes
in the signature instead of the dcl instruction? sm1 has a similar thing. So vazir should have that difference abstracted away.
- Things which cause us to need a private i/o variable:
- Multiple semantics packed into the same v# register. (Only if one of
them is a builtin, though. For the others we just calmly treat them as the same register. E.g. we only declare one spirv variable. Note that sm4 won't actually pack together registers with a different interpolation mode [at least the HLSL compiler won't], so this works. Not that we validate it...)
- Impedance mismatches between spirv builtins and sm4 builtins/sysvals (well, that's most of this, but the ones that fall under fixup_pfn):
- Vertex and instance ID need the base vertex/instance subtracted.
- Front face is bool in spirv but int in sm4.
- Frag coord W is inverted.
- sm1 is going to have more of these. Along the same lines it also
has an FFP builtin, specifically, fog frag coord, although that ends up mapping to an extra varying. (My intuition is that vazir should not be aware of this as such, but that it should be handled up front in the sm1 frontend.)
- Registers which are arrayed in spirv but might not be arrayed in sm4.
- Hull shader vicp/vpc. Also hull shader outputs. I don't understand
this and the code is not exactly easy to read.
- Output swizzle, which requires the use of a private variable. This is currently encoded into vkd3d_shader_spirv_target_info, but it could be encoded into other outputs or made more general.
- I think compiler->output_info is basically just the same thing (it tracks the real outputs, not the private ones), except that we need to
store it ahead of time in spirv_compiler_emit_output() [instead of spirv_compiler_emit_input() where we can write it immediately]. In theory we could use a separate instruction stream here instead? Note that we handle clip and cull distances (from the output signature) at creation time instead of when declaring the outputs. I dunno why.
- The writemask in the dcl instruction might be a proper subset of the
one from the signature element (this is actually easy to reproduce in HLSL by only using part of the semantic). We declare spirv variables with the writemask from the signature element. Why? Judging from 2bdc63d68 I guess it's because we can't declare a non-contiguous spirv variable. My guess is that we'd want to declare the whole variable in vazir, i.e. use the signature writemask. (sm1 will take scanning and then probably filling in the whole register, that or we just declare everything as vec4s.) Register packing is probably the most interesting open question here, I think. My tentative proposal is that we want to match spirv, which would mean synthesizing new private I/O variables on the dxbc side. Essentially the vkd3d_shader_instruction would get synthesized variable IDs instead of directly passing v#/o# from the sm1/4 side, and the sm4 frontend would be responsible for translating those. Not sure about the other reasons for private i/o variables, though (i.e. the impedance mismatches). I guess we have options there. I'm kind of surprised though; does sm6 really match spirv and not sm4 for all of those? Also output swizzle is going to be a concern regardless, unless we handle that before even translating to vkd3d_shader_instruction IR.
I wouldn't say the impedance mismatch is minimal, but it's much less than for TPF->SPIR-V.
Resource register space/idx are contained in the metadata, and there's a createHandle instruction which is a subset of TPF's DCL_RESOURCE instructions and which results in creation of an id. DCL_RESOURCE must be synthesised with all of the info.
Builtins still have mismatches, but because DXIL uses scalars it's trivial to read/write the relevant scalar within the SPIR-V vector. Fixups are no trouble either.
DXIL doesn't define anything like a shader phase, and I don't know why they exist in TPF. Tessellation I/O is straightforward.
If we use vkd3d_shader_instruction, the simplest way to get traces is use the existing backend. Emitting DXIL-specific traces either requires another IR for that purpose, or passing the raw DXIL records to trace.c. Neither of these are particularly desirable, so we could just rely on dxc when a DXIL trace is needed.
VKD3DSIH_DCL_IMMEDIATE_CONSTANT_BUFFER creates another issue. It uses vectors, and TPF may rearrange the data for better access. I wouldn't try to replicate that. Support for 16-bit types is required. The simplest approach is a new register type containing an array of scalar constants. This makes addressing easy. We also need more types in enum vkd3d_data_type:
VKD3D_DATA_HALF, VKD3D_DATA_UINT8, VKD3D_DATA_UINT16, VKD3D_DATA_UINT64
UINT8 is often used in dx.op instructions to encode e.g. resource types, so it's not exclusive to int8 support.
I've pushed a [branch](https://gitlab.winehq.org/cmccarthy/vkd3d/-/tree/sm6_common_backend) which can use the existing SPIR-V backend to compile a pixel shader which performs a binary op on SV_Position and stores to SV_Target. It doesn't include any workarounds to eliminate private variables. Most likely some parts of it are not required as I haven't been thorough about minimising it. It doesn't load the metadata, which means the input interpolation mode is not available.
I think it will have somewhat slower performance vs a separate backend, but it's hard to say by how much at this stage.
The SSA register type needs to marked as a scalar or vector because use of the symbol tree and get_register_info() is avoided for simplicity and performance. Immediate constants already have enum vkd3d_immconst_type, which is the most logical place for the info if it's renamed appropriately.
Does enum vkd3d_static_type and VKD3D_STATIC_SCALAR, etc., look right?
On Wed Nov 16 19:20:40 2022 +0000, Conor McCarthy wrote:
The SSA register type needs to marked as a scalar or vector because use of the symbol tree and get_register_info() is avoided for simplicity and performance. Immediate constants already have enum vkd3d_immconst_type, which is the most logical place for the info if it's renamed appropriately. Does enum vkd3d_static_type and VKD3D_STATIC_SCALAR, etc., look right?
I think that makes sense, at least. The native HLSL compiler actually emits that field on all parameters, it's just that only immconsts actually care (since we can infer it in every other case from the register type).
FWIW, elsewhere in vkd3d-shader we call that "dimension".
On Wed Nov 16 19:20:40 2022 +0000, Zebediah Figura wrote:
I think that makes sense, at least. The native HLSL compiler actually emits that field on all parameters, it's just that only immconsts actually care (since we can infer it in every other case from the register type). FWIW, elsewhere in vkd3d-shader we call that "dimension".
Btw, I'm also using it to flag undefined constants, i.e. VKD3D_IMMCONST_UNDEF_SCALAR, to avoid adding a field which would need to be initialised everywhere.
Signatures are complicated by the ids not always matching in the DXBC signature and the DXIL metadata version of it. This is because elements with the same semantic but different registers are collapsed in DXIL into a single declaration with a row count to show how many registers it occupies. This can be accommodated by adding a register_count field to struct vkd3d_shader_signature_element, but element ids in DXIL load/store instructions would need to be messily translated to the IR ones. I think it makes sense instead to use separate input/output declaration functions for SM 6. This bypasses the id problem and also means no unnecessary private variables.
Another complication: the result of DXIL comparison ops is a 1-bit integer, but TPF requires an integer 0 or 0xffffffff. The SPIR-V backend emits this conversion and it will break any subsequent DXIL instructions which assume a 1-bit int result. Maybe the simplest workaround is omit the bool -> 0/~0 conversion for SM 6 and store a bool result. It will likely result in mods to instructions which have bool inputs though.
On Fri Nov 25 06:51:18 2022 +0000, Conor McCarthy wrote:
Another complication: the result of DXIL comparison ops is a 1-bit integer, but TPF requires an integer 0 or 0xffffffff. The SPIR-V backend emits this conversion and it will break any subsequent DXIL instructions which assume a 1-bit int result. Maybe the simplest workaround is omit the bool -> 0/~0 conversion for SM 6 and store a bool result. It will likely result in mods to instructions which have bool inputs though.
Could we add VKD3D_DATA_BOOL instead?
On Fri Nov 25 06:51:18 2022 +0000, Zebediah Figura wrote:
Could we add VKD3D_DATA_BOOL instead?
Already done. It will emit a register with type VKD3D_DATA_BOOL, and if conversions are needed anywhere I'll deal with those as they arise.
On Mon Nov 7 06:25:27 2022 +0000, Conor McCarthy wrote:
I wouldn't say the impedance mismatch is minimal, but it's much less than for TPF->SPIR-V. Resource register space/idx are contained in the metadata, and there's a createHandle instruction which is a subset of TPF's DCL_RESOURCE instructions and which results in creation of an id. DCL_RESOURCE must be synthesised with all of the info. Builtins still have mismatches, but because DXIL uses scalars it's trivial to read/write the relevant scalar within the SPIR-V vector. Fixups are no trouble either. DXIL doesn't define anything like a shader phase, and I don't know why they exist in TPF. Tessellation I/O is straightforward.
I have a complete picture of this after dealing with converting DXIL to our existing IR. Introducing new backend functions for SM 6 input/output and eventually fixing up TPF in the frontend to use the new functions is the best solution I think.
- Multiple semantics packed into the same v# register: DXIL refers to inputs/outputs by signature element index rather than register index. The equivalent of a register index in TPF is now a row index, and component index is a column index. Using the element index as a register index in the IR works well, and results in separate declarations for separate elements within the same row. This eliminates the need for a private variable. TPF can be fixed up in the frontend to match this since it expresses the same information but in a messier way. - Registers which are arrayed in spirv but might not be arrayed in sm4: Where a builtin is arrayed in spir-v it is also an array in DXIL, i.e. the declaration has a start row and row count, with a single component on each row. TPF can be converted to this format by merging multiple signature elements. The only backend complication is emitting a zero index when loading from an array of size 1, because DXIL treats it as a scalar in this case but the spir-v variable is still an array. - Hull shader vicp/vocp: These are implicit declarations with no sysval. They are handled pretty simply for SM 6 by emitting an input or output declaration to the IR. No private variable is needed in the backend. The TPF frontend can be fixed up to do this too. - Impedance mismatches between spirv builtins and sm4 builtins/sysvals: It's not complicated to store a fixup_pfn in register_info and apply the fixup on loading. For scalar loads the component index must be passed to the fixup so frag coord w can be handled. For cases where the value is loaded more than once it may be worth caching the result, but in branched shaders the code would have to go in the entry block. - Output swizzle: probably still a private variable.