From: Henri Verbeet hverbeet@codeweavers.com
--- libs/vkd3d-shader/dxbc.c | 1748 +--------------------- libs/vkd3d-shader/hlsl_sm4.c | 1660 +++++++++++++++++++- libs/vkd3d-shader/vkd3d_shader_private.h | 2 + 3 files changed, 1706 insertions(+), 1704 deletions(-)
diff --git a/libs/vkd3d-shader/dxbc.c b/libs/vkd3d-shader/dxbc.c index 62a9a702..e5bc67d9 100644 --- a/libs/vkd3d-shader/dxbc.c +++ b/libs/vkd3d-shader/dxbc.c @@ -19,1673 +19,81 @@ */
#include "vkd3d_shader_private.h" -#include "sm4.h"
-#define SM4_MAX_SRC_COUNT 6 -#define SM4_MAX_DST_COUNT 2 - -STATIC_ASSERT(SM4_MAX_SRC_COUNT <= SPIRV_MAX_SRC_COUNT); - -void dxbc_writer_init(struct dxbc_writer *dxbc) -{ - memset(dxbc, 0, sizeof(*dxbc)); -} - -void dxbc_writer_add_section(struct dxbc_writer *dxbc, uint32_t tag, const void *data, size_t size) -{ - struct vkd3d_shader_dxbc_section_desc *section; - - assert(dxbc->section_count < ARRAY_SIZE(dxbc->sections)); - - section = &dxbc->sections[dxbc->section_count++]; - section->tag = tag; - section->data.code = data; - section->data.size = size; -} - -int vkd3d_shader_serialize_dxbc(size_t section_count, const struct vkd3d_shader_dxbc_section_desc *sections, - struct vkd3d_shader_code *dxbc, char **messages) -{ - size_t size_position, offsets_position, checksum_position, i; - struct vkd3d_bytecode_buffer buffer = {0}; - uint32_t checksum[4]; - - TRACE("section_count %zu, sections %p, dxbc %p, messages %p.\n", section_count, sections, dxbc, messages); - - if (messages) - *messages = NULL; - - put_u32(&buffer, TAG_DXBC); - - checksum_position = bytecode_get_size(&buffer); - for (i = 0; i < 4; ++i) - put_u32(&buffer, 0); - - put_u32(&buffer, 1); /* version */ - size_position = put_u32(&buffer, 0); - put_u32(&buffer, section_count); - - offsets_position = bytecode_get_size(&buffer); - for (i = 0; i < section_count; ++i) - put_u32(&buffer, 0); - - for (i = 0; i < section_count; ++i) - { - set_u32(&buffer, offsets_position + i * sizeof(uint32_t), bytecode_align(&buffer)); - put_u32(&buffer, sections[i].tag); - put_u32(&buffer, sections[i].data.size); - bytecode_put_bytes(&buffer, sections[i].data.code, sections[i].data.size); - } - set_u32(&buffer, size_position, bytecode_get_size(&buffer)); - - vkd3d_compute_dxbc_checksum(buffer.data, buffer.size, checksum); - for (i = 0; i < 4; ++i) - set_u32(&buffer, checksum_position + i * sizeof(uint32_t), checksum[i]); - - if (!buffer.status) - { - dxbc->code = buffer.data; - dxbc->size = buffer.size; - } - return buffer.status; -} - -int dxbc_writer_write(struct dxbc_writer *dxbc, struct vkd3d_shader_code *out) -{ - return vkd3d_shader_serialize_dxbc(dxbc->section_count, dxbc->sections, out, NULL); -} - -struct vkd3d_shader_src_param_entry -{ - struct list entry; - struct vkd3d_shader_src_param param; -}; - -struct vkd3d_shader_sm4_parser -{ - const uint32_t *start, *end, *ptr; - - unsigned int output_map[MAX_REG_OUTPUT]; - - struct vkd3d_shader_parser p; -}; - -struct vkd3d_sm4_opcode_info -{ - enum vkd3d_sm4_opcode opcode; - enum vkd3d_shader_opcode handler_idx; - char dst_info[SM4_MAX_DST_COUNT]; - char src_info[SM4_MAX_SRC_COUNT]; - void (*read_opcode_func)(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, - const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv); -}; - -static const enum vkd3d_primitive_type output_primitive_type_table[] = -{ - /* UNKNOWN */ VKD3D_PT_UNDEFINED, - /* VKD3D_SM4_OUTPUT_PT_POINTLIST */ VKD3D_PT_POINTLIST, - /* UNKNOWN */ VKD3D_PT_UNDEFINED, - /* VKD3D_SM4_OUTPUT_PT_LINESTRIP */ VKD3D_PT_LINESTRIP, - /* UNKNOWN */ VKD3D_PT_UNDEFINED, - /* VKD3D_SM4_OUTPUT_PT_TRIANGLESTRIP */ VKD3D_PT_TRIANGLESTRIP, -}; - -static const enum vkd3d_primitive_type input_primitive_type_table[] = -{ - /* UNKNOWN */ VKD3D_PT_UNDEFINED, - /* VKD3D_SM4_INPUT_PT_POINT */ VKD3D_PT_POINTLIST, - /* VKD3D_SM4_INPUT_PT_LINE */ VKD3D_PT_LINELIST, - /* VKD3D_SM4_INPUT_PT_TRIANGLE */ VKD3D_PT_TRIANGLELIST, - /* UNKNOWN */ VKD3D_PT_UNDEFINED, - /* UNKNOWN */ VKD3D_PT_UNDEFINED, - /* VKD3D_SM4_INPUT_PT_LINEADJ */ VKD3D_PT_LINELIST_ADJ, - /* VKD3D_SM4_INPUT_PT_TRIANGLEADJ */ VKD3D_PT_TRIANGLELIST_ADJ, -}; - -static const enum vkd3d_shader_resource_type resource_type_table[] = -{ - /* 0 */ VKD3D_SHADER_RESOURCE_NONE, - /* VKD3D_SM4_RESOURCE_BUFFER */ VKD3D_SHADER_RESOURCE_BUFFER, - /* VKD3D_SM4_RESOURCE_TEXTURE_1D */ VKD3D_SHADER_RESOURCE_TEXTURE_1D, - /* VKD3D_SM4_RESOURCE_TEXTURE_2D */ VKD3D_SHADER_RESOURCE_TEXTURE_2D, - /* VKD3D_SM4_RESOURCE_TEXTURE_2DMS */ VKD3D_SHADER_RESOURCE_TEXTURE_2DMS, - /* VKD3D_SM4_RESOURCE_TEXTURE_3D */ VKD3D_SHADER_RESOURCE_TEXTURE_3D, - /* VKD3D_SM4_RESOURCE_TEXTURE_CUBE */ VKD3D_SHADER_RESOURCE_TEXTURE_CUBE, - /* VKD3D_SM4_RESOURCE_TEXTURE_1DARRAY */ VKD3D_SHADER_RESOURCE_TEXTURE_1DARRAY, - /* VKD3D_SM4_RESOURCE_TEXTURE_2DARRAY */ VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY, - /* VKD3D_SM4_RESOURCE_TEXTURE_2DMSARRAY */ VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY, - /* VKD3D_SM4_RESOURCE_TEXTURE_CUBEARRAY */ VKD3D_SHADER_RESOURCE_TEXTURE_CUBEARRAY, - /* VKD3D_SM4_RESOURCE_RAW_BUFFER */ VKD3D_SHADER_RESOURCE_BUFFER, - /* VKD3D_SM4_RESOURCE_STRUCTURED_BUFFER */ VKD3D_SHADER_RESOURCE_BUFFER, -}; - -static const enum vkd3d_data_type data_type_table[] = -{ - /* 0 */ VKD3D_DATA_FLOAT, - /* VKD3D_SM4_DATA_UNORM */ VKD3D_DATA_UNORM, - /* VKD3D_SM4_DATA_SNORM */ VKD3D_DATA_SNORM, - /* VKD3D_SM4_DATA_INT */ VKD3D_DATA_INT, - /* VKD3D_SM4_DATA_UINT */ VKD3D_DATA_UINT, - /* VKD3D_SM4_DATA_FLOAT */ VKD3D_DATA_FLOAT, - /* VKD3D_SM4_DATA_MIXED */ VKD3D_DATA_MIXED, - /* VKD3D_SM4_DATA_DOUBLE */ VKD3D_DATA_DOUBLE, - /* VKD3D_SM4_DATA_CONTINUED */ VKD3D_DATA_CONTINUED, - /* VKD3D_SM4_DATA_UNUSED */ VKD3D_DATA_UNUSED, -}; - -static struct vkd3d_shader_sm4_parser *vkd3d_shader_sm4_parser(struct vkd3d_shader_parser *parser) -{ - return CONTAINING_RECORD(parser, struct vkd3d_shader_sm4_parser, p); -} - -static bool shader_is_sm_5_1(const struct vkd3d_shader_sm4_parser *sm4) -{ - const struct vkd3d_shader_version *version = &sm4->p.shader_version; - - return version->major >= 5 && version->minor >= 1; -} - -static bool shader_sm4_read_src_param(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, - const uint32_t *end, enum vkd3d_data_type data_type, struct vkd3d_shader_src_param *src_param); -static bool shader_sm4_read_dst_param(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, - const uint32_t *end, enum vkd3d_data_type data_type, struct vkd3d_shader_dst_param *dst_param); - -static bool shader_sm4_read_register_space(struct vkd3d_shader_sm4_parser *priv, - const uint32_t **ptr, const uint32_t *end, unsigned int *register_space) -{ - *register_space = 0; - - if (!shader_is_sm_5_1(priv)) - return true; - - if (*ptr >= end) - { - WARN("Invalid ptr %p >= end %p.\n", *ptr, end); - return false; - } - - *register_space = *(*ptr)++; - return true; -} - -static void shader_sm4_read_conditional_op(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - shader_sm4_read_src_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_UINT, - (struct vkd3d_shader_src_param *)&ins->src[0]); - ins->flags = (opcode_token & VKD3D_SM4_CONDITIONAL_NZ) ? - VKD3D_SHADER_CONDITIONAL_OP_NZ : VKD3D_SHADER_CONDITIONAL_OP_Z; -} - -static void shader_sm4_read_shader_data(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, - const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - struct vkd3d_shader_immediate_constant_buffer *icb; - enum vkd3d_sm4_shader_data_type type; - unsigned int icb_size; - - type = (opcode_token & VKD3D_SM4_SHADER_DATA_TYPE_MASK) >> VKD3D_SM4_SHADER_DATA_TYPE_SHIFT; - if (type != VKD3D_SM4_SHADER_DATA_IMMEDIATE_CONSTANT_BUFFER) - { - FIXME("Ignoring shader data type %#x.\n", type); - ins->handler_idx = VKD3DSIH_NOP; - return; - } - - ++tokens; - icb_size = token_count - 1; - if (icb_size % 4) - { - FIXME("Unexpected immediate constant buffer size %u.\n", icb_size); - ins->handler_idx = VKD3DSIH_INVALID; - return; - } - - if (!(icb = vkd3d_malloc(offsetof(struct vkd3d_shader_immediate_constant_buffer, data[icb_size])))) - { - ERR("Failed to allocate immediate constant buffer, size %u.\n", icb_size); - vkd3d_shader_parser_error(&priv->p, VKD3D_SHADER_ERROR_TPF_OUT_OF_MEMORY, "Out of memory."); - ins->handler_idx = VKD3DSIH_INVALID; - return; - } - icb->vec4_count = icb_size / 4; - memcpy(icb->data, tokens, sizeof(*tokens) * icb_size); - shader_instruction_array_add_icb(&priv->p.instructions, icb); - ins->declaration.icb = icb; -} - -static void shader_sm4_set_descriptor_register_range(struct vkd3d_shader_sm4_parser *sm4, - const struct vkd3d_shader_register *reg, struct vkd3d_shader_register_range *range) -{ - range->first = reg->idx[1].offset; - range->last = reg->idx[shader_is_sm_5_1(sm4) ? 2 : 1].offset; - if (range->last < range->first) - { - FIXME("Invalid register range [%u:%u].\n", range->first, range->last); - vkd3d_shader_parser_error(&sm4->p, VKD3D_SHADER_ERROR_TPF_INVALID_REGISTER_RANGE, - "Last register %u must not be less than first register %u in range.\n", range->last, range->first); - } -} - -static void shader_sm4_read_dcl_resource(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - struct vkd3d_shader_semantic *semantic = &ins->declaration.semantic; - enum vkd3d_sm4_resource_type resource_type; - const uint32_t *end = &tokens[token_count]; - enum vkd3d_sm4_data_type data_type; - enum vkd3d_data_type reg_data_type; - DWORD components; - unsigned int i; - - resource_type = (opcode_token & VKD3D_SM4_RESOURCE_TYPE_MASK) >> VKD3D_SM4_RESOURCE_TYPE_SHIFT; - if (!resource_type || (resource_type >= ARRAY_SIZE(resource_type_table))) - { - FIXME("Unhandled resource type %#x.\n", resource_type); - semantic->resource_type = VKD3D_SHADER_RESOURCE_NONE; - } - else - { - semantic->resource_type = resource_type_table[resource_type]; - } - - if (semantic->resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMS - || semantic->resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY) - { - semantic->sample_count = (opcode_token & VKD3D_SM4_RESOURCE_SAMPLE_COUNT_MASK) >> VKD3D_SM4_RESOURCE_SAMPLE_COUNT_SHIFT; - } - - reg_data_type = opcode == VKD3D_SM4_OP_DCL_RESOURCE ? VKD3D_DATA_RESOURCE : VKD3D_DATA_UAV; - shader_sm4_read_dst_param(priv, &tokens, end, reg_data_type, &semantic->resource.reg); - shader_sm4_set_descriptor_register_range(priv, &semantic->resource.reg.reg, &semantic->resource.range); - - components = *tokens++; - for (i = 0; i < VKD3D_VEC4_SIZE; i++) - { - data_type = VKD3D_SM4_TYPE_COMPONENT(components, i); - - if (!data_type || (data_type >= ARRAY_SIZE(data_type_table))) - { - FIXME("Unhandled data type %#x.\n", data_type); - semantic->resource_data_type[i] = VKD3D_DATA_FLOAT; - } - else - { - semantic->resource_data_type[i] = data_type_table[data_type]; - } - } - - if (reg_data_type == VKD3D_DATA_UAV) - ins->flags = (opcode_token & VKD3D_SM5_UAV_FLAGS_MASK) >> VKD3D_SM5_UAV_FLAGS_SHIFT; - - shader_sm4_read_register_space(priv, &tokens, end, &semantic->resource.range.space); -} - -static void shader_sm4_read_dcl_constant_buffer(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - const uint32_t *end = &tokens[token_count]; - - shader_sm4_read_src_param(priv, &tokens, end, VKD3D_DATA_FLOAT, &ins->declaration.cb.src); - shader_sm4_set_descriptor_register_range(priv, &ins->declaration.cb.src.reg, &ins->declaration.cb.range); - if (opcode_token & VKD3D_SM4_INDEX_TYPE_MASK) - ins->flags |= VKD3DSI_INDEXED_DYNAMIC; - - ins->declaration.cb.size = ins->declaration.cb.src.reg.idx[2].offset; - ins->declaration.cb.range.space = 0; - - if (shader_is_sm_5_1(priv)) - { - if (tokens >= end) - { - FIXME("Invalid ptr %p >= end %p.\n", tokens, end); - return; - } - - ins->declaration.cb.size = *tokens++; - shader_sm4_read_register_space(priv, &tokens, end, &ins->declaration.cb.range.space); - } -} - -static void shader_sm4_read_dcl_sampler(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, - const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - const uint32_t *end = &tokens[token_count]; - - ins->flags = (opcode_token & VKD3D_SM4_SAMPLER_MODE_MASK) >> VKD3D_SM4_SAMPLER_MODE_SHIFT; - if (ins->flags & ~VKD3D_SM4_SAMPLER_COMPARISON) - FIXME("Unhandled sampler mode %#x.\n", ins->flags); - shader_sm4_read_src_param(priv, &tokens, end, VKD3D_DATA_SAMPLER, &ins->declaration.sampler.src); - shader_sm4_set_descriptor_register_range(priv, &ins->declaration.sampler.src.reg, &ins->declaration.sampler.range); - shader_sm4_read_register_space(priv, &tokens, end, &ins->declaration.sampler.range.space); -} - -static void shader_sm4_read_dcl_index_range(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_OPAQUE, - &ins->declaration.index_range.dst); - ins->declaration.index_range.register_count = *tokens; -} - -static void shader_sm4_read_dcl_output_topology(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - enum vkd3d_sm4_output_primitive_type primitive_type; - - primitive_type = (opcode_token & VKD3D_SM4_PRIMITIVE_TYPE_MASK) >> VKD3D_SM4_PRIMITIVE_TYPE_SHIFT; - if (primitive_type >= ARRAY_SIZE(output_primitive_type_table)) - ins->declaration.primitive_type.type = VKD3D_PT_UNDEFINED; - else - ins->declaration.primitive_type.type = output_primitive_type_table[primitive_type]; - - if (ins->declaration.primitive_type.type == VKD3D_PT_UNDEFINED) - FIXME("Unhandled output primitive type %#x.\n", primitive_type); -} - -static void shader_sm4_read_dcl_input_primitive(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - enum vkd3d_sm4_input_primitive_type primitive_type; - - primitive_type = (opcode_token & VKD3D_SM4_PRIMITIVE_TYPE_MASK) >> VKD3D_SM4_PRIMITIVE_TYPE_SHIFT; - if (VKD3D_SM5_INPUT_PT_PATCH1 <= primitive_type && primitive_type <= VKD3D_SM5_INPUT_PT_PATCH32) - { - ins->declaration.primitive_type.type = VKD3D_PT_PATCH; - ins->declaration.primitive_type.patch_vertex_count = primitive_type - VKD3D_SM5_INPUT_PT_PATCH1 + 1; - } - else if (primitive_type >= ARRAY_SIZE(input_primitive_type_table)) - { - ins->declaration.primitive_type.type = VKD3D_PT_UNDEFINED; - } - else - { - ins->declaration.primitive_type.type = input_primitive_type_table[primitive_type]; - } - - if (ins->declaration.primitive_type.type == VKD3D_PT_UNDEFINED) - FIXME("Unhandled input primitive type %#x.\n", primitive_type); -} - -static void shader_sm4_read_declaration_count(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - ins->declaration.count = *tokens; -} - -static void shader_sm4_read_declaration_dst(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_FLOAT, &ins->declaration.dst); -} - -static void shader_sm4_read_declaration_register_semantic(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_FLOAT, - &ins->declaration.register_semantic.reg); - ins->declaration.register_semantic.sysval_semantic = *tokens; -} - -static void shader_sm4_read_dcl_input_ps(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - ins->flags = (opcode_token & VKD3D_SM4_INTERPOLATION_MODE_MASK) >> VKD3D_SM4_INTERPOLATION_MODE_SHIFT; - shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_FLOAT, &ins->declaration.dst); -} - -static void shader_sm4_read_dcl_input_ps_siv(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - ins->flags = (opcode_token & VKD3D_SM4_INTERPOLATION_MODE_MASK) >> VKD3D_SM4_INTERPOLATION_MODE_SHIFT; - shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_FLOAT, - &ins->declaration.register_semantic.reg); - ins->declaration.register_semantic.sysval_semantic = *tokens; -} - -static void shader_sm4_read_dcl_indexable_temp(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - ins->declaration.indexable_temp.register_idx = *tokens++; - ins->declaration.indexable_temp.register_size = *tokens++; - ins->declaration.indexable_temp.component_count = *tokens; -} - -static void shader_sm4_read_dcl_global_flags(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - ins->flags = (opcode_token & VKD3D_SM4_GLOBAL_FLAGS_MASK) >> VKD3D_SM4_GLOBAL_FLAGS_SHIFT; -} - -static void shader_sm5_read_fcall(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, - const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - struct vkd3d_shader_src_param *src_params = (struct vkd3d_shader_src_param *)ins->src; - src_params[0].reg.u.fp_body_idx = *tokens++; - shader_sm4_read_src_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_OPAQUE, &src_params[0]); -} - -static void shader_sm5_read_dcl_function_body(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - ins->declaration.index = *tokens; -} - -static void shader_sm5_read_dcl_function_table(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - ins->declaration.index = *tokens++; - FIXME("Ignoring set of function bodies (count %u).\n", *tokens); -} - -static void shader_sm5_read_dcl_interface(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - ins->declaration.fp.index = *tokens++; - ins->declaration.fp.body_count = *tokens++; - ins->declaration.fp.array_size = *tokens >> VKD3D_SM5_FP_ARRAY_SIZE_SHIFT; - ins->declaration.fp.table_count = *tokens++ & VKD3D_SM5_FP_TABLE_COUNT_MASK; - FIXME("Ignoring set of function tables (count %u).\n", ins->declaration.fp.table_count); -} - -static void shader_sm5_read_control_point_count(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - ins->declaration.count = (opcode_token & VKD3D_SM5_CONTROL_POINT_COUNT_MASK) - >> VKD3D_SM5_CONTROL_POINT_COUNT_SHIFT; -} - -static void shader_sm5_read_dcl_tessellator_domain(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - ins->declaration.tessellator_domain = (opcode_token & VKD3D_SM5_TESSELLATOR_MASK) - >> VKD3D_SM5_TESSELLATOR_SHIFT; -} - -static void shader_sm5_read_dcl_tessellator_partitioning(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - ins->declaration.tessellator_partitioning = (opcode_token & VKD3D_SM5_TESSELLATOR_MASK) - >> VKD3D_SM5_TESSELLATOR_SHIFT; -} - -static void shader_sm5_read_dcl_tessellator_output_primitive(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - ins->declaration.tessellator_output_primitive = (opcode_token & VKD3D_SM5_TESSELLATOR_MASK) - >> VKD3D_SM5_TESSELLATOR_SHIFT; -} - -static void shader_sm5_read_dcl_hs_max_tessfactor(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - ins->declaration.max_tessellation_factor = *(float *)tokens; -} - -static void shader_sm5_read_dcl_thread_group(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - ins->declaration.thread_group_size.x = *tokens++; - ins->declaration.thread_group_size.y = *tokens++; - ins->declaration.thread_group_size.z = *tokens++; -} - -static void shader_sm5_read_dcl_uav_raw(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, - const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - struct vkd3d_shader_raw_resource *resource = &ins->declaration.raw_resource; - const uint32_t *end = &tokens[token_count]; - - shader_sm4_read_dst_param(priv, &tokens, end, VKD3D_DATA_UAV, &resource->resource.reg); - shader_sm4_set_descriptor_register_range(priv, &resource->resource.reg.reg, &resource->resource.range); - ins->flags = (opcode_token & VKD3D_SM5_UAV_FLAGS_MASK) >> VKD3D_SM5_UAV_FLAGS_SHIFT; - shader_sm4_read_register_space(priv, &tokens, end, &resource->resource.range.space); -} - -static void shader_sm5_read_dcl_uav_structured(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - struct vkd3d_shader_structured_resource *resource = &ins->declaration.structured_resource; - const uint32_t *end = &tokens[token_count]; - - shader_sm4_read_dst_param(priv, &tokens, end, VKD3D_DATA_UAV, &resource->resource.reg); - shader_sm4_set_descriptor_register_range(priv, &resource->resource.reg.reg, &resource->resource.range); - ins->flags = (opcode_token & VKD3D_SM5_UAV_FLAGS_MASK) >> VKD3D_SM5_UAV_FLAGS_SHIFT; - resource->byte_stride = *tokens++; - if (resource->byte_stride % 4) - FIXME("Byte stride %u is not multiple of 4.\n", resource->byte_stride); - shader_sm4_read_register_space(priv, &tokens, end, &resource->resource.range.space); -} - -static void shader_sm5_read_dcl_tgsm_raw(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_FLOAT, &ins->declaration.tgsm_raw.reg); - ins->declaration.tgsm_raw.byte_count = *tokens; - if (ins->declaration.tgsm_raw.byte_count % 4) - FIXME("Byte count %u is not multiple of 4.\n", ins->declaration.tgsm_raw.byte_count); -} - -static void shader_sm5_read_dcl_tgsm_structured(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_FLOAT, - &ins->declaration.tgsm_structured.reg); - ins->declaration.tgsm_structured.byte_stride = *tokens++; - ins->declaration.tgsm_structured.structure_count = *tokens; - if (ins->declaration.tgsm_structured.byte_stride % 4) - FIXME("Byte stride %u is not multiple of 4.\n", ins->declaration.tgsm_structured.byte_stride); -} - -static void shader_sm5_read_dcl_resource_structured(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - struct vkd3d_shader_structured_resource *resource = &ins->declaration.structured_resource; - const uint32_t *end = &tokens[token_count]; - - shader_sm4_read_dst_param(priv, &tokens, end, VKD3D_DATA_RESOURCE, &resource->resource.reg); - shader_sm4_set_descriptor_register_range(priv, &resource->resource.reg.reg, &resource->resource.range); - resource->byte_stride = *tokens++; - if (resource->byte_stride % 4) - FIXME("Byte stride %u is not multiple of 4.\n", resource->byte_stride); - shader_sm4_read_register_space(priv, &tokens, end, &resource->resource.range.space); -} - -static void shader_sm5_read_dcl_resource_raw(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - struct vkd3d_shader_raw_resource *resource = &ins->declaration.raw_resource; - const uint32_t *end = &tokens[token_count]; - - shader_sm4_read_dst_param(priv, &tokens, end, VKD3D_DATA_RESOURCE, &resource->resource.reg); - shader_sm4_set_descriptor_register_range(priv, &resource->resource.reg.reg, &resource->resource.range); - shader_sm4_read_register_space(priv, &tokens, end, &resource->resource.range.space); -} - -static void shader_sm5_read_sync(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, - const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - ins->flags = (opcode_token & VKD3D_SM5_SYNC_FLAGS_MASK) >> VKD3D_SM5_SYNC_FLAGS_SHIFT; -} - -/* - * d -> VKD3D_DATA_DOUBLE - * f -> VKD3D_DATA_FLOAT - * i -> VKD3D_DATA_INT - * u -> VKD3D_DATA_UINT - * O -> VKD3D_DATA_OPAQUE - * R -> VKD3D_DATA_RESOURCE - * S -> VKD3D_DATA_SAMPLER - * U -> VKD3D_DATA_UAV - */ -static const struct vkd3d_sm4_opcode_info opcode_table[] = -{ - {VKD3D_SM4_OP_ADD, VKD3DSIH_ADD, "f", "ff"}, - {VKD3D_SM4_OP_AND, VKD3DSIH_AND, "u", "uu"}, - {VKD3D_SM4_OP_BREAK, VKD3DSIH_BREAK, "", ""}, - {VKD3D_SM4_OP_BREAKC, VKD3DSIH_BREAKP, "", "u", - shader_sm4_read_conditional_op}, - {VKD3D_SM4_OP_CASE, VKD3DSIH_CASE, "", "u"}, - {VKD3D_SM4_OP_CONTINUE, VKD3DSIH_CONTINUE, "", ""}, - {VKD3D_SM4_OP_CONTINUEC, VKD3DSIH_CONTINUEP, "", "u", - shader_sm4_read_conditional_op}, - {VKD3D_SM4_OP_CUT, VKD3DSIH_CUT, "", ""}, - {VKD3D_SM4_OP_DEFAULT, VKD3DSIH_DEFAULT, "", ""}, - {VKD3D_SM4_OP_DERIV_RTX, VKD3DSIH_DSX, "f", "f"}, - {VKD3D_SM4_OP_DERIV_RTY, VKD3DSIH_DSY, "f", "f"}, - {VKD3D_SM4_OP_DISCARD, VKD3DSIH_TEXKILL, "", "u", - shader_sm4_read_conditional_op}, - {VKD3D_SM4_OP_DIV, VKD3DSIH_DIV, "f", "ff"}, - {VKD3D_SM4_OP_DP2, VKD3DSIH_DP2, "f", "ff"}, - {VKD3D_SM4_OP_DP3, VKD3DSIH_DP3, "f", "ff"}, - {VKD3D_SM4_OP_DP4, VKD3DSIH_DP4, "f", "ff"}, - {VKD3D_SM4_OP_ELSE, VKD3DSIH_ELSE, "", ""}, - {VKD3D_SM4_OP_EMIT, VKD3DSIH_EMIT, "", ""}, - {VKD3D_SM4_OP_ENDIF, VKD3DSIH_ENDIF, "", ""}, - {VKD3D_SM4_OP_ENDLOOP, VKD3DSIH_ENDLOOP, "", ""}, - {VKD3D_SM4_OP_ENDSWITCH, VKD3DSIH_ENDSWITCH, "", ""}, - {VKD3D_SM4_OP_EQ, VKD3DSIH_EQ, "u", "ff"}, - {VKD3D_SM4_OP_EXP, VKD3DSIH_EXP, "f", "f"}, - {VKD3D_SM4_OP_FRC, VKD3DSIH_FRC, "f", "f"}, - {VKD3D_SM4_OP_FTOI, VKD3DSIH_FTOI, "i", "f"}, - {VKD3D_SM4_OP_FTOU, VKD3DSIH_FTOU, "u", "f"}, - {VKD3D_SM4_OP_GE, VKD3DSIH_GE, "u", "ff"}, - {VKD3D_SM4_OP_IADD, VKD3DSIH_IADD, "i", "ii"}, - {VKD3D_SM4_OP_IF, VKD3DSIH_IF, "", "u", - shader_sm4_read_conditional_op}, - {VKD3D_SM4_OP_IEQ, VKD3DSIH_IEQ, "u", "ii"}, - {VKD3D_SM4_OP_IGE, VKD3DSIH_IGE, "u", "ii"}, - {VKD3D_SM4_OP_ILT, VKD3DSIH_ILT, "u", "ii"}, - {VKD3D_SM4_OP_IMAD, VKD3DSIH_IMAD, "i", "iii"}, - {VKD3D_SM4_OP_IMAX, VKD3DSIH_IMAX, "i", "ii"}, - {VKD3D_SM4_OP_IMIN, VKD3DSIH_IMIN, "i", "ii"}, - {VKD3D_SM4_OP_IMUL, VKD3DSIH_IMUL, "ii", "ii"}, - {VKD3D_SM4_OP_INE, VKD3DSIH_INE, "u", "ii"}, - {VKD3D_SM4_OP_INEG, VKD3DSIH_INEG, "i", "i"}, - {VKD3D_SM4_OP_ISHL, VKD3DSIH_ISHL, "i", "ii"}, - {VKD3D_SM4_OP_ISHR, VKD3DSIH_ISHR, "i", "ii"}, - {VKD3D_SM4_OP_ITOF, VKD3DSIH_ITOF, "f", "i"}, - {VKD3D_SM4_OP_LABEL, VKD3DSIH_LABEL, "", "O"}, - {VKD3D_SM4_OP_LD, VKD3DSIH_LD, "u", "iR"}, - {VKD3D_SM4_OP_LD2DMS, VKD3DSIH_LD2DMS, "u", "iRi"}, - {VKD3D_SM4_OP_LOG, VKD3DSIH_LOG, "f", "f"}, - {VKD3D_SM4_OP_LOOP, VKD3DSIH_LOOP, "", ""}, - {VKD3D_SM4_OP_LT, VKD3DSIH_LT, "u", "ff"}, - {VKD3D_SM4_OP_MAD, VKD3DSIH_MAD, "f", "fff"}, - {VKD3D_SM4_OP_MIN, VKD3DSIH_MIN, "f", "ff"}, - {VKD3D_SM4_OP_MAX, VKD3DSIH_MAX, "f", "ff"}, - {VKD3D_SM4_OP_SHADER_DATA, VKD3DSIH_DCL_IMMEDIATE_CONSTANT_BUFFER, "", "", - shader_sm4_read_shader_data}, - {VKD3D_SM4_OP_MOV, VKD3DSIH_MOV, "f", "f"}, - {VKD3D_SM4_OP_MOVC, VKD3DSIH_MOVC, "f", "uff"}, - {VKD3D_SM4_OP_MUL, VKD3DSIH_MUL, "f", "ff"}, - {VKD3D_SM4_OP_NE, VKD3DSIH_NE, "u", "ff"}, - {VKD3D_SM4_OP_NOP, VKD3DSIH_NOP, "", ""}, - {VKD3D_SM4_OP_NOT, VKD3DSIH_NOT, "u", "u"}, - {VKD3D_SM4_OP_OR, VKD3DSIH_OR, "u", "uu"}, - {VKD3D_SM4_OP_RESINFO, VKD3DSIH_RESINFO, "f", "iR"}, - {VKD3D_SM4_OP_RET, VKD3DSIH_RET, "", ""}, - {VKD3D_SM4_OP_RETC, VKD3DSIH_RETP, "", "u", - shader_sm4_read_conditional_op}, - {VKD3D_SM4_OP_ROUND_NE, VKD3DSIH_ROUND_NE, "f", "f"}, - {VKD3D_SM4_OP_ROUND_NI, VKD3DSIH_ROUND_NI, "f", "f"}, - {VKD3D_SM4_OP_ROUND_PI, VKD3DSIH_ROUND_PI, "f", "f"}, - {VKD3D_SM4_OP_ROUND_Z, VKD3DSIH_ROUND_Z, "f", "f"}, - {VKD3D_SM4_OP_RSQ, VKD3DSIH_RSQ, "f", "f"}, - {VKD3D_SM4_OP_SAMPLE, VKD3DSIH_SAMPLE, "u", "fRS"}, - {VKD3D_SM4_OP_SAMPLE_C, VKD3DSIH_SAMPLE_C, "f", "fRSf"}, - {VKD3D_SM4_OP_SAMPLE_C_LZ, VKD3DSIH_SAMPLE_C_LZ, "f", "fRSf"}, - {VKD3D_SM4_OP_SAMPLE_LOD, VKD3DSIH_SAMPLE_LOD, "u", "fRSf"}, - {VKD3D_SM4_OP_SAMPLE_GRAD, VKD3DSIH_SAMPLE_GRAD, "u", "fRSff"}, - {VKD3D_SM4_OP_SAMPLE_B, VKD3DSIH_SAMPLE_B, "u", "fRSf"}, - {VKD3D_SM4_OP_SQRT, VKD3DSIH_SQRT, "f", "f"}, - {VKD3D_SM4_OP_SWITCH, VKD3DSIH_SWITCH, "", "i"}, - {VKD3D_SM4_OP_SINCOS, VKD3DSIH_SINCOS, "ff", "f"}, - {VKD3D_SM4_OP_UDIV, VKD3DSIH_UDIV, "uu", "uu"}, - {VKD3D_SM4_OP_ULT, VKD3DSIH_ULT, "u", "uu"}, - {VKD3D_SM4_OP_UGE, VKD3DSIH_UGE, "u", "uu"}, - {VKD3D_SM4_OP_UMUL, VKD3DSIH_UMUL, "uu", "uu"}, - {VKD3D_SM4_OP_UMAX, VKD3DSIH_UMAX, "u", "uu"}, - {VKD3D_SM4_OP_UMIN, VKD3DSIH_UMIN, "u", "uu"}, - {VKD3D_SM4_OP_USHR, VKD3DSIH_USHR, "u", "uu"}, - {VKD3D_SM4_OP_UTOF, VKD3DSIH_UTOF, "f", "u"}, - {VKD3D_SM4_OP_XOR, VKD3DSIH_XOR, "u", "uu"}, - {VKD3D_SM4_OP_DCL_RESOURCE, VKD3DSIH_DCL, "", "", - shader_sm4_read_dcl_resource}, - {VKD3D_SM4_OP_DCL_CONSTANT_BUFFER, VKD3DSIH_DCL_CONSTANT_BUFFER, "", "", - shader_sm4_read_dcl_constant_buffer}, - {VKD3D_SM4_OP_DCL_SAMPLER, VKD3DSIH_DCL_SAMPLER, "", "", - shader_sm4_read_dcl_sampler}, - {VKD3D_SM4_OP_DCL_INDEX_RANGE, VKD3DSIH_DCL_INDEX_RANGE, "", "", - shader_sm4_read_dcl_index_range}, - {VKD3D_SM4_OP_DCL_OUTPUT_TOPOLOGY, VKD3DSIH_DCL_OUTPUT_TOPOLOGY, "", "", - shader_sm4_read_dcl_output_topology}, - {VKD3D_SM4_OP_DCL_INPUT_PRIMITIVE, VKD3DSIH_DCL_INPUT_PRIMITIVE, "", "", - shader_sm4_read_dcl_input_primitive}, - {VKD3D_SM4_OP_DCL_VERTICES_OUT, VKD3DSIH_DCL_VERTICES_OUT, "", "", - shader_sm4_read_declaration_count}, - {VKD3D_SM4_OP_DCL_INPUT, VKD3DSIH_DCL_INPUT, "", "", - shader_sm4_read_declaration_dst}, - {VKD3D_SM4_OP_DCL_INPUT_SGV, VKD3DSIH_DCL_INPUT_SGV, "", "", - shader_sm4_read_declaration_register_semantic}, - {VKD3D_SM4_OP_DCL_INPUT_SIV, VKD3DSIH_DCL_INPUT_SIV, "", "", - shader_sm4_read_declaration_register_semantic}, - {VKD3D_SM4_OP_DCL_INPUT_PS, VKD3DSIH_DCL_INPUT_PS, "", "", - shader_sm4_read_dcl_input_ps}, - {VKD3D_SM4_OP_DCL_INPUT_PS_SGV, VKD3DSIH_DCL_INPUT_PS_SGV, "", "", - shader_sm4_read_declaration_register_semantic}, - {VKD3D_SM4_OP_DCL_INPUT_PS_SIV, VKD3DSIH_DCL_INPUT_PS_SIV, "", "", - shader_sm4_read_dcl_input_ps_siv}, - {VKD3D_SM4_OP_DCL_OUTPUT, VKD3DSIH_DCL_OUTPUT, "", "", - shader_sm4_read_declaration_dst}, - {VKD3D_SM4_OP_DCL_OUTPUT_SIV, VKD3DSIH_DCL_OUTPUT_SIV, "", "", - shader_sm4_read_declaration_register_semantic}, - {VKD3D_SM4_OP_DCL_TEMPS, VKD3DSIH_DCL_TEMPS, "", "", - shader_sm4_read_declaration_count}, - {VKD3D_SM4_OP_DCL_INDEXABLE_TEMP, VKD3DSIH_DCL_INDEXABLE_TEMP, "", "", - shader_sm4_read_dcl_indexable_temp}, - {VKD3D_SM4_OP_DCL_GLOBAL_FLAGS, VKD3DSIH_DCL_GLOBAL_FLAGS, "", "", - shader_sm4_read_dcl_global_flags}, - {VKD3D_SM4_OP_LOD, VKD3DSIH_LOD, "f", "fRS"}, - {VKD3D_SM4_OP_GATHER4, VKD3DSIH_GATHER4, "u", "fRS"}, - {VKD3D_SM4_OP_SAMPLE_POS, VKD3DSIH_SAMPLE_POS, "f", "Ru"}, - {VKD3D_SM4_OP_SAMPLE_INFO, VKD3DSIH_SAMPLE_INFO, "f", "R"}, - {VKD3D_SM5_OP_HS_DECLS, VKD3DSIH_HS_DECLS, "", ""}, - {VKD3D_SM5_OP_HS_CONTROL_POINT_PHASE, VKD3DSIH_HS_CONTROL_POINT_PHASE, "", ""}, - {VKD3D_SM5_OP_HS_FORK_PHASE, VKD3DSIH_HS_FORK_PHASE, "", ""}, - {VKD3D_SM5_OP_HS_JOIN_PHASE, VKD3DSIH_HS_JOIN_PHASE, "", ""}, - {VKD3D_SM5_OP_EMIT_STREAM, VKD3DSIH_EMIT_STREAM, "", "f"}, - {VKD3D_SM5_OP_CUT_STREAM, VKD3DSIH_CUT_STREAM, "", "f"}, - {VKD3D_SM5_OP_FCALL, VKD3DSIH_FCALL, "", "O", - shader_sm5_read_fcall}, - {VKD3D_SM5_OP_BUFINFO, VKD3DSIH_BUFINFO, "i", "U"}, - {VKD3D_SM5_OP_DERIV_RTX_COARSE, VKD3DSIH_DSX_COARSE, "f", "f"}, - {VKD3D_SM5_OP_DERIV_RTX_FINE, VKD3DSIH_DSX_FINE, "f", "f"}, - {VKD3D_SM5_OP_DERIV_RTY_COARSE, VKD3DSIH_DSY_COARSE, "f", "f"}, - {VKD3D_SM5_OP_DERIV_RTY_FINE, VKD3DSIH_DSY_FINE, "f", "f"}, - {VKD3D_SM5_OP_GATHER4_C, VKD3DSIH_GATHER4_C, "f", "fRSf"}, - {VKD3D_SM5_OP_GATHER4_PO, VKD3DSIH_GATHER4_PO, "f", "fiRS"}, - {VKD3D_SM5_OP_GATHER4_PO_C, VKD3DSIH_GATHER4_PO_C, "f", "fiRSf"}, - {VKD3D_SM5_OP_RCP, VKD3DSIH_RCP, "f", "f"}, - {VKD3D_SM5_OP_F32TOF16, VKD3DSIH_F32TOF16, "u", "f"}, - {VKD3D_SM5_OP_F16TOF32, VKD3DSIH_F16TOF32, "f", "u"}, - {VKD3D_SM5_OP_COUNTBITS, VKD3DSIH_COUNTBITS, "u", "u"}, - {VKD3D_SM5_OP_FIRSTBIT_HI, VKD3DSIH_FIRSTBIT_HI, "u", "u"}, - {VKD3D_SM5_OP_FIRSTBIT_LO, VKD3DSIH_FIRSTBIT_LO, "u", "u"}, - {VKD3D_SM5_OP_FIRSTBIT_SHI, VKD3DSIH_FIRSTBIT_SHI, "u", "i"}, - {VKD3D_SM5_OP_UBFE, VKD3DSIH_UBFE, "u", "iiu"}, - {VKD3D_SM5_OP_IBFE, VKD3DSIH_IBFE, "i", "iii"}, - {VKD3D_SM5_OP_BFI, VKD3DSIH_BFI, "u", "iiuu"}, - {VKD3D_SM5_OP_BFREV, VKD3DSIH_BFREV, "u", "u"}, - {VKD3D_SM5_OP_SWAPC, VKD3DSIH_SWAPC, "ff", "uff"}, - {VKD3D_SM5_OP_DCL_STREAM, VKD3DSIH_DCL_STREAM, "", "O"}, - {VKD3D_SM5_OP_DCL_FUNCTION_BODY, VKD3DSIH_DCL_FUNCTION_BODY, "", "", - shader_sm5_read_dcl_function_body}, - {VKD3D_SM5_OP_DCL_FUNCTION_TABLE, VKD3DSIH_DCL_FUNCTION_TABLE, "", "", - shader_sm5_read_dcl_function_table}, - {VKD3D_SM5_OP_DCL_INTERFACE, VKD3DSIH_DCL_INTERFACE, "", "", - shader_sm5_read_dcl_interface}, - {VKD3D_SM5_OP_DCL_INPUT_CONTROL_POINT_COUNT, VKD3DSIH_DCL_INPUT_CONTROL_POINT_COUNT, "", "", - shader_sm5_read_control_point_count}, - {VKD3D_SM5_OP_DCL_OUTPUT_CONTROL_POINT_COUNT, VKD3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT, "", "", - shader_sm5_read_control_point_count}, - {VKD3D_SM5_OP_DCL_TESSELLATOR_DOMAIN, VKD3DSIH_DCL_TESSELLATOR_DOMAIN, "", "", - shader_sm5_read_dcl_tessellator_domain}, - {VKD3D_SM5_OP_DCL_TESSELLATOR_PARTITIONING, VKD3DSIH_DCL_TESSELLATOR_PARTITIONING, "", "", - shader_sm5_read_dcl_tessellator_partitioning}, - {VKD3D_SM5_OP_DCL_TESSELLATOR_OUTPUT_PRIMITIVE, VKD3DSIH_DCL_TESSELLATOR_OUTPUT_PRIMITIVE, "", "", - shader_sm5_read_dcl_tessellator_output_primitive}, - {VKD3D_SM5_OP_DCL_HS_MAX_TESSFACTOR, VKD3DSIH_DCL_HS_MAX_TESSFACTOR, "", "", - shader_sm5_read_dcl_hs_max_tessfactor}, - {VKD3D_SM5_OP_DCL_HS_FORK_PHASE_INSTANCE_COUNT, VKD3DSIH_DCL_HS_FORK_PHASE_INSTANCE_COUNT, "", "", - shader_sm4_read_declaration_count}, - {VKD3D_SM5_OP_DCL_HS_JOIN_PHASE_INSTANCE_COUNT, VKD3DSIH_DCL_HS_JOIN_PHASE_INSTANCE_COUNT, "", "", - shader_sm4_read_declaration_count}, - {VKD3D_SM5_OP_DCL_THREAD_GROUP, VKD3DSIH_DCL_THREAD_GROUP, "", "", - shader_sm5_read_dcl_thread_group}, - {VKD3D_SM5_OP_DCL_UAV_TYPED, VKD3DSIH_DCL_UAV_TYPED, "", "", - shader_sm4_read_dcl_resource}, - {VKD3D_SM5_OP_DCL_UAV_RAW, VKD3DSIH_DCL_UAV_RAW, "", "", - shader_sm5_read_dcl_uav_raw}, - {VKD3D_SM5_OP_DCL_UAV_STRUCTURED, VKD3DSIH_DCL_UAV_STRUCTURED, "", "", - shader_sm5_read_dcl_uav_structured}, - {VKD3D_SM5_OP_DCL_TGSM_RAW, VKD3DSIH_DCL_TGSM_RAW, "", "", - shader_sm5_read_dcl_tgsm_raw}, - {VKD3D_SM5_OP_DCL_TGSM_STRUCTURED, VKD3DSIH_DCL_TGSM_STRUCTURED, "", "", - shader_sm5_read_dcl_tgsm_structured}, - {VKD3D_SM5_OP_DCL_RESOURCE_RAW, VKD3DSIH_DCL_RESOURCE_RAW, "", "", - shader_sm5_read_dcl_resource_raw}, - {VKD3D_SM5_OP_DCL_RESOURCE_STRUCTURED, VKD3DSIH_DCL_RESOURCE_STRUCTURED, "", "", - shader_sm5_read_dcl_resource_structured}, - {VKD3D_SM5_OP_LD_UAV_TYPED, VKD3DSIH_LD_UAV_TYPED, "u", "iU"}, - {VKD3D_SM5_OP_STORE_UAV_TYPED, VKD3DSIH_STORE_UAV_TYPED, "U", "iu"}, - {VKD3D_SM5_OP_LD_RAW, VKD3DSIH_LD_RAW, "u", "iU"}, - {VKD3D_SM5_OP_STORE_RAW, VKD3DSIH_STORE_RAW, "U", "uu"}, - {VKD3D_SM5_OP_LD_STRUCTURED, VKD3DSIH_LD_STRUCTURED, "u", "iiR"}, - {VKD3D_SM5_OP_STORE_STRUCTURED, VKD3DSIH_STORE_STRUCTURED, "U", "iiu"}, - {VKD3D_SM5_OP_ATOMIC_AND, VKD3DSIH_ATOMIC_AND, "U", "iu"}, - {VKD3D_SM5_OP_ATOMIC_OR, VKD3DSIH_ATOMIC_OR, "U", "iu"}, - {VKD3D_SM5_OP_ATOMIC_XOR, VKD3DSIH_ATOMIC_XOR, "U", "iu"}, - {VKD3D_SM5_OP_ATOMIC_CMP_STORE, VKD3DSIH_ATOMIC_CMP_STORE, "U", "iuu"}, - {VKD3D_SM5_OP_ATOMIC_IADD, VKD3DSIH_ATOMIC_IADD, "U", "ii"}, - {VKD3D_SM5_OP_ATOMIC_IMAX, VKD3DSIH_ATOMIC_IMAX, "U", "ii"}, - {VKD3D_SM5_OP_ATOMIC_IMIN, VKD3DSIH_ATOMIC_IMIN, "U", "ii"}, - {VKD3D_SM5_OP_ATOMIC_UMAX, VKD3DSIH_ATOMIC_UMAX, "U", "iu"}, - {VKD3D_SM5_OP_ATOMIC_UMIN, VKD3DSIH_ATOMIC_UMIN, "U", "iu"}, - {VKD3D_SM5_OP_IMM_ATOMIC_ALLOC, VKD3DSIH_IMM_ATOMIC_ALLOC, "u", "U"}, - {VKD3D_SM5_OP_IMM_ATOMIC_CONSUME, VKD3DSIH_IMM_ATOMIC_CONSUME, "u", "U"}, - {VKD3D_SM5_OP_IMM_ATOMIC_IADD, VKD3DSIH_IMM_ATOMIC_IADD, "uU", "ii"}, - {VKD3D_SM5_OP_IMM_ATOMIC_AND, VKD3DSIH_IMM_ATOMIC_AND, "uU", "iu"}, - {VKD3D_SM5_OP_IMM_ATOMIC_OR, VKD3DSIH_IMM_ATOMIC_OR, "uU", "iu"}, - {VKD3D_SM5_OP_IMM_ATOMIC_XOR, VKD3DSIH_IMM_ATOMIC_XOR, "uU", "iu"}, - {VKD3D_SM5_OP_IMM_ATOMIC_EXCH, VKD3DSIH_IMM_ATOMIC_EXCH, "uU", "iu"}, - {VKD3D_SM5_OP_IMM_ATOMIC_CMP_EXCH, VKD3DSIH_IMM_ATOMIC_CMP_EXCH, "uU", "iuu"}, - {VKD3D_SM5_OP_IMM_ATOMIC_IMAX, VKD3DSIH_IMM_ATOMIC_IMAX, "iU", "ii"}, - {VKD3D_SM5_OP_IMM_ATOMIC_IMIN, VKD3DSIH_IMM_ATOMIC_IMIN, "iU", "ii"}, - {VKD3D_SM5_OP_IMM_ATOMIC_UMAX, VKD3DSIH_IMM_ATOMIC_UMAX, "uU", "iu"}, - {VKD3D_SM5_OP_IMM_ATOMIC_UMIN, VKD3DSIH_IMM_ATOMIC_UMIN, "uU", "iu"}, - {VKD3D_SM5_OP_SYNC, VKD3DSIH_SYNC, "", "", - shader_sm5_read_sync}, - {VKD3D_SM5_OP_DADD, VKD3DSIH_DADD, "d", "dd"}, - {VKD3D_SM5_OP_DMAX, VKD3DSIH_DMAX, "d", "dd"}, - {VKD3D_SM5_OP_DMIN, VKD3DSIH_DMIN, "d", "dd"}, - {VKD3D_SM5_OP_DMUL, VKD3DSIH_DMUL, "d", "dd"}, - {VKD3D_SM5_OP_DEQ, VKD3DSIH_DEQ, "u", "dd"}, - {VKD3D_SM5_OP_DGE, VKD3DSIH_DGE, "u", "dd"}, - {VKD3D_SM5_OP_DLT, VKD3DSIH_DLT, "u", "dd"}, - {VKD3D_SM5_OP_DNE, VKD3DSIH_DNE, "u", "dd"}, - {VKD3D_SM5_OP_DMOV, VKD3DSIH_DMOV, "d", "d"}, - {VKD3D_SM5_OP_DMOVC, VKD3DSIH_DMOVC, "d", "udd"}, - {VKD3D_SM5_OP_DTOF, VKD3DSIH_DTOF, "f", "d"}, - {VKD3D_SM5_OP_FTOD, VKD3DSIH_FTOD, "d", "f"}, - {VKD3D_SM5_OP_EVAL_SAMPLE_INDEX, VKD3DSIH_EVAL_SAMPLE_INDEX, "f", "fi"}, - {VKD3D_SM5_OP_EVAL_CENTROID, VKD3DSIH_EVAL_CENTROID, "f", "f"}, - {VKD3D_SM5_OP_DCL_GS_INSTANCES, VKD3DSIH_DCL_GS_INSTANCES, "", "", - shader_sm4_read_declaration_count}, - {VKD3D_SM5_OP_DDIV, VKD3DSIH_DDIV, "d", "dd"}, - {VKD3D_SM5_OP_DFMA, VKD3DSIH_DFMA, "d", "ddd"}, - {VKD3D_SM5_OP_DRCP, VKD3DSIH_DRCP, "d", "d"}, - {VKD3D_SM5_OP_MSAD, VKD3DSIH_MSAD, "u", "uuu"}, - {VKD3D_SM5_OP_DTOI, VKD3DSIH_DTOI, "i", "d"}, - {VKD3D_SM5_OP_DTOU, VKD3DSIH_DTOU, "u", "d"}, - {VKD3D_SM5_OP_ITOD, VKD3DSIH_ITOD, "d", "i"}, - {VKD3D_SM5_OP_UTOD, VKD3DSIH_UTOD, "d", "u"}, - {VKD3D_SM5_OP_GATHER4_S, VKD3DSIH_GATHER4_S, "uu", "fRS"}, - {VKD3D_SM5_OP_GATHER4_C_S, VKD3DSIH_GATHER4_C_S, "fu", "fRSf"}, - {VKD3D_SM5_OP_GATHER4_PO_S, VKD3DSIH_GATHER4_PO_S, "fu", "fiRS"}, - {VKD3D_SM5_OP_GATHER4_PO_C_S, VKD3DSIH_GATHER4_PO_C_S, "fu", "fiRSf"}, - {VKD3D_SM5_OP_LD_S, VKD3DSIH_LD_S, "uu", "iR"}, - {VKD3D_SM5_OP_LD2DMS_S, VKD3DSIH_LD2DMS_S, "uu", "iRi"}, - {VKD3D_SM5_OP_LD_UAV_TYPED_S, VKD3DSIH_LD_UAV_TYPED_S, "uu", "iU"}, - {VKD3D_SM5_OP_LD_RAW_S, VKD3DSIH_LD_RAW_S, "uu", "iU"}, - {VKD3D_SM5_OP_LD_STRUCTURED_S, VKD3DSIH_LD_STRUCTURED_S, "uu", "iiR"}, - {VKD3D_SM5_OP_SAMPLE_LOD_S, VKD3DSIH_SAMPLE_LOD_S, "uu", "fRSf"}, - {VKD3D_SM5_OP_SAMPLE_C_LZ_S, VKD3DSIH_SAMPLE_C_LZ_S, "fu", "fRSf"}, - {VKD3D_SM5_OP_SAMPLE_CL_S, VKD3DSIH_SAMPLE_CL_S, "uu", "fRSf"}, - {VKD3D_SM5_OP_SAMPLE_B_CL_S, VKD3DSIH_SAMPLE_B_CL_S, "uu", "fRSff"}, - {VKD3D_SM5_OP_SAMPLE_GRAD_CL_S, VKD3DSIH_SAMPLE_GRAD_CL_S, "uu", "fRSfff"}, - {VKD3D_SM5_OP_SAMPLE_C_CL_S, VKD3DSIH_SAMPLE_C_CL_S, "fu", "fRSff"}, - {VKD3D_SM5_OP_CHECK_ACCESS_FULLY_MAPPED, VKD3DSIH_CHECK_ACCESS_FULLY_MAPPED, "u", "u"}, -}; - -static const enum vkd3d_shader_register_type register_type_table[] = -{ - /* VKD3D_SM4_RT_TEMP */ VKD3DSPR_TEMP, - /* VKD3D_SM4_RT_INPUT */ VKD3DSPR_INPUT, - /* VKD3D_SM4_RT_OUTPUT */ VKD3DSPR_OUTPUT, - /* VKD3D_SM4_RT_INDEXABLE_TEMP */ VKD3DSPR_IDXTEMP, - /* VKD3D_SM4_RT_IMMCONST */ VKD3DSPR_IMMCONST, - /* VKD3D_SM4_RT_IMMCONST64 */ VKD3DSPR_IMMCONST64, - /* VKD3D_SM4_RT_SAMPLER */ VKD3DSPR_SAMPLER, - /* VKD3D_SM4_RT_RESOURCE */ VKD3DSPR_RESOURCE, - /* VKD3D_SM4_RT_CONSTBUFFER */ VKD3DSPR_CONSTBUFFER, - /* VKD3D_SM4_RT_IMMCONSTBUFFER */ VKD3DSPR_IMMCONSTBUFFER, - /* UNKNOWN */ ~0u, - /* VKD3D_SM4_RT_PRIMID */ VKD3DSPR_PRIMID, - /* VKD3D_SM4_RT_DEPTHOUT */ VKD3DSPR_DEPTHOUT, - /* VKD3D_SM4_RT_NULL */ VKD3DSPR_NULL, - /* VKD3D_SM4_RT_RASTERIZER */ VKD3DSPR_RASTERIZER, - /* VKD3D_SM4_RT_OMASK */ VKD3DSPR_SAMPLEMASK, - /* VKD3D_SM5_RT_STREAM */ VKD3DSPR_STREAM, - /* VKD3D_SM5_RT_FUNCTION_BODY */ VKD3DSPR_FUNCTIONBODY, - /* UNKNOWN */ ~0u, - /* VKD3D_SM5_RT_FUNCTION_POINTER */ VKD3DSPR_FUNCTIONPOINTER, - /* UNKNOWN */ ~0u, - /* UNKNOWN */ ~0u, - /* VKD3D_SM5_RT_OUTPUT_CONTROL_POINT_ID */ VKD3DSPR_OUTPOINTID, - /* VKD3D_SM5_RT_FORK_INSTANCE_ID */ VKD3DSPR_FORKINSTID, - /* VKD3D_SM5_RT_JOIN_INSTANCE_ID */ VKD3DSPR_JOININSTID, - /* VKD3D_SM5_RT_INPUT_CONTROL_POINT */ VKD3DSPR_INCONTROLPOINT, - /* VKD3D_SM5_RT_OUTPUT_CONTROL_POINT */ VKD3DSPR_OUTCONTROLPOINT, - /* VKD3D_SM5_RT_PATCH_CONSTANT_DATA */ VKD3DSPR_PATCHCONST, - /* VKD3D_SM5_RT_DOMAIN_LOCATION */ VKD3DSPR_TESSCOORD, - /* UNKNOWN */ ~0u, - /* VKD3D_SM5_RT_UAV */ VKD3DSPR_UAV, - /* VKD3D_SM5_RT_SHARED_MEMORY */ VKD3DSPR_GROUPSHAREDMEM, - /* VKD3D_SM5_RT_THREAD_ID */ VKD3DSPR_THREADID, - /* VKD3D_SM5_RT_THREAD_GROUP_ID */ VKD3DSPR_THREADGROUPID, - /* VKD3D_SM5_RT_LOCAL_THREAD_ID */ VKD3DSPR_LOCALTHREADID, - /* VKD3D_SM5_RT_COVERAGE */ VKD3DSPR_COVERAGE, - /* VKD3D_SM5_RT_LOCAL_THREAD_INDEX */ VKD3DSPR_LOCALTHREADINDEX, - /* VKD3D_SM5_RT_GS_INSTANCE_ID */ VKD3DSPR_GSINSTID, - /* VKD3D_SM5_RT_DEPTHOUT_GREATER_EQUAL */ VKD3DSPR_DEPTHOUTGE, - /* VKD3D_SM5_RT_DEPTHOUT_LESS_EQUAL */ VKD3DSPR_DEPTHOUTLE, - /* VKD3D_SM5_RT_CYCLE_COUNTER */ ~0u, - /* VKD3D_SM5_RT_OUTPUT_STENCIL_REF */ VKD3DSPR_OUTSTENCILREF, -}; - -static const enum vkd3d_shader_register_precision register_precision_table[] = -{ - /* VKD3D_SM4_REGISTER_PRECISION_DEFAULT */ VKD3D_SHADER_REGISTER_PRECISION_DEFAULT, - /* VKD3D_SM4_REGISTER_PRECISION_MIN_FLOAT_16 */ VKD3D_SHADER_REGISTER_PRECISION_MIN_FLOAT_16, - /* VKD3D_SM4_REGISTER_PRECISION_MIN_FLOAT_10 */ VKD3D_SHADER_REGISTER_PRECISION_MIN_FLOAT_10, - /* UNKNOWN */ VKD3D_SHADER_REGISTER_PRECISION_INVALID, - /* VKD3D_SM4_REGISTER_PRECISION_MIN_INT_16 */ VKD3D_SHADER_REGISTER_PRECISION_MIN_INT_16, - /* VKD3D_SM4_REGISTER_PRECISION_MIN_UINT_16 */ VKD3D_SHADER_REGISTER_PRECISION_MIN_UINT_16, -}; - -static const struct vkd3d_sm4_opcode_info *get_opcode_info(enum vkd3d_sm4_opcode opcode) -{ - unsigned int i; - - for (i = 0; i < sizeof(opcode_table) / sizeof(*opcode_table); ++i) - { - if (opcode == opcode_table[i].opcode) return &opcode_table[i]; - } - - return NULL; -} - -static void map_register(const struct vkd3d_shader_sm4_parser *sm4, struct vkd3d_shader_register *reg) -{ - switch (sm4->p.shader_version.type) - { - case VKD3D_SHADER_TYPE_PIXEL: - if (reg->type == VKD3DSPR_OUTPUT) - { - unsigned int reg_idx = reg->idx[0].offset; - - if (reg_idx >= ARRAY_SIZE(sm4->output_map)) - { - ERR("Invalid output index %u.\n", reg_idx); - break; - } - - reg->type = VKD3DSPR_COLOROUT; - reg->idx[0].offset = sm4->output_map[reg_idx]; - } - break; - - default: - break; - } -} - -static enum vkd3d_data_type map_data_type(char t) -{ - switch (t) - { - case 'd': - return VKD3D_DATA_DOUBLE; - case 'f': - return VKD3D_DATA_FLOAT; - case 'i': - return VKD3D_DATA_INT; - case 'u': - return VKD3D_DATA_UINT; - case 'O': - return VKD3D_DATA_OPAQUE; - case 'R': - return VKD3D_DATA_RESOURCE; - case 'S': - return VKD3D_DATA_SAMPLER; - case 'U': - return VKD3D_DATA_UAV; - default: - ERR("Invalid data type '%c'.\n", t); - return VKD3D_DATA_FLOAT; - } -} - -static void shader_sm4_destroy(struct vkd3d_shader_parser *parser) -{ - struct vkd3d_shader_sm4_parser *sm4 = vkd3d_shader_sm4_parser(parser); - - shader_instruction_array_destroy(&parser->instructions); - free_shader_desc(&parser->shader_desc); - vkd3d_free(sm4); -} - -static bool shader_sm4_read_reg_idx(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, - const uint32_t *end, uint32_t addressing, struct vkd3d_shader_register_index *reg_idx) -{ - if (addressing & VKD3D_SM4_ADDRESSING_RELATIVE) - { - struct vkd3d_shader_src_param *rel_addr = shader_parser_get_src_params(&priv->p, 1); - - if (!(reg_idx->rel_addr = rel_addr)) - { - ERR("Failed to get src param for relative addressing.\n"); - return false; - } - - if (addressing & VKD3D_SM4_ADDRESSING_OFFSET) - reg_idx->offset = *(*ptr)++; - else - reg_idx->offset = 0; - shader_sm4_read_src_param(priv, ptr, end, VKD3D_DATA_INT, rel_addr); - } - else - { - reg_idx->rel_addr = NULL; - reg_idx->offset = *(*ptr)++; - } - - return true; -} - -static bool sm4_register_is_descriptor(enum vkd3d_sm4_register_type register_type) -{ - switch (register_type) - { - case VKD3D_SM4_RT_SAMPLER: - case VKD3D_SM4_RT_RESOURCE: - case VKD3D_SM4_RT_CONSTBUFFER: - case VKD3D_SM5_RT_UAV: - return true; - - default: - return false; - } -} - -static bool shader_sm4_read_param(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, const uint32_t *end, - enum vkd3d_data_type data_type, struct vkd3d_shader_register *param, enum vkd3d_shader_src_modifier *modifier) -{ - enum vkd3d_sm4_register_precision precision; - enum vkd3d_sm4_register_type register_type; - enum vkd3d_sm4_extended_operand_type type; - enum vkd3d_sm4_register_modifier m; - uint32_t token, order, extended; - - if (*ptr >= end) - { - WARN("Invalid ptr %p >= end %p.\n", *ptr, end); - return false; - } - token = *(*ptr)++; - - register_type = (token & VKD3D_SM4_REGISTER_TYPE_MASK) >> VKD3D_SM4_REGISTER_TYPE_SHIFT; - if (register_type >= ARRAY_SIZE(register_type_table) - || register_type_table[register_type] == VKD3DSPR_INVALID) - { - FIXME("Unhandled register type %#x.\n", register_type); - param->type = VKD3DSPR_TEMP; - } - else - { - param->type = register_type_table[register_type]; - } - param->precision = VKD3D_SHADER_REGISTER_PRECISION_DEFAULT; - param->non_uniform = false; - param->data_type = data_type; - - *modifier = VKD3DSPSM_NONE; - if (token & VKD3D_SM4_EXTENDED_OPERAND) - { - if (*ptr >= end) - { - WARN("Invalid ptr %p >= end %p.\n", *ptr, end); - return false; - } - extended = *(*ptr)++; - - if (extended & VKD3D_SM4_EXTENDED_OPERAND) - { - FIXME("Skipping second-order extended operand.\n"); - *ptr += *ptr < end; - } - - type = extended & VKD3D_SM4_EXTENDED_OPERAND_TYPE_MASK; - if (type == VKD3D_SM4_EXTENDED_OPERAND_MODIFIER) - { - m = (extended & VKD3D_SM4_REGISTER_MODIFIER_MASK) >> VKD3D_SM4_REGISTER_MODIFIER_SHIFT; - switch (m) - { - case VKD3D_SM4_REGISTER_MODIFIER_NEGATE: - *modifier = VKD3DSPSM_NEG; - break; - - case VKD3D_SM4_REGISTER_MODIFIER_ABS: - *modifier = VKD3DSPSM_ABS; - break; - - case VKD3D_SM4_REGISTER_MODIFIER_ABS_NEGATE: - *modifier = VKD3DSPSM_ABSNEG; - break; - - default: - FIXME("Unhandled register modifier %#x.\n", m); - /* fall-through */ - case VKD3D_SM4_REGISTER_MODIFIER_NONE: - break; - } - - precision = (extended & VKD3D_SM4_REGISTER_PRECISION_MASK) >> VKD3D_SM4_REGISTER_PRECISION_SHIFT; - if (precision >= ARRAY_SIZE(register_precision_table) - || register_precision_table[precision] == VKD3D_SHADER_REGISTER_PRECISION_INVALID) - { - FIXME("Unhandled register precision %#x.\n", precision); - param->precision = VKD3D_SHADER_REGISTER_PRECISION_INVALID; - } - else - { - param->precision = register_precision_table[precision]; - } - - if (extended & VKD3D_SM4_REGISTER_NON_UNIFORM_MASK) - param->non_uniform = true; - - extended &= ~(VKD3D_SM4_EXTENDED_OPERAND_TYPE_MASK | VKD3D_SM4_REGISTER_MODIFIER_MASK - | VKD3D_SM4_REGISTER_PRECISION_MASK | VKD3D_SM4_REGISTER_NON_UNIFORM_MASK - | VKD3D_SM4_EXTENDED_OPERAND); - if (extended) - FIXME("Skipping unhandled extended operand bits 0x%08x.\n", extended); - } - else if (type) - { - FIXME("Skipping unhandled extended operand token 0x%08x (type %#x).\n", extended, type); - } - } - - order = (token & VKD3D_SM4_REGISTER_ORDER_MASK) >> VKD3D_SM4_REGISTER_ORDER_SHIFT; - - if (order < 1) - { - param->idx[0].offset = ~0u; - param->idx[0].rel_addr = NULL; - } - else - { - DWORD addressing = (token & VKD3D_SM4_ADDRESSING_MASK0) >> VKD3D_SM4_ADDRESSING_SHIFT0; - if (!(shader_sm4_read_reg_idx(priv, ptr, end, addressing, ¶m->idx[0]))) - { - ERR("Failed to read register index.\n"); - return false; - } - } - - if (order < 2) - { - param->idx[1].offset = ~0u; - param->idx[1].rel_addr = NULL; - } - else - { - DWORD addressing = (token & VKD3D_SM4_ADDRESSING_MASK1) >> VKD3D_SM4_ADDRESSING_SHIFT1; - if (!(shader_sm4_read_reg_idx(priv, ptr, end, addressing, ¶m->idx[1]))) - { - ERR("Failed to read register index.\n"); - return false; - } - } - - if (order < 3) - { - param->idx[2].offset = ~0u; - param->idx[2].rel_addr = NULL; - } - else - { - DWORD addressing = (token & VKD3D_SM4_ADDRESSING_MASK2) >> VKD3D_SM4_ADDRESSING_SHIFT2; - if (!(shader_sm4_read_reg_idx(priv, ptr, end, addressing, ¶m->idx[2]))) - { - ERR("Failed to read register index.\n"); - return false; - } - } - - if (order > 3) - { - WARN("Unhandled order %u.\n", order); - return false; - } - - if (register_type == VKD3D_SM4_RT_IMMCONST || register_type == VKD3D_SM4_RT_IMMCONST64) - { - enum vkd3d_sm4_dimension dimension = (token & VKD3D_SM4_DIMENSION_MASK) >> VKD3D_SM4_DIMENSION_SHIFT; - unsigned int dword_count; - - switch (dimension) - { - case VKD3D_SM4_DIMENSION_SCALAR: - param->immconst_type = VKD3D_IMMCONST_SCALAR; - dword_count = 1 + (register_type == VKD3D_SM4_RT_IMMCONST64); - if (end - *ptr < dword_count) - { - WARN("Invalid ptr %p, end %p.\n", *ptr, end); - return false; - } - memcpy(param->u.immconst_uint, *ptr, dword_count * sizeof(DWORD)); - *ptr += dword_count; - break; - - case VKD3D_SM4_DIMENSION_VEC4: - param->immconst_type = VKD3D_IMMCONST_VEC4; - if (end - *ptr < VKD3D_VEC4_SIZE) - { - WARN("Invalid ptr %p, end %p.\n", *ptr, end); - return false; - } - memcpy(param->u.immconst_uint, *ptr, VKD3D_VEC4_SIZE * sizeof(DWORD)); - *ptr += 4; - break; - - default: - FIXME("Unhandled dimension %#x.\n", dimension); - break; - } - } - else if (!shader_is_sm_5_1(priv) && sm4_register_is_descriptor(register_type)) - { - /* SM5.1 places a symbol identifier in idx[0] and moves - * other values up one slot. Normalize to SM5.1. */ - param->idx[2] = param->idx[1]; - param->idx[1] = param->idx[0]; - } - - map_register(priv, param); - - return true; -} - -static bool shader_sm4_is_scalar_register(const struct vkd3d_shader_register *reg) -{ - switch (reg->type) - { - case VKD3DSPR_COVERAGE: - case VKD3DSPR_DEPTHOUT: - case VKD3DSPR_DEPTHOUTGE: - case VKD3DSPR_DEPTHOUTLE: - case VKD3DSPR_GSINSTID: - case VKD3DSPR_LOCALTHREADINDEX: - case VKD3DSPR_OUTPOINTID: - case VKD3DSPR_PRIMID: - case VKD3DSPR_SAMPLEMASK: - case VKD3DSPR_OUTSTENCILREF: - return true; - default: - return false; - } -} - -static uint32_t swizzle_from_sm4(uint32_t s) -{ - return vkd3d_shader_create_swizzle(s & 0x3, (s >> 2) & 0x3, (s >> 4) & 0x3, (s >> 6) & 0x3); -} - -static bool shader_sm4_read_src_param(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, - const uint32_t *end, enum vkd3d_data_type data_type, struct vkd3d_shader_src_param *src_param) +void dxbc_writer_init(struct dxbc_writer *dxbc) { - DWORD token; - - if (*ptr >= end) - { - WARN("Invalid ptr %p >= end %p.\n", *ptr, end); - return false; - } - token = **ptr; - - if (!shader_sm4_read_param(priv, ptr, end, data_type, &src_param->reg, &src_param->modifiers)) - { - ERR("Failed to read parameter.\n"); - return false; - } - - if (src_param->reg.type == VKD3DSPR_IMMCONST || src_param->reg.type == VKD3DSPR_IMMCONST64) - { - src_param->swizzle = VKD3D_SHADER_NO_SWIZZLE; - } - else - { - enum vkd3d_sm4_swizzle_type swizzle_type = - (token & VKD3D_SM4_SWIZZLE_TYPE_MASK) >> VKD3D_SM4_SWIZZLE_TYPE_SHIFT; - - switch (swizzle_type) - { - case VKD3D_SM4_SWIZZLE_NONE: - if (shader_sm4_is_scalar_register(&src_param->reg)) - src_param->swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); - else - src_param->swizzle = VKD3D_SHADER_NO_SWIZZLE; - break; - - case VKD3D_SM4_SWIZZLE_SCALAR: - src_param->swizzle = (token & VKD3D_SM4_SWIZZLE_MASK) >> VKD3D_SM4_SWIZZLE_SHIFT; - src_param->swizzle = (src_param->swizzle & 0x3) * 0x01010101; - break; - - case VKD3D_SM4_SWIZZLE_VEC4: - src_param->swizzle = swizzle_from_sm4((token & VKD3D_SM4_SWIZZLE_MASK) >> VKD3D_SM4_SWIZZLE_SHIFT); - break; - - default: - FIXME("Unhandled swizzle type %#x.\n", swizzle_type); - break; - } - } - - return true; + memset(dxbc, 0, sizeof(*dxbc)); }
-static bool shader_sm4_read_dst_param(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, - const uint32_t *end, enum vkd3d_data_type data_type, struct vkd3d_shader_dst_param *dst_param) +void dxbc_writer_add_section(struct dxbc_writer *dxbc, uint32_t tag, const void *data, size_t size) { - enum vkd3d_shader_src_modifier modifier; - DWORD token; - - if (*ptr >= end) - { - WARN("Invalid ptr %p >= end %p.\n", *ptr, end); - return false; - } - token = **ptr; - - if (!shader_sm4_read_param(priv, ptr, end, data_type, &dst_param->reg, &modifier)) - { - ERR("Failed to read parameter.\n"); - return false; - } - - if (modifier != VKD3DSPSM_NONE) - { - ERR("Invalid source modifier %#x on destination register.\n", modifier); - return false; - } + struct vkd3d_shader_dxbc_section_desc *section;
- dst_param->write_mask = (token & VKD3D_SM4_WRITEMASK_MASK) >> VKD3D_SM4_WRITEMASK_SHIFT; - if (data_type == VKD3D_DATA_DOUBLE) - dst_param->write_mask = vkd3d_write_mask_64_from_32(dst_param->write_mask); - /* Scalar registers are declared with no write mask in shader bytecode. */ - if (!dst_param->write_mask && shader_sm4_is_scalar_register(&dst_param->reg)) - dst_param->write_mask = VKD3DSP_WRITEMASK_0; - dst_param->modifiers = 0; - dst_param->shift = 0; + assert(dxbc->section_count < ARRAY_SIZE(dxbc->sections));
- return true; + section = &dxbc->sections[dxbc->section_count++]; + section->tag = tag; + section->data.code = data; + section->data.size = size; }
-static void shader_sm4_read_instruction_modifier(DWORD modifier, struct vkd3d_shader_instruction *ins) +int vkd3d_shader_serialize_dxbc(size_t section_count, const struct vkd3d_shader_dxbc_section_desc *sections, + struct vkd3d_shader_code *dxbc, char **messages) { - enum vkd3d_sm4_instruction_modifier modifier_type = modifier & VKD3D_SM4_MODIFIER_MASK; - - switch (modifier_type) - { - case VKD3D_SM4_MODIFIER_AOFFIMMI: - { - static const DWORD recognized_bits = VKD3D_SM4_INSTRUCTION_MODIFIER - | VKD3D_SM4_MODIFIER_MASK - | VKD3D_SM4_AOFFIMMI_U_MASK - | VKD3D_SM4_AOFFIMMI_V_MASK - | VKD3D_SM4_AOFFIMMI_W_MASK; - - /* Bit fields are used for sign extension. */ - struct - { - int u : 4; - int v : 4; - int w : 4; - } aoffimmi; - - if (modifier & ~recognized_bits) - FIXME("Unhandled instruction modifier %#x.\n", modifier); - - aoffimmi.u = (modifier & VKD3D_SM4_AOFFIMMI_U_MASK) >> VKD3D_SM4_AOFFIMMI_U_SHIFT; - aoffimmi.v = (modifier & VKD3D_SM4_AOFFIMMI_V_MASK) >> VKD3D_SM4_AOFFIMMI_V_SHIFT; - aoffimmi.w = (modifier & VKD3D_SM4_AOFFIMMI_W_MASK) >> VKD3D_SM4_AOFFIMMI_W_SHIFT; - ins->texel_offset.u = aoffimmi.u; - ins->texel_offset.v = aoffimmi.v; - ins->texel_offset.w = aoffimmi.w; - break; - } - - case VKD3D_SM5_MODIFIER_DATA_TYPE: - { - DWORD components = (modifier & VKD3D_SM5_MODIFIER_DATA_TYPE_MASK) >> VKD3D_SM5_MODIFIER_DATA_TYPE_SHIFT; - unsigned int i; - - for (i = 0; i < VKD3D_VEC4_SIZE; i++) - { - enum vkd3d_sm4_data_type data_type = VKD3D_SM4_TYPE_COMPONENT(components, i); - - if (!data_type || (data_type >= ARRAY_SIZE(data_type_table))) - { - FIXME("Unhandled data type %#x.\n", data_type); - ins->resource_data_type[i] = VKD3D_DATA_FLOAT; - } - else - { - ins->resource_data_type[i] = data_type_table[data_type]; - } - } - break; - } - - case VKD3D_SM5_MODIFIER_RESOURCE_TYPE: - { - enum vkd3d_sm4_resource_type resource_type - = (modifier & VKD3D_SM5_MODIFIER_RESOURCE_TYPE_MASK) >> VKD3D_SM5_MODIFIER_RESOURCE_TYPE_SHIFT; - - if (resource_type == VKD3D_SM4_RESOURCE_RAW_BUFFER) - ins->raw = true; - else if (resource_type == VKD3D_SM4_RESOURCE_STRUCTURED_BUFFER) - ins->structured = true; - - if (resource_type < ARRAY_SIZE(resource_type_table)) - ins->resource_type = resource_type_table[resource_type]; - else - { - FIXME("Unhandled resource type %#x.\n", resource_type); - ins->resource_type = VKD3D_SHADER_RESOURCE_NONE; - } - - ins->resource_stride - = (modifier & VKD3D_SM5_MODIFIER_RESOURCE_STRIDE_MASK) >> VKD3D_SM5_MODIFIER_RESOURCE_STRIDE_SHIFT; - break; - } + size_t size_position, offsets_position, checksum_position, i; + struct vkd3d_bytecode_buffer buffer = {0}; + uint32_t checksum[4];
- default: - FIXME("Unhandled instruction modifier %#x.\n", modifier); - } -} + TRACE("section_count %zu, sections %p, dxbc %p, messages %p.\n", section_count, sections, dxbc, messages);
-static void shader_sm4_read_instruction(struct vkd3d_shader_sm4_parser *sm4, struct vkd3d_shader_instruction *ins) -{ - const struct vkd3d_sm4_opcode_info *opcode_info; - uint32_t opcode_token, opcode, previous_token; - struct vkd3d_shader_dst_param *dst_params; - struct vkd3d_shader_src_param *src_params; - const uint32_t **ptr = &sm4->ptr; - unsigned int i, len; - size_t remaining; - const uint32_t *p; - DWORD precise; + if (messages) + *messages = NULL;
- if (*ptr >= sm4->end) - { - WARN("End of byte-code, failed to read opcode.\n"); - goto fail; - } - remaining = sm4->end - *ptr; + put_u32(&buffer, TAG_DXBC);
- ++sm4->p.location.line; + checksum_position = bytecode_get_size(&buffer); + for (i = 0; i < 4; ++i) + put_u32(&buffer, 0);
- opcode_token = *(*ptr)++; - opcode = opcode_token & VKD3D_SM4_OPCODE_MASK; + put_u32(&buffer, 1); /* version */ + size_position = put_u32(&buffer, 0); + put_u32(&buffer, section_count);
- len = ((opcode_token & VKD3D_SM4_INSTRUCTION_LENGTH_MASK) >> VKD3D_SM4_INSTRUCTION_LENGTH_SHIFT); - if (!len) - { - if (remaining < 2) - { - WARN("End of byte-code, failed to read length token.\n"); - goto fail; - } - len = **ptr; - } - if (!len || remaining < len) - { - WARN("Read invalid length %u (remaining %zu).\n", len, remaining); - goto fail; - } - --len; + offsets_position = bytecode_get_size(&buffer); + for (i = 0; i < section_count; ++i) + put_u32(&buffer, 0);
- if (!(opcode_info = get_opcode_info(opcode))) + for (i = 0; i < section_count; ++i) { - FIXME("Unrecognized opcode %#x, opcode_token 0x%08x.\n", opcode, opcode_token); - ins->handler_idx = VKD3DSIH_INVALID; - *ptr += len; - return; - } - - ins->handler_idx = opcode_info->handler_idx; - ins->flags = 0; - ins->coissue = false; - ins->raw = false; - ins->structured = false; - ins->predicate = NULL; - ins->dst_count = strnlen(opcode_info->dst_info, SM4_MAX_DST_COUNT); - ins->src_count = strnlen(opcode_info->src_info, SM4_MAX_SRC_COUNT); - ins->src = src_params = shader_parser_get_src_params(&sm4->p, ins->src_count); - if (!src_params && ins->src_count) - { - ERR("Failed to allocate src parameters.\n"); - vkd3d_shader_parser_error(&sm4->p, VKD3D_SHADER_ERROR_TPF_OUT_OF_MEMORY, "Out of memory."); - ins->handler_idx = VKD3DSIH_INVALID; - return; + set_u32(&buffer, offsets_position + i * sizeof(uint32_t), bytecode_align(&buffer)); + put_u32(&buffer, sections[i].tag); + put_u32(&buffer, sections[i].data.size); + bytecode_put_bytes(&buffer, sections[i].data.code, sections[i].data.size); } - ins->resource_type = VKD3D_SHADER_RESOURCE_NONE; - ins->resource_stride = 0; - ins->resource_data_type[0] = VKD3D_DATA_FLOAT; - ins->resource_data_type[1] = VKD3D_DATA_FLOAT; - ins->resource_data_type[2] = VKD3D_DATA_FLOAT; - ins->resource_data_type[3] = VKD3D_DATA_FLOAT; - memset(&ins->texel_offset, 0, sizeof(ins->texel_offset)); + set_u32(&buffer, size_position, bytecode_get_size(&buffer));
- p = *ptr; - *ptr += len; + vkd3d_compute_dxbc_checksum(buffer.data, buffer.size, checksum); + for (i = 0; i < 4; ++i) + set_u32(&buffer, checksum_position + i * sizeof(uint32_t), checksum[i]);
- if (opcode_info->read_opcode_func) - { - ins->dst = NULL; - ins->dst_count = 0; - opcode_info->read_opcode_func(ins, opcode, opcode_token, p, len, sm4); - } - else + if (!buffer.status) { - enum vkd3d_shader_dst_modifier instruction_dst_modifier = VKD3DSPDM_NONE; - - previous_token = opcode_token; - while (previous_token & VKD3D_SM4_INSTRUCTION_MODIFIER && p != *ptr) - shader_sm4_read_instruction_modifier(previous_token = *p++, ins); - - ins->flags = (opcode_token & VKD3D_SM4_INSTRUCTION_FLAGS_MASK) >> VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT; - if (ins->flags & VKD3D_SM4_INSTRUCTION_FLAG_SATURATE) - { - ins->flags &= ~VKD3D_SM4_INSTRUCTION_FLAG_SATURATE; - instruction_dst_modifier = VKD3DSPDM_SATURATE; - } - precise = (opcode_token & VKD3D_SM5_PRECISE_MASK) >> VKD3D_SM5_PRECISE_SHIFT; - ins->flags |= precise << VKD3DSI_PRECISE_SHIFT; - - ins->dst = dst_params = shader_parser_get_dst_params(&sm4->p, ins->dst_count); - if (!dst_params && ins->dst_count) - { - ERR("Failed to allocate dst parameters.\n"); - vkd3d_shader_parser_error(&sm4->p, VKD3D_SHADER_ERROR_TPF_OUT_OF_MEMORY, "Out of memory."); - ins->handler_idx = VKD3DSIH_INVALID; - return; - } - for (i = 0; i < ins->dst_count; ++i) - { - if (!(shader_sm4_read_dst_param(sm4, &p, *ptr, map_data_type(opcode_info->dst_info[i]), - &dst_params[i]))) - { - ins->handler_idx = VKD3DSIH_INVALID; - return; - } - dst_params[i].modifiers |= instruction_dst_modifier; - } - - for (i = 0; i < ins->src_count; ++i) - { - if (!(shader_sm4_read_src_param(sm4, &p, *ptr, map_data_type(opcode_info->src_info[i]), - &src_params[i]))) - { - ins->handler_idx = VKD3DSIH_INVALID; - return; - } - } + dxbc->code = buffer.data; + dxbc->size = buffer.size; } - - return; - -fail: - *ptr = sm4->end; - ins->handler_idx = VKD3DSIH_INVALID; - return; + return buffer.status; }
-static const struct vkd3d_shader_parser_ops shader_sm4_parser_ops = +int dxbc_writer_write(struct dxbc_writer *dxbc, struct vkd3d_shader_code *out) { - .parser_destroy = shader_sm4_destroy, -}; + return vkd3d_shader_serialize_dxbc(dxbc->section_count, dxbc->sections, out, NULL); +}
-static bool shader_sm4_init(struct vkd3d_shader_sm4_parser *sm4, const uint32_t *byte_code, - size_t byte_code_size, const char *source_name, const struct vkd3d_shader_signature *output_signature, - struct vkd3d_shader_message_context *message_context) +struct vkd3d_shader_src_param_entry { - struct vkd3d_shader_version version; - uint32_t version_token, token_count; - unsigned int i; - - if (byte_code_size / sizeof(*byte_code) < 2) - { - WARN("Invalid byte code size %lu.\n", (long)byte_code_size); - return false; - } - - version_token = byte_code[0]; - TRACE("Version: 0x%08x.\n", version_token); - token_count = byte_code[1]; - TRACE("Token count: %u.\n", token_count); - - if (token_count < 2 || byte_code_size / sizeof(*byte_code) < token_count) - { - WARN("Invalid token count %u.\n", token_count); - return false; - } - - sm4->start = &byte_code[2]; - sm4->end = &byte_code[token_count]; - - switch (version_token >> 16) - { - case VKD3D_SM4_PS: - version.type = VKD3D_SHADER_TYPE_PIXEL; - break; - - case VKD3D_SM4_VS: - version.type = VKD3D_SHADER_TYPE_VERTEX; - break; - - case VKD3D_SM4_GS: - version.type = VKD3D_SHADER_TYPE_GEOMETRY; - break; - - case VKD3D_SM5_HS: - version.type = VKD3D_SHADER_TYPE_HULL; - break; - - case VKD3D_SM5_DS: - version.type = VKD3D_SHADER_TYPE_DOMAIN; - break; - - case VKD3D_SM5_CS: - version.type = VKD3D_SHADER_TYPE_COMPUTE; - break; - - default: - FIXME("Unrecognised shader type %#x.\n", version_token >> 16); - } - version.major = VKD3D_SM4_VERSION_MAJOR(version_token); - version.minor = VKD3D_SM4_VERSION_MINOR(version_token); - - /* Estimate instruction count to avoid reallocation in most shaders. */ - if (!vkd3d_shader_parser_init(&sm4->p, message_context, source_name, &version, &shader_sm4_parser_ops, - token_count / 7u + 20)) - return false; - sm4->ptr = sm4->start; - - memset(sm4->output_map, 0xff, sizeof(sm4->output_map)); - for (i = 0; i < output_signature->element_count; ++i) - { - struct vkd3d_shader_signature_element *e = &output_signature->elements[i]; - - if (version.type == VKD3D_SHADER_TYPE_PIXEL - && ascii_strcasecmp(e->semantic_name, "SV_Target")) - continue; - if (e->register_index >= ARRAY_SIZE(sm4->output_map)) - { - WARN("Invalid output index %u.\n", e->register_index); - continue; - } - - sm4->output_map[e->register_index] = e->semantic_index; - } - - return true; -} + struct list entry; + struct vkd3d_shader_src_param param; +};
static bool require_space(size_t offset, size_t count, size_t size, size_t data_size) { @@ -2119,7 +527,7 @@ void free_shader_desc(struct vkd3d_shader_desc *desc) vkd3d_shader_free_shader_signature(&desc->patch_constant_signature); }
-static int shader_extract_from_dxbc(const struct vkd3d_shader_code *dxbc, +int shader_extract_from_dxbc(const struct vkd3d_shader_code *dxbc, struct vkd3d_shader_message_context *message_context, const char *source_name, struct vkd3d_shader_desc *desc) { int ret; @@ -2143,66 +551,6 @@ static int shader_extract_from_dxbc(const struct vkd3d_shader_code *dxbc, return ret; }
-int vkd3d_shader_sm4_parser_create(const struct vkd3d_shader_compile_info *compile_info, - struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser **parser) -{ - struct vkd3d_shader_instruction_array *instructions; - struct vkd3d_shader_desc *shader_desc; - struct vkd3d_shader_instruction *ins; - struct vkd3d_shader_sm4_parser *sm4; - int ret; - - if (!(sm4 = vkd3d_calloc(1, sizeof(*sm4)))) - { - ERR("Failed to allocate parser.\n"); - return VKD3D_ERROR_OUT_OF_MEMORY; - } - - shader_desc = &sm4->p.shader_desc; - if ((ret = shader_extract_from_dxbc(&compile_info->source, - message_context, compile_info->source_name, shader_desc)) < 0) - { - WARN("Failed to extract shader, vkd3d result %d.\n", ret); - vkd3d_free(sm4); - return ret; - } - - if (!shader_sm4_init(sm4, shader_desc->byte_code, shader_desc->byte_code_size, - compile_info->source_name, &shader_desc->output_signature, message_context)) - { - WARN("Failed to initialise shader parser.\n"); - free_shader_desc(shader_desc); - vkd3d_free(sm4); - return VKD3D_ERROR_INVALID_ARGUMENT; - } - - instructions = &sm4->p.instructions; - while (sm4->ptr != sm4->end) - { - if (!shader_instruction_array_reserve(instructions, instructions->count + 1)) - { - ERR("Failed to allocate instructions.\n"); - vkd3d_shader_parser_error(&sm4->p, VKD3D_SHADER_ERROR_TPF_OUT_OF_MEMORY, "Out of memory."); - shader_sm4_destroy(&sm4->p); - return VKD3D_ERROR_OUT_OF_MEMORY; - } - ins = &instructions->elements[instructions->count]; - shader_sm4_read_instruction(sm4, ins); - - if (ins->handler_idx == VKD3DSIH_INVALID) - { - WARN("Encountered unrecognized or invalid instruction.\n"); - shader_sm4_destroy(&sm4->p); - return VKD3D_ERROR_OUT_OF_MEMORY; - } - ++instructions->count; - } - - *parser = &sm4->p; - - return VKD3D_OK; -} - /* root signatures */ #define VKD3D_ROOT_SIGNATURE_1_0_ROOT_DESCRIPTOR_FLAGS VKD3D_SHADER_ROOT_DESCRIPTOR_FLAG_DATA_VOLATILE
diff --git a/libs/vkd3d-shader/hlsl_sm4.c b/libs/vkd3d-shader/hlsl_sm4.c index 47bc1fed..91aba46a 100644 --- a/libs/vkd3d-shader/hlsl_sm4.c +++ b/libs/vkd3d-shader/hlsl_sm4.c @@ -1,6 +1,9 @@ /* - * HLSL code generation for DXBC shader models 4-5 + * TPF (Direct3D shader models 4 and 5 bytecode) support * + * Copyright 2008-2009 Henri Verbeet for CodeWeavers + * Copyright 2010 Rico Schüller + * Copyright 2017 Józef Kucia for CodeWeavers * Copyright 2019-2020 Zebediah Figura for CodeWeavers * * This library is free software; you can redistribute it and/or @@ -19,9 +22,1658 @@ */
#include "hlsl.h" -#include <stdio.h> -#include "vkd3d_d3dcommon.h" -#include "sm4.h" + +#define SM4_MAX_SRC_COUNT 6 +#define SM4_MAX_DST_COUNT 2 + +STATIC_ASSERT(SM4_MAX_SRC_COUNT <= SPIRV_MAX_SRC_COUNT); + +struct vkd3d_shader_sm4_parser +{ + const uint32_t *start, *end, *ptr; + + unsigned int output_map[MAX_REG_OUTPUT]; + + struct vkd3d_shader_parser p; +}; + +struct vkd3d_sm4_opcode_info +{ + enum vkd3d_sm4_opcode opcode; + enum vkd3d_shader_opcode handler_idx; + char dst_info[SM4_MAX_DST_COUNT]; + char src_info[SM4_MAX_SRC_COUNT]; + void (*read_opcode_func)(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, + const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv); +}; + +static const enum vkd3d_primitive_type output_primitive_type_table[] = +{ + /* UNKNOWN */ VKD3D_PT_UNDEFINED, + /* VKD3D_SM4_OUTPUT_PT_POINTLIST */ VKD3D_PT_POINTLIST, + /* UNKNOWN */ VKD3D_PT_UNDEFINED, + /* VKD3D_SM4_OUTPUT_PT_LINESTRIP */ VKD3D_PT_LINESTRIP, + /* UNKNOWN */ VKD3D_PT_UNDEFINED, + /* VKD3D_SM4_OUTPUT_PT_TRIANGLESTRIP */ VKD3D_PT_TRIANGLESTRIP, +}; + +static const enum vkd3d_primitive_type input_primitive_type_table[] = +{ + /* UNKNOWN */ VKD3D_PT_UNDEFINED, + /* VKD3D_SM4_INPUT_PT_POINT */ VKD3D_PT_POINTLIST, + /* VKD3D_SM4_INPUT_PT_LINE */ VKD3D_PT_LINELIST, + /* VKD3D_SM4_INPUT_PT_TRIANGLE */ VKD3D_PT_TRIANGLELIST, + /* UNKNOWN */ VKD3D_PT_UNDEFINED, + /* UNKNOWN */ VKD3D_PT_UNDEFINED, + /* VKD3D_SM4_INPUT_PT_LINEADJ */ VKD3D_PT_LINELIST_ADJ, + /* VKD3D_SM4_INPUT_PT_TRIANGLEADJ */ VKD3D_PT_TRIANGLELIST_ADJ, +}; + +static const enum vkd3d_shader_resource_type resource_type_table[] = +{ + /* 0 */ VKD3D_SHADER_RESOURCE_NONE, + /* VKD3D_SM4_RESOURCE_BUFFER */ VKD3D_SHADER_RESOURCE_BUFFER, + /* VKD3D_SM4_RESOURCE_TEXTURE_1D */ VKD3D_SHADER_RESOURCE_TEXTURE_1D, + /* VKD3D_SM4_RESOURCE_TEXTURE_2D */ VKD3D_SHADER_RESOURCE_TEXTURE_2D, + /* VKD3D_SM4_RESOURCE_TEXTURE_2DMS */ VKD3D_SHADER_RESOURCE_TEXTURE_2DMS, + /* VKD3D_SM4_RESOURCE_TEXTURE_3D */ VKD3D_SHADER_RESOURCE_TEXTURE_3D, + /* VKD3D_SM4_RESOURCE_TEXTURE_CUBE */ VKD3D_SHADER_RESOURCE_TEXTURE_CUBE, + /* VKD3D_SM4_RESOURCE_TEXTURE_1DARRAY */ VKD3D_SHADER_RESOURCE_TEXTURE_1DARRAY, + /* VKD3D_SM4_RESOURCE_TEXTURE_2DARRAY */ VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY, + /* VKD3D_SM4_RESOURCE_TEXTURE_2DMSARRAY */ VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY, + /* VKD3D_SM4_RESOURCE_TEXTURE_CUBEARRAY */ VKD3D_SHADER_RESOURCE_TEXTURE_CUBEARRAY, + /* VKD3D_SM4_RESOURCE_RAW_BUFFER */ VKD3D_SHADER_RESOURCE_BUFFER, + /* VKD3D_SM4_RESOURCE_STRUCTURED_BUFFER */ VKD3D_SHADER_RESOURCE_BUFFER, +}; + +static const enum vkd3d_data_type data_type_table[] = +{ + /* 0 */ VKD3D_DATA_FLOAT, + /* VKD3D_SM4_DATA_UNORM */ VKD3D_DATA_UNORM, + /* VKD3D_SM4_DATA_SNORM */ VKD3D_DATA_SNORM, + /* VKD3D_SM4_DATA_INT */ VKD3D_DATA_INT, + /* VKD3D_SM4_DATA_UINT */ VKD3D_DATA_UINT, + /* VKD3D_SM4_DATA_FLOAT */ VKD3D_DATA_FLOAT, + /* VKD3D_SM4_DATA_MIXED */ VKD3D_DATA_MIXED, + /* VKD3D_SM4_DATA_DOUBLE */ VKD3D_DATA_DOUBLE, + /* VKD3D_SM4_DATA_CONTINUED */ VKD3D_DATA_CONTINUED, + /* VKD3D_SM4_DATA_UNUSED */ VKD3D_DATA_UNUSED, +}; + +static struct vkd3d_shader_sm4_parser *vkd3d_shader_sm4_parser(struct vkd3d_shader_parser *parser) +{ + return CONTAINING_RECORD(parser, struct vkd3d_shader_sm4_parser, p); +} + +static bool shader_is_sm_5_1(const struct vkd3d_shader_sm4_parser *sm4) +{ + const struct vkd3d_shader_version *version = &sm4->p.shader_version; + + return version->major >= 5 && version->minor >= 1; +} + +static bool shader_sm4_read_src_param(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, + const uint32_t *end, enum vkd3d_data_type data_type, struct vkd3d_shader_src_param *src_param); +static bool shader_sm4_read_dst_param(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, + const uint32_t *end, enum vkd3d_data_type data_type, struct vkd3d_shader_dst_param *dst_param); + +static bool shader_sm4_read_register_space(struct vkd3d_shader_sm4_parser *priv, + const uint32_t **ptr, const uint32_t *end, unsigned int *register_space) +{ + *register_space = 0; + + if (!shader_is_sm_5_1(priv)) + return true; + + if (*ptr >= end) + { + WARN("Invalid ptr %p >= end %p.\n", *ptr, end); + return false; + } + + *register_space = *(*ptr)++; + return true; +} + +static void shader_sm4_read_conditional_op(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + shader_sm4_read_src_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_UINT, + (struct vkd3d_shader_src_param *)&ins->src[0]); + ins->flags = (opcode_token & VKD3D_SM4_CONDITIONAL_NZ) ? + VKD3D_SHADER_CONDITIONAL_OP_NZ : VKD3D_SHADER_CONDITIONAL_OP_Z; +} + +static void shader_sm4_read_shader_data(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, + const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + struct vkd3d_shader_immediate_constant_buffer *icb; + enum vkd3d_sm4_shader_data_type type; + unsigned int icb_size; + + type = (opcode_token & VKD3D_SM4_SHADER_DATA_TYPE_MASK) >> VKD3D_SM4_SHADER_DATA_TYPE_SHIFT; + if (type != VKD3D_SM4_SHADER_DATA_IMMEDIATE_CONSTANT_BUFFER) + { + FIXME("Ignoring shader data type %#x.\n", type); + ins->handler_idx = VKD3DSIH_NOP; + return; + } + + ++tokens; + icb_size = token_count - 1; + if (icb_size % 4) + { + FIXME("Unexpected immediate constant buffer size %u.\n", icb_size); + ins->handler_idx = VKD3DSIH_INVALID; + return; + } + + if (!(icb = vkd3d_malloc(offsetof(struct vkd3d_shader_immediate_constant_buffer, data[icb_size])))) + { + ERR("Failed to allocate immediate constant buffer, size %u.\n", icb_size); + vkd3d_shader_parser_error(&priv->p, VKD3D_SHADER_ERROR_TPF_OUT_OF_MEMORY, "Out of memory."); + ins->handler_idx = VKD3DSIH_INVALID; + return; + } + icb->vec4_count = icb_size / 4; + memcpy(icb->data, tokens, sizeof(*tokens) * icb_size); + shader_instruction_array_add_icb(&priv->p.instructions, icb); + ins->declaration.icb = icb; +} + +static void shader_sm4_set_descriptor_register_range(struct vkd3d_shader_sm4_parser *sm4, + const struct vkd3d_shader_register *reg, struct vkd3d_shader_register_range *range) +{ + range->first = reg->idx[1].offset; + range->last = reg->idx[shader_is_sm_5_1(sm4) ? 2 : 1].offset; + if (range->last < range->first) + { + FIXME("Invalid register range [%u:%u].\n", range->first, range->last); + vkd3d_shader_parser_error(&sm4->p, VKD3D_SHADER_ERROR_TPF_INVALID_REGISTER_RANGE, + "Last register %u must not be less than first register %u in range.\n", range->last, range->first); + } +} + +static void shader_sm4_read_dcl_resource(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + struct vkd3d_shader_semantic *semantic = &ins->declaration.semantic; + enum vkd3d_sm4_resource_type resource_type; + const uint32_t *end = &tokens[token_count]; + enum vkd3d_sm4_data_type data_type; + enum vkd3d_data_type reg_data_type; + DWORD components; + unsigned int i; + + resource_type = (opcode_token & VKD3D_SM4_RESOURCE_TYPE_MASK) >> VKD3D_SM4_RESOURCE_TYPE_SHIFT; + if (!resource_type || (resource_type >= ARRAY_SIZE(resource_type_table))) + { + FIXME("Unhandled resource type %#x.\n", resource_type); + semantic->resource_type = VKD3D_SHADER_RESOURCE_NONE; + } + else + { + semantic->resource_type = resource_type_table[resource_type]; + } + + if (semantic->resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMS + || semantic->resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY) + { + semantic->sample_count = (opcode_token & VKD3D_SM4_RESOURCE_SAMPLE_COUNT_MASK) + >> VKD3D_SM4_RESOURCE_SAMPLE_COUNT_SHIFT; + } + + reg_data_type = opcode == VKD3D_SM4_OP_DCL_RESOURCE ? VKD3D_DATA_RESOURCE : VKD3D_DATA_UAV; + shader_sm4_read_dst_param(priv, &tokens, end, reg_data_type, &semantic->resource.reg); + shader_sm4_set_descriptor_register_range(priv, &semantic->resource.reg.reg, &semantic->resource.range); + + components = *tokens++; + for (i = 0; i < VKD3D_VEC4_SIZE; i++) + { + data_type = VKD3D_SM4_TYPE_COMPONENT(components, i); + + if (!data_type || (data_type >= ARRAY_SIZE(data_type_table))) + { + FIXME("Unhandled data type %#x.\n", data_type); + semantic->resource_data_type[i] = VKD3D_DATA_FLOAT; + } + else + { + semantic->resource_data_type[i] = data_type_table[data_type]; + } + } + + if (reg_data_type == VKD3D_DATA_UAV) + ins->flags = (opcode_token & VKD3D_SM5_UAV_FLAGS_MASK) >> VKD3D_SM5_UAV_FLAGS_SHIFT; + + shader_sm4_read_register_space(priv, &tokens, end, &semantic->resource.range.space); +} + +static void shader_sm4_read_dcl_constant_buffer(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + const uint32_t *end = &tokens[token_count]; + + shader_sm4_read_src_param(priv, &tokens, end, VKD3D_DATA_FLOAT, &ins->declaration.cb.src); + shader_sm4_set_descriptor_register_range(priv, &ins->declaration.cb.src.reg, &ins->declaration.cb.range); + if (opcode_token & VKD3D_SM4_INDEX_TYPE_MASK) + ins->flags |= VKD3DSI_INDEXED_DYNAMIC; + + ins->declaration.cb.size = ins->declaration.cb.src.reg.idx[2].offset; + ins->declaration.cb.range.space = 0; + + if (shader_is_sm_5_1(priv)) + { + if (tokens >= end) + { + FIXME("Invalid ptr %p >= end %p.\n", tokens, end); + return; + } + + ins->declaration.cb.size = *tokens++; + shader_sm4_read_register_space(priv, &tokens, end, &ins->declaration.cb.range.space); + } +} + +static void shader_sm4_read_dcl_sampler(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, + const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + const uint32_t *end = &tokens[token_count]; + + ins->flags = (opcode_token & VKD3D_SM4_SAMPLER_MODE_MASK) >> VKD3D_SM4_SAMPLER_MODE_SHIFT; + if (ins->flags & ~VKD3D_SM4_SAMPLER_COMPARISON) + FIXME("Unhandled sampler mode %#x.\n", ins->flags); + shader_sm4_read_src_param(priv, &tokens, end, VKD3D_DATA_SAMPLER, &ins->declaration.sampler.src); + shader_sm4_set_descriptor_register_range(priv, &ins->declaration.sampler.src.reg, &ins->declaration.sampler.range); + shader_sm4_read_register_space(priv, &tokens, end, &ins->declaration.sampler.range.space); +} + +static void shader_sm4_read_dcl_index_range(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_OPAQUE, + &ins->declaration.index_range.dst); + ins->declaration.index_range.register_count = *tokens; +} + +static void shader_sm4_read_dcl_output_topology(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + enum vkd3d_sm4_output_primitive_type primitive_type; + + primitive_type = (opcode_token & VKD3D_SM4_PRIMITIVE_TYPE_MASK) >> VKD3D_SM4_PRIMITIVE_TYPE_SHIFT; + if (primitive_type >= ARRAY_SIZE(output_primitive_type_table)) + ins->declaration.primitive_type.type = VKD3D_PT_UNDEFINED; + else + ins->declaration.primitive_type.type = output_primitive_type_table[primitive_type]; + + if (ins->declaration.primitive_type.type == VKD3D_PT_UNDEFINED) + FIXME("Unhandled output primitive type %#x.\n", primitive_type); +} + +static void shader_sm4_read_dcl_input_primitive(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + enum vkd3d_sm4_input_primitive_type primitive_type; + + primitive_type = (opcode_token & VKD3D_SM4_PRIMITIVE_TYPE_MASK) >> VKD3D_SM4_PRIMITIVE_TYPE_SHIFT; + if (VKD3D_SM5_INPUT_PT_PATCH1 <= primitive_type && primitive_type <= VKD3D_SM5_INPUT_PT_PATCH32) + { + ins->declaration.primitive_type.type = VKD3D_PT_PATCH; + ins->declaration.primitive_type.patch_vertex_count = primitive_type - VKD3D_SM5_INPUT_PT_PATCH1 + 1; + } + else if (primitive_type >= ARRAY_SIZE(input_primitive_type_table)) + { + ins->declaration.primitive_type.type = VKD3D_PT_UNDEFINED; + } + else + { + ins->declaration.primitive_type.type = input_primitive_type_table[primitive_type]; + } + + if (ins->declaration.primitive_type.type == VKD3D_PT_UNDEFINED) + FIXME("Unhandled input primitive type %#x.\n", primitive_type); +} + +static void shader_sm4_read_declaration_count(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + ins->declaration.count = *tokens; +} + +static void shader_sm4_read_declaration_dst(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_FLOAT, &ins->declaration.dst); +} + +static void shader_sm4_read_declaration_register_semantic(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_FLOAT, + &ins->declaration.register_semantic.reg); + ins->declaration.register_semantic.sysval_semantic = *tokens; +} + +static void shader_sm4_read_dcl_input_ps(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + ins->flags = (opcode_token & VKD3D_SM4_INTERPOLATION_MODE_MASK) >> VKD3D_SM4_INTERPOLATION_MODE_SHIFT; + shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_FLOAT, &ins->declaration.dst); +} + +static void shader_sm4_read_dcl_input_ps_siv(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + ins->flags = (opcode_token & VKD3D_SM4_INTERPOLATION_MODE_MASK) >> VKD3D_SM4_INTERPOLATION_MODE_SHIFT; + shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_FLOAT, + &ins->declaration.register_semantic.reg); + ins->declaration.register_semantic.sysval_semantic = *tokens; +} + +static void shader_sm4_read_dcl_indexable_temp(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + ins->declaration.indexable_temp.register_idx = *tokens++; + ins->declaration.indexable_temp.register_size = *tokens++; + ins->declaration.indexable_temp.component_count = *tokens; +} + +static void shader_sm4_read_dcl_global_flags(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + ins->flags = (opcode_token & VKD3D_SM4_GLOBAL_FLAGS_MASK) >> VKD3D_SM4_GLOBAL_FLAGS_SHIFT; +} + +static void shader_sm5_read_fcall(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, + const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + struct vkd3d_shader_src_param *src_params = (struct vkd3d_shader_src_param *)ins->src; + src_params[0].reg.u.fp_body_idx = *tokens++; + shader_sm4_read_src_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_OPAQUE, &src_params[0]); +} + +static void shader_sm5_read_dcl_function_body(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + ins->declaration.index = *tokens; +} + +static void shader_sm5_read_dcl_function_table(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + ins->declaration.index = *tokens++; + FIXME("Ignoring set of function bodies (count %u).\n", *tokens); +} + +static void shader_sm5_read_dcl_interface(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + ins->declaration.fp.index = *tokens++; + ins->declaration.fp.body_count = *tokens++; + ins->declaration.fp.array_size = *tokens >> VKD3D_SM5_FP_ARRAY_SIZE_SHIFT; + ins->declaration.fp.table_count = *tokens++ & VKD3D_SM5_FP_TABLE_COUNT_MASK; + FIXME("Ignoring set of function tables (count %u).\n", ins->declaration.fp.table_count); +} + +static void shader_sm5_read_control_point_count(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + ins->declaration.count = (opcode_token & VKD3D_SM5_CONTROL_POINT_COUNT_MASK) + >> VKD3D_SM5_CONTROL_POINT_COUNT_SHIFT; +} + +static void shader_sm5_read_dcl_tessellator_domain(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + ins->declaration.tessellator_domain = (opcode_token & VKD3D_SM5_TESSELLATOR_MASK) + >> VKD3D_SM5_TESSELLATOR_SHIFT; +} + +static void shader_sm5_read_dcl_tessellator_partitioning(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + ins->declaration.tessellator_partitioning = (opcode_token & VKD3D_SM5_TESSELLATOR_MASK) + >> VKD3D_SM5_TESSELLATOR_SHIFT; +} + +static void shader_sm5_read_dcl_tessellator_output_primitive(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + ins->declaration.tessellator_output_primitive = (opcode_token & VKD3D_SM5_TESSELLATOR_MASK) + >> VKD3D_SM5_TESSELLATOR_SHIFT; +} + +static void shader_sm5_read_dcl_hs_max_tessfactor(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + ins->declaration.max_tessellation_factor = *(float *)tokens; +} + +static void shader_sm5_read_dcl_thread_group(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + ins->declaration.thread_group_size.x = *tokens++; + ins->declaration.thread_group_size.y = *tokens++; + ins->declaration.thread_group_size.z = *tokens++; +} + +static void shader_sm5_read_dcl_uav_raw(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, + const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + struct vkd3d_shader_raw_resource *resource = &ins->declaration.raw_resource; + const uint32_t *end = &tokens[token_count]; + + shader_sm4_read_dst_param(priv, &tokens, end, VKD3D_DATA_UAV, &resource->resource.reg); + shader_sm4_set_descriptor_register_range(priv, &resource->resource.reg.reg, &resource->resource.range); + ins->flags = (opcode_token & VKD3D_SM5_UAV_FLAGS_MASK) >> VKD3D_SM5_UAV_FLAGS_SHIFT; + shader_sm4_read_register_space(priv, &tokens, end, &resource->resource.range.space); +} + +static void shader_sm5_read_dcl_uav_structured(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + struct vkd3d_shader_structured_resource *resource = &ins->declaration.structured_resource; + const uint32_t *end = &tokens[token_count]; + + shader_sm4_read_dst_param(priv, &tokens, end, VKD3D_DATA_UAV, &resource->resource.reg); + shader_sm4_set_descriptor_register_range(priv, &resource->resource.reg.reg, &resource->resource.range); + ins->flags = (opcode_token & VKD3D_SM5_UAV_FLAGS_MASK) >> VKD3D_SM5_UAV_FLAGS_SHIFT; + resource->byte_stride = *tokens++; + if (resource->byte_stride % 4) + FIXME("Byte stride %u is not multiple of 4.\n", resource->byte_stride); + shader_sm4_read_register_space(priv, &tokens, end, &resource->resource.range.space); +} + +static void shader_sm5_read_dcl_tgsm_raw(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_FLOAT, &ins->declaration.tgsm_raw.reg); + ins->declaration.tgsm_raw.byte_count = *tokens; + if (ins->declaration.tgsm_raw.byte_count % 4) + FIXME("Byte count %u is not multiple of 4.\n", ins->declaration.tgsm_raw.byte_count); +} + +static void shader_sm5_read_dcl_tgsm_structured(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_FLOAT, + &ins->declaration.tgsm_structured.reg); + ins->declaration.tgsm_structured.byte_stride = *tokens++; + ins->declaration.tgsm_structured.structure_count = *tokens; + if (ins->declaration.tgsm_structured.byte_stride % 4) + FIXME("Byte stride %u is not multiple of 4.\n", ins->declaration.tgsm_structured.byte_stride); +} + +static void shader_sm5_read_dcl_resource_structured(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + struct vkd3d_shader_structured_resource *resource = &ins->declaration.structured_resource; + const uint32_t *end = &tokens[token_count]; + + shader_sm4_read_dst_param(priv, &tokens, end, VKD3D_DATA_RESOURCE, &resource->resource.reg); + shader_sm4_set_descriptor_register_range(priv, &resource->resource.reg.reg, &resource->resource.range); + resource->byte_stride = *tokens++; + if (resource->byte_stride % 4) + FIXME("Byte stride %u is not multiple of 4.\n", resource->byte_stride); + shader_sm4_read_register_space(priv, &tokens, end, &resource->resource.range.space); +} + +static void shader_sm5_read_dcl_resource_raw(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + struct vkd3d_shader_raw_resource *resource = &ins->declaration.raw_resource; + const uint32_t *end = &tokens[token_count]; + + shader_sm4_read_dst_param(priv, &tokens, end, VKD3D_DATA_RESOURCE, &resource->resource.reg); + shader_sm4_set_descriptor_register_range(priv, &resource->resource.reg.reg, &resource->resource.range); + shader_sm4_read_register_space(priv, &tokens, end, &resource->resource.range.space); +} + +static void shader_sm5_read_sync(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, + const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + ins->flags = (opcode_token & VKD3D_SM5_SYNC_FLAGS_MASK) >> VKD3D_SM5_SYNC_FLAGS_SHIFT; +} + +/* + * d -> VKD3D_DATA_DOUBLE + * f -> VKD3D_DATA_FLOAT + * i -> VKD3D_DATA_INT + * u -> VKD3D_DATA_UINT + * O -> VKD3D_DATA_OPAQUE + * R -> VKD3D_DATA_RESOURCE + * S -> VKD3D_DATA_SAMPLER + * U -> VKD3D_DATA_UAV + */ +static const struct vkd3d_sm4_opcode_info opcode_table[] = +{ + {VKD3D_SM4_OP_ADD, VKD3DSIH_ADD, "f", "ff"}, + {VKD3D_SM4_OP_AND, VKD3DSIH_AND, "u", "uu"}, + {VKD3D_SM4_OP_BREAK, VKD3DSIH_BREAK, "", ""}, + {VKD3D_SM4_OP_BREAKC, VKD3DSIH_BREAKP, "", "u", + shader_sm4_read_conditional_op}, + {VKD3D_SM4_OP_CASE, VKD3DSIH_CASE, "", "u"}, + {VKD3D_SM4_OP_CONTINUE, VKD3DSIH_CONTINUE, "", ""}, + {VKD3D_SM4_OP_CONTINUEC, VKD3DSIH_CONTINUEP, "", "u", + shader_sm4_read_conditional_op}, + {VKD3D_SM4_OP_CUT, VKD3DSIH_CUT, "", ""}, + {VKD3D_SM4_OP_DEFAULT, VKD3DSIH_DEFAULT, "", ""}, + {VKD3D_SM4_OP_DERIV_RTX, VKD3DSIH_DSX, "f", "f"}, + {VKD3D_SM4_OP_DERIV_RTY, VKD3DSIH_DSY, "f", "f"}, + {VKD3D_SM4_OP_DISCARD, VKD3DSIH_TEXKILL, "", "u", + shader_sm4_read_conditional_op}, + {VKD3D_SM4_OP_DIV, VKD3DSIH_DIV, "f", "ff"}, + {VKD3D_SM4_OP_DP2, VKD3DSIH_DP2, "f", "ff"}, + {VKD3D_SM4_OP_DP3, VKD3DSIH_DP3, "f", "ff"}, + {VKD3D_SM4_OP_DP4, VKD3DSIH_DP4, "f", "ff"}, + {VKD3D_SM4_OP_ELSE, VKD3DSIH_ELSE, "", ""}, + {VKD3D_SM4_OP_EMIT, VKD3DSIH_EMIT, "", ""}, + {VKD3D_SM4_OP_ENDIF, VKD3DSIH_ENDIF, "", ""}, + {VKD3D_SM4_OP_ENDLOOP, VKD3DSIH_ENDLOOP, "", ""}, + {VKD3D_SM4_OP_ENDSWITCH, VKD3DSIH_ENDSWITCH, "", ""}, + {VKD3D_SM4_OP_EQ, VKD3DSIH_EQ, "u", "ff"}, + {VKD3D_SM4_OP_EXP, VKD3DSIH_EXP, "f", "f"}, + {VKD3D_SM4_OP_FRC, VKD3DSIH_FRC, "f", "f"}, + {VKD3D_SM4_OP_FTOI, VKD3DSIH_FTOI, "i", "f"}, + {VKD3D_SM4_OP_FTOU, VKD3DSIH_FTOU, "u", "f"}, + {VKD3D_SM4_OP_GE, VKD3DSIH_GE, "u", "ff"}, + {VKD3D_SM4_OP_IADD, VKD3DSIH_IADD, "i", "ii"}, + {VKD3D_SM4_OP_IF, VKD3DSIH_IF, "", "u", + shader_sm4_read_conditional_op}, + {VKD3D_SM4_OP_IEQ, VKD3DSIH_IEQ, "u", "ii"}, + {VKD3D_SM4_OP_IGE, VKD3DSIH_IGE, "u", "ii"}, + {VKD3D_SM4_OP_ILT, VKD3DSIH_ILT, "u", "ii"}, + {VKD3D_SM4_OP_IMAD, VKD3DSIH_IMAD, "i", "iii"}, + {VKD3D_SM4_OP_IMAX, VKD3DSIH_IMAX, "i", "ii"}, + {VKD3D_SM4_OP_IMIN, VKD3DSIH_IMIN, "i", "ii"}, + {VKD3D_SM4_OP_IMUL, VKD3DSIH_IMUL, "ii", "ii"}, + {VKD3D_SM4_OP_INE, VKD3DSIH_INE, "u", "ii"}, + {VKD3D_SM4_OP_INEG, VKD3DSIH_INEG, "i", "i"}, + {VKD3D_SM4_OP_ISHL, VKD3DSIH_ISHL, "i", "ii"}, + {VKD3D_SM4_OP_ISHR, VKD3DSIH_ISHR, "i", "ii"}, + {VKD3D_SM4_OP_ITOF, VKD3DSIH_ITOF, "f", "i"}, + {VKD3D_SM4_OP_LABEL, VKD3DSIH_LABEL, "", "O"}, + {VKD3D_SM4_OP_LD, VKD3DSIH_LD, "u", "iR"}, + {VKD3D_SM4_OP_LD2DMS, VKD3DSIH_LD2DMS, "u", "iRi"}, + {VKD3D_SM4_OP_LOG, VKD3DSIH_LOG, "f", "f"}, + {VKD3D_SM4_OP_LOOP, VKD3DSIH_LOOP, "", ""}, + {VKD3D_SM4_OP_LT, VKD3DSIH_LT, "u", "ff"}, + {VKD3D_SM4_OP_MAD, VKD3DSIH_MAD, "f", "fff"}, + {VKD3D_SM4_OP_MIN, VKD3DSIH_MIN, "f", "ff"}, + {VKD3D_SM4_OP_MAX, VKD3DSIH_MAX, "f", "ff"}, + {VKD3D_SM4_OP_SHADER_DATA, VKD3DSIH_DCL_IMMEDIATE_CONSTANT_BUFFER, "", "", + shader_sm4_read_shader_data}, + {VKD3D_SM4_OP_MOV, VKD3DSIH_MOV, "f", "f"}, + {VKD3D_SM4_OP_MOVC, VKD3DSIH_MOVC, "f", "uff"}, + {VKD3D_SM4_OP_MUL, VKD3DSIH_MUL, "f", "ff"}, + {VKD3D_SM4_OP_NE, VKD3DSIH_NE, "u", "ff"}, + {VKD3D_SM4_OP_NOP, VKD3DSIH_NOP, "", ""}, + {VKD3D_SM4_OP_NOT, VKD3DSIH_NOT, "u", "u"}, + {VKD3D_SM4_OP_OR, VKD3DSIH_OR, "u", "uu"}, + {VKD3D_SM4_OP_RESINFO, VKD3DSIH_RESINFO, "f", "iR"}, + {VKD3D_SM4_OP_RET, VKD3DSIH_RET, "", ""}, + {VKD3D_SM4_OP_RETC, VKD3DSIH_RETP, "", "u", + shader_sm4_read_conditional_op}, + {VKD3D_SM4_OP_ROUND_NE, VKD3DSIH_ROUND_NE, "f", "f"}, + {VKD3D_SM4_OP_ROUND_NI, VKD3DSIH_ROUND_NI, "f", "f"}, + {VKD3D_SM4_OP_ROUND_PI, VKD3DSIH_ROUND_PI, "f", "f"}, + {VKD3D_SM4_OP_ROUND_Z, VKD3DSIH_ROUND_Z, "f", "f"}, + {VKD3D_SM4_OP_RSQ, VKD3DSIH_RSQ, "f", "f"}, + {VKD3D_SM4_OP_SAMPLE, VKD3DSIH_SAMPLE, "u", "fRS"}, + {VKD3D_SM4_OP_SAMPLE_C, VKD3DSIH_SAMPLE_C, "f", "fRSf"}, + {VKD3D_SM4_OP_SAMPLE_C_LZ, VKD3DSIH_SAMPLE_C_LZ, "f", "fRSf"}, + {VKD3D_SM4_OP_SAMPLE_LOD, VKD3DSIH_SAMPLE_LOD, "u", "fRSf"}, + {VKD3D_SM4_OP_SAMPLE_GRAD, VKD3DSIH_SAMPLE_GRAD, "u", "fRSff"}, + {VKD3D_SM4_OP_SAMPLE_B, VKD3DSIH_SAMPLE_B, "u", "fRSf"}, + {VKD3D_SM4_OP_SQRT, VKD3DSIH_SQRT, "f", "f"}, + {VKD3D_SM4_OP_SWITCH, VKD3DSIH_SWITCH, "", "i"}, + {VKD3D_SM4_OP_SINCOS, VKD3DSIH_SINCOS, "ff", "f"}, + {VKD3D_SM4_OP_UDIV, VKD3DSIH_UDIV, "uu", "uu"}, + {VKD3D_SM4_OP_ULT, VKD3DSIH_ULT, "u", "uu"}, + {VKD3D_SM4_OP_UGE, VKD3DSIH_UGE, "u", "uu"}, + {VKD3D_SM4_OP_UMUL, VKD3DSIH_UMUL, "uu", "uu"}, + {VKD3D_SM4_OP_UMAX, VKD3DSIH_UMAX, "u", "uu"}, + {VKD3D_SM4_OP_UMIN, VKD3DSIH_UMIN, "u", "uu"}, + {VKD3D_SM4_OP_USHR, VKD3DSIH_USHR, "u", "uu"}, + {VKD3D_SM4_OP_UTOF, VKD3DSIH_UTOF, "f", "u"}, + {VKD3D_SM4_OP_XOR, VKD3DSIH_XOR, "u", "uu"}, + {VKD3D_SM4_OP_DCL_RESOURCE, VKD3DSIH_DCL, "", "", + shader_sm4_read_dcl_resource}, + {VKD3D_SM4_OP_DCL_CONSTANT_BUFFER, VKD3DSIH_DCL_CONSTANT_BUFFER, "", "", + shader_sm4_read_dcl_constant_buffer}, + {VKD3D_SM4_OP_DCL_SAMPLER, VKD3DSIH_DCL_SAMPLER, "", "", + shader_sm4_read_dcl_sampler}, + {VKD3D_SM4_OP_DCL_INDEX_RANGE, VKD3DSIH_DCL_INDEX_RANGE, "", "", + shader_sm4_read_dcl_index_range}, + {VKD3D_SM4_OP_DCL_OUTPUT_TOPOLOGY, VKD3DSIH_DCL_OUTPUT_TOPOLOGY, "", "", + shader_sm4_read_dcl_output_topology}, + {VKD3D_SM4_OP_DCL_INPUT_PRIMITIVE, VKD3DSIH_DCL_INPUT_PRIMITIVE, "", "", + shader_sm4_read_dcl_input_primitive}, + {VKD3D_SM4_OP_DCL_VERTICES_OUT, VKD3DSIH_DCL_VERTICES_OUT, "", "", + shader_sm4_read_declaration_count}, + {VKD3D_SM4_OP_DCL_INPUT, VKD3DSIH_DCL_INPUT, "", "", + shader_sm4_read_declaration_dst}, + {VKD3D_SM4_OP_DCL_INPUT_SGV, VKD3DSIH_DCL_INPUT_SGV, "", "", + shader_sm4_read_declaration_register_semantic}, + {VKD3D_SM4_OP_DCL_INPUT_SIV, VKD3DSIH_DCL_INPUT_SIV, "", "", + shader_sm4_read_declaration_register_semantic}, + {VKD3D_SM4_OP_DCL_INPUT_PS, VKD3DSIH_DCL_INPUT_PS, "", "", + shader_sm4_read_dcl_input_ps}, + {VKD3D_SM4_OP_DCL_INPUT_PS_SGV, VKD3DSIH_DCL_INPUT_PS_SGV, "", "", + shader_sm4_read_declaration_register_semantic}, + {VKD3D_SM4_OP_DCL_INPUT_PS_SIV, VKD3DSIH_DCL_INPUT_PS_SIV, "", "", + shader_sm4_read_dcl_input_ps_siv}, + {VKD3D_SM4_OP_DCL_OUTPUT, VKD3DSIH_DCL_OUTPUT, "", "", + shader_sm4_read_declaration_dst}, + {VKD3D_SM4_OP_DCL_OUTPUT_SIV, VKD3DSIH_DCL_OUTPUT_SIV, "", "", + shader_sm4_read_declaration_register_semantic}, + {VKD3D_SM4_OP_DCL_TEMPS, VKD3DSIH_DCL_TEMPS, "", "", + shader_sm4_read_declaration_count}, + {VKD3D_SM4_OP_DCL_INDEXABLE_TEMP, VKD3DSIH_DCL_INDEXABLE_TEMP, "", "", + shader_sm4_read_dcl_indexable_temp}, + {VKD3D_SM4_OP_DCL_GLOBAL_FLAGS, VKD3DSIH_DCL_GLOBAL_FLAGS, "", "", + shader_sm4_read_dcl_global_flags}, + {VKD3D_SM4_OP_LOD, VKD3DSIH_LOD, "f", "fRS"}, + {VKD3D_SM4_OP_GATHER4, VKD3DSIH_GATHER4, "u", "fRS"}, + {VKD3D_SM4_OP_SAMPLE_POS, VKD3DSIH_SAMPLE_POS, "f", "Ru"}, + {VKD3D_SM4_OP_SAMPLE_INFO, VKD3DSIH_SAMPLE_INFO, "f", "R"}, + {VKD3D_SM5_OP_HS_DECLS, VKD3DSIH_HS_DECLS, "", ""}, + {VKD3D_SM5_OP_HS_CONTROL_POINT_PHASE, VKD3DSIH_HS_CONTROL_POINT_PHASE, "", ""}, + {VKD3D_SM5_OP_HS_FORK_PHASE, VKD3DSIH_HS_FORK_PHASE, "", ""}, + {VKD3D_SM5_OP_HS_JOIN_PHASE, VKD3DSIH_HS_JOIN_PHASE, "", ""}, + {VKD3D_SM5_OP_EMIT_STREAM, VKD3DSIH_EMIT_STREAM, "", "f"}, + {VKD3D_SM5_OP_CUT_STREAM, VKD3DSIH_CUT_STREAM, "", "f"}, + {VKD3D_SM5_OP_FCALL, VKD3DSIH_FCALL, "", "O", + shader_sm5_read_fcall}, + {VKD3D_SM5_OP_BUFINFO, VKD3DSIH_BUFINFO, "i", "U"}, + {VKD3D_SM5_OP_DERIV_RTX_COARSE, VKD3DSIH_DSX_COARSE, "f", "f"}, + {VKD3D_SM5_OP_DERIV_RTX_FINE, VKD3DSIH_DSX_FINE, "f", "f"}, + {VKD3D_SM5_OP_DERIV_RTY_COARSE, VKD3DSIH_DSY_COARSE, "f", "f"}, + {VKD3D_SM5_OP_DERIV_RTY_FINE, VKD3DSIH_DSY_FINE, "f", "f"}, + {VKD3D_SM5_OP_GATHER4_C, VKD3DSIH_GATHER4_C, "f", "fRSf"}, + {VKD3D_SM5_OP_GATHER4_PO, VKD3DSIH_GATHER4_PO, "f", "fiRS"}, + {VKD3D_SM5_OP_GATHER4_PO_C, VKD3DSIH_GATHER4_PO_C, "f", "fiRSf"}, + {VKD3D_SM5_OP_RCP, VKD3DSIH_RCP, "f", "f"}, + {VKD3D_SM5_OP_F32TOF16, VKD3DSIH_F32TOF16, "u", "f"}, + {VKD3D_SM5_OP_F16TOF32, VKD3DSIH_F16TOF32, "f", "u"}, + {VKD3D_SM5_OP_COUNTBITS, VKD3DSIH_COUNTBITS, "u", "u"}, + {VKD3D_SM5_OP_FIRSTBIT_HI, VKD3DSIH_FIRSTBIT_HI, "u", "u"}, + {VKD3D_SM5_OP_FIRSTBIT_LO, VKD3DSIH_FIRSTBIT_LO, "u", "u"}, + {VKD3D_SM5_OP_FIRSTBIT_SHI, VKD3DSIH_FIRSTBIT_SHI, "u", "i"}, + {VKD3D_SM5_OP_UBFE, VKD3DSIH_UBFE, "u", "iiu"}, + {VKD3D_SM5_OP_IBFE, VKD3DSIH_IBFE, "i", "iii"}, + {VKD3D_SM5_OP_BFI, VKD3DSIH_BFI, "u", "iiuu"}, + {VKD3D_SM5_OP_BFREV, VKD3DSIH_BFREV, "u", "u"}, + {VKD3D_SM5_OP_SWAPC, VKD3DSIH_SWAPC, "ff", "uff"}, + {VKD3D_SM5_OP_DCL_STREAM, VKD3DSIH_DCL_STREAM, "", "O"}, + {VKD3D_SM5_OP_DCL_FUNCTION_BODY, VKD3DSIH_DCL_FUNCTION_BODY, "", "", + shader_sm5_read_dcl_function_body}, + {VKD3D_SM5_OP_DCL_FUNCTION_TABLE, VKD3DSIH_DCL_FUNCTION_TABLE, "", "", + shader_sm5_read_dcl_function_table}, + {VKD3D_SM5_OP_DCL_INTERFACE, VKD3DSIH_DCL_INTERFACE, "", "", + shader_sm5_read_dcl_interface}, + {VKD3D_SM5_OP_DCL_INPUT_CONTROL_POINT_COUNT, VKD3DSIH_DCL_INPUT_CONTROL_POINT_COUNT, "", "", + shader_sm5_read_control_point_count}, + {VKD3D_SM5_OP_DCL_OUTPUT_CONTROL_POINT_COUNT, VKD3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT, "", "", + shader_sm5_read_control_point_count}, + {VKD3D_SM5_OP_DCL_TESSELLATOR_DOMAIN, VKD3DSIH_DCL_TESSELLATOR_DOMAIN, "", "", + shader_sm5_read_dcl_tessellator_domain}, + {VKD3D_SM5_OP_DCL_TESSELLATOR_PARTITIONING, VKD3DSIH_DCL_TESSELLATOR_PARTITIONING, "", "", + shader_sm5_read_dcl_tessellator_partitioning}, + {VKD3D_SM5_OP_DCL_TESSELLATOR_OUTPUT_PRIMITIVE, VKD3DSIH_DCL_TESSELLATOR_OUTPUT_PRIMITIVE, "", "", + shader_sm5_read_dcl_tessellator_output_primitive}, + {VKD3D_SM5_OP_DCL_HS_MAX_TESSFACTOR, VKD3DSIH_DCL_HS_MAX_TESSFACTOR, "", "", + shader_sm5_read_dcl_hs_max_tessfactor}, + {VKD3D_SM5_OP_DCL_HS_FORK_PHASE_INSTANCE_COUNT, VKD3DSIH_DCL_HS_FORK_PHASE_INSTANCE_COUNT, "", "", + shader_sm4_read_declaration_count}, + {VKD3D_SM5_OP_DCL_HS_JOIN_PHASE_INSTANCE_COUNT, VKD3DSIH_DCL_HS_JOIN_PHASE_INSTANCE_COUNT, "", "", + shader_sm4_read_declaration_count}, + {VKD3D_SM5_OP_DCL_THREAD_GROUP, VKD3DSIH_DCL_THREAD_GROUP, "", "", + shader_sm5_read_dcl_thread_group}, + {VKD3D_SM5_OP_DCL_UAV_TYPED, VKD3DSIH_DCL_UAV_TYPED, "", "", + shader_sm4_read_dcl_resource}, + {VKD3D_SM5_OP_DCL_UAV_RAW, VKD3DSIH_DCL_UAV_RAW, "", "", + shader_sm5_read_dcl_uav_raw}, + {VKD3D_SM5_OP_DCL_UAV_STRUCTURED, VKD3DSIH_DCL_UAV_STRUCTURED, "", "", + shader_sm5_read_dcl_uav_structured}, + {VKD3D_SM5_OP_DCL_TGSM_RAW, VKD3DSIH_DCL_TGSM_RAW, "", "", + shader_sm5_read_dcl_tgsm_raw}, + {VKD3D_SM5_OP_DCL_TGSM_STRUCTURED, VKD3DSIH_DCL_TGSM_STRUCTURED, "", "", + shader_sm5_read_dcl_tgsm_structured}, + {VKD3D_SM5_OP_DCL_RESOURCE_RAW, VKD3DSIH_DCL_RESOURCE_RAW, "", "", + shader_sm5_read_dcl_resource_raw}, + {VKD3D_SM5_OP_DCL_RESOURCE_STRUCTURED, VKD3DSIH_DCL_RESOURCE_STRUCTURED, "", "", + shader_sm5_read_dcl_resource_structured}, + {VKD3D_SM5_OP_LD_UAV_TYPED, VKD3DSIH_LD_UAV_TYPED, "u", "iU"}, + {VKD3D_SM5_OP_STORE_UAV_TYPED, VKD3DSIH_STORE_UAV_TYPED, "U", "iu"}, + {VKD3D_SM5_OP_LD_RAW, VKD3DSIH_LD_RAW, "u", "iU"}, + {VKD3D_SM5_OP_STORE_RAW, VKD3DSIH_STORE_RAW, "U", "uu"}, + {VKD3D_SM5_OP_LD_STRUCTURED, VKD3DSIH_LD_STRUCTURED, "u", "iiR"}, + {VKD3D_SM5_OP_STORE_STRUCTURED, VKD3DSIH_STORE_STRUCTURED, "U", "iiu"}, + {VKD3D_SM5_OP_ATOMIC_AND, VKD3DSIH_ATOMIC_AND, "U", "iu"}, + {VKD3D_SM5_OP_ATOMIC_OR, VKD3DSIH_ATOMIC_OR, "U", "iu"}, + {VKD3D_SM5_OP_ATOMIC_XOR, VKD3DSIH_ATOMIC_XOR, "U", "iu"}, + {VKD3D_SM5_OP_ATOMIC_CMP_STORE, VKD3DSIH_ATOMIC_CMP_STORE, "U", "iuu"}, + {VKD3D_SM5_OP_ATOMIC_IADD, VKD3DSIH_ATOMIC_IADD, "U", "ii"}, + {VKD3D_SM5_OP_ATOMIC_IMAX, VKD3DSIH_ATOMIC_IMAX, "U", "ii"}, + {VKD3D_SM5_OP_ATOMIC_IMIN, VKD3DSIH_ATOMIC_IMIN, "U", "ii"}, + {VKD3D_SM5_OP_ATOMIC_UMAX, VKD3DSIH_ATOMIC_UMAX, "U", "iu"}, + {VKD3D_SM5_OP_ATOMIC_UMIN, VKD3DSIH_ATOMIC_UMIN, "U", "iu"}, + {VKD3D_SM5_OP_IMM_ATOMIC_ALLOC, VKD3DSIH_IMM_ATOMIC_ALLOC, "u", "U"}, + {VKD3D_SM5_OP_IMM_ATOMIC_CONSUME, VKD3DSIH_IMM_ATOMIC_CONSUME, "u", "U"}, + {VKD3D_SM5_OP_IMM_ATOMIC_IADD, VKD3DSIH_IMM_ATOMIC_IADD, "uU", "ii"}, + {VKD3D_SM5_OP_IMM_ATOMIC_AND, VKD3DSIH_IMM_ATOMIC_AND, "uU", "iu"}, + {VKD3D_SM5_OP_IMM_ATOMIC_OR, VKD3DSIH_IMM_ATOMIC_OR, "uU", "iu"}, + {VKD3D_SM5_OP_IMM_ATOMIC_XOR, VKD3DSIH_IMM_ATOMIC_XOR, "uU", "iu"}, + {VKD3D_SM5_OP_IMM_ATOMIC_EXCH, VKD3DSIH_IMM_ATOMIC_EXCH, "uU", "iu"}, + {VKD3D_SM5_OP_IMM_ATOMIC_CMP_EXCH, VKD3DSIH_IMM_ATOMIC_CMP_EXCH, "uU", "iuu"}, + {VKD3D_SM5_OP_IMM_ATOMIC_IMAX, VKD3DSIH_IMM_ATOMIC_IMAX, "iU", "ii"}, + {VKD3D_SM5_OP_IMM_ATOMIC_IMIN, VKD3DSIH_IMM_ATOMIC_IMIN, "iU", "ii"}, + {VKD3D_SM5_OP_IMM_ATOMIC_UMAX, VKD3DSIH_IMM_ATOMIC_UMAX, "uU", "iu"}, + {VKD3D_SM5_OP_IMM_ATOMIC_UMIN, VKD3DSIH_IMM_ATOMIC_UMIN, "uU", "iu"}, + {VKD3D_SM5_OP_SYNC, VKD3DSIH_SYNC, "", "", + shader_sm5_read_sync}, + {VKD3D_SM5_OP_DADD, VKD3DSIH_DADD, "d", "dd"}, + {VKD3D_SM5_OP_DMAX, VKD3DSIH_DMAX, "d", "dd"}, + {VKD3D_SM5_OP_DMIN, VKD3DSIH_DMIN, "d", "dd"}, + {VKD3D_SM5_OP_DMUL, VKD3DSIH_DMUL, "d", "dd"}, + {VKD3D_SM5_OP_DEQ, VKD3DSIH_DEQ, "u", "dd"}, + {VKD3D_SM5_OP_DGE, VKD3DSIH_DGE, "u", "dd"}, + {VKD3D_SM5_OP_DLT, VKD3DSIH_DLT, "u", "dd"}, + {VKD3D_SM5_OP_DNE, VKD3DSIH_DNE, "u", "dd"}, + {VKD3D_SM5_OP_DMOV, VKD3DSIH_DMOV, "d", "d"}, + {VKD3D_SM5_OP_DMOVC, VKD3DSIH_DMOVC, "d", "udd"}, + {VKD3D_SM5_OP_DTOF, VKD3DSIH_DTOF, "f", "d"}, + {VKD3D_SM5_OP_FTOD, VKD3DSIH_FTOD, "d", "f"}, + {VKD3D_SM5_OP_EVAL_SAMPLE_INDEX, VKD3DSIH_EVAL_SAMPLE_INDEX, "f", "fi"}, + {VKD3D_SM5_OP_EVAL_CENTROID, VKD3DSIH_EVAL_CENTROID, "f", "f"}, + {VKD3D_SM5_OP_DCL_GS_INSTANCES, VKD3DSIH_DCL_GS_INSTANCES, "", "", + shader_sm4_read_declaration_count}, + {VKD3D_SM5_OP_DDIV, VKD3DSIH_DDIV, "d", "dd"}, + {VKD3D_SM5_OP_DFMA, VKD3DSIH_DFMA, "d", "ddd"}, + {VKD3D_SM5_OP_DRCP, VKD3DSIH_DRCP, "d", "d"}, + {VKD3D_SM5_OP_MSAD, VKD3DSIH_MSAD, "u", "uuu"}, + {VKD3D_SM5_OP_DTOI, VKD3DSIH_DTOI, "i", "d"}, + {VKD3D_SM5_OP_DTOU, VKD3DSIH_DTOU, "u", "d"}, + {VKD3D_SM5_OP_ITOD, VKD3DSIH_ITOD, "d", "i"}, + {VKD3D_SM5_OP_UTOD, VKD3DSIH_UTOD, "d", "u"}, + {VKD3D_SM5_OP_GATHER4_S, VKD3DSIH_GATHER4_S, "uu", "fRS"}, + {VKD3D_SM5_OP_GATHER4_C_S, VKD3DSIH_GATHER4_C_S, "fu", "fRSf"}, + {VKD3D_SM5_OP_GATHER4_PO_S, VKD3DSIH_GATHER4_PO_S, "fu", "fiRS"}, + {VKD3D_SM5_OP_GATHER4_PO_C_S, VKD3DSIH_GATHER4_PO_C_S, "fu", "fiRSf"}, + {VKD3D_SM5_OP_LD_S, VKD3DSIH_LD_S, "uu", "iR"}, + {VKD3D_SM5_OP_LD2DMS_S, VKD3DSIH_LD2DMS_S, "uu", "iRi"}, + {VKD3D_SM5_OP_LD_UAV_TYPED_S, VKD3DSIH_LD_UAV_TYPED_S, "uu", "iU"}, + {VKD3D_SM5_OP_LD_RAW_S, VKD3DSIH_LD_RAW_S, "uu", "iU"}, + {VKD3D_SM5_OP_LD_STRUCTURED_S, VKD3DSIH_LD_STRUCTURED_S, "uu", "iiR"}, + {VKD3D_SM5_OP_SAMPLE_LOD_S, VKD3DSIH_SAMPLE_LOD_S, "uu", "fRSf"}, + {VKD3D_SM5_OP_SAMPLE_C_LZ_S, VKD3DSIH_SAMPLE_C_LZ_S, "fu", "fRSf"}, + {VKD3D_SM5_OP_SAMPLE_CL_S, VKD3DSIH_SAMPLE_CL_S, "uu", "fRSf"}, + {VKD3D_SM5_OP_SAMPLE_B_CL_S, VKD3DSIH_SAMPLE_B_CL_S, "uu", "fRSff"}, + {VKD3D_SM5_OP_SAMPLE_GRAD_CL_S, VKD3DSIH_SAMPLE_GRAD_CL_S, "uu", "fRSfff"}, + {VKD3D_SM5_OP_SAMPLE_C_CL_S, VKD3DSIH_SAMPLE_C_CL_S, "fu", "fRSff"}, + {VKD3D_SM5_OP_CHECK_ACCESS_FULLY_MAPPED, VKD3DSIH_CHECK_ACCESS_FULLY_MAPPED, "u", "u"}, +}; + +static const enum vkd3d_shader_register_type register_type_table[] = +{ + /* VKD3D_SM4_RT_TEMP */ VKD3DSPR_TEMP, + /* VKD3D_SM4_RT_INPUT */ VKD3DSPR_INPUT, + /* VKD3D_SM4_RT_OUTPUT */ VKD3DSPR_OUTPUT, + /* VKD3D_SM4_RT_INDEXABLE_TEMP */ VKD3DSPR_IDXTEMP, + /* VKD3D_SM4_RT_IMMCONST */ VKD3DSPR_IMMCONST, + /* VKD3D_SM4_RT_IMMCONST64 */ VKD3DSPR_IMMCONST64, + /* VKD3D_SM4_RT_SAMPLER */ VKD3DSPR_SAMPLER, + /* VKD3D_SM4_RT_RESOURCE */ VKD3DSPR_RESOURCE, + /* VKD3D_SM4_RT_CONSTBUFFER */ VKD3DSPR_CONSTBUFFER, + /* VKD3D_SM4_RT_IMMCONSTBUFFER */ VKD3DSPR_IMMCONSTBUFFER, + /* UNKNOWN */ ~0u, + /* VKD3D_SM4_RT_PRIMID */ VKD3DSPR_PRIMID, + /* VKD3D_SM4_RT_DEPTHOUT */ VKD3DSPR_DEPTHOUT, + /* VKD3D_SM4_RT_NULL */ VKD3DSPR_NULL, + /* VKD3D_SM4_RT_RASTERIZER */ VKD3DSPR_RASTERIZER, + /* VKD3D_SM4_RT_OMASK */ VKD3DSPR_SAMPLEMASK, + /* VKD3D_SM5_RT_STREAM */ VKD3DSPR_STREAM, + /* VKD3D_SM5_RT_FUNCTION_BODY */ VKD3DSPR_FUNCTIONBODY, + /* UNKNOWN */ ~0u, + /* VKD3D_SM5_RT_FUNCTION_POINTER */ VKD3DSPR_FUNCTIONPOINTER, + /* UNKNOWN */ ~0u, + /* UNKNOWN */ ~0u, + /* VKD3D_SM5_RT_OUTPUT_CONTROL_POINT_ID */ VKD3DSPR_OUTPOINTID, + /* VKD3D_SM5_RT_FORK_INSTANCE_ID */ VKD3DSPR_FORKINSTID, + /* VKD3D_SM5_RT_JOIN_INSTANCE_ID */ VKD3DSPR_JOININSTID, + /* VKD3D_SM5_RT_INPUT_CONTROL_POINT */ VKD3DSPR_INCONTROLPOINT, + /* VKD3D_SM5_RT_OUTPUT_CONTROL_POINT */ VKD3DSPR_OUTCONTROLPOINT, + /* VKD3D_SM5_RT_PATCH_CONSTANT_DATA */ VKD3DSPR_PATCHCONST, + /* VKD3D_SM5_RT_DOMAIN_LOCATION */ VKD3DSPR_TESSCOORD, + /* UNKNOWN */ ~0u, + /* VKD3D_SM5_RT_UAV */ VKD3DSPR_UAV, + /* VKD3D_SM5_RT_SHARED_MEMORY */ VKD3DSPR_GROUPSHAREDMEM, + /* VKD3D_SM5_RT_THREAD_ID */ VKD3DSPR_THREADID, + /* VKD3D_SM5_RT_THREAD_GROUP_ID */ VKD3DSPR_THREADGROUPID, + /* VKD3D_SM5_RT_LOCAL_THREAD_ID */ VKD3DSPR_LOCALTHREADID, + /* VKD3D_SM5_RT_COVERAGE */ VKD3DSPR_COVERAGE, + /* VKD3D_SM5_RT_LOCAL_THREAD_INDEX */ VKD3DSPR_LOCALTHREADINDEX, + /* VKD3D_SM5_RT_GS_INSTANCE_ID */ VKD3DSPR_GSINSTID, + /* VKD3D_SM5_RT_DEPTHOUT_GREATER_EQUAL */ VKD3DSPR_DEPTHOUTGE, + /* VKD3D_SM5_RT_DEPTHOUT_LESS_EQUAL */ VKD3DSPR_DEPTHOUTLE, + /* VKD3D_SM5_RT_CYCLE_COUNTER */ ~0u, + /* VKD3D_SM5_RT_OUTPUT_STENCIL_REF */ VKD3DSPR_OUTSTENCILREF, +}; + +static const enum vkd3d_shader_register_precision register_precision_table[] = +{ + /* VKD3D_SM4_REGISTER_PRECISION_DEFAULT */ VKD3D_SHADER_REGISTER_PRECISION_DEFAULT, + /* VKD3D_SM4_REGISTER_PRECISION_MIN_FLOAT_16 */ VKD3D_SHADER_REGISTER_PRECISION_MIN_FLOAT_16, + /* VKD3D_SM4_REGISTER_PRECISION_MIN_FLOAT_10 */ VKD3D_SHADER_REGISTER_PRECISION_MIN_FLOAT_10, + /* UNKNOWN */ VKD3D_SHADER_REGISTER_PRECISION_INVALID, + /* VKD3D_SM4_REGISTER_PRECISION_MIN_INT_16 */ VKD3D_SHADER_REGISTER_PRECISION_MIN_INT_16, + /* VKD3D_SM4_REGISTER_PRECISION_MIN_UINT_16 */ VKD3D_SHADER_REGISTER_PRECISION_MIN_UINT_16, +}; + +static const struct vkd3d_sm4_opcode_info *get_opcode_info(enum vkd3d_sm4_opcode opcode) +{ + unsigned int i; + + for (i = 0; i < sizeof(opcode_table) / sizeof(*opcode_table); ++i) + { + if (opcode == opcode_table[i].opcode) return &opcode_table[i]; + } + + return NULL; +} + +static void map_register(const struct vkd3d_shader_sm4_parser *sm4, struct vkd3d_shader_register *reg) +{ + switch (sm4->p.shader_version.type) + { + case VKD3D_SHADER_TYPE_PIXEL: + if (reg->type == VKD3DSPR_OUTPUT) + { + unsigned int reg_idx = reg->idx[0].offset; + + if (reg_idx >= ARRAY_SIZE(sm4->output_map)) + { + ERR("Invalid output index %u.\n", reg_idx); + break; + } + + reg->type = VKD3DSPR_COLOROUT; + reg->idx[0].offset = sm4->output_map[reg_idx]; + } + break; + + default: + break; + } +} + +static enum vkd3d_data_type map_data_type(char t) +{ + switch (t) + { + case 'd': + return VKD3D_DATA_DOUBLE; + case 'f': + return VKD3D_DATA_FLOAT; + case 'i': + return VKD3D_DATA_INT; + case 'u': + return VKD3D_DATA_UINT; + case 'O': + return VKD3D_DATA_OPAQUE; + case 'R': + return VKD3D_DATA_RESOURCE; + case 'S': + return VKD3D_DATA_SAMPLER; + case 'U': + return VKD3D_DATA_UAV; + default: + ERR("Invalid data type '%c'.\n", t); + return VKD3D_DATA_FLOAT; + } +} + +static void shader_sm4_destroy(struct vkd3d_shader_parser *parser) +{ + struct vkd3d_shader_sm4_parser *sm4 = vkd3d_shader_sm4_parser(parser); + + shader_instruction_array_destroy(&parser->instructions); + free_shader_desc(&parser->shader_desc); + vkd3d_free(sm4); +} + +static bool shader_sm4_read_reg_idx(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, + const uint32_t *end, uint32_t addressing, struct vkd3d_shader_register_index *reg_idx) +{ + if (addressing & VKD3D_SM4_ADDRESSING_RELATIVE) + { + struct vkd3d_shader_src_param *rel_addr = shader_parser_get_src_params(&priv->p, 1); + + if (!(reg_idx->rel_addr = rel_addr)) + { + ERR("Failed to get src param for relative addressing.\n"); + return false; + } + + if (addressing & VKD3D_SM4_ADDRESSING_OFFSET) + reg_idx->offset = *(*ptr)++; + else + reg_idx->offset = 0; + shader_sm4_read_src_param(priv, ptr, end, VKD3D_DATA_INT, rel_addr); + } + else + { + reg_idx->rel_addr = NULL; + reg_idx->offset = *(*ptr)++; + } + + return true; +} + +static bool sm4_register_is_descriptor(enum vkd3d_sm4_register_type register_type) +{ + switch (register_type) + { + case VKD3D_SM4_RT_SAMPLER: + case VKD3D_SM4_RT_RESOURCE: + case VKD3D_SM4_RT_CONSTBUFFER: + case VKD3D_SM5_RT_UAV: + return true; + + default: + return false; + } +} + +static bool shader_sm4_read_param(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, const uint32_t *end, + enum vkd3d_data_type data_type, struct vkd3d_shader_register *param, enum vkd3d_shader_src_modifier *modifier) +{ + enum vkd3d_sm4_register_precision precision; + enum vkd3d_sm4_register_type register_type; + enum vkd3d_sm4_extended_operand_type type; + enum vkd3d_sm4_register_modifier m; + uint32_t token, order, extended; + + if (*ptr >= end) + { + WARN("Invalid ptr %p >= end %p.\n", *ptr, end); + return false; + } + token = *(*ptr)++; + + register_type = (token & VKD3D_SM4_REGISTER_TYPE_MASK) >> VKD3D_SM4_REGISTER_TYPE_SHIFT; + if (register_type >= ARRAY_SIZE(register_type_table) + || register_type_table[register_type] == VKD3DSPR_INVALID) + { + FIXME("Unhandled register type %#x.\n", register_type); + param->type = VKD3DSPR_TEMP; + } + else + { + param->type = register_type_table[register_type]; + } + param->precision = VKD3D_SHADER_REGISTER_PRECISION_DEFAULT; + param->non_uniform = false; + param->data_type = data_type; + + *modifier = VKD3DSPSM_NONE; + if (token & VKD3D_SM4_EXTENDED_OPERAND) + { + if (*ptr >= end) + { + WARN("Invalid ptr %p >= end %p.\n", *ptr, end); + return false; + } + extended = *(*ptr)++; + + if (extended & VKD3D_SM4_EXTENDED_OPERAND) + { + FIXME("Skipping second-order extended operand.\n"); + *ptr += *ptr < end; + } + + type = extended & VKD3D_SM4_EXTENDED_OPERAND_TYPE_MASK; + if (type == VKD3D_SM4_EXTENDED_OPERAND_MODIFIER) + { + m = (extended & VKD3D_SM4_REGISTER_MODIFIER_MASK) >> VKD3D_SM4_REGISTER_MODIFIER_SHIFT; + switch (m) + { + case VKD3D_SM4_REGISTER_MODIFIER_NEGATE: + *modifier = VKD3DSPSM_NEG; + break; + + case VKD3D_SM4_REGISTER_MODIFIER_ABS: + *modifier = VKD3DSPSM_ABS; + break; + + case VKD3D_SM4_REGISTER_MODIFIER_ABS_NEGATE: + *modifier = VKD3DSPSM_ABSNEG; + break; + + default: + FIXME("Unhandled register modifier %#x.\n", m); + /* fall-through */ + case VKD3D_SM4_REGISTER_MODIFIER_NONE: + break; + } + + precision = (extended & VKD3D_SM4_REGISTER_PRECISION_MASK) >> VKD3D_SM4_REGISTER_PRECISION_SHIFT; + if (precision >= ARRAY_SIZE(register_precision_table) + || register_precision_table[precision] == VKD3D_SHADER_REGISTER_PRECISION_INVALID) + { + FIXME("Unhandled register precision %#x.\n", precision); + param->precision = VKD3D_SHADER_REGISTER_PRECISION_INVALID; + } + else + { + param->precision = register_precision_table[precision]; + } + + if (extended & VKD3D_SM4_REGISTER_NON_UNIFORM_MASK) + param->non_uniform = true; + + extended &= ~(VKD3D_SM4_EXTENDED_OPERAND_TYPE_MASK | VKD3D_SM4_REGISTER_MODIFIER_MASK + | VKD3D_SM4_REGISTER_PRECISION_MASK | VKD3D_SM4_REGISTER_NON_UNIFORM_MASK + | VKD3D_SM4_EXTENDED_OPERAND); + if (extended) + FIXME("Skipping unhandled extended operand bits 0x%08x.\n", extended); + } + else if (type) + { + FIXME("Skipping unhandled extended operand token 0x%08x (type %#x).\n", extended, type); + } + } + + order = (token & VKD3D_SM4_REGISTER_ORDER_MASK) >> VKD3D_SM4_REGISTER_ORDER_SHIFT; + + if (order < 1) + { + param->idx[0].offset = ~0u; + param->idx[0].rel_addr = NULL; + } + else + { + DWORD addressing = (token & VKD3D_SM4_ADDRESSING_MASK0) >> VKD3D_SM4_ADDRESSING_SHIFT0; + if (!(shader_sm4_read_reg_idx(priv, ptr, end, addressing, ¶m->idx[0]))) + { + ERR("Failed to read register index.\n"); + return false; + } + } + + if (order < 2) + { + param->idx[1].offset = ~0u; + param->idx[1].rel_addr = NULL; + } + else + { + DWORD addressing = (token & VKD3D_SM4_ADDRESSING_MASK1) >> VKD3D_SM4_ADDRESSING_SHIFT1; + if (!(shader_sm4_read_reg_idx(priv, ptr, end, addressing, ¶m->idx[1]))) + { + ERR("Failed to read register index.\n"); + return false; + } + } + + if (order < 3) + { + param->idx[2].offset = ~0u; + param->idx[2].rel_addr = NULL; + } + else + { + DWORD addressing = (token & VKD3D_SM4_ADDRESSING_MASK2) >> VKD3D_SM4_ADDRESSING_SHIFT2; + if (!(shader_sm4_read_reg_idx(priv, ptr, end, addressing, ¶m->idx[2]))) + { + ERR("Failed to read register index.\n"); + return false; + } + } + + if (order > 3) + { + WARN("Unhandled order %u.\n", order); + return false; + } + + if (register_type == VKD3D_SM4_RT_IMMCONST || register_type == VKD3D_SM4_RT_IMMCONST64) + { + enum vkd3d_sm4_dimension dimension = (token & VKD3D_SM4_DIMENSION_MASK) >> VKD3D_SM4_DIMENSION_SHIFT; + unsigned int dword_count; + + switch (dimension) + { + case VKD3D_SM4_DIMENSION_SCALAR: + param->immconst_type = VKD3D_IMMCONST_SCALAR; + dword_count = 1 + (register_type == VKD3D_SM4_RT_IMMCONST64); + if (end - *ptr < dword_count) + { + WARN("Invalid ptr %p, end %p.\n", *ptr, end); + return false; + } + memcpy(param->u.immconst_uint, *ptr, dword_count * sizeof(DWORD)); + *ptr += dword_count; + break; + + case VKD3D_SM4_DIMENSION_VEC4: + param->immconst_type = VKD3D_IMMCONST_VEC4; + if (end - *ptr < VKD3D_VEC4_SIZE) + { + WARN("Invalid ptr %p, end %p.\n", *ptr, end); + return false; + } + memcpy(param->u.immconst_uint, *ptr, VKD3D_VEC4_SIZE * sizeof(DWORD)); + *ptr += 4; + break; + + default: + FIXME("Unhandled dimension %#x.\n", dimension); + break; + } + } + else if (!shader_is_sm_5_1(priv) && sm4_register_is_descriptor(register_type)) + { + /* SM5.1 places a symbol identifier in idx[0] and moves + * other values up one slot. Normalize to SM5.1. */ + param->idx[2] = param->idx[1]; + param->idx[1] = param->idx[0]; + } + + map_register(priv, param); + + return true; +} + +static bool shader_sm4_is_scalar_register(const struct vkd3d_shader_register *reg) +{ + switch (reg->type) + { + case VKD3DSPR_COVERAGE: + case VKD3DSPR_DEPTHOUT: + case VKD3DSPR_DEPTHOUTGE: + case VKD3DSPR_DEPTHOUTLE: + case VKD3DSPR_GSINSTID: + case VKD3DSPR_LOCALTHREADINDEX: + case VKD3DSPR_OUTPOINTID: + case VKD3DSPR_PRIMID: + case VKD3DSPR_SAMPLEMASK: + case VKD3DSPR_OUTSTENCILREF: + return true; + default: + return false; + } +} + +static uint32_t swizzle_from_sm4(uint32_t s) +{ + return vkd3d_shader_create_swizzle(s & 0x3, (s >> 2) & 0x3, (s >> 4) & 0x3, (s >> 6) & 0x3); +} + +static bool shader_sm4_read_src_param(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, + const uint32_t *end, enum vkd3d_data_type data_type, struct vkd3d_shader_src_param *src_param) +{ + DWORD token; + + if (*ptr >= end) + { + WARN("Invalid ptr %p >= end %p.\n", *ptr, end); + return false; + } + token = **ptr; + + if (!shader_sm4_read_param(priv, ptr, end, data_type, &src_param->reg, &src_param->modifiers)) + { + ERR("Failed to read parameter.\n"); + return false; + } + + if (src_param->reg.type == VKD3DSPR_IMMCONST || src_param->reg.type == VKD3DSPR_IMMCONST64) + { + src_param->swizzle = VKD3D_SHADER_NO_SWIZZLE; + } + else + { + enum vkd3d_sm4_swizzle_type swizzle_type = + (token & VKD3D_SM4_SWIZZLE_TYPE_MASK) >> VKD3D_SM4_SWIZZLE_TYPE_SHIFT; + + switch (swizzle_type) + { + case VKD3D_SM4_SWIZZLE_NONE: + if (shader_sm4_is_scalar_register(&src_param->reg)) + src_param->swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); + else + src_param->swizzle = VKD3D_SHADER_NO_SWIZZLE; + break; + + case VKD3D_SM4_SWIZZLE_SCALAR: + src_param->swizzle = (token & VKD3D_SM4_SWIZZLE_MASK) >> VKD3D_SM4_SWIZZLE_SHIFT; + src_param->swizzle = (src_param->swizzle & 0x3) * 0x01010101; + break; + + case VKD3D_SM4_SWIZZLE_VEC4: + src_param->swizzle = swizzle_from_sm4((token & VKD3D_SM4_SWIZZLE_MASK) >> VKD3D_SM4_SWIZZLE_SHIFT); + break; + + default: + FIXME("Unhandled swizzle type %#x.\n", swizzle_type); + break; + } + } + + return true; +} + +static bool shader_sm4_read_dst_param(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, + const uint32_t *end, enum vkd3d_data_type data_type, struct vkd3d_shader_dst_param *dst_param) +{ + enum vkd3d_shader_src_modifier modifier; + DWORD token; + + if (*ptr >= end) + { + WARN("Invalid ptr %p >= end %p.\n", *ptr, end); + return false; + } + token = **ptr; + + if (!shader_sm4_read_param(priv, ptr, end, data_type, &dst_param->reg, &modifier)) + { + ERR("Failed to read parameter.\n"); + return false; + } + + if (modifier != VKD3DSPSM_NONE) + { + ERR("Invalid source modifier %#x on destination register.\n", modifier); + return false; + } + + dst_param->write_mask = (token & VKD3D_SM4_WRITEMASK_MASK) >> VKD3D_SM4_WRITEMASK_SHIFT; + if (data_type == VKD3D_DATA_DOUBLE) + dst_param->write_mask = vkd3d_write_mask_64_from_32(dst_param->write_mask); + /* Scalar registers are declared with no write mask in shader bytecode. */ + if (!dst_param->write_mask && shader_sm4_is_scalar_register(&dst_param->reg)) + dst_param->write_mask = VKD3DSP_WRITEMASK_0; + dst_param->modifiers = 0; + dst_param->shift = 0; + + return true; +} + +static void shader_sm4_read_instruction_modifier(DWORD modifier, struct vkd3d_shader_instruction *ins) +{ + enum vkd3d_sm4_instruction_modifier modifier_type = modifier & VKD3D_SM4_MODIFIER_MASK; + + switch (modifier_type) + { + case VKD3D_SM4_MODIFIER_AOFFIMMI: + { + static const DWORD recognized_bits = VKD3D_SM4_INSTRUCTION_MODIFIER + | VKD3D_SM4_MODIFIER_MASK + | VKD3D_SM4_AOFFIMMI_U_MASK + | VKD3D_SM4_AOFFIMMI_V_MASK + | VKD3D_SM4_AOFFIMMI_W_MASK; + + /* Bit fields are used for sign extension. */ + struct + { + int u : 4; + int v : 4; + int w : 4; + } aoffimmi; + + if (modifier & ~recognized_bits) + FIXME("Unhandled instruction modifier %#x.\n", modifier); + + aoffimmi.u = (modifier & VKD3D_SM4_AOFFIMMI_U_MASK) >> VKD3D_SM4_AOFFIMMI_U_SHIFT; + aoffimmi.v = (modifier & VKD3D_SM4_AOFFIMMI_V_MASK) >> VKD3D_SM4_AOFFIMMI_V_SHIFT; + aoffimmi.w = (modifier & VKD3D_SM4_AOFFIMMI_W_MASK) >> VKD3D_SM4_AOFFIMMI_W_SHIFT; + ins->texel_offset.u = aoffimmi.u; + ins->texel_offset.v = aoffimmi.v; + ins->texel_offset.w = aoffimmi.w; + break; + } + + case VKD3D_SM5_MODIFIER_DATA_TYPE: + { + DWORD components = (modifier & VKD3D_SM5_MODIFIER_DATA_TYPE_MASK) >> VKD3D_SM5_MODIFIER_DATA_TYPE_SHIFT; + unsigned int i; + + for (i = 0; i < VKD3D_VEC4_SIZE; i++) + { + enum vkd3d_sm4_data_type data_type = VKD3D_SM4_TYPE_COMPONENT(components, i); + + if (!data_type || (data_type >= ARRAY_SIZE(data_type_table))) + { + FIXME("Unhandled data type %#x.\n", data_type); + ins->resource_data_type[i] = VKD3D_DATA_FLOAT; + } + else + { + ins->resource_data_type[i] = data_type_table[data_type]; + } + } + break; + } + + case VKD3D_SM5_MODIFIER_RESOURCE_TYPE: + { + enum vkd3d_sm4_resource_type resource_type + = (modifier & VKD3D_SM5_MODIFIER_RESOURCE_TYPE_MASK) >> VKD3D_SM5_MODIFIER_RESOURCE_TYPE_SHIFT; + + if (resource_type == VKD3D_SM4_RESOURCE_RAW_BUFFER) + ins->raw = true; + else if (resource_type == VKD3D_SM4_RESOURCE_STRUCTURED_BUFFER) + ins->structured = true; + + if (resource_type < ARRAY_SIZE(resource_type_table)) + ins->resource_type = resource_type_table[resource_type]; + else + { + FIXME("Unhandled resource type %#x.\n", resource_type); + ins->resource_type = VKD3D_SHADER_RESOURCE_NONE; + } + + ins->resource_stride + = (modifier & VKD3D_SM5_MODIFIER_RESOURCE_STRIDE_MASK) >> VKD3D_SM5_MODIFIER_RESOURCE_STRIDE_SHIFT; + break; + } + + default: + FIXME("Unhandled instruction modifier %#x.\n", modifier); + } +} + +static void shader_sm4_read_instruction(struct vkd3d_shader_sm4_parser *sm4, struct vkd3d_shader_instruction *ins) +{ + const struct vkd3d_sm4_opcode_info *opcode_info; + uint32_t opcode_token, opcode, previous_token; + struct vkd3d_shader_dst_param *dst_params; + struct vkd3d_shader_src_param *src_params; + const uint32_t **ptr = &sm4->ptr; + unsigned int i, len; + size_t remaining; + const uint32_t *p; + DWORD precise; + + if (*ptr >= sm4->end) + { + WARN("End of byte-code, failed to read opcode.\n"); + goto fail; + } + remaining = sm4->end - *ptr; + + ++sm4->p.location.line; + + opcode_token = *(*ptr)++; + opcode = opcode_token & VKD3D_SM4_OPCODE_MASK; + + len = ((opcode_token & VKD3D_SM4_INSTRUCTION_LENGTH_MASK) >> VKD3D_SM4_INSTRUCTION_LENGTH_SHIFT); + if (!len) + { + if (remaining < 2) + { + WARN("End of byte-code, failed to read length token.\n"); + goto fail; + } + len = **ptr; + } + if (!len || remaining < len) + { + WARN("Read invalid length %u (remaining %zu).\n", len, remaining); + goto fail; + } + --len; + + if (!(opcode_info = get_opcode_info(opcode))) + { + FIXME("Unrecognized opcode %#x, opcode_token 0x%08x.\n", opcode, opcode_token); + ins->handler_idx = VKD3DSIH_INVALID; + *ptr += len; + return; + } + + ins->handler_idx = opcode_info->handler_idx; + ins->flags = 0; + ins->coissue = false; + ins->raw = false; + ins->structured = false; + ins->predicate = NULL; + ins->dst_count = strnlen(opcode_info->dst_info, SM4_MAX_DST_COUNT); + ins->src_count = strnlen(opcode_info->src_info, SM4_MAX_SRC_COUNT); + ins->src = src_params = shader_parser_get_src_params(&sm4->p, ins->src_count); + if (!src_params && ins->src_count) + { + ERR("Failed to allocate src parameters.\n"); + vkd3d_shader_parser_error(&sm4->p, VKD3D_SHADER_ERROR_TPF_OUT_OF_MEMORY, "Out of memory."); + ins->handler_idx = VKD3DSIH_INVALID; + return; + } + ins->resource_type = VKD3D_SHADER_RESOURCE_NONE; + ins->resource_stride = 0; + ins->resource_data_type[0] = VKD3D_DATA_FLOAT; + ins->resource_data_type[1] = VKD3D_DATA_FLOAT; + ins->resource_data_type[2] = VKD3D_DATA_FLOAT; + ins->resource_data_type[3] = VKD3D_DATA_FLOAT; + memset(&ins->texel_offset, 0, sizeof(ins->texel_offset)); + + p = *ptr; + *ptr += len; + + if (opcode_info->read_opcode_func) + { + ins->dst = NULL; + ins->dst_count = 0; + opcode_info->read_opcode_func(ins, opcode, opcode_token, p, len, sm4); + } + else + { + enum vkd3d_shader_dst_modifier instruction_dst_modifier = VKD3DSPDM_NONE; + + previous_token = opcode_token; + while (previous_token & VKD3D_SM4_INSTRUCTION_MODIFIER && p != *ptr) + shader_sm4_read_instruction_modifier(previous_token = *p++, ins); + + ins->flags = (opcode_token & VKD3D_SM4_INSTRUCTION_FLAGS_MASK) >> VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT; + if (ins->flags & VKD3D_SM4_INSTRUCTION_FLAG_SATURATE) + { + ins->flags &= ~VKD3D_SM4_INSTRUCTION_FLAG_SATURATE; + instruction_dst_modifier = VKD3DSPDM_SATURATE; + } + precise = (opcode_token & VKD3D_SM5_PRECISE_MASK) >> VKD3D_SM5_PRECISE_SHIFT; + ins->flags |= precise << VKD3DSI_PRECISE_SHIFT; + + ins->dst = dst_params = shader_parser_get_dst_params(&sm4->p, ins->dst_count); + if (!dst_params && ins->dst_count) + { + ERR("Failed to allocate dst parameters.\n"); + vkd3d_shader_parser_error(&sm4->p, VKD3D_SHADER_ERROR_TPF_OUT_OF_MEMORY, "Out of memory."); + ins->handler_idx = VKD3DSIH_INVALID; + return; + } + for (i = 0; i < ins->dst_count; ++i) + { + if (!(shader_sm4_read_dst_param(sm4, &p, *ptr, map_data_type(opcode_info->dst_info[i]), + &dst_params[i]))) + { + ins->handler_idx = VKD3DSIH_INVALID; + return; + } + dst_params[i].modifiers |= instruction_dst_modifier; + } + + for (i = 0; i < ins->src_count; ++i) + { + if (!(shader_sm4_read_src_param(sm4, &p, *ptr, map_data_type(opcode_info->src_info[i]), + &src_params[i]))) + { + ins->handler_idx = VKD3DSIH_INVALID; + return; + } + } + } + + return; + +fail: + *ptr = sm4->end; + ins->handler_idx = VKD3DSIH_INVALID; + return; +} + +static const struct vkd3d_shader_parser_ops shader_sm4_parser_ops = +{ + .parser_destroy = shader_sm4_destroy, +}; + +static bool shader_sm4_init(struct vkd3d_shader_sm4_parser *sm4, const uint32_t *byte_code, + size_t byte_code_size, const char *source_name, const struct vkd3d_shader_signature *output_signature, + struct vkd3d_shader_message_context *message_context) +{ + struct vkd3d_shader_version version; + uint32_t version_token, token_count; + unsigned int i; + + if (byte_code_size / sizeof(*byte_code) < 2) + { + WARN("Invalid byte code size %lu.\n", (long)byte_code_size); + return false; + } + + version_token = byte_code[0]; + TRACE("Version: 0x%08x.\n", version_token); + token_count = byte_code[1]; + TRACE("Token count: %u.\n", token_count); + + if (token_count < 2 || byte_code_size / sizeof(*byte_code) < token_count) + { + WARN("Invalid token count %u.\n", token_count); + return false; + } + + sm4->start = &byte_code[2]; + sm4->end = &byte_code[token_count]; + + switch (version_token >> 16) + { + case VKD3D_SM4_PS: + version.type = VKD3D_SHADER_TYPE_PIXEL; + break; + + case VKD3D_SM4_VS: + version.type = VKD3D_SHADER_TYPE_VERTEX; + break; + + case VKD3D_SM4_GS: + version.type = VKD3D_SHADER_TYPE_GEOMETRY; + break; + + case VKD3D_SM5_HS: + version.type = VKD3D_SHADER_TYPE_HULL; + break; + + case VKD3D_SM5_DS: + version.type = VKD3D_SHADER_TYPE_DOMAIN; + break; + + case VKD3D_SM5_CS: + version.type = VKD3D_SHADER_TYPE_COMPUTE; + break; + + default: + FIXME("Unrecognised shader type %#x.\n", version_token >> 16); + } + version.major = VKD3D_SM4_VERSION_MAJOR(version_token); + version.minor = VKD3D_SM4_VERSION_MINOR(version_token); + + /* Estimate instruction count to avoid reallocation in most shaders. */ + if (!vkd3d_shader_parser_init(&sm4->p, message_context, source_name, &version, &shader_sm4_parser_ops, + token_count / 7u + 20)) + return false; + sm4->ptr = sm4->start; + + memset(sm4->output_map, 0xff, sizeof(sm4->output_map)); + for (i = 0; i < output_signature->element_count; ++i) + { + struct vkd3d_shader_signature_element *e = &output_signature->elements[i]; + + if (version.type == VKD3D_SHADER_TYPE_PIXEL + && ascii_strcasecmp(e->semantic_name, "SV_Target")) + continue; + if (e->register_index >= ARRAY_SIZE(sm4->output_map)) + { + WARN("Invalid output index %u.\n", e->register_index); + continue; + } + + sm4->output_map[e->register_index] = e->semantic_index; + } + + return true; +} + +int vkd3d_shader_sm4_parser_create(const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser **parser) +{ + struct vkd3d_shader_instruction_array *instructions; + struct vkd3d_shader_desc *shader_desc; + struct vkd3d_shader_instruction *ins; + struct vkd3d_shader_sm4_parser *sm4; + int ret; + + if (!(sm4 = vkd3d_calloc(1, sizeof(*sm4)))) + { + ERR("Failed to allocate parser.\n"); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + + shader_desc = &sm4->p.shader_desc; + if ((ret = shader_extract_from_dxbc(&compile_info->source, + message_context, compile_info->source_name, shader_desc)) < 0) + { + WARN("Failed to extract shader, vkd3d result %d.\n", ret); + vkd3d_free(sm4); + return ret; + } + + if (!shader_sm4_init(sm4, shader_desc->byte_code, shader_desc->byte_code_size, + compile_info->source_name, &shader_desc->output_signature, message_context)) + { + WARN("Failed to initialise shader parser.\n"); + free_shader_desc(shader_desc); + vkd3d_free(sm4); + return VKD3D_ERROR_INVALID_ARGUMENT; + } + + instructions = &sm4->p.instructions; + while (sm4->ptr != sm4->end) + { + if (!shader_instruction_array_reserve(instructions, instructions->count + 1)) + { + ERR("Failed to allocate instructions.\n"); + vkd3d_shader_parser_error(&sm4->p, VKD3D_SHADER_ERROR_TPF_OUT_OF_MEMORY, "Out of memory."); + shader_sm4_destroy(&sm4->p); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + ins = &instructions->elements[instructions->count]; + shader_sm4_read_instruction(sm4, ins); + + if (ins->handler_idx == VKD3DSIH_INVALID) + { + WARN("Encountered unrecognized or invalid instruction.\n"); + shader_sm4_destroy(&sm4->p); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + ++instructions->count; + } + + *parser = &sm4->p; + + return VKD3D_OK; +}
static void write_sm4_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_block *block);
diff --git a/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d-shader/vkd3d_shader_private.h index e635a70b..469139b5 100644 --- a/libs/vkd3d-shader/vkd3d_shader_private.h +++ b/libs/vkd3d-shader/vkd3d_shader_private.h @@ -1132,6 +1132,8 @@ int vkd3d_shader_sm4_parser_create(const struct vkd3d_shader_compile_info *compi
void free_shader_desc(struct vkd3d_shader_desc *desc);
+int shader_extract_from_dxbc(const struct vkd3d_shader_code *dxbc, + struct vkd3d_shader_message_context *message_context, const char *source_name, struct vkd3d_shader_desc *desc); int shader_parse_input_signature(const struct vkd3d_shader_code *dxbc, struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_signature *signature);
From: Henri Verbeet hverbeet@codeweavers.com
--- Makefile.am | 2 +- libs/vkd3d-shader/{hlsl_sm4.c => tpf.c} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename libs/vkd3d-shader/{hlsl_sm4.c => tpf.c} (100%)
diff --git a/Makefile.am b/Makefile.am index 8cbd9662..e8c5ab47 100644 --- a/Makefile.am +++ b/Makefile.am @@ -257,11 +257,11 @@ libvkd3d_shader_la_SOURCES = \ libs/vkd3d-shader/hlsl_codegen.c \ libs/vkd3d-shader/hlsl_constant_ops.c \ libs/vkd3d-shader/hlsl_sm1.c \ - libs/vkd3d-shader/hlsl_sm4.c \ libs/vkd3d-shader/ir.c \ libs/vkd3d-shader/preproc.h \ libs/vkd3d-shader/sm4.h \ libs/vkd3d-shader/spirv.c \ + libs/vkd3d-shader/tpf.c \ libs/vkd3d-shader/trace.c \ libs/vkd3d-shader/vkd3d_shader.map \ libs/vkd3d-shader/vkd3d_shader_main.c \ diff --git a/libs/vkd3d-shader/hlsl_sm4.c b/libs/vkd3d-shader/tpf.c similarity index 100% rename from libs/vkd3d-shader/hlsl_sm4.c rename to libs/vkd3d-shader/tpf.c
From: Henri Verbeet hverbeet@codeweavers.com
--- Makefile.am | 1 - libs/vkd3d-shader/hlsl.h | 4 +- libs/vkd3d-shader/sm4.h | 552 --------------------------------------- libs/vkd3d-shader/tpf.c | 530 +++++++++++++++++++++++++++++++++++++ 4 files changed, 533 insertions(+), 554 deletions(-) delete mode 100644 libs/vkd3d-shader/sm4.h
diff --git a/Makefile.am b/Makefile.am index e8c5ab47..f2a96823 100644 --- a/Makefile.am +++ b/Makefile.am @@ -259,7 +259,6 @@ libvkd3d_shader_la_SOURCES = \ libs/vkd3d-shader/hlsl_sm1.c \ libs/vkd3d-shader/ir.c \ libs/vkd3d-shader/preproc.h \ - libs/vkd3d-shader/sm4.h \ libs/vkd3d-shader/spirv.c \ libs/vkd3d-shader/tpf.c \ libs/vkd3d-shader/trace.c \ diff --git a/libs/vkd3d-shader/hlsl.h b/libs/vkd3d-shader/hlsl.h index ce20a12c..f84bb62b 100644 --- a/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d-shader/hlsl.h @@ -24,7 +24,9 @@ #include "rbtree.h" #include "vkd3d_d3dcommon.h" #include "vkd3d_d3dx9shader.h" -#include "sm4.h" + +enum vkd3d_sm4_register_type; +enum vkd3d_sm4_swizzle_type;
/* The general IR structure is inspired by Mesa GLSL hir, even though the code * ends up being quite different in practice. Anyway, here comes the relevant diff --git a/libs/vkd3d-shader/sm4.h b/libs/vkd3d-shader/sm4.h deleted file mode 100644 index 5ec4ee17..00000000 --- a/libs/vkd3d-shader/sm4.h +++ /dev/null @@ -1,552 +0,0 @@ -/* - * Copyright 2009 Henri Verbeet for CodeWeavers - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA - */ - -#ifndef __VKD3D_SM4_H -#define __VKD3D_SM4_H - -#define VKD3D_SM4_PS 0x0000u -#define VKD3D_SM4_VS 0x0001u -#define VKD3D_SM4_GS 0x0002u -#define VKD3D_SM5_HS 0x0003u -#define VKD3D_SM5_DS 0x0004u -#define VKD3D_SM5_CS 0x0005u -#define VKD3D_SM4_LIB 0xfff0u - -#define VKD3D_SM4_INSTRUCTION_MODIFIER (0x1u << 31) - -#define VKD3D_SM4_MODIFIER_MASK 0x3fu - -#define VKD3D_SM5_MODIFIER_DATA_TYPE_SHIFT 6 -#define VKD3D_SM5_MODIFIER_DATA_TYPE_MASK (0xffffu << VKD3D_SM5_MODIFIER_DATA_TYPE_SHIFT) - -#define VKD3D_SM5_MODIFIER_RESOURCE_TYPE_SHIFT 6 -#define VKD3D_SM5_MODIFIER_RESOURCE_TYPE_MASK (0xfu << VKD3D_SM5_MODIFIER_RESOURCE_TYPE_SHIFT) - -#define VKD3D_SM5_MODIFIER_RESOURCE_STRIDE_SHIFT 11 -#define VKD3D_SM5_MODIFIER_RESOURCE_STRIDE_MASK (0xfffu << VKD3D_SM5_MODIFIER_RESOURCE_STRIDE_SHIFT) - -#define VKD3D_SM4_AOFFIMMI_U_SHIFT 9 -#define VKD3D_SM4_AOFFIMMI_U_MASK (0xfu << VKD3D_SM4_AOFFIMMI_U_SHIFT) -#define VKD3D_SM4_AOFFIMMI_V_SHIFT 13 -#define VKD3D_SM4_AOFFIMMI_V_MASK (0xfu << VKD3D_SM4_AOFFIMMI_V_SHIFT) -#define VKD3D_SM4_AOFFIMMI_W_SHIFT 17 -#define VKD3D_SM4_AOFFIMMI_W_MASK (0xfu << VKD3D_SM4_AOFFIMMI_W_SHIFT) - -#define VKD3D_SM4_INSTRUCTION_LENGTH_SHIFT 24 -#define VKD3D_SM4_INSTRUCTION_LENGTH_MASK (0x1fu << VKD3D_SM4_INSTRUCTION_LENGTH_SHIFT) - -#define VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT 11 -#define VKD3D_SM4_INSTRUCTION_FLAGS_MASK (0x7u << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT) - -#define VKD3D_SM4_RESOURCE_TYPE_SHIFT 11 -#define VKD3D_SM4_RESOURCE_TYPE_MASK (0xfu << VKD3D_SM4_RESOURCE_TYPE_SHIFT) - -#define VKD3D_SM4_RESOURCE_SAMPLE_COUNT_SHIFT 16 -#define VKD3D_SM4_RESOURCE_SAMPLE_COUNT_MASK (0xfu << VKD3D_SM4_RESOURCE_SAMPLE_COUNT_SHIFT) - -#define VKD3D_SM4_PRIMITIVE_TYPE_SHIFT 11 -#define VKD3D_SM4_PRIMITIVE_TYPE_MASK (0x3fu << VKD3D_SM4_PRIMITIVE_TYPE_SHIFT) - -#define VKD3D_SM4_INDEX_TYPE_SHIFT 11 -#define VKD3D_SM4_INDEX_TYPE_MASK (0x1u << VKD3D_SM4_INDEX_TYPE_SHIFT) - -#define VKD3D_SM4_SAMPLER_MODE_SHIFT 11 -#define VKD3D_SM4_SAMPLER_MODE_MASK (0xfu << VKD3D_SM4_SAMPLER_MODE_SHIFT) - -#define VKD3D_SM4_SHADER_DATA_TYPE_SHIFT 11 -#define VKD3D_SM4_SHADER_DATA_TYPE_MASK (0xfu << VKD3D_SM4_SHADER_DATA_TYPE_SHIFT) - -#define VKD3D_SM4_INTERPOLATION_MODE_SHIFT 11 -#define VKD3D_SM4_INTERPOLATION_MODE_MASK (0xfu << VKD3D_SM4_INTERPOLATION_MODE_SHIFT) - -#define VKD3D_SM4_GLOBAL_FLAGS_SHIFT 11 -#define VKD3D_SM4_GLOBAL_FLAGS_MASK (0xffu << VKD3D_SM4_GLOBAL_FLAGS_SHIFT) - -#define VKD3D_SM5_PRECISE_SHIFT 19 -#define VKD3D_SM5_PRECISE_MASK (0xfu << VKD3D_SM5_PRECISE_SHIFT) - -#define VKD3D_SM5_CONTROL_POINT_COUNT_SHIFT 11 -#define VKD3D_SM5_CONTROL_POINT_COUNT_MASK (0xffu << VKD3D_SM5_CONTROL_POINT_COUNT_SHIFT) - -#define VKD3D_SM5_FP_ARRAY_SIZE_SHIFT 16 -#define VKD3D_SM5_FP_TABLE_COUNT_MASK 0xffffu - -#define VKD3D_SM5_UAV_FLAGS_SHIFT 15 -#define VKD3D_SM5_UAV_FLAGS_MASK (0x1ffu << VKD3D_SM5_UAV_FLAGS_SHIFT) - -#define VKD3D_SM5_SYNC_FLAGS_SHIFT 11 -#define VKD3D_SM5_SYNC_FLAGS_MASK (0xffu << VKD3D_SM5_SYNC_FLAGS_SHIFT) - -#define VKD3D_SM5_TESSELLATOR_SHIFT 11 -#define VKD3D_SM5_TESSELLATOR_MASK (0xfu << VKD3D_SM5_TESSELLATOR_SHIFT) - -#define VKD3D_SM4_OPCODE_MASK 0xff - -#define VKD3D_SM4_EXTENDED_OPERAND (0x1u << 31) - -#define VKD3D_SM4_EXTENDED_OPERAND_TYPE_MASK 0x3fu - -#define VKD3D_SM4_REGISTER_MODIFIER_SHIFT 6 -#define VKD3D_SM4_REGISTER_MODIFIER_MASK (0xffu << VKD3D_SM4_REGISTER_MODIFIER_SHIFT) - -#define VKD3D_SM4_REGISTER_PRECISION_SHIFT 14 -#define VKD3D_SM4_REGISTER_PRECISION_MASK (0x7u << VKD3D_SM4_REGISTER_PRECISION_SHIFT) - -#define VKD3D_SM4_REGISTER_NON_UNIFORM_SHIFT 17 -#define VKD3D_SM4_REGISTER_NON_UNIFORM_MASK (0x1u << VKD3D_SM4_REGISTER_NON_UNIFORM_SHIFT) - -#define VKD3D_SM4_ADDRESSING_SHIFT2 28 -#define VKD3D_SM4_ADDRESSING_MASK2 (0x3u << VKD3D_SM4_ADDRESSING_SHIFT2) - -#define VKD3D_SM4_ADDRESSING_SHIFT1 25 -#define VKD3D_SM4_ADDRESSING_MASK1 (0x3u << VKD3D_SM4_ADDRESSING_SHIFT1) - -#define VKD3D_SM4_ADDRESSING_SHIFT0 22 -#define VKD3D_SM4_ADDRESSING_MASK0 (0x3u << VKD3D_SM4_ADDRESSING_SHIFT0) - -#define VKD3D_SM4_REGISTER_ORDER_SHIFT 20 -#define VKD3D_SM4_REGISTER_ORDER_MASK (0x3u << VKD3D_SM4_REGISTER_ORDER_SHIFT) - -#define VKD3D_SM4_REGISTER_TYPE_SHIFT 12 -#define VKD3D_SM4_REGISTER_TYPE_MASK (0xffu << VKD3D_SM4_REGISTER_TYPE_SHIFT) - -#define VKD3D_SM4_SWIZZLE_TYPE_SHIFT 2 -#define VKD3D_SM4_SWIZZLE_TYPE_MASK (0x3u << VKD3D_SM4_SWIZZLE_TYPE_SHIFT) - -#define VKD3D_SM4_DIMENSION_SHIFT 0 -#define VKD3D_SM4_DIMENSION_MASK (0x3u << VKD3D_SM4_DIMENSION_SHIFT) - -#define VKD3D_SM4_WRITEMASK_SHIFT 4 -#define VKD3D_SM4_WRITEMASK_MASK (0xfu << VKD3D_SM4_WRITEMASK_SHIFT) - -#define VKD3D_SM4_SWIZZLE_SHIFT 4 -#define VKD3D_SM4_SWIZZLE_MASK (0xffu << VKD3D_SM4_SWIZZLE_SHIFT) - -#define VKD3D_SM4_VERSION_MAJOR(version) (((version) >> 4) & 0xf) -#define VKD3D_SM4_VERSION_MINOR(version) (((version) >> 0) & 0xf) - -#define VKD3D_SM4_ADDRESSING_RELATIVE 0x2 -#define VKD3D_SM4_ADDRESSING_OFFSET 0x1 - -#define VKD3D_SM4_INSTRUCTION_FLAG_SATURATE 0x4 - -#define VKD3D_SM4_CONDITIONAL_NZ (0x1u << 18) - -#define VKD3D_SM4_TYPE_COMPONENT(com, i) (((com) >> (4 * (i))) & 0xfu) - -/* The shift that corresponds to the D3D_SIF_TEXTURE_COMPONENTS mask. */ -#define VKD3D_SM4_SIF_TEXTURE_COMPONENTS_SHIFT 2 - -enum vkd3d_sm4_opcode -{ - VKD3D_SM4_OP_ADD = 0x00, - VKD3D_SM4_OP_AND = 0x01, - VKD3D_SM4_OP_BREAK = 0x02, - VKD3D_SM4_OP_BREAKC = 0x03, - VKD3D_SM4_OP_CASE = 0x06, - VKD3D_SM4_OP_CONTINUE = 0x07, - VKD3D_SM4_OP_CONTINUEC = 0x08, - VKD3D_SM4_OP_CUT = 0x09, - VKD3D_SM4_OP_DEFAULT = 0x0a, - VKD3D_SM4_OP_DERIV_RTX = 0x0b, - VKD3D_SM4_OP_DERIV_RTY = 0x0c, - VKD3D_SM4_OP_DISCARD = 0x0d, - VKD3D_SM4_OP_DIV = 0x0e, - VKD3D_SM4_OP_DP2 = 0x0f, - VKD3D_SM4_OP_DP3 = 0x10, - VKD3D_SM4_OP_DP4 = 0x11, - VKD3D_SM4_OP_ELSE = 0x12, - VKD3D_SM4_OP_EMIT = 0x13, - VKD3D_SM4_OP_ENDIF = 0x15, - VKD3D_SM4_OP_ENDLOOP = 0x16, - VKD3D_SM4_OP_ENDSWITCH = 0x17, - VKD3D_SM4_OP_EQ = 0x18, - VKD3D_SM4_OP_EXP = 0x19, - VKD3D_SM4_OP_FRC = 0x1a, - VKD3D_SM4_OP_FTOI = 0x1b, - VKD3D_SM4_OP_FTOU = 0x1c, - VKD3D_SM4_OP_GE = 0x1d, - VKD3D_SM4_OP_IADD = 0x1e, - VKD3D_SM4_OP_IF = 0x1f, - VKD3D_SM4_OP_IEQ = 0x20, - VKD3D_SM4_OP_IGE = 0x21, - VKD3D_SM4_OP_ILT = 0x22, - VKD3D_SM4_OP_IMAD = 0x23, - VKD3D_SM4_OP_IMAX = 0x24, - VKD3D_SM4_OP_IMIN = 0x25, - VKD3D_SM4_OP_IMUL = 0x26, - VKD3D_SM4_OP_INE = 0x27, - VKD3D_SM4_OP_INEG = 0x28, - VKD3D_SM4_OP_ISHL = 0x29, - VKD3D_SM4_OP_ISHR = 0x2a, - VKD3D_SM4_OP_ITOF = 0x2b, - VKD3D_SM4_OP_LABEL = 0x2c, - VKD3D_SM4_OP_LD = 0x2d, - VKD3D_SM4_OP_LD2DMS = 0x2e, - VKD3D_SM4_OP_LOG = 0x2f, - VKD3D_SM4_OP_LOOP = 0x30, - VKD3D_SM4_OP_LT = 0x31, - VKD3D_SM4_OP_MAD = 0x32, - VKD3D_SM4_OP_MIN = 0x33, - VKD3D_SM4_OP_MAX = 0x34, - VKD3D_SM4_OP_SHADER_DATA = 0x35, - VKD3D_SM4_OP_MOV = 0x36, - VKD3D_SM4_OP_MOVC = 0x37, - VKD3D_SM4_OP_MUL = 0x38, - VKD3D_SM4_OP_NE = 0x39, - VKD3D_SM4_OP_NOP = 0x3a, - VKD3D_SM4_OP_NOT = 0x3b, - VKD3D_SM4_OP_OR = 0x3c, - VKD3D_SM4_OP_RESINFO = 0x3d, - VKD3D_SM4_OP_RET = 0x3e, - VKD3D_SM4_OP_RETC = 0x3f, - VKD3D_SM4_OP_ROUND_NE = 0x40, - VKD3D_SM4_OP_ROUND_NI = 0x41, - VKD3D_SM4_OP_ROUND_PI = 0x42, - VKD3D_SM4_OP_ROUND_Z = 0x43, - VKD3D_SM4_OP_RSQ = 0x44, - VKD3D_SM4_OP_SAMPLE = 0x45, - VKD3D_SM4_OP_SAMPLE_C = 0x46, - VKD3D_SM4_OP_SAMPLE_C_LZ = 0x47, - VKD3D_SM4_OP_SAMPLE_LOD = 0x48, - VKD3D_SM4_OP_SAMPLE_GRAD = 0x49, - VKD3D_SM4_OP_SAMPLE_B = 0x4a, - VKD3D_SM4_OP_SQRT = 0x4b, - VKD3D_SM4_OP_SWITCH = 0x4c, - VKD3D_SM4_OP_SINCOS = 0x4d, - VKD3D_SM4_OP_UDIV = 0x4e, - VKD3D_SM4_OP_ULT = 0x4f, - VKD3D_SM4_OP_UGE = 0x50, - VKD3D_SM4_OP_UMUL = 0x51, - VKD3D_SM4_OP_UMAX = 0x53, - VKD3D_SM4_OP_UMIN = 0x54, - VKD3D_SM4_OP_USHR = 0x55, - VKD3D_SM4_OP_UTOF = 0x56, - VKD3D_SM4_OP_XOR = 0x57, - VKD3D_SM4_OP_DCL_RESOURCE = 0x58, - VKD3D_SM4_OP_DCL_CONSTANT_BUFFER = 0x59, - VKD3D_SM4_OP_DCL_SAMPLER = 0x5a, - VKD3D_SM4_OP_DCL_INDEX_RANGE = 0x5b, - VKD3D_SM4_OP_DCL_OUTPUT_TOPOLOGY = 0x5c, - VKD3D_SM4_OP_DCL_INPUT_PRIMITIVE = 0x5d, - VKD3D_SM4_OP_DCL_VERTICES_OUT = 0x5e, - VKD3D_SM4_OP_DCL_INPUT = 0x5f, - VKD3D_SM4_OP_DCL_INPUT_SGV = 0x60, - VKD3D_SM4_OP_DCL_INPUT_SIV = 0x61, - VKD3D_SM4_OP_DCL_INPUT_PS = 0x62, - VKD3D_SM4_OP_DCL_INPUT_PS_SGV = 0x63, - VKD3D_SM4_OP_DCL_INPUT_PS_SIV = 0x64, - VKD3D_SM4_OP_DCL_OUTPUT = 0x65, - VKD3D_SM4_OP_DCL_OUTPUT_SIV = 0x67, - VKD3D_SM4_OP_DCL_TEMPS = 0x68, - VKD3D_SM4_OP_DCL_INDEXABLE_TEMP = 0x69, - VKD3D_SM4_OP_DCL_GLOBAL_FLAGS = 0x6a, - VKD3D_SM4_OP_LOD = 0x6c, - VKD3D_SM4_OP_GATHER4 = 0x6d, - VKD3D_SM4_OP_SAMPLE_POS = 0x6e, - VKD3D_SM4_OP_SAMPLE_INFO = 0x6f, - VKD3D_SM5_OP_HS_DECLS = 0x71, - VKD3D_SM5_OP_HS_CONTROL_POINT_PHASE = 0x72, - VKD3D_SM5_OP_HS_FORK_PHASE = 0x73, - VKD3D_SM5_OP_HS_JOIN_PHASE = 0x74, - VKD3D_SM5_OP_EMIT_STREAM = 0x75, - VKD3D_SM5_OP_CUT_STREAM = 0x76, - VKD3D_SM5_OP_FCALL = 0x78, - VKD3D_SM5_OP_BUFINFO = 0x79, - VKD3D_SM5_OP_DERIV_RTX_COARSE = 0x7a, - VKD3D_SM5_OP_DERIV_RTX_FINE = 0x7b, - VKD3D_SM5_OP_DERIV_RTY_COARSE = 0x7c, - VKD3D_SM5_OP_DERIV_RTY_FINE = 0x7d, - VKD3D_SM5_OP_GATHER4_C = 0x7e, - VKD3D_SM5_OP_GATHER4_PO = 0x7f, - VKD3D_SM5_OP_GATHER4_PO_C = 0x80, - VKD3D_SM5_OP_RCP = 0x81, - VKD3D_SM5_OP_F32TOF16 = 0x82, - VKD3D_SM5_OP_F16TOF32 = 0x83, - VKD3D_SM5_OP_COUNTBITS = 0x86, - VKD3D_SM5_OP_FIRSTBIT_HI = 0x87, - VKD3D_SM5_OP_FIRSTBIT_LO = 0x88, - VKD3D_SM5_OP_FIRSTBIT_SHI = 0x89, - VKD3D_SM5_OP_UBFE = 0x8a, - VKD3D_SM5_OP_IBFE = 0x8b, - VKD3D_SM5_OP_BFI = 0x8c, - VKD3D_SM5_OP_BFREV = 0x8d, - VKD3D_SM5_OP_SWAPC = 0x8e, - VKD3D_SM5_OP_DCL_STREAM = 0x8f, - VKD3D_SM5_OP_DCL_FUNCTION_BODY = 0x90, - VKD3D_SM5_OP_DCL_FUNCTION_TABLE = 0x91, - VKD3D_SM5_OP_DCL_INTERFACE = 0x92, - VKD3D_SM5_OP_DCL_INPUT_CONTROL_POINT_COUNT = 0x93, - VKD3D_SM5_OP_DCL_OUTPUT_CONTROL_POINT_COUNT = 0x94, - VKD3D_SM5_OP_DCL_TESSELLATOR_DOMAIN = 0x95, - VKD3D_SM5_OP_DCL_TESSELLATOR_PARTITIONING = 0x96, - VKD3D_SM5_OP_DCL_TESSELLATOR_OUTPUT_PRIMITIVE = 0x97, - VKD3D_SM5_OP_DCL_HS_MAX_TESSFACTOR = 0x98, - VKD3D_SM5_OP_DCL_HS_FORK_PHASE_INSTANCE_COUNT = 0x99, - VKD3D_SM5_OP_DCL_HS_JOIN_PHASE_INSTANCE_COUNT = 0x9a, - VKD3D_SM5_OP_DCL_THREAD_GROUP = 0x9b, - VKD3D_SM5_OP_DCL_UAV_TYPED = 0x9c, - VKD3D_SM5_OP_DCL_UAV_RAW = 0x9d, - VKD3D_SM5_OP_DCL_UAV_STRUCTURED = 0x9e, - VKD3D_SM5_OP_DCL_TGSM_RAW = 0x9f, - VKD3D_SM5_OP_DCL_TGSM_STRUCTURED = 0xa0, - VKD3D_SM5_OP_DCL_RESOURCE_RAW = 0xa1, - VKD3D_SM5_OP_DCL_RESOURCE_STRUCTURED = 0xa2, - VKD3D_SM5_OP_LD_UAV_TYPED = 0xa3, - VKD3D_SM5_OP_STORE_UAV_TYPED = 0xa4, - VKD3D_SM5_OP_LD_RAW = 0xa5, - VKD3D_SM5_OP_STORE_RAW = 0xa6, - VKD3D_SM5_OP_LD_STRUCTURED = 0xa7, - VKD3D_SM5_OP_STORE_STRUCTURED = 0xa8, - VKD3D_SM5_OP_ATOMIC_AND = 0xa9, - VKD3D_SM5_OP_ATOMIC_OR = 0xaa, - VKD3D_SM5_OP_ATOMIC_XOR = 0xab, - VKD3D_SM5_OP_ATOMIC_CMP_STORE = 0xac, - VKD3D_SM5_OP_ATOMIC_IADD = 0xad, - VKD3D_SM5_OP_ATOMIC_IMAX = 0xae, - VKD3D_SM5_OP_ATOMIC_IMIN = 0xaf, - VKD3D_SM5_OP_ATOMIC_UMAX = 0xb0, - VKD3D_SM5_OP_ATOMIC_UMIN = 0xb1, - VKD3D_SM5_OP_IMM_ATOMIC_ALLOC = 0xb2, - VKD3D_SM5_OP_IMM_ATOMIC_CONSUME = 0xb3, - VKD3D_SM5_OP_IMM_ATOMIC_IADD = 0xb4, - VKD3D_SM5_OP_IMM_ATOMIC_AND = 0xb5, - VKD3D_SM5_OP_IMM_ATOMIC_OR = 0xb6, - VKD3D_SM5_OP_IMM_ATOMIC_XOR = 0xb7, - VKD3D_SM5_OP_IMM_ATOMIC_EXCH = 0xb8, - VKD3D_SM5_OP_IMM_ATOMIC_CMP_EXCH = 0xb9, - VKD3D_SM5_OP_IMM_ATOMIC_IMAX = 0xba, - VKD3D_SM5_OP_IMM_ATOMIC_IMIN = 0xbb, - VKD3D_SM5_OP_IMM_ATOMIC_UMAX = 0xbc, - VKD3D_SM5_OP_IMM_ATOMIC_UMIN = 0xbd, - VKD3D_SM5_OP_SYNC = 0xbe, - VKD3D_SM5_OP_DADD = 0xbf, - VKD3D_SM5_OP_DMAX = 0xc0, - VKD3D_SM5_OP_DMIN = 0xc1, - VKD3D_SM5_OP_DMUL = 0xc2, - VKD3D_SM5_OP_DEQ = 0xc3, - VKD3D_SM5_OP_DGE = 0xc4, - VKD3D_SM5_OP_DLT = 0xc5, - VKD3D_SM5_OP_DNE = 0xc6, - VKD3D_SM5_OP_DMOV = 0xc7, - VKD3D_SM5_OP_DMOVC = 0xc8, - VKD3D_SM5_OP_DTOF = 0xc9, - VKD3D_SM5_OP_FTOD = 0xca, - VKD3D_SM5_OP_EVAL_SAMPLE_INDEX = 0xcc, - VKD3D_SM5_OP_EVAL_CENTROID = 0xcd, - VKD3D_SM5_OP_DCL_GS_INSTANCES = 0xce, - VKD3D_SM5_OP_DDIV = 0xd2, - VKD3D_SM5_OP_DFMA = 0xd3, - VKD3D_SM5_OP_DRCP = 0xd4, - VKD3D_SM5_OP_MSAD = 0xd5, - VKD3D_SM5_OP_DTOI = 0xd6, - VKD3D_SM5_OP_DTOU = 0xd7, - VKD3D_SM5_OP_ITOD = 0xd8, - VKD3D_SM5_OP_UTOD = 0xd9, - VKD3D_SM5_OP_GATHER4_S = 0xdb, - VKD3D_SM5_OP_GATHER4_C_S = 0xdc, - VKD3D_SM5_OP_GATHER4_PO_S = 0xdd, - VKD3D_SM5_OP_GATHER4_PO_C_S = 0xde, - VKD3D_SM5_OP_LD_S = 0xdf, - VKD3D_SM5_OP_LD2DMS_S = 0xe0, - VKD3D_SM5_OP_LD_UAV_TYPED_S = 0xe1, - VKD3D_SM5_OP_LD_RAW_S = 0xe2, - VKD3D_SM5_OP_LD_STRUCTURED_S = 0xe3, - VKD3D_SM5_OP_SAMPLE_LOD_S = 0xe4, - VKD3D_SM5_OP_SAMPLE_C_LZ_S = 0xe5, - VKD3D_SM5_OP_SAMPLE_CL_S = 0xe6, - VKD3D_SM5_OP_SAMPLE_B_CL_S = 0xe7, - VKD3D_SM5_OP_SAMPLE_GRAD_CL_S = 0xe8, - VKD3D_SM5_OP_SAMPLE_C_CL_S = 0xe9, - VKD3D_SM5_OP_CHECK_ACCESS_FULLY_MAPPED = 0xea, -}; - -enum vkd3d_sm4_instruction_modifier -{ - VKD3D_SM4_MODIFIER_AOFFIMMI = 0x1, - VKD3D_SM5_MODIFIER_RESOURCE_TYPE = 0x2, - VKD3D_SM5_MODIFIER_DATA_TYPE = 0x3, -}; - -enum vkd3d_sm4_register_type -{ - VKD3D_SM4_RT_TEMP = 0x00, - VKD3D_SM4_RT_INPUT = 0x01, - VKD3D_SM4_RT_OUTPUT = 0x02, - VKD3D_SM4_RT_INDEXABLE_TEMP = 0x03, - VKD3D_SM4_RT_IMMCONST = 0x04, - VKD3D_SM4_RT_IMMCONST64 = 0x05, - VKD3D_SM4_RT_SAMPLER = 0x06, - VKD3D_SM4_RT_RESOURCE = 0x07, - VKD3D_SM4_RT_CONSTBUFFER = 0x08, - VKD3D_SM4_RT_IMMCONSTBUFFER = 0x09, - VKD3D_SM4_RT_PRIMID = 0x0b, - VKD3D_SM4_RT_DEPTHOUT = 0x0c, - VKD3D_SM4_RT_NULL = 0x0d, - VKD3D_SM4_RT_RASTERIZER = 0x0e, - VKD3D_SM4_RT_OMASK = 0x0f, - VKD3D_SM5_RT_STREAM = 0x10, - VKD3D_SM5_RT_FUNCTION_BODY = 0x11, - VKD3D_SM5_RT_FUNCTION_POINTER = 0x13, - VKD3D_SM5_RT_OUTPUT_CONTROL_POINT_ID = 0x16, - VKD3D_SM5_RT_FORK_INSTANCE_ID = 0x17, - VKD3D_SM5_RT_JOIN_INSTANCE_ID = 0x18, - VKD3D_SM5_RT_INPUT_CONTROL_POINT = 0x19, - VKD3D_SM5_RT_OUTPUT_CONTROL_POINT = 0x1a, - VKD3D_SM5_RT_PATCH_CONSTANT_DATA = 0x1b, - VKD3D_SM5_RT_DOMAIN_LOCATION = 0x1c, - VKD3D_SM5_RT_UAV = 0x1e, - VKD3D_SM5_RT_SHARED_MEMORY = 0x1f, - VKD3D_SM5_RT_THREAD_ID = 0x20, - VKD3D_SM5_RT_THREAD_GROUP_ID = 0x21, - VKD3D_SM5_RT_LOCAL_THREAD_ID = 0x22, - VKD3D_SM5_RT_COVERAGE = 0x23, - VKD3D_SM5_RT_LOCAL_THREAD_INDEX = 0x24, - VKD3D_SM5_RT_GS_INSTANCE_ID = 0x25, - VKD3D_SM5_RT_DEPTHOUT_GREATER_EQUAL = 0x26, - VKD3D_SM5_RT_DEPTHOUT_LESS_EQUAL = 0x27, - VKD3D_SM5_RT_OUTPUT_STENCIL_REF = 0x29, -}; - -enum vkd3d_sm4_extended_operand_type -{ - VKD3D_SM4_EXTENDED_OPERAND_NONE = 0x0, - VKD3D_SM4_EXTENDED_OPERAND_MODIFIER = 0x1, -}; - -enum vkd3d_sm4_register_modifier -{ - VKD3D_SM4_REGISTER_MODIFIER_NONE = 0x00, - VKD3D_SM4_REGISTER_MODIFIER_NEGATE = 0x01, - VKD3D_SM4_REGISTER_MODIFIER_ABS = 0x02, - VKD3D_SM4_REGISTER_MODIFIER_ABS_NEGATE = 0x03, -}; - -enum vkd3d_sm4_register_precision -{ - VKD3D_SM4_REGISTER_PRECISION_DEFAULT = 0x0, - VKD3D_SM4_REGISTER_PRECISION_MIN_FLOAT_16 = 0x1, - VKD3D_SM4_REGISTER_PRECISION_MIN_FLOAT_10 = 0x2, - VKD3D_SM4_REGISTER_PRECISION_MIN_INT_16 = 0x4, - VKD3D_SM4_REGISTER_PRECISION_MIN_UINT_16 = 0x5, -}; - -enum vkd3d_sm4_output_primitive_type -{ - VKD3D_SM4_OUTPUT_PT_POINTLIST = 0x1, - VKD3D_SM4_OUTPUT_PT_LINESTRIP = 0x3, - VKD3D_SM4_OUTPUT_PT_TRIANGLESTRIP = 0x5, -}; - -enum vkd3d_sm4_input_primitive_type -{ - VKD3D_SM4_INPUT_PT_POINT = 0x01, - VKD3D_SM4_INPUT_PT_LINE = 0x02, - VKD3D_SM4_INPUT_PT_TRIANGLE = 0x03, - VKD3D_SM4_INPUT_PT_LINEADJ = 0x06, - VKD3D_SM4_INPUT_PT_TRIANGLEADJ = 0x07, - VKD3D_SM5_INPUT_PT_PATCH1 = 0x08, - VKD3D_SM5_INPUT_PT_PATCH2 = 0x09, - VKD3D_SM5_INPUT_PT_PATCH3 = 0x0a, - VKD3D_SM5_INPUT_PT_PATCH4 = 0x0b, - VKD3D_SM5_INPUT_PT_PATCH5 = 0x0c, - VKD3D_SM5_INPUT_PT_PATCH6 = 0x0d, - VKD3D_SM5_INPUT_PT_PATCH7 = 0x0e, - VKD3D_SM5_INPUT_PT_PATCH8 = 0x0f, - VKD3D_SM5_INPUT_PT_PATCH9 = 0x10, - VKD3D_SM5_INPUT_PT_PATCH10 = 0x11, - VKD3D_SM5_INPUT_PT_PATCH11 = 0x12, - VKD3D_SM5_INPUT_PT_PATCH12 = 0x13, - VKD3D_SM5_INPUT_PT_PATCH13 = 0x14, - VKD3D_SM5_INPUT_PT_PATCH14 = 0x15, - VKD3D_SM5_INPUT_PT_PATCH15 = 0x16, - VKD3D_SM5_INPUT_PT_PATCH16 = 0x17, - VKD3D_SM5_INPUT_PT_PATCH17 = 0x18, - VKD3D_SM5_INPUT_PT_PATCH18 = 0x19, - VKD3D_SM5_INPUT_PT_PATCH19 = 0x1a, - VKD3D_SM5_INPUT_PT_PATCH20 = 0x1b, - VKD3D_SM5_INPUT_PT_PATCH21 = 0x1c, - VKD3D_SM5_INPUT_PT_PATCH22 = 0x1d, - VKD3D_SM5_INPUT_PT_PATCH23 = 0x1e, - VKD3D_SM5_INPUT_PT_PATCH24 = 0x1f, - VKD3D_SM5_INPUT_PT_PATCH25 = 0x20, - VKD3D_SM5_INPUT_PT_PATCH26 = 0x21, - VKD3D_SM5_INPUT_PT_PATCH27 = 0x22, - VKD3D_SM5_INPUT_PT_PATCH28 = 0x23, - VKD3D_SM5_INPUT_PT_PATCH29 = 0x24, - VKD3D_SM5_INPUT_PT_PATCH30 = 0x25, - VKD3D_SM5_INPUT_PT_PATCH31 = 0x26, - VKD3D_SM5_INPUT_PT_PATCH32 = 0x27, -}; - -enum vkd3d_sm4_swizzle_type -{ - VKD3D_SM4_SWIZZLE_NONE = 0x0, - VKD3D_SM4_SWIZZLE_VEC4 = 0x1, - VKD3D_SM4_SWIZZLE_SCALAR = 0x2, -}; - -enum vkd3d_sm4_dimension -{ - VKD3D_SM4_DIMENSION_NONE = 0x0, - VKD3D_SM4_DIMENSION_SCALAR = 0x1, - VKD3D_SM4_DIMENSION_VEC4 = 0x2, -}; - -enum vkd3d_sm4_resource_type -{ - VKD3D_SM4_RESOURCE_BUFFER = 0x1, - VKD3D_SM4_RESOURCE_TEXTURE_1D = 0x2, - VKD3D_SM4_RESOURCE_TEXTURE_2D = 0x3, - VKD3D_SM4_RESOURCE_TEXTURE_2DMS = 0x4, - VKD3D_SM4_RESOURCE_TEXTURE_3D = 0x5, - VKD3D_SM4_RESOURCE_TEXTURE_CUBE = 0x6, - VKD3D_SM4_RESOURCE_TEXTURE_1DARRAY = 0x7, - VKD3D_SM4_RESOURCE_TEXTURE_2DARRAY = 0x8, - VKD3D_SM4_RESOURCE_TEXTURE_2DMSARRAY = 0x9, - VKD3D_SM4_RESOURCE_TEXTURE_CUBEARRAY = 0xa, - VKD3D_SM4_RESOURCE_RAW_BUFFER = 0xb, - VKD3D_SM4_RESOURCE_STRUCTURED_BUFFER = 0xc, -}; - -enum vkd3d_sm4_data_type -{ - VKD3D_SM4_DATA_UNORM = 0x1, - VKD3D_SM4_DATA_SNORM = 0x2, - VKD3D_SM4_DATA_INT = 0x3, - VKD3D_SM4_DATA_UINT = 0x4, - VKD3D_SM4_DATA_FLOAT = 0x5, - VKD3D_SM4_DATA_MIXED = 0x6, - VKD3D_SM4_DATA_DOUBLE = 0x7, - VKD3D_SM4_DATA_CONTINUED = 0x8, - VKD3D_SM4_DATA_UNUSED = 0x9, -}; - -enum vkd3d_sm4_sampler_mode -{ - VKD3D_SM4_SAMPLER_DEFAULT = 0x0, - VKD3D_SM4_SAMPLER_COMPARISON = 0x1, -}; - -enum vkd3d_sm4_shader_data_type -{ - VKD3D_SM4_SHADER_DATA_IMMEDIATE_CONSTANT_BUFFER = 0x3, - VKD3D_SM4_SHADER_DATA_MESSAGE = 0x4, -}; - -#endif /* __VKD3D_SM4_H */ diff --git a/libs/vkd3d-shader/tpf.c b/libs/vkd3d-shader/tpf.c index 91aba46a..80487060 100644 --- a/libs/vkd3d-shader/tpf.c +++ b/libs/vkd3d-shader/tpf.c @@ -28,6 +28,536 @@
STATIC_ASSERT(SM4_MAX_SRC_COUNT <= SPIRV_MAX_SRC_COUNT);
+#define VKD3D_SM4_PS 0x0000u +#define VKD3D_SM4_VS 0x0001u +#define VKD3D_SM4_GS 0x0002u +#define VKD3D_SM5_HS 0x0003u +#define VKD3D_SM5_DS 0x0004u +#define VKD3D_SM5_CS 0x0005u +#define VKD3D_SM4_LIB 0xfff0u + +#define VKD3D_SM4_INSTRUCTION_MODIFIER (0x1u << 31) + +#define VKD3D_SM4_MODIFIER_MASK 0x3fu + +#define VKD3D_SM5_MODIFIER_DATA_TYPE_SHIFT 6 +#define VKD3D_SM5_MODIFIER_DATA_TYPE_MASK (0xffffu << VKD3D_SM5_MODIFIER_DATA_TYPE_SHIFT) + +#define VKD3D_SM5_MODIFIER_RESOURCE_TYPE_SHIFT 6 +#define VKD3D_SM5_MODIFIER_RESOURCE_TYPE_MASK (0xfu << VKD3D_SM5_MODIFIER_RESOURCE_TYPE_SHIFT) + +#define VKD3D_SM5_MODIFIER_RESOURCE_STRIDE_SHIFT 11 +#define VKD3D_SM5_MODIFIER_RESOURCE_STRIDE_MASK (0xfffu << VKD3D_SM5_MODIFIER_RESOURCE_STRIDE_SHIFT) + +#define VKD3D_SM4_AOFFIMMI_U_SHIFT 9 +#define VKD3D_SM4_AOFFIMMI_U_MASK (0xfu << VKD3D_SM4_AOFFIMMI_U_SHIFT) +#define VKD3D_SM4_AOFFIMMI_V_SHIFT 13 +#define VKD3D_SM4_AOFFIMMI_V_MASK (0xfu << VKD3D_SM4_AOFFIMMI_V_SHIFT) +#define VKD3D_SM4_AOFFIMMI_W_SHIFT 17 +#define VKD3D_SM4_AOFFIMMI_W_MASK (0xfu << VKD3D_SM4_AOFFIMMI_W_SHIFT) + +#define VKD3D_SM4_INSTRUCTION_LENGTH_SHIFT 24 +#define VKD3D_SM4_INSTRUCTION_LENGTH_MASK (0x1fu << VKD3D_SM4_INSTRUCTION_LENGTH_SHIFT) + +#define VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT 11 +#define VKD3D_SM4_INSTRUCTION_FLAGS_MASK (0x7u << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT) + +#define VKD3D_SM4_RESOURCE_TYPE_SHIFT 11 +#define VKD3D_SM4_RESOURCE_TYPE_MASK (0xfu << VKD3D_SM4_RESOURCE_TYPE_SHIFT) + +#define VKD3D_SM4_RESOURCE_SAMPLE_COUNT_SHIFT 16 +#define VKD3D_SM4_RESOURCE_SAMPLE_COUNT_MASK (0xfu << VKD3D_SM4_RESOURCE_SAMPLE_COUNT_SHIFT) + +#define VKD3D_SM4_PRIMITIVE_TYPE_SHIFT 11 +#define VKD3D_SM4_PRIMITIVE_TYPE_MASK (0x3fu << VKD3D_SM4_PRIMITIVE_TYPE_SHIFT) + +#define VKD3D_SM4_INDEX_TYPE_SHIFT 11 +#define VKD3D_SM4_INDEX_TYPE_MASK (0x1u << VKD3D_SM4_INDEX_TYPE_SHIFT) + +#define VKD3D_SM4_SAMPLER_MODE_SHIFT 11 +#define VKD3D_SM4_SAMPLER_MODE_MASK (0xfu << VKD3D_SM4_SAMPLER_MODE_SHIFT) + +#define VKD3D_SM4_SHADER_DATA_TYPE_SHIFT 11 +#define VKD3D_SM4_SHADER_DATA_TYPE_MASK (0xfu << VKD3D_SM4_SHADER_DATA_TYPE_SHIFT) + +#define VKD3D_SM4_INTERPOLATION_MODE_SHIFT 11 +#define VKD3D_SM4_INTERPOLATION_MODE_MASK (0xfu << VKD3D_SM4_INTERPOLATION_MODE_SHIFT) + +#define VKD3D_SM4_GLOBAL_FLAGS_SHIFT 11 +#define VKD3D_SM4_GLOBAL_FLAGS_MASK (0xffu << VKD3D_SM4_GLOBAL_FLAGS_SHIFT) + +#define VKD3D_SM5_PRECISE_SHIFT 19 +#define VKD3D_SM5_PRECISE_MASK (0xfu << VKD3D_SM5_PRECISE_SHIFT) + +#define VKD3D_SM5_CONTROL_POINT_COUNT_SHIFT 11 +#define VKD3D_SM5_CONTROL_POINT_COUNT_MASK (0xffu << VKD3D_SM5_CONTROL_POINT_COUNT_SHIFT) + +#define VKD3D_SM5_FP_ARRAY_SIZE_SHIFT 16 +#define VKD3D_SM5_FP_TABLE_COUNT_MASK 0xffffu + +#define VKD3D_SM5_UAV_FLAGS_SHIFT 15 +#define VKD3D_SM5_UAV_FLAGS_MASK (0x1ffu << VKD3D_SM5_UAV_FLAGS_SHIFT) + +#define VKD3D_SM5_SYNC_FLAGS_SHIFT 11 +#define VKD3D_SM5_SYNC_FLAGS_MASK (0xffu << VKD3D_SM5_SYNC_FLAGS_SHIFT) + +#define VKD3D_SM5_TESSELLATOR_SHIFT 11 +#define VKD3D_SM5_TESSELLATOR_MASK (0xfu << VKD3D_SM5_TESSELLATOR_SHIFT) + +#define VKD3D_SM4_OPCODE_MASK 0xff + +#define VKD3D_SM4_EXTENDED_OPERAND (0x1u << 31) + +#define VKD3D_SM4_EXTENDED_OPERAND_TYPE_MASK 0x3fu + +#define VKD3D_SM4_REGISTER_MODIFIER_SHIFT 6 +#define VKD3D_SM4_REGISTER_MODIFIER_MASK (0xffu << VKD3D_SM4_REGISTER_MODIFIER_SHIFT) + +#define VKD3D_SM4_REGISTER_PRECISION_SHIFT 14 +#define VKD3D_SM4_REGISTER_PRECISION_MASK (0x7u << VKD3D_SM4_REGISTER_PRECISION_SHIFT) + +#define VKD3D_SM4_REGISTER_NON_UNIFORM_SHIFT 17 +#define VKD3D_SM4_REGISTER_NON_UNIFORM_MASK (0x1u << VKD3D_SM4_REGISTER_NON_UNIFORM_SHIFT) + +#define VKD3D_SM4_ADDRESSING_SHIFT2 28 +#define VKD3D_SM4_ADDRESSING_MASK2 (0x3u << VKD3D_SM4_ADDRESSING_SHIFT2) + +#define VKD3D_SM4_ADDRESSING_SHIFT1 25 +#define VKD3D_SM4_ADDRESSING_MASK1 (0x3u << VKD3D_SM4_ADDRESSING_SHIFT1) + +#define VKD3D_SM4_ADDRESSING_SHIFT0 22 +#define VKD3D_SM4_ADDRESSING_MASK0 (0x3u << VKD3D_SM4_ADDRESSING_SHIFT0) + +#define VKD3D_SM4_REGISTER_ORDER_SHIFT 20 +#define VKD3D_SM4_REGISTER_ORDER_MASK (0x3u << VKD3D_SM4_REGISTER_ORDER_SHIFT) + +#define VKD3D_SM4_REGISTER_TYPE_SHIFT 12 +#define VKD3D_SM4_REGISTER_TYPE_MASK (0xffu << VKD3D_SM4_REGISTER_TYPE_SHIFT) + +#define VKD3D_SM4_SWIZZLE_TYPE_SHIFT 2 +#define VKD3D_SM4_SWIZZLE_TYPE_MASK (0x3u << VKD3D_SM4_SWIZZLE_TYPE_SHIFT) + +#define VKD3D_SM4_DIMENSION_SHIFT 0 +#define VKD3D_SM4_DIMENSION_MASK (0x3u << VKD3D_SM4_DIMENSION_SHIFT) + +#define VKD3D_SM4_WRITEMASK_SHIFT 4 +#define VKD3D_SM4_WRITEMASK_MASK (0xfu << VKD3D_SM4_WRITEMASK_SHIFT) + +#define VKD3D_SM4_SWIZZLE_SHIFT 4 +#define VKD3D_SM4_SWIZZLE_MASK (0xffu << VKD3D_SM4_SWIZZLE_SHIFT) + +#define VKD3D_SM4_VERSION_MAJOR(version) (((version) >> 4) & 0xf) +#define VKD3D_SM4_VERSION_MINOR(version) (((version) >> 0) & 0xf) + +#define VKD3D_SM4_ADDRESSING_RELATIVE 0x2 +#define VKD3D_SM4_ADDRESSING_OFFSET 0x1 + +#define VKD3D_SM4_INSTRUCTION_FLAG_SATURATE 0x4 + +#define VKD3D_SM4_CONDITIONAL_NZ (0x1u << 18) + +#define VKD3D_SM4_TYPE_COMPONENT(com, i) (((com) >> (4 * (i))) & 0xfu) + +/* The shift that corresponds to the D3D_SIF_TEXTURE_COMPONENTS mask. */ +#define VKD3D_SM4_SIF_TEXTURE_COMPONENTS_SHIFT 2 + +enum vkd3d_sm4_opcode +{ + VKD3D_SM4_OP_ADD = 0x00, + VKD3D_SM4_OP_AND = 0x01, + VKD3D_SM4_OP_BREAK = 0x02, + VKD3D_SM4_OP_BREAKC = 0x03, + VKD3D_SM4_OP_CASE = 0x06, + VKD3D_SM4_OP_CONTINUE = 0x07, + VKD3D_SM4_OP_CONTINUEC = 0x08, + VKD3D_SM4_OP_CUT = 0x09, + VKD3D_SM4_OP_DEFAULT = 0x0a, + VKD3D_SM4_OP_DERIV_RTX = 0x0b, + VKD3D_SM4_OP_DERIV_RTY = 0x0c, + VKD3D_SM4_OP_DISCARD = 0x0d, + VKD3D_SM4_OP_DIV = 0x0e, + VKD3D_SM4_OP_DP2 = 0x0f, + VKD3D_SM4_OP_DP3 = 0x10, + VKD3D_SM4_OP_DP4 = 0x11, + VKD3D_SM4_OP_ELSE = 0x12, + VKD3D_SM4_OP_EMIT = 0x13, + VKD3D_SM4_OP_ENDIF = 0x15, + VKD3D_SM4_OP_ENDLOOP = 0x16, + VKD3D_SM4_OP_ENDSWITCH = 0x17, + VKD3D_SM4_OP_EQ = 0x18, + VKD3D_SM4_OP_EXP = 0x19, + VKD3D_SM4_OP_FRC = 0x1a, + VKD3D_SM4_OP_FTOI = 0x1b, + VKD3D_SM4_OP_FTOU = 0x1c, + VKD3D_SM4_OP_GE = 0x1d, + VKD3D_SM4_OP_IADD = 0x1e, + VKD3D_SM4_OP_IF = 0x1f, + VKD3D_SM4_OP_IEQ = 0x20, + VKD3D_SM4_OP_IGE = 0x21, + VKD3D_SM4_OP_ILT = 0x22, + VKD3D_SM4_OP_IMAD = 0x23, + VKD3D_SM4_OP_IMAX = 0x24, + VKD3D_SM4_OP_IMIN = 0x25, + VKD3D_SM4_OP_IMUL = 0x26, + VKD3D_SM4_OP_INE = 0x27, + VKD3D_SM4_OP_INEG = 0x28, + VKD3D_SM4_OP_ISHL = 0x29, + VKD3D_SM4_OP_ISHR = 0x2a, + VKD3D_SM4_OP_ITOF = 0x2b, + VKD3D_SM4_OP_LABEL = 0x2c, + VKD3D_SM4_OP_LD = 0x2d, + VKD3D_SM4_OP_LD2DMS = 0x2e, + VKD3D_SM4_OP_LOG = 0x2f, + VKD3D_SM4_OP_LOOP = 0x30, + VKD3D_SM4_OP_LT = 0x31, + VKD3D_SM4_OP_MAD = 0x32, + VKD3D_SM4_OP_MIN = 0x33, + VKD3D_SM4_OP_MAX = 0x34, + VKD3D_SM4_OP_SHADER_DATA = 0x35, + VKD3D_SM4_OP_MOV = 0x36, + VKD3D_SM4_OP_MOVC = 0x37, + VKD3D_SM4_OP_MUL = 0x38, + VKD3D_SM4_OP_NE = 0x39, + VKD3D_SM4_OP_NOP = 0x3a, + VKD3D_SM4_OP_NOT = 0x3b, + VKD3D_SM4_OP_OR = 0x3c, + VKD3D_SM4_OP_RESINFO = 0x3d, + VKD3D_SM4_OP_RET = 0x3e, + VKD3D_SM4_OP_RETC = 0x3f, + VKD3D_SM4_OP_ROUND_NE = 0x40, + VKD3D_SM4_OP_ROUND_NI = 0x41, + VKD3D_SM4_OP_ROUND_PI = 0x42, + VKD3D_SM4_OP_ROUND_Z = 0x43, + VKD3D_SM4_OP_RSQ = 0x44, + VKD3D_SM4_OP_SAMPLE = 0x45, + VKD3D_SM4_OP_SAMPLE_C = 0x46, + VKD3D_SM4_OP_SAMPLE_C_LZ = 0x47, + VKD3D_SM4_OP_SAMPLE_LOD = 0x48, + VKD3D_SM4_OP_SAMPLE_GRAD = 0x49, + VKD3D_SM4_OP_SAMPLE_B = 0x4a, + VKD3D_SM4_OP_SQRT = 0x4b, + VKD3D_SM4_OP_SWITCH = 0x4c, + VKD3D_SM4_OP_SINCOS = 0x4d, + VKD3D_SM4_OP_UDIV = 0x4e, + VKD3D_SM4_OP_ULT = 0x4f, + VKD3D_SM4_OP_UGE = 0x50, + VKD3D_SM4_OP_UMUL = 0x51, + VKD3D_SM4_OP_UMAX = 0x53, + VKD3D_SM4_OP_UMIN = 0x54, + VKD3D_SM4_OP_USHR = 0x55, + VKD3D_SM4_OP_UTOF = 0x56, + VKD3D_SM4_OP_XOR = 0x57, + VKD3D_SM4_OP_DCL_RESOURCE = 0x58, + VKD3D_SM4_OP_DCL_CONSTANT_BUFFER = 0x59, + VKD3D_SM4_OP_DCL_SAMPLER = 0x5a, + VKD3D_SM4_OP_DCL_INDEX_RANGE = 0x5b, + VKD3D_SM4_OP_DCL_OUTPUT_TOPOLOGY = 0x5c, + VKD3D_SM4_OP_DCL_INPUT_PRIMITIVE = 0x5d, + VKD3D_SM4_OP_DCL_VERTICES_OUT = 0x5e, + VKD3D_SM4_OP_DCL_INPUT = 0x5f, + VKD3D_SM4_OP_DCL_INPUT_SGV = 0x60, + VKD3D_SM4_OP_DCL_INPUT_SIV = 0x61, + VKD3D_SM4_OP_DCL_INPUT_PS = 0x62, + VKD3D_SM4_OP_DCL_INPUT_PS_SGV = 0x63, + VKD3D_SM4_OP_DCL_INPUT_PS_SIV = 0x64, + VKD3D_SM4_OP_DCL_OUTPUT = 0x65, + VKD3D_SM4_OP_DCL_OUTPUT_SIV = 0x67, + VKD3D_SM4_OP_DCL_TEMPS = 0x68, + VKD3D_SM4_OP_DCL_INDEXABLE_TEMP = 0x69, + VKD3D_SM4_OP_DCL_GLOBAL_FLAGS = 0x6a, + VKD3D_SM4_OP_LOD = 0x6c, + VKD3D_SM4_OP_GATHER4 = 0x6d, + VKD3D_SM4_OP_SAMPLE_POS = 0x6e, + VKD3D_SM4_OP_SAMPLE_INFO = 0x6f, + VKD3D_SM5_OP_HS_DECLS = 0x71, + VKD3D_SM5_OP_HS_CONTROL_POINT_PHASE = 0x72, + VKD3D_SM5_OP_HS_FORK_PHASE = 0x73, + VKD3D_SM5_OP_HS_JOIN_PHASE = 0x74, + VKD3D_SM5_OP_EMIT_STREAM = 0x75, + VKD3D_SM5_OP_CUT_STREAM = 0x76, + VKD3D_SM5_OP_FCALL = 0x78, + VKD3D_SM5_OP_BUFINFO = 0x79, + VKD3D_SM5_OP_DERIV_RTX_COARSE = 0x7a, + VKD3D_SM5_OP_DERIV_RTX_FINE = 0x7b, + VKD3D_SM5_OP_DERIV_RTY_COARSE = 0x7c, + VKD3D_SM5_OP_DERIV_RTY_FINE = 0x7d, + VKD3D_SM5_OP_GATHER4_C = 0x7e, + VKD3D_SM5_OP_GATHER4_PO = 0x7f, + VKD3D_SM5_OP_GATHER4_PO_C = 0x80, + VKD3D_SM5_OP_RCP = 0x81, + VKD3D_SM5_OP_F32TOF16 = 0x82, + VKD3D_SM5_OP_F16TOF32 = 0x83, + VKD3D_SM5_OP_COUNTBITS = 0x86, + VKD3D_SM5_OP_FIRSTBIT_HI = 0x87, + VKD3D_SM5_OP_FIRSTBIT_LO = 0x88, + VKD3D_SM5_OP_FIRSTBIT_SHI = 0x89, + VKD3D_SM5_OP_UBFE = 0x8a, + VKD3D_SM5_OP_IBFE = 0x8b, + VKD3D_SM5_OP_BFI = 0x8c, + VKD3D_SM5_OP_BFREV = 0x8d, + VKD3D_SM5_OP_SWAPC = 0x8e, + VKD3D_SM5_OP_DCL_STREAM = 0x8f, + VKD3D_SM5_OP_DCL_FUNCTION_BODY = 0x90, + VKD3D_SM5_OP_DCL_FUNCTION_TABLE = 0x91, + VKD3D_SM5_OP_DCL_INTERFACE = 0x92, + VKD3D_SM5_OP_DCL_INPUT_CONTROL_POINT_COUNT = 0x93, + VKD3D_SM5_OP_DCL_OUTPUT_CONTROL_POINT_COUNT = 0x94, + VKD3D_SM5_OP_DCL_TESSELLATOR_DOMAIN = 0x95, + VKD3D_SM5_OP_DCL_TESSELLATOR_PARTITIONING = 0x96, + VKD3D_SM5_OP_DCL_TESSELLATOR_OUTPUT_PRIMITIVE = 0x97, + VKD3D_SM5_OP_DCL_HS_MAX_TESSFACTOR = 0x98, + VKD3D_SM5_OP_DCL_HS_FORK_PHASE_INSTANCE_COUNT = 0x99, + VKD3D_SM5_OP_DCL_HS_JOIN_PHASE_INSTANCE_COUNT = 0x9a, + VKD3D_SM5_OP_DCL_THREAD_GROUP = 0x9b, + VKD3D_SM5_OP_DCL_UAV_TYPED = 0x9c, + VKD3D_SM5_OP_DCL_UAV_RAW = 0x9d, + VKD3D_SM5_OP_DCL_UAV_STRUCTURED = 0x9e, + VKD3D_SM5_OP_DCL_TGSM_RAW = 0x9f, + VKD3D_SM5_OP_DCL_TGSM_STRUCTURED = 0xa0, + VKD3D_SM5_OP_DCL_RESOURCE_RAW = 0xa1, + VKD3D_SM5_OP_DCL_RESOURCE_STRUCTURED = 0xa2, + VKD3D_SM5_OP_LD_UAV_TYPED = 0xa3, + VKD3D_SM5_OP_STORE_UAV_TYPED = 0xa4, + VKD3D_SM5_OP_LD_RAW = 0xa5, + VKD3D_SM5_OP_STORE_RAW = 0xa6, + VKD3D_SM5_OP_LD_STRUCTURED = 0xa7, + VKD3D_SM5_OP_STORE_STRUCTURED = 0xa8, + VKD3D_SM5_OP_ATOMIC_AND = 0xa9, + VKD3D_SM5_OP_ATOMIC_OR = 0xaa, + VKD3D_SM5_OP_ATOMIC_XOR = 0xab, + VKD3D_SM5_OP_ATOMIC_CMP_STORE = 0xac, + VKD3D_SM5_OP_ATOMIC_IADD = 0xad, + VKD3D_SM5_OP_ATOMIC_IMAX = 0xae, + VKD3D_SM5_OP_ATOMIC_IMIN = 0xaf, + VKD3D_SM5_OP_ATOMIC_UMAX = 0xb0, + VKD3D_SM5_OP_ATOMIC_UMIN = 0xb1, + VKD3D_SM5_OP_IMM_ATOMIC_ALLOC = 0xb2, + VKD3D_SM5_OP_IMM_ATOMIC_CONSUME = 0xb3, + VKD3D_SM5_OP_IMM_ATOMIC_IADD = 0xb4, + VKD3D_SM5_OP_IMM_ATOMIC_AND = 0xb5, + VKD3D_SM5_OP_IMM_ATOMIC_OR = 0xb6, + VKD3D_SM5_OP_IMM_ATOMIC_XOR = 0xb7, + VKD3D_SM5_OP_IMM_ATOMIC_EXCH = 0xb8, + VKD3D_SM5_OP_IMM_ATOMIC_CMP_EXCH = 0xb9, + VKD3D_SM5_OP_IMM_ATOMIC_IMAX = 0xba, + VKD3D_SM5_OP_IMM_ATOMIC_IMIN = 0xbb, + VKD3D_SM5_OP_IMM_ATOMIC_UMAX = 0xbc, + VKD3D_SM5_OP_IMM_ATOMIC_UMIN = 0xbd, + VKD3D_SM5_OP_SYNC = 0xbe, + VKD3D_SM5_OP_DADD = 0xbf, + VKD3D_SM5_OP_DMAX = 0xc0, + VKD3D_SM5_OP_DMIN = 0xc1, + VKD3D_SM5_OP_DMUL = 0xc2, + VKD3D_SM5_OP_DEQ = 0xc3, + VKD3D_SM5_OP_DGE = 0xc4, + VKD3D_SM5_OP_DLT = 0xc5, + VKD3D_SM5_OP_DNE = 0xc6, + VKD3D_SM5_OP_DMOV = 0xc7, + VKD3D_SM5_OP_DMOVC = 0xc8, + VKD3D_SM5_OP_DTOF = 0xc9, + VKD3D_SM5_OP_FTOD = 0xca, + VKD3D_SM5_OP_EVAL_SAMPLE_INDEX = 0xcc, + VKD3D_SM5_OP_EVAL_CENTROID = 0xcd, + VKD3D_SM5_OP_DCL_GS_INSTANCES = 0xce, + VKD3D_SM5_OP_DDIV = 0xd2, + VKD3D_SM5_OP_DFMA = 0xd3, + VKD3D_SM5_OP_DRCP = 0xd4, + VKD3D_SM5_OP_MSAD = 0xd5, + VKD3D_SM5_OP_DTOI = 0xd6, + VKD3D_SM5_OP_DTOU = 0xd7, + VKD3D_SM5_OP_ITOD = 0xd8, + VKD3D_SM5_OP_UTOD = 0xd9, + VKD3D_SM5_OP_GATHER4_S = 0xdb, + VKD3D_SM5_OP_GATHER4_C_S = 0xdc, + VKD3D_SM5_OP_GATHER4_PO_S = 0xdd, + VKD3D_SM5_OP_GATHER4_PO_C_S = 0xde, + VKD3D_SM5_OP_LD_S = 0xdf, + VKD3D_SM5_OP_LD2DMS_S = 0xe0, + VKD3D_SM5_OP_LD_UAV_TYPED_S = 0xe1, + VKD3D_SM5_OP_LD_RAW_S = 0xe2, + VKD3D_SM5_OP_LD_STRUCTURED_S = 0xe3, + VKD3D_SM5_OP_SAMPLE_LOD_S = 0xe4, + VKD3D_SM5_OP_SAMPLE_C_LZ_S = 0xe5, + VKD3D_SM5_OP_SAMPLE_CL_S = 0xe6, + VKD3D_SM5_OP_SAMPLE_B_CL_S = 0xe7, + VKD3D_SM5_OP_SAMPLE_GRAD_CL_S = 0xe8, + VKD3D_SM5_OP_SAMPLE_C_CL_S = 0xe9, + VKD3D_SM5_OP_CHECK_ACCESS_FULLY_MAPPED = 0xea, +}; + +enum vkd3d_sm4_instruction_modifier +{ + VKD3D_SM4_MODIFIER_AOFFIMMI = 0x1, + VKD3D_SM5_MODIFIER_RESOURCE_TYPE = 0x2, + VKD3D_SM5_MODIFIER_DATA_TYPE = 0x3, +}; + +enum vkd3d_sm4_register_type +{ + VKD3D_SM4_RT_TEMP = 0x00, + VKD3D_SM4_RT_INPUT = 0x01, + VKD3D_SM4_RT_OUTPUT = 0x02, + VKD3D_SM4_RT_INDEXABLE_TEMP = 0x03, + VKD3D_SM4_RT_IMMCONST = 0x04, + VKD3D_SM4_RT_IMMCONST64 = 0x05, + VKD3D_SM4_RT_SAMPLER = 0x06, + VKD3D_SM4_RT_RESOURCE = 0x07, + VKD3D_SM4_RT_CONSTBUFFER = 0x08, + VKD3D_SM4_RT_IMMCONSTBUFFER = 0x09, + VKD3D_SM4_RT_PRIMID = 0x0b, + VKD3D_SM4_RT_DEPTHOUT = 0x0c, + VKD3D_SM4_RT_NULL = 0x0d, + VKD3D_SM4_RT_RASTERIZER = 0x0e, + VKD3D_SM4_RT_OMASK = 0x0f, + VKD3D_SM5_RT_STREAM = 0x10, + VKD3D_SM5_RT_FUNCTION_BODY = 0x11, + VKD3D_SM5_RT_FUNCTION_POINTER = 0x13, + VKD3D_SM5_RT_OUTPUT_CONTROL_POINT_ID = 0x16, + VKD3D_SM5_RT_FORK_INSTANCE_ID = 0x17, + VKD3D_SM5_RT_JOIN_INSTANCE_ID = 0x18, + VKD3D_SM5_RT_INPUT_CONTROL_POINT = 0x19, + VKD3D_SM5_RT_OUTPUT_CONTROL_POINT = 0x1a, + VKD3D_SM5_RT_PATCH_CONSTANT_DATA = 0x1b, + VKD3D_SM5_RT_DOMAIN_LOCATION = 0x1c, + VKD3D_SM5_RT_UAV = 0x1e, + VKD3D_SM5_RT_SHARED_MEMORY = 0x1f, + VKD3D_SM5_RT_THREAD_ID = 0x20, + VKD3D_SM5_RT_THREAD_GROUP_ID = 0x21, + VKD3D_SM5_RT_LOCAL_THREAD_ID = 0x22, + VKD3D_SM5_RT_COVERAGE = 0x23, + VKD3D_SM5_RT_LOCAL_THREAD_INDEX = 0x24, + VKD3D_SM5_RT_GS_INSTANCE_ID = 0x25, + VKD3D_SM5_RT_DEPTHOUT_GREATER_EQUAL = 0x26, + VKD3D_SM5_RT_DEPTHOUT_LESS_EQUAL = 0x27, + VKD3D_SM5_RT_OUTPUT_STENCIL_REF = 0x29, +}; + +enum vkd3d_sm4_extended_operand_type +{ + VKD3D_SM4_EXTENDED_OPERAND_NONE = 0x0, + VKD3D_SM4_EXTENDED_OPERAND_MODIFIER = 0x1, +}; + +enum vkd3d_sm4_register_modifier +{ + VKD3D_SM4_REGISTER_MODIFIER_NONE = 0x00, + VKD3D_SM4_REGISTER_MODIFIER_NEGATE = 0x01, + VKD3D_SM4_REGISTER_MODIFIER_ABS = 0x02, + VKD3D_SM4_REGISTER_MODIFIER_ABS_NEGATE = 0x03, +}; + +enum vkd3d_sm4_register_precision +{ + VKD3D_SM4_REGISTER_PRECISION_DEFAULT = 0x0, + VKD3D_SM4_REGISTER_PRECISION_MIN_FLOAT_16 = 0x1, + VKD3D_SM4_REGISTER_PRECISION_MIN_FLOAT_10 = 0x2, + VKD3D_SM4_REGISTER_PRECISION_MIN_INT_16 = 0x4, + VKD3D_SM4_REGISTER_PRECISION_MIN_UINT_16 = 0x5, +}; + +enum vkd3d_sm4_output_primitive_type +{ + VKD3D_SM4_OUTPUT_PT_POINTLIST = 0x1, + VKD3D_SM4_OUTPUT_PT_LINESTRIP = 0x3, + VKD3D_SM4_OUTPUT_PT_TRIANGLESTRIP = 0x5, +}; + +enum vkd3d_sm4_input_primitive_type +{ + VKD3D_SM4_INPUT_PT_POINT = 0x01, + VKD3D_SM4_INPUT_PT_LINE = 0x02, + VKD3D_SM4_INPUT_PT_TRIANGLE = 0x03, + VKD3D_SM4_INPUT_PT_LINEADJ = 0x06, + VKD3D_SM4_INPUT_PT_TRIANGLEADJ = 0x07, + VKD3D_SM5_INPUT_PT_PATCH1 = 0x08, + VKD3D_SM5_INPUT_PT_PATCH2 = 0x09, + VKD3D_SM5_INPUT_PT_PATCH3 = 0x0a, + VKD3D_SM5_INPUT_PT_PATCH4 = 0x0b, + VKD3D_SM5_INPUT_PT_PATCH5 = 0x0c, + VKD3D_SM5_INPUT_PT_PATCH6 = 0x0d, + VKD3D_SM5_INPUT_PT_PATCH7 = 0x0e, + VKD3D_SM5_INPUT_PT_PATCH8 = 0x0f, + VKD3D_SM5_INPUT_PT_PATCH9 = 0x10, + VKD3D_SM5_INPUT_PT_PATCH10 = 0x11, + VKD3D_SM5_INPUT_PT_PATCH11 = 0x12, + VKD3D_SM5_INPUT_PT_PATCH12 = 0x13, + VKD3D_SM5_INPUT_PT_PATCH13 = 0x14, + VKD3D_SM5_INPUT_PT_PATCH14 = 0x15, + VKD3D_SM5_INPUT_PT_PATCH15 = 0x16, + VKD3D_SM5_INPUT_PT_PATCH16 = 0x17, + VKD3D_SM5_INPUT_PT_PATCH17 = 0x18, + VKD3D_SM5_INPUT_PT_PATCH18 = 0x19, + VKD3D_SM5_INPUT_PT_PATCH19 = 0x1a, + VKD3D_SM5_INPUT_PT_PATCH20 = 0x1b, + VKD3D_SM5_INPUT_PT_PATCH21 = 0x1c, + VKD3D_SM5_INPUT_PT_PATCH22 = 0x1d, + VKD3D_SM5_INPUT_PT_PATCH23 = 0x1e, + VKD3D_SM5_INPUT_PT_PATCH24 = 0x1f, + VKD3D_SM5_INPUT_PT_PATCH25 = 0x20, + VKD3D_SM5_INPUT_PT_PATCH26 = 0x21, + VKD3D_SM5_INPUT_PT_PATCH27 = 0x22, + VKD3D_SM5_INPUT_PT_PATCH28 = 0x23, + VKD3D_SM5_INPUT_PT_PATCH29 = 0x24, + VKD3D_SM5_INPUT_PT_PATCH30 = 0x25, + VKD3D_SM5_INPUT_PT_PATCH31 = 0x26, + VKD3D_SM5_INPUT_PT_PATCH32 = 0x27, +}; + +enum vkd3d_sm4_swizzle_type +{ + VKD3D_SM4_SWIZZLE_NONE = 0x0, + VKD3D_SM4_SWIZZLE_VEC4 = 0x1, + VKD3D_SM4_SWIZZLE_SCALAR = 0x2, +}; + +enum vkd3d_sm4_dimension +{ + VKD3D_SM4_DIMENSION_NONE = 0x0, + VKD3D_SM4_DIMENSION_SCALAR = 0x1, + VKD3D_SM4_DIMENSION_VEC4 = 0x2, +}; + +enum vkd3d_sm4_resource_type +{ + VKD3D_SM4_RESOURCE_BUFFER = 0x1, + VKD3D_SM4_RESOURCE_TEXTURE_1D = 0x2, + VKD3D_SM4_RESOURCE_TEXTURE_2D = 0x3, + VKD3D_SM4_RESOURCE_TEXTURE_2DMS = 0x4, + VKD3D_SM4_RESOURCE_TEXTURE_3D = 0x5, + VKD3D_SM4_RESOURCE_TEXTURE_CUBE = 0x6, + VKD3D_SM4_RESOURCE_TEXTURE_1DARRAY = 0x7, + VKD3D_SM4_RESOURCE_TEXTURE_2DARRAY = 0x8, + VKD3D_SM4_RESOURCE_TEXTURE_2DMSARRAY = 0x9, + VKD3D_SM4_RESOURCE_TEXTURE_CUBEARRAY = 0xa, + VKD3D_SM4_RESOURCE_RAW_BUFFER = 0xb, + VKD3D_SM4_RESOURCE_STRUCTURED_BUFFER = 0xc, +}; + +enum vkd3d_sm4_data_type +{ + VKD3D_SM4_DATA_UNORM = 0x1, + VKD3D_SM4_DATA_SNORM = 0x2, + VKD3D_SM4_DATA_INT = 0x3, + VKD3D_SM4_DATA_UINT = 0x4, + VKD3D_SM4_DATA_FLOAT = 0x5, + VKD3D_SM4_DATA_MIXED = 0x6, + VKD3D_SM4_DATA_DOUBLE = 0x7, + VKD3D_SM4_DATA_CONTINUED = 0x8, + VKD3D_SM4_DATA_UNUSED = 0x9, +}; + +enum vkd3d_sm4_sampler_mode +{ + VKD3D_SM4_SAMPLER_DEFAULT = 0x0, + VKD3D_SM4_SAMPLER_COMPARISON = 0x1, +}; + +enum vkd3d_sm4_shader_data_type +{ + VKD3D_SM4_SHADER_DATA_IMMEDIATE_CONSTANT_BUFFER = 0x3, + VKD3D_SM4_SHADER_DATA_MESSAGE = 0x4, +}; + struct vkd3d_shader_sm4_parser { const uint32_t *start, *end, *ptr;
From: Henri Verbeet hverbeet@codeweavers.com
--- Makefile.am | 1 - libs/vkd3d-shader/d3dbc.c | 973 +++++++++++++++++++++++++++++++++- libs/vkd3d-shader/hlsl_sm1.c | 990 ----------------------------------- 3 files changed, 972 insertions(+), 992 deletions(-) delete mode 100644 libs/vkd3d-shader/hlsl_sm1.c
diff --git a/Makefile.am b/Makefile.am index f2a96823..549354b4 100644 --- a/Makefile.am +++ b/Makefile.am @@ -256,7 +256,6 @@ libvkd3d_shader_la_SOURCES = \ libs/vkd3d-shader/hlsl.h \ libs/vkd3d-shader/hlsl_codegen.c \ libs/vkd3d-shader/hlsl_constant_ops.c \ - libs/vkd3d-shader/hlsl_sm1.c \ libs/vkd3d-shader/ir.c \ libs/vkd3d-shader/preproc.h \ libs/vkd3d-shader/spirv.c \ diff --git a/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d-shader/d3dbc.c index 5e772239..14268440 100644 --- a/libs/vkd3d-shader/d3dbc.c +++ b/libs/vkd3d-shader/d3dbc.c @@ -1,4 +1,6 @@ /* + * d3dbc (Direct3D shader models 1-3 bytecode) support + * * Copyright 2002-2003 Jason Edmeades * Copyright 2002-2003 Raphael Junqueira * Copyright 2004 Christian Costa @@ -6,6 +8,7 @@ * Copyright 2006 Ivan Gyurdiev * Copyright 2007-2008 Stefan Dösinger for CodeWeavers * Copyright 2009, 2021 Henri Verbeet for CodeWeavers + * Copyright 2019-2020 Zebediah Figura for CodeWeavers * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -22,7 +25,7 @@ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA */
-#include "vkd3d_shader_private.h" +#include "hlsl.h"
#define VKD3D_SM1_VS 0xfffeu #define VKD3D_SM1_PS 0xffffu @@ -988,3 +991,971 @@ int vkd3d_shader_sm1_parser_create(const struct vkd3d_shader_compile_info *compi
return VKD3D_OK; } + +bool hlsl_sm1_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, + bool output, D3DSHADER_PARAM_REGISTER_TYPE *type, unsigned int *reg) +{ + unsigned int i; + + static const struct + { + const char *semantic; + bool output; + enum vkd3d_shader_type shader_type; + unsigned int major_version; + D3DSHADER_PARAM_REGISTER_TYPE type; + DWORD offset; + } + register_table[] = + { + {"color", false, VKD3D_SHADER_TYPE_PIXEL, 1, D3DSPR_INPUT}, + {"texcoord", false, VKD3D_SHADER_TYPE_PIXEL, 1, D3DSPR_TEXTURE}, + + {"color", true, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_COLOROUT}, + {"depth", true, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_DEPTHOUT}, + {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_DEPTHOUT}, + {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_COLOROUT}, + {"color", false, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_INPUT}, + {"texcoord", false, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_TEXTURE}, + + {"color", true, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_COLOROUT}, + {"depth", true, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_DEPTHOUT}, + {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_DEPTHOUT}, + {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_COLOROUT}, + {"sv_position", false, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_MISCTYPE, D3DSMO_POSITION}, + {"vface", false, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_MISCTYPE, D3DSMO_FACE}, + {"vpos", false, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_MISCTYPE, D3DSMO_POSITION}, + + {"color", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_ATTROUT}, + {"fog", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT, D3DSRO_FOG}, + {"position", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT, D3DSRO_POSITION}, + {"psize", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT, D3DSRO_POINT_SIZE}, + {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT, D3DSRO_POSITION}, + {"texcoord", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_TEXCRDOUT}, + + {"color", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_ATTROUT}, + {"fog", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT, D3DSRO_FOG}, + {"position", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT, D3DSRO_POSITION}, + {"psize", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT, D3DSRO_POINT_SIZE}, + {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT, D3DSRO_POSITION}, + {"texcoord", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_TEXCRDOUT}, + }; + + for (i = 0; i < ARRAY_SIZE(register_table); ++i) + { + if (!ascii_strcasecmp(semantic->name, register_table[i].semantic) + && output == register_table[i].output + && ctx->profile->type == register_table[i].shader_type + && ctx->profile->major_version == register_table[i].major_version) + { + *type = register_table[i].type; + if (register_table[i].type == D3DSPR_MISCTYPE || register_table[i].type == D3DSPR_RASTOUT) + *reg = register_table[i].offset; + else + *reg = semantic->index; + return true; + } + } + + return false; +} + +bool hlsl_sm1_usage_from_semantic(const struct hlsl_semantic *semantic, D3DDECLUSAGE *usage, uint32_t *usage_idx) +{ + static const struct + { + const char *name; + D3DDECLUSAGE usage; + } + semantics[] = + { + {"binormal", D3DDECLUSAGE_BINORMAL}, + {"blendindices", D3DDECLUSAGE_BLENDINDICES}, + {"blendweight", D3DDECLUSAGE_BLENDWEIGHT}, + {"color", D3DDECLUSAGE_COLOR}, + {"depth", D3DDECLUSAGE_DEPTH}, + {"fog", D3DDECLUSAGE_FOG}, + {"normal", D3DDECLUSAGE_NORMAL}, + {"position", D3DDECLUSAGE_POSITION}, + {"positiont", D3DDECLUSAGE_POSITIONT}, + {"psize", D3DDECLUSAGE_PSIZE}, + {"sample", D3DDECLUSAGE_SAMPLE}, + {"sv_depth", D3DDECLUSAGE_DEPTH}, + {"sv_position", D3DDECLUSAGE_POSITION}, + {"sv_target", D3DDECLUSAGE_COLOR}, + {"tangent", D3DDECLUSAGE_TANGENT}, + {"tessfactor", D3DDECLUSAGE_TESSFACTOR}, + {"texcoord", D3DDECLUSAGE_TEXCOORD}, + }; + + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(semantics); ++i) + { + if (!ascii_strcasecmp(semantic->name, semantics[i].name)) + { + *usage = semantics[i].usage; + *usage_idx = semantic->index; + return true; + } + } + + return false; +} + +static uint32_t sm1_version(enum vkd3d_shader_type type, unsigned int major, unsigned int minor) +{ + if (type == VKD3D_SHADER_TYPE_VERTEX) + return D3DVS_VERSION(major, minor); + else + return D3DPS_VERSION(major, minor); +} + +static D3DXPARAMETER_CLASS sm1_class(const struct hlsl_type *type) +{ + switch (type->class) + { + case HLSL_CLASS_ARRAY: + return sm1_class(type->e.array.type); + case HLSL_CLASS_MATRIX: + assert(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); + if (type->modifiers & HLSL_MODIFIER_COLUMN_MAJOR) + return D3DXPC_MATRIX_COLUMNS; + else + return D3DXPC_MATRIX_ROWS; + case HLSL_CLASS_OBJECT: + return D3DXPC_OBJECT; + case HLSL_CLASS_SCALAR: + return D3DXPC_SCALAR; + case HLSL_CLASS_STRUCT: + return D3DXPC_STRUCT; + case HLSL_CLASS_VECTOR: + return D3DXPC_VECTOR; + default: + ERR("Invalid class %#x.\n", type->class); + vkd3d_unreachable(); + } +} + +static D3DXPARAMETER_TYPE sm1_base_type(const struct hlsl_type *type) +{ + switch (type->base_type) + { + case HLSL_TYPE_BOOL: + return D3DXPT_BOOL; + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: + return D3DXPT_FLOAT; + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: + return D3DXPT_INT; + case HLSL_TYPE_PIXELSHADER: + return D3DXPT_PIXELSHADER; + case HLSL_TYPE_SAMPLER: + switch (type->sampler_dim) + { + case HLSL_SAMPLER_DIM_1D: + return D3DXPT_SAMPLER1D; + case HLSL_SAMPLER_DIM_2D: + return D3DXPT_SAMPLER2D; + case HLSL_SAMPLER_DIM_3D: + return D3DXPT_SAMPLER3D; + case HLSL_SAMPLER_DIM_CUBE: + return D3DXPT_SAMPLERCUBE; + case HLSL_SAMPLER_DIM_GENERIC: + return D3DXPT_SAMPLER; + default: + ERR("Invalid dimension %#x.\n", type->sampler_dim); + vkd3d_unreachable(); + } + break; + case HLSL_TYPE_STRING: + return D3DXPT_STRING; + case HLSL_TYPE_TEXTURE: + switch (type->sampler_dim) + { + case HLSL_SAMPLER_DIM_1D: + return D3DXPT_TEXTURE1D; + case HLSL_SAMPLER_DIM_2D: + return D3DXPT_TEXTURE2D; + case HLSL_SAMPLER_DIM_3D: + return D3DXPT_TEXTURE3D; + case HLSL_SAMPLER_DIM_CUBE: + return D3DXPT_TEXTURECUBE; + case HLSL_SAMPLER_DIM_GENERIC: + return D3DXPT_TEXTURE; + default: + ERR("Invalid dimension %#x.\n", type->sampler_dim); + vkd3d_unreachable(); + } + break; + case HLSL_TYPE_VERTEXSHADER: + return D3DXPT_VERTEXSHADER; + case HLSL_TYPE_VOID: + return D3DXPT_VOID; + default: + vkd3d_unreachable(); + } +} + +static const struct hlsl_type *get_array_type(const struct hlsl_type *type) +{ + if (type->class == HLSL_CLASS_ARRAY) + return get_array_type(type->e.array.type); + return type; +} + +static unsigned int get_array_size(const struct hlsl_type *type) +{ + if (type->class == HLSL_CLASS_ARRAY) + return get_array_size(type->e.array.type) * type->e.array.elements_count; + return 1; +} + +static void write_sm1_type(struct vkd3d_bytecode_buffer *buffer, struct hlsl_type *type, unsigned int ctab_start) +{ + const struct hlsl_type *array_type = get_array_type(type); + unsigned int array_size = get_array_size(type); + unsigned int field_count = 0; + size_t fields_offset = 0; + size_t i; + + if (type->bytecode_offset) + return; + + if (array_type->class == HLSL_CLASS_STRUCT) + { + field_count = array_type->e.record.field_count; + + for (i = 0; i < field_count; ++i) + { + struct hlsl_struct_field *field = &array_type->e.record.fields[i]; + + field->name_bytecode_offset = put_string(buffer, field->name); + write_sm1_type(buffer, field->type, ctab_start); + } + + fields_offset = bytecode_align(buffer) - ctab_start; + + for (i = 0; i < field_count; ++i) + { + struct hlsl_struct_field *field = &array_type->e.record.fields[i]; + + put_u32(buffer, field->name_bytecode_offset - ctab_start); + put_u32(buffer, field->type->bytecode_offset - ctab_start); + } + } + + type->bytecode_offset = put_u32(buffer, vkd3d_make_u32(sm1_class(type), sm1_base_type(array_type))); + put_u32(buffer, vkd3d_make_u32(type->dimy, type->dimx)); + put_u32(buffer, vkd3d_make_u32(array_size, field_count)); + put_u32(buffer, fields_offset); +} + +static void sm1_sort_extern(struct list *sorted, struct hlsl_ir_var *to_sort) +{ + struct hlsl_ir_var *var; + + list_remove(&to_sort->extern_entry); + + LIST_FOR_EACH_ENTRY(var, sorted, struct hlsl_ir_var, extern_entry) + { + if (strcmp(to_sort->name, var->name) < 0) + { + list_add_before(&var->extern_entry, &to_sort->extern_entry); + return; + } + } + + list_add_tail(sorted, &to_sort->extern_entry); +} + +static void sm1_sort_externs(struct hlsl_ctx *ctx) +{ + struct list sorted = LIST_INIT(sorted); + struct hlsl_ir_var *var, *next; + + LIST_FOR_EACH_ENTRY_SAFE(var, next, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + if (var->is_uniform) + sm1_sort_extern(&sorted, var); + } + list_move_tail(&ctx->extern_vars, &sorted); +} + +static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + struct hlsl_ir_function_decl *entry_func) +{ + size_t ctab_offset, ctab_start, ctab_end, vars_start, size_offset, creator_offset, offset; + unsigned int uniform_count = 0; + struct hlsl_ir_var *var; + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + enum hlsl_regset regset = hlsl_type_get_regset(var->data_type); + + if (!var->semantic.name && var->regs[regset].allocated) + { + ++uniform_count; + + if (var->is_param && var->is_uniform) + { + struct vkd3d_string_buffer *name; + + if (!(name = hlsl_get_string_buffer(ctx))) + { + buffer->status = VKD3D_ERROR_OUT_OF_MEMORY; + return; + } + vkd3d_string_buffer_printf(name, "$%s", var->name); + vkd3d_free((char *)var->name); + var->name = hlsl_strdup(ctx, name->buffer); + hlsl_release_string_buffer(ctx, name); + } + } + } + + sm1_sort_externs(ctx); + + size_offset = put_u32(buffer, 0); + ctab_offset = put_u32(buffer, VKD3D_MAKE_TAG('C','T','A','B')); + + ctab_start = put_u32(buffer, sizeof(D3DXSHADER_CONSTANTTABLE)); + creator_offset = put_u32(buffer, 0); + put_u32(buffer, sm1_version(ctx->profile->type, ctx->profile->major_version, ctx->profile->minor_version)); + put_u32(buffer, uniform_count); + put_u32(buffer, sizeof(D3DXSHADER_CONSTANTTABLE)); /* offset of constants */ + put_u32(buffer, 0); /* FIXME: flags */ + put_u32(buffer, 0); /* FIXME: target string */ + + vars_start = bytecode_align(buffer); + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + enum hlsl_regset regset = hlsl_type_get_regset(var->data_type); + + if (!var->semantic.name && var->regs[regset].allocated) + { + put_u32(buffer, 0); /* name */ + if (var->data_type->class == HLSL_CLASS_OBJECT + && (var->data_type->base_type == HLSL_TYPE_SAMPLER + || var->data_type->base_type == HLSL_TYPE_TEXTURE)) + { + assert(regset == HLSL_REGSET_SAMPLERS); + put_u32(buffer, vkd3d_make_u32(D3DXRS_SAMPLER, var->regs[regset].id)); + put_u32(buffer, 1); + } + else + { + assert(regset == HLSL_REGSET_NUMERIC); + put_u32(buffer, vkd3d_make_u32(D3DXRS_FLOAT4, var->regs[regset].id)); + put_u32(buffer, var->data_type->reg_size[regset] / 4); + } + put_u32(buffer, 0); /* type */ + put_u32(buffer, 0); /* FIXME: default value */ + } + } + + uniform_count = 0; + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + enum hlsl_regset regset = hlsl_type_get_regset(var->data_type); + + if (!var->semantic.name && var->regs[regset].allocated) + { + size_t var_offset = vars_start + (uniform_count * 5 * sizeof(uint32_t)); + size_t name_offset; + + name_offset = put_string(buffer, var->name); + set_u32(buffer, var_offset, name_offset - ctab_start); + + write_sm1_type(buffer, var->data_type, ctab_start); + set_u32(buffer, var_offset + 3 * sizeof(uint32_t), var->data_type->bytecode_offset - ctab_start); + ++uniform_count; + } + } + + offset = put_string(buffer, vkd3d_shader_get_version(NULL, NULL)); + set_u32(buffer, creator_offset, offset - ctab_start); + + ctab_end = bytecode_align(buffer); + set_u32(buffer, size_offset, vkd3d_make_u32(D3DSIO_COMMENT, (ctab_end - ctab_offset) / sizeof(uint32_t))); +} + +static uint32_t sm1_encode_register_type(D3DSHADER_PARAM_REGISTER_TYPE type) +{ + return ((type << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK) + | ((type << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2); +} + +struct sm1_instruction +{ + D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode; + + struct sm1_dst_register + { + D3DSHADER_PARAM_REGISTER_TYPE type; + D3DSHADER_PARAM_DSTMOD_TYPE mod; + unsigned int writemask; + uint32_t reg; + } dst; + + struct sm1_src_register + { + D3DSHADER_PARAM_REGISTER_TYPE type; + D3DSHADER_PARAM_SRCMOD_TYPE mod; + unsigned int swizzle; + uint32_t reg; + } srcs[3]; + unsigned int src_count; + + unsigned int has_dst; +}; + +static void write_sm1_dst_register(struct vkd3d_bytecode_buffer *buffer, const struct sm1_dst_register *reg) +{ + assert(reg->writemask); + put_u32(buffer, (1u << 31) | sm1_encode_register_type(reg->type) | reg->mod | (reg->writemask << 16) | reg->reg); +} + +static void write_sm1_src_register(struct vkd3d_bytecode_buffer *buffer, + const struct sm1_src_register *reg) +{ + put_u32(buffer, (1u << 31) | sm1_encode_register_type(reg->type) | reg->mod | (reg->swizzle << 16) | reg->reg); +} + +static void write_sm1_instruction(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + const struct sm1_instruction *instr) +{ + uint32_t token = instr->opcode; + unsigned int i; + + if (ctx->profile->major_version > 1) + token |= (instr->has_dst + instr->src_count) << D3DSI_INSTLENGTH_SHIFT; + put_u32(buffer, token); + + if (instr->has_dst) + write_sm1_dst_register(buffer, &instr->dst); + + for (i = 0; i < instr->src_count; ++i) + write_sm1_src_register(buffer, &instr->srcs[i]); +}; + +static void sm1_map_src_swizzle(struct sm1_src_register *src, unsigned int map_writemask) +{ + src->swizzle = hlsl_map_swizzle(src->swizzle, map_writemask); +} + +static void write_sm1_dp2add(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + const struct hlsl_reg *dst, const struct hlsl_reg *src1, const struct hlsl_reg *src2, + const struct hlsl_reg *src3) +{ + struct sm1_instruction instr = + { + .opcode = D3DSIO_DP2ADD, + + .dst.type = D3DSPR_TEMP, + .dst.writemask = dst->writemask, + .dst.reg = dst->id, + .has_dst = 1, + + .srcs[0].type = D3DSPR_TEMP, + .srcs[0].swizzle = hlsl_swizzle_from_writemask(src1->writemask), + .srcs[0].reg = src1->id, + .srcs[1].type = D3DSPR_TEMP, + .srcs[1].swizzle = hlsl_swizzle_from_writemask(src2->writemask), + .srcs[1].reg = src2->id, + .srcs[2].type = D3DSPR_TEMP, + .srcs[2].swizzle = hlsl_swizzle_from_writemask(src3->writemask), + .srcs[2].reg = src3->id, + .src_count = 3, + }; + + write_sm1_instruction(ctx, buffer, &instr); +} + +static void write_sm1_binary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, const struct hlsl_reg *dst, + const struct hlsl_reg *src1, const struct hlsl_reg *src2) +{ + struct sm1_instruction instr = + { + .opcode = opcode, + + .dst.type = D3DSPR_TEMP, + .dst.writemask = dst->writemask, + .dst.reg = dst->id, + .has_dst = 1, + + .srcs[0].type = D3DSPR_TEMP, + .srcs[0].swizzle = hlsl_swizzle_from_writemask(src1->writemask), + .srcs[0].reg = src1->id, + .srcs[1].type = D3DSPR_TEMP, + .srcs[1].swizzle = hlsl_swizzle_from_writemask(src2->writemask), + .srcs[1].reg = src2->id, + .src_count = 2, + }; + + sm1_map_src_swizzle(&instr.srcs[0], instr.dst.writemask); + sm1_map_src_swizzle(&instr.srcs[1], instr.dst.writemask); + write_sm1_instruction(ctx, buffer, &instr); +} + +static void write_sm1_binary_op_dot(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, const struct hlsl_reg *dst, + const struct hlsl_reg *src1, const struct hlsl_reg *src2) +{ + struct sm1_instruction instr = + { + .opcode = opcode, + + .dst.type = D3DSPR_TEMP, + .dst.writemask = dst->writemask, + .dst.reg = dst->id, + .has_dst = 1, + + .srcs[0].type = D3DSPR_TEMP, + .srcs[0].swizzle = hlsl_swizzle_from_writemask(src1->writemask), + .srcs[0].reg = src1->id, + .srcs[1].type = D3DSPR_TEMP, + .srcs[1].swizzle = hlsl_swizzle_from_writemask(src2->writemask), + .srcs[1].reg = src2->id, + .src_count = 2, + }; + + write_sm1_instruction(ctx, buffer, &instr); +} + +static void write_sm1_unary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, const struct hlsl_reg *dst, + const struct hlsl_reg *src, D3DSHADER_PARAM_SRCMOD_TYPE src_mod, D3DSHADER_PARAM_DSTMOD_TYPE dst_mod) +{ + struct sm1_instruction instr = + { + .opcode = opcode, + + .dst.type = D3DSPR_TEMP, + .dst.mod = dst_mod, + .dst.writemask = dst->writemask, + .dst.reg = dst->id, + .has_dst = 1, + + .srcs[0].type = D3DSPR_TEMP, + .srcs[0].swizzle = hlsl_swizzle_from_writemask(src->writemask), + .srcs[0].reg = src->id, + .srcs[0].mod = src_mod, + .src_count = 1, + }; + + sm1_map_src_swizzle(&instr.srcs[0], instr.dst.writemask); + write_sm1_instruction(ctx, buffer, &instr); +} + +static void write_sm1_constant_defs(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer) +{ + unsigned int i, x; + + for (i = 0; i < ctx->constant_defs.count; ++i) + { + uint32_t token = D3DSIO_DEF; + const struct sm1_dst_register reg = + { + .type = D3DSPR_CONST, + .writemask = VKD3DSP_WRITEMASK_ALL, + .reg = i, + }; + + if (ctx->profile->major_version > 1) + token |= 5 << D3DSI_INSTLENGTH_SHIFT; + put_u32(buffer, token); + + write_sm1_dst_register(buffer, ®); + for (x = 0; x < 4; ++x) + put_f32(buffer, ctx->constant_defs.values[i].f[x]); + } +} + +static void write_sm1_semantic_dcl(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + const struct hlsl_ir_var *var, bool output) +{ + struct sm1_dst_register reg = {0}; + uint32_t token, usage_idx; + D3DDECLUSAGE usage; + bool ret; + + if (hlsl_sm1_register_from_semantic(ctx, &var->semantic, output, ®.type, ®.reg)) + { + usage = 0; + usage_idx = 0; + } + else + { + ret = hlsl_sm1_usage_from_semantic(&var->semantic, &usage, &usage_idx); + assert(ret); + reg.type = output ? D3DSPR_OUTPUT : D3DSPR_INPUT; + reg.reg = var->regs[HLSL_REGSET_NUMERIC].id; + } + + token = D3DSIO_DCL; + if (ctx->profile->major_version > 1) + token |= 2 << D3DSI_INSTLENGTH_SHIFT; + put_u32(buffer, token); + + token = (1u << 31); + token |= usage << D3DSP_DCL_USAGE_SHIFT; + token |= usage_idx << D3DSP_DCL_USAGEINDEX_SHIFT; + put_u32(buffer, token); + + reg.writemask = (1 << var->data_type->dimx) - 1; + write_sm1_dst_register(buffer, ®); +} + +static void write_sm1_semantic_dcls(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer) +{ + bool write_in = false, write_out = false; + struct hlsl_ir_var *var; + + if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL && ctx->profile->major_version >= 2) + write_in = true; + else if (ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX && ctx->profile->major_version == 3) + write_in = write_out = true; + else if (ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX && ctx->profile->major_version < 3) + write_in = true; + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + if (write_in && var->is_input_semantic) + write_sm1_semantic_dcl(ctx, buffer, var, false); + if (write_out && var->is_output_semantic) + write_sm1_semantic_dcl(ctx, buffer, var, true); + } +} + +static void write_sm1_constant(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + const struct hlsl_ir_node *instr) +{ + const struct hlsl_ir_constant *constant = hlsl_ir_constant(instr); + struct sm1_instruction sm1_instr = + { + .opcode = D3DSIO_MOV, + + .dst.type = D3DSPR_TEMP, + .dst.reg = instr->reg.id, + .dst.writemask = instr->reg.writemask, + .has_dst = 1, + + .srcs[0].type = D3DSPR_CONST, + .srcs[0].reg = constant->reg.id, + .srcs[0].swizzle = hlsl_swizzle_from_writemask(constant->reg.writemask), + .src_count = 1, + }; + + assert(instr->reg.allocated); + assert(constant->reg.allocated); + sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); + write_sm1_instruction(ctx, buffer, &sm1_instr); +} + +static void write_sm1_per_component_unary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + const struct hlsl_ir_node *instr, D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode) +{ + struct hlsl_ir_expr *expr = hlsl_ir_expr(instr); + struct hlsl_ir_node *arg1 = expr->operands[0].node; + unsigned int i; + + for (i = 0; i < instr->data_type->dimx; ++i) + { + struct hlsl_reg src = arg1->reg, dst = instr->reg; + + src.writemask = hlsl_combine_writemasks(src.writemask, 1u << i); + dst.writemask = hlsl_combine_writemasks(dst.writemask, 1u << i); + write_sm1_unary_op(ctx, buffer, opcode, &dst, &src, 0, 0); + } +} + +static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) +{ + struct hlsl_ir_expr *expr = hlsl_ir_expr(instr); + struct hlsl_ir_node *arg1 = expr->operands[0].node; + struct hlsl_ir_node *arg2 = expr->operands[1].node; + struct hlsl_ir_node *arg3 = expr->operands[2].node; + + assert(instr->reg.allocated); + + if (instr->data_type->base_type != HLSL_TYPE_FLOAT) + { + /* These need to be lowered. */ + hlsl_fixme(ctx, &instr->loc, "SM1 non-float expression."); + return; + } + + switch (expr->op) + { + case HLSL_OP1_ABS: + write_sm1_unary_op(ctx, buffer, D3DSIO_ABS, &instr->reg, &arg1->reg, 0, 0); + break; + + case HLSL_OP1_EXP2: + write_sm1_per_component_unary_op(ctx, buffer, instr, D3DSIO_EXP); + break; + + case HLSL_OP1_LOG2: + write_sm1_per_component_unary_op(ctx, buffer, instr, D3DSIO_LOG); + break; + + case HLSL_OP1_NEG: + write_sm1_unary_op(ctx, buffer, D3DSIO_MOV, &instr->reg, &arg1->reg, D3DSPSM_NEG, 0); + break; + + case HLSL_OP1_SAT: + write_sm1_unary_op(ctx, buffer, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, D3DSPDM_SATURATE); + break; + + case HLSL_OP1_RCP: + write_sm1_per_component_unary_op(ctx, buffer, instr, D3DSIO_RCP); + break; + + case HLSL_OP1_RSQ: + write_sm1_per_component_unary_op(ctx, buffer, instr, D3DSIO_RSQ); + break; + + case HLSL_OP2_ADD: + write_sm1_binary_op(ctx, buffer, D3DSIO_ADD, &instr->reg, &arg1->reg, &arg2->reg); + break; + + case HLSL_OP2_MAX: + write_sm1_binary_op(ctx, buffer, D3DSIO_MAX, &instr->reg, &arg1->reg, &arg2->reg); + break; + + case HLSL_OP2_MIN: + write_sm1_binary_op(ctx, buffer, D3DSIO_MIN, &instr->reg, &arg1->reg, &arg2->reg); + break; + + case HLSL_OP2_MUL: + write_sm1_binary_op(ctx, buffer, D3DSIO_MUL, &instr->reg, &arg1->reg, &arg2->reg); + break; + + case HLSL_OP1_FRACT: + write_sm1_unary_op(ctx, buffer, D3DSIO_FRC, &instr->reg, &arg1->reg, D3DSPSM_NONE, 0); + break; + + case HLSL_OP2_DOT: + switch (arg1->data_type->dimx) + { + case 4: + write_sm1_binary_op_dot(ctx, buffer, D3DSIO_DP4, &instr->reg, &arg1->reg, &arg2->reg); + break; + + case 3: + write_sm1_binary_op_dot(ctx, buffer, D3DSIO_DP3, &instr->reg, &arg1->reg, &arg2->reg); + break; + + default: + vkd3d_unreachable(); + } + break; + + case HLSL_OP3_DP2ADD: + write_sm1_dp2add(ctx, buffer, &instr->reg, &arg1->reg, &arg2->reg, &arg3->reg); + break; + + default: + hlsl_fixme(ctx, &instr->loc, "SM1 "%s" expression.", debug_hlsl_expr_op(expr->op)); + break; + } +} + +static void write_sm1_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) +{ + const struct hlsl_ir_load *load = hlsl_ir_load(instr); + const struct hlsl_reg reg = hlsl_reg_from_deref(ctx, &load->src); + struct sm1_instruction sm1_instr = + { + .opcode = D3DSIO_MOV, + + .dst.type = D3DSPR_TEMP, + .dst.reg = instr->reg.id, + .dst.writemask = instr->reg.writemask, + .has_dst = 1, + + .srcs[0].type = D3DSPR_TEMP, + .srcs[0].reg = reg.id, + .srcs[0].swizzle = hlsl_swizzle_from_writemask(reg.writemask), + .src_count = 1, + }; + + assert(instr->reg.allocated); + + if (load->src.var->is_uniform) + { + assert(reg.allocated); + sm1_instr.srcs[0].type = D3DSPR_CONST; + } + else if (load->src.var->is_input_semantic) + { + if (!hlsl_sm1_register_from_semantic(ctx, &load->src.var->semantic, + false, &sm1_instr.srcs[0].type, &sm1_instr.srcs[0].reg)) + { + assert(reg.allocated); + sm1_instr.srcs[0].type = D3DSPR_INPUT; + sm1_instr.srcs[0].reg = reg.id; + } + else + sm1_instr.srcs[0].swizzle = hlsl_swizzle_from_writemask((1 << load->src.var->data_type->dimx) - 1); + } + + sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); + write_sm1_instruction(ctx, buffer, &sm1_instr); +} + +static void write_sm1_store(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + const struct hlsl_ir_node *instr) +{ + const struct hlsl_ir_store *store = hlsl_ir_store(instr); + const struct hlsl_ir_node *rhs = store->rhs.node; + const struct hlsl_reg reg = hlsl_reg_from_deref(ctx, &store->lhs); + struct sm1_instruction sm1_instr = + { + .opcode = D3DSIO_MOV, + + .dst.type = D3DSPR_TEMP, + .dst.reg = reg.id, + .dst.writemask = hlsl_combine_writemasks(reg.writemask, store->writemask), + .has_dst = 1, + + .srcs[0].type = D3DSPR_TEMP, + .srcs[0].reg = rhs->reg.id, + .srcs[0].swizzle = hlsl_swizzle_from_writemask(rhs->reg.writemask), + .src_count = 1, + }; + + if (store->lhs.var->data_type->class == HLSL_CLASS_MATRIX) + { + FIXME("Matrix writemasks need to be lowered.\n"); + return; + } + + if (store->lhs.var->is_output_semantic) + { + if (!hlsl_sm1_register_from_semantic(ctx, &store->lhs.var->semantic, + true, &sm1_instr.dst.type, &sm1_instr.dst.reg)) + { + assert(reg.allocated); + sm1_instr.dst.type = D3DSPR_OUTPUT; + sm1_instr.dst.reg = reg.id; + } + else + sm1_instr.dst.writemask = (1u << store->lhs.var->data_type->dimx) - 1; + } + else + assert(reg.allocated); + + sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); + write_sm1_instruction(ctx, buffer, &sm1_instr); +} + +static void write_sm1_swizzle(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + const struct hlsl_ir_node *instr) +{ + const struct hlsl_ir_swizzle *swizzle = hlsl_ir_swizzle(instr); + const struct hlsl_ir_node *val = swizzle->val.node; + struct sm1_instruction sm1_instr = + { + .opcode = D3DSIO_MOV, + + .dst.type = D3DSPR_TEMP, + .dst.reg = instr->reg.id, + .dst.writemask = instr->reg.writemask, + .has_dst = 1, + + .srcs[0].type = D3DSPR_TEMP, + .srcs[0].reg = val->reg.id, + .srcs[0].swizzle = hlsl_combine_swizzles(hlsl_swizzle_from_writemask(val->reg.writemask), + swizzle->swizzle, instr->data_type->dimx), + .src_count = 1, + }; + + assert(instr->reg.allocated); + assert(val->reg.allocated); + sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); + write_sm1_instruction(ctx, buffer, &sm1_instr); +} + +static void write_sm1_instructions(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + const struct hlsl_ir_function_decl *entry_func) +{ + const struct hlsl_ir_node *instr; + + LIST_FOR_EACH_ENTRY(instr, &entry_func->body.instrs, struct hlsl_ir_node, entry) + { + if (instr->data_type) + { + if (instr->data_type->class == HLSL_CLASS_MATRIX) + { + /* These need to be lowered. */ + hlsl_fixme(ctx, &instr->loc, "SM1 matrix expression."); + continue; + } + else if (instr->data_type->class == HLSL_CLASS_OBJECT) + { + hlsl_fixme(ctx, &instr->loc, "Object copy."); + break; + } + + assert(instr->data_type->class == HLSL_CLASS_SCALAR || instr->data_type->class == HLSL_CLASS_VECTOR); + } + + switch (instr->type) + { + case HLSL_IR_CALL: + vkd3d_unreachable(); + + case HLSL_IR_CONSTANT: + write_sm1_constant(ctx, buffer, instr); + break; + + case HLSL_IR_EXPR: + write_sm1_expr(ctx, buffer, instr); + break; + + case HLSL_IR_LOAD: + write_sm1_load(ctx, buffer, instr); + break; + + case HLSL_IR_STORE: + write_sm1_store(ctx, buffer, instr); + break; + + case HLSL_IR_SWIZZLE: + write_sm1_swizzle(ctx, buffer, instr); + break; + + default: + hlsl_fixme(ctx, &instr->loc, "Instruction type %s.", hlsl_node_type_to_string(instr->type)); + } + } +} + +int hlsl_sm1_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out) +{ + struct vkd3d_bytecode_buffer buffer = {0}; + int ret; + + put_u32(&buffer, sm1_version(ctx->profile->type, ctx->profile->major_version, ctx->profile->minor_version)); + + write_sm1_uniforms(ctx, &buffer, entry_func); + + write_sm1_constant_defs(ctx, &buffer); + write_sm1_semantic_dcls(ctx, &buffer); + write_sm1_instructions(ctx, &buffer, entry_func); + + put_u32(&buffer, D3DSIO_END); + + if (!(ret = buffer.status)) + { + out->code = buffer.data; + out->size = buffer.size; + } + return ret; +} diff --git a/libs/vkd3d-shader/hlsl_sm1.c b/libs/vkd3d-shader/hlsl_sm1.c deleted file mode 100644 index 0afeac68..00000000 --- a/libs/vkd3d-shader/hlsl_sm1.c +++ /dev/null @@ -1,990 +0,0 @@ -/* - * HLSL code generation for DXBC shader models 1-3 - * - * Copyright 2019-2020 Zebediah Figura for CodeWeavers - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA - */ - -#include "hlsl.h" -#include <stdio.h> - -bool hlsl_sm1_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, - bool output, D3DSHADER_PARAM_REGISTER_TYPE *type, unsigned int *reg) -{ - unsigned int i; - - static const struct - { - const char *semantic; - bool output; - enum vkd3d_shader_type shader_type; - unsigned int major_version; - D3DSHADER_PARAM_REGISTER_TYPE type; - DWORD offset; - } - register_table[] = - { - {"color", false, VKD3D_SHADER_TYPE_PIXEL, 1, D3DSPR_INPUT}, - {"texcoord", false, VKD3D_SHADER_TYPE_PIXEL, 1, D3DSPR_TEXTURE}, - - {"color", true, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_COLOROUT}, - {"depth", true, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_DEPTHOUT}, - {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_DEPTHOUT}, - {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_COLOROUT}, - {"color", false, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_INPUT}, - {"texcoord", false, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_TEXTURE}, - - {"color", true, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_COLOROUT}, - {"depth", true, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_DEPTHOUT}, - {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_DEPTHOUT}, - {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_COLOROUT}, - {"sv_position", false, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_MISCTYPE, D3DSMO_POSITION}, - {"vface", false, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_MISCTYPE, D3DSMO_FACE}, - {"vpos", false, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_MISCTYPE, D3DSMO_POSITION}, - - {"color", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_ATTROUT}, - {"fog", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT, D3DSRO_FOG}, - {"position", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT, D3DSRO_POSITION}, - {"psize", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT, D3DSRO_POINT_SIZE}, - {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT, D3DSRO_POSITION}, - {"texcoord", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_TEXCRDOUT}, - - {"color", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_ATTROUT}, - {"fog", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT, D3DSRO_FOG}, - {"position", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT, D3DSRO_POSITION}, - {"psize", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT, D3DSRO_POINT_SIZE}, - {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT, D3DSRO_POSITION}, - {"texcoord", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_TEXCRDOUT}, - }; - - for (i = 0; i < ARRAY_SIZE(register_table); ++i) - { - if (!ascii_strcasecmp(semantic->name, register_table[i].semantic) - && output == register_table[i].output - && ctx->profile->type == register_table[i].shader_type - && ctx->profile->major_version == register_table[i].major_version) - { - *type = register_table[i].type; - if (register_table[i].type == D3DSPR_MISCTYPE || register_table[i].type == D3DSPR_RASTOUT) - *reg = register_table[i].offset; - else - *reg = semantic->index; - return true; - } - } - - return false; -} - -bool hlsl_sm1_usage_from_semantic(const struct hlsl_semantic *semantic, D3DDECLUSAGE *usage, uint32_t *usage_idx) -{ - static const struct - { - const char *name; - D3DDECLUSAGE usage; - } - semantics[] = - { - {"binormal", D3DDECLUSAGE_BINORMAL}, - {"blendindices", D3DDECLUSAGE_BLENDINDICES}, - {"blendweight", D3DDECLUSAGE_BLENDWEIGHT}, - {"color", D3DDECLUSAGE_COLOR}, - {"depth", D3DDECLUSAGE_DEPTH}, - {"fog", D3DDECLUSAGE_FOG}, - {"normal", D3DDECLUSAGE_NORMAL}, - {"position", D3DDECLUSAGE_POSITION}, - {"positiont", D3DDECLUSAGE_POSITIONT}, - {"psize", D3DDECLUSAGE_PSIZE}, - {"sample", D3DDECLUSAGE_SAMPLE}, - {"sv_depth", D3DDECLUSAGE_DEPTH}, - {"sv_position", D3DDECLUSAGE_POSITION}, - {"sv_target", D3DDECLUSAGE_COLOR}, - {"tangent", D3DDECLUSAGE_TANGENT}, - {"tessfactor", D3DDECLUSAGE_TESSFACTOR}, - {"texcoord", D3DDECLUSAGE_TEXCOORD}, - }; - - unsigned int i; - - for (i = 0; i < ARRAY_SIZE(semantics); ++i) - { - if (!ascii_strcasecmp(semantic->name, semantics[i].name)) - { - *usage = semantics[i].usage; - *usage_idx = semantic->index; - return true; - } - } - - return false; -} - -static uint32_t sm1_version(enum vkd3d_shader_type type, unsigned int major, unsigned int minor) -{ - if (type == VKD3D_SHADER_TYPE_VERTEX) - return D3DVS_VERSION(major, minor); - else - return D3DPS_VERSION(major, minor); -} - -static D3DXPARAMETER_CLASS sm1_class(const struct hlsl_type *type) -{ - switch (type->class) - { - case HLSL_CLASS_ARRAY: - return sm1_class(type->e.array.type); - case HLSL_CLASS_MATRIX: - assert(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); - if (type->modifiers & HLSL_MODIFIER_COLUMN_MAJOR) - return D3DXPC_MATRIX_COLUMNS; - else - return D3DXPC_MATRIX_ROWS; - case HLSL_CLASS_OBJECT: - return D3DXPC_OBJECT; - case HLSL_CLASS_SCALAR: - return D3DXPC_SCALAR; - case HLSL_CLASS_STRUCT: - return D3DXPC_STRUCT; - case HLSL_CLASS_VECTOR: - return D3DXPC_VECTOR; - default: - ERR("Invalid class %#x.\n", type->class); - vkd3d_unreachable(); - } -} - -static D3DXPARAMETER_TYPE sm1_base_type(const struct hlsl_type *type) -{ - switch (type->base_type) - { - case HLSL_TYPE_BOOL: - return D3DXPT_BOOL; - case HLSL_TYPE_FLOAT: - case HLSL_TYPE_HALF: - return D3DXPT_FLOAT; - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: - return D3DXPT_INT; - case HLSL_TYPE_PIXELSHADER: - return D3DXPT_PIXELSHADER; - case HLSL_TYPE_SAMPLER: - switch (type->sampler_dim) - { - case HLSL_SAMPLER_DIM_1D: - return D3DXPT_SAMPLER1D; - case HLSL_SAMPLER_DIM_2D: - return D3DXPT_SAMPLER2D; - case HLSL_SAMPLER_DIM_3D: - return D3DXPT_SAMPLER3D; - case HLSL_SAMPLER_DIM_CUBE: - return D3DXPT_SAMPLERCUBE; - case HLSL_SAMPLER_DIM_GENERIC: - return D3DXPT_SAMPLER; - default: - ERR("Invalid dimension %#x.\n", type->sampler_dim); - vkd3d_unreachable(); - } - break; - case HLSL_TYPE_STRING: - return D3DXPT_STRING; - case HLSL_TYPE_TEXTURE: - switch (type->sampler_dim) - { - case HLSL_SAMPLER_DIM_1D: - return D3DXPT_TEXTURE1D; - case HLSL_SAMPLER_DIM_2D: - return D3DXPT_TEXTURE2D; - case HLSL_SAMPLER_DIM_3D: - return D3DXPT_TEXTURE3D; - case HLSL_SAMPLER_DIM_CUBE: - return D3DXPT_TEXTURECUBE; - case HLSL_SAMPLER_DIM_GENERIC: - return D3DXPT_TEXTURE; - default: - ERR("Invalid dimension %#x.\n", type->sampler_dim); - vkd3d_unreachable(); - } - break; - case HLSL_TYPE_VERTEXSHADER: - return D3DXPT_VERTEXSHADER; - case HLSL_TYPE_VOID: - return D3DXPT_VOID; - default: - vkd3d_unreachable(); - } -} - -static const struct hlsl_type *get_array_type(const struct hlsl_type *type) -{ - if (type->class == HLSL_CLASS_ARRAY) - return get_array_type(type->e.array.type); - return type; -} - -static unsigned int get_array_size(const struct hlsl_type *type) -{ - if (type->class == HLSL_CLASS_ARRAY) - return get_array_size(type->e.array.type) * type->e.array.elements_count; - return 1; -} - -static void write_sm1_type(struct vkd3d_bytecode_buffer *buffer, struct hlsl_type *type, unsigned int ctab_start) -{ - const struct hlsl_type *array_type = get_array_type(type); - unsigned int array_size = get_array_size(type); - unsigned int field_count = 0; - size_t fields_offset = 0; - size_t i; - - if (type->bytecode_offset) - return; - - if (array_type->class == HLSL_CLASS_STRUCT) - { - field_count = array_type->e.record.field_count; - - for (i = 0; i < field_count; ++i) - { - struct hlsl_struct_field *field = &array_type->e.record.fields[i]; - - field->name_bytecode_offset = put_string(buffer, field->name); - write_sm1_type(buffer, field->type, ctab_start); - } - - fields_offset = bytecode_align(buffer) - ctab_start; - - for (i = 0; i < field_count; ++i) - { - struct hlsl_struct_field *field = &array_type->e.record.fields[i]; - - put_u32(buffer, field->name_bytecode_offset - ctab_start); - put_u32(buffer, field->type->bytecode_offset - ctab_start); - } - } - - type->bytecode_offset = put_u32(buffer, vkd3d_make_u32(sm1_class(type), sm1_base_type(array_type))); - put_u32(buffer, vkd3d_make_u32(type->dimy, type->dimx)); - put_u32(buffer, vkd3d_make_u32(array_size, field_count)); - put_u32(buffer, fields_offset); -} - -static void sm1_sort_extern(struct list *sorted, struct hlsl_ir_var *to_sort) -{ - struct hlsl_ir_var *var; - - list_remove(&to_sort->extern_entry); - - LIST_FOR_EACH_ENTRY(var, sorted, struct hlsl_ir_var, extern_entry) - { - if (strcmp(to_sort->name, var->name) < 0) - { - list_add_before(&var->extern_entry, &to_sort->extern_entry); - return; - } - } - - list_add_tail(sorted, &to_sort->extern_entry); -} - -static void sm1_sort_externs(struct hlsl_ctx *ctx) -{ - struct list sorted = LIST_INIT(sorted); - struct hlsl_ir_var *var, *next; - - LIST_FOR_EACH_ENTRY_SAFE(var, next, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { - if (var->is_uniform) - sm1_sort_extern(&sorted, var); - } - list_move_tail(&ctx->extern_vars, &sorted); -} - -static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - struct hlsl_ir_function_decl *entry_func) -{ - size_t ctab_offset, ctab_start, ctab_end, vars_start, size_offset, creator_offset, offset; - unsigned int uniform_count = 0; - struct hlsl_ir_var *var; - - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { - enum hlsl_regset regset = hlsl_type_get_regset(var->data_type); - - if (!var->semantic.name && var->regs[regset].allocated) - { - ++uniform_count; - - if (var->is_param && var->is_uniform) - { - struct vkd3d_string_buffer *name; - - if (!(name = hlsl_get_string_buffer(ctx))) - { - buffer->status = VKD3D_ERROR_OUT_OF_MEMORY; - return; - } - vkd3d_string_buffer_printf(name, "$%s", var->name); - vkd3d_free((char *)var->name); - var->name = hlsl_strdup(ctx, name->buffer); - hlsl_release_string_buffer(ctx, name); - } - } - } - - sm1_sort_externs(ctx); - - size_offset = put_u32(buffer, 0); - ctab_offset = put_u32(buffer, VKD3D_MAKE_TAG('C','T','A','B')); - - ctab_start = put_u32(buffer, sizeof(D3DXSHADER_CONSTANTTABLE)); - creator_offset = put_u32(buffer, 0); - put_u32(buffer, sm1_version(ctx->profile->type, ctx->profile->major_version, ctx->profile->minor_version)); - put_u32(buffer, uniform_count); - put_u32(buffer, sizeof(D3DXSHADER_CONSTANTTABLE)); /* offset of constants */ - put_u32(buffer, 0); /* FIXME: flags */ - put_u32(buffer, 0); /* FIXME: target string */ - - vars_start = bytecode_align(buffer); - - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { - enum hlsl_regset regset = hlsl_type_get_regset(var->data_type); - - if (!var->semantic.name && var->regs[regset].allocated) - { - put_u32(buffer, 0); /* name */ - if (var->data_type->class == HLSL_CLASS_OBJECT - && (var->data_type->base_type == HLSL_TYPE_SAMPLER - || var->data_type->base_type == HLSL_TYPE_TEXTURE)) - { - assert(regset == HLSL_REGSET_SAMPLERS); - put_u32(buffer, vkd3d_make_u32(D3DXRS_SAMPLER, var->regs[regset].id)); - put_u32(buffer, 1); - } - else - { - assert(regset == HLSL_REGSET_NUMERIC); - put_u32(buffer, vkd3d_make_u32(D3DXRS_FLOAT4, var->regs[regset].id)); - put_u32(buffer, var->data_type->reg_size[regset] / 4); - } - put_u32(buffer, 0); /* type */ - put_u32(buffer, 0); /* FIXME: default value */ - } - } - - uniform_count = 0; - - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { - enum hlsl_regset regset = hlsl_type_get_regset(var->data_type); - - if (!var->semantic.name && var->regs[regset].allocated) - { - size_t var_offset = vars_start + (uniform_count * 5 * sizeof(uint32_t)); - size_t name_offset; - - name_offset = put_string(buffer, var->name); - set_u32(buffer, var_offset, name_offset - ctab_start); - - write_sm1_type(buffer, var->data_type, ctab_start); - set_u32(buffer, var_offset + 3 * sizeof(uint32_t), var->data_type->bytecode_offset - ctab_start); - ++uniform_count; - } - } - - offset = put_string(buffer, vkd3d_shader_get_version(NULL, NULL)); - set_u32(buffer, creator_offset, offset - ctab_start); - - ctab_end = bytecode_align(buffer); - set_u32(buffer, size_offset, vkd3d_make_u32(D3DSIO_COMMENT, (ctab_end - ctab_offset) / sizeof(uint32_t))); -} - -static uint32_t sm1_encode_register_type(D3DSHADER_PARAM_REGISTER_TYPE type) -{ - return ((type << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK) - | ((type << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2); -} - -struct sm1_instruction -{ - D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode; - - struct sm1_dst_register - { - D3DSHADER_PARAM_REGISTER_TYPE type; - D3DSHADER_PARAM_DSTMOD_TYPE mod; - unsigned int writemask; - uint32_t reg; - } dst; - - struct sm1_src_register - { - D3DSHADER_PARAM_REGISTER_TYPE type; - D3DSHADER_PARAM_SRCMOD_TYPE mod; - unsigned int swizzle; - uint32_t reg; - } srcs[3]; - unsigned int src_count; - - unsigned int has_dst; -}; - -static void write_sm1_dst_register(struct vkd3d_bytecode_buffer *buffer, const struct sm1_dst_register *reg) -{ - assert(reg->writemask); - put_u32(buffer, (1u << 31) | sm1_encode_register_type(reg->type) | reg->mod | (reg->writemask << 16) | reg->reg); -} - -static void write_sm1_src_register(struct vkd3d_bytecode_buffer *buffer, - const struct sm1_src_register *reg) -{ - put_u32(buffer, (1u << 31) | sm1_encode_register_type(reg->type) | reg->mod | (reg->swizzle << 16) | reg->reg); -} - -static void write_sm1_instruction(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - const struct sm1_instruction *instr) -{ - uint32_t token = instr->opcode; - unsigned int i; - - if (ctx->profile->major_version > 1) - token |= (instr->has_dst + instr->src_count) << D3DSI_INSTLENGTH_SHIFT; - put_u32(buffer, token); - - if (instr->has_dst) - write_sm1_dst_register(buffer, &instr->dst); - - for (i = 0; i < instr->src_count; ++i) - write_sm1_src_register(buffer, &instr->srcs[i]); -}; - -static void sm1_map_src_swizzle(struct sm1_src_register *src, unsigned int map_writemask) -{ - src->swizzle = hlsl_map_swizzle(src->swizzle, map_writemask); -} - -static void write_sm1_dp2add(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - const struct hlsl_reg *dst, const struct hlsl_reg *src1, const struct hlsl_reg *src2, - const struct hlsl_reg *src3) -{ - struct sm1_instruction instr = - { - .opcode = D3DSIO_DP2ADD, - - .dst.type = D3DSPR_TEMP, - .dst.writemask = dst->writemask, - .dst.reg = dst->id, - .has_dst = 1, - - .srcs[0].type = D3DSPR_TEMP, - .srcs[0].swizzle = hlsl_swizzle_from_writemask(src1->writemask), - .srcs[0].reg = src1->id, - .srcs[1].type = D3DSPR_TEMP, - .srcs[1].swizzle = hlsl_swizzle_from_writemask(src2->writemask), - .srcs[1].reg = src2->id, - .srcs[2].type = D3DSPR_TEMP, - .srcs[2].swizzle = hlsl_swizzle_from_writemask(src3->writemask), - .srcs[2].reg = src3->id, - .src_count = 3, - }; - - write_sm1_instruction(ctx, buffer, &instr); -} - -static void write_sm1_binary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, const struct hlsl_reg *dst, - const struct hlsl_reg *src1, const struct hlsl_reg *src2) -{ - struct sm1_instruction instr = - { - .opcode = opcode, - - .dst.type = D3DSPR_TEMP, - .dst.writemask = dst->writemask, - .dst.reg = dst->id, - .has_dst = 1, - - .srcs[0].type = D3DSPR_TEMP, - .srcs[0].swizzle = hlsl_swizzle_from_writemask(src1->writemask), - .srcs[0].reg = src1->id, - .srcs[1].type = D3DSPR_TEMP, - .srcs[1].swizzle = hlsl_swizzle_from_writemask(src2->writemask), - .srcs[1].reg = src2->id, - .src_count = 2, - }; - - sm1_map_src_swizzle(&instr.srcs[0], instr.dst.writemask); - sm1_map_src_swizzle(&instr.srcs[1], instr.dst.writemask); - write_sm1_instruction(ctx, buffer, &instr); -} - -static void write_sm1_binary_op_dot(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, const struct hlsl_reg *dst, - const struct hlsl_reg *src1, const struct hlsl_reg *src2) -{ - struct sm1_instruction instr = - { - .opcode = opcode, - - .dst.type = D3DSPR_TEMP, - .dst.writemask = dst->writemask, - .dst.reg = dst->id, - .has_dst = 1, - - .srcs[0].type = D3DSPR_TEMP, - .srcs[0].swizzle = hlsl_swizzle_from_writemask(src1->writemask), - .srcs[0].reg = src1->id, - .srcs[1].type = D3DSPR_TEMP, - .srcs[1].swizzle = hlsl_swizzle_from_writemask(src2->writemask), - .srcs[1].reg = src2->id, - .src_count = 2, - }; - - write_sm1_instruction(ctx, buffer, &instr); -} - -static void write_sm1_unary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, const struct hlsl_reg *dst, - const struct hlsl_reg *src, D3DSHADER_PARAM_SRCMOD_TYPE src_mod, D3DSHADER_PARAM_DSTMOD_TYPE dst_mod) -{ - struct sm1_instruction instr = - { - .opcode = opcode, - - .dst.type = D3DSPR_TEMP, - .dst.mod = dst_mod, - .dst.writemask = dst->writemask, - .dst.reg = dst->id, - .has_dst = 1, - - .srcs[0].type = D3DSPR_TEMP, - .srcs[0].swizzle = hlsl_swizzle_from_writemask(src->writemask), - .srcs[0].reg = src->id, - .srcs[0].mod = src_mod, - .src_count = 1, - }; - - sm1_map_src_swizzle(&instr.srcs[0], instr.dst.writemask); - write_sm1_instruction(ctx, buffer, &instr); -} - -static void write_sm1_constant_defs(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer) -{ - unsigned int i, x; - - for (i = 0; i < ctx->constant_defs.count; ++i) - { - uint32_t token = D3DSIO_DEF; - const struct sm1_dst_register reg = - { - .type = D3DSPR_CONST, - .writemask = VKD3DSP_WRITEMASK_ALL, - .reg = i, - }; - - if (ctx->profile->major_version > 1) - token |= 5 << D3DSI_INSTLENGTH_SHIFT; - put_u32(buffer, token); - - write_sm1_dst_register(buffer, ®); - for (x = 0; x < 4; ++x) - put_f32(buffer, ctx->constant_defs.values[i].f[x]); - } -} - -static void write_sm1_semantic_dcl(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - const struct hlsl_ir_var *var, bool output) -{ - struct sm1_dst_register reg = {0}; - uint32_t token, usage_idx; - D3DDECLUSAGE usage; - bool ret; - - if (hlsl_sm1_register_from_semantic(ctx, &var->semantic, output, ®.type, ®.reg)) - { - usage = 0; - usage_idx = 0; - } - else - { - ret = hlsl_sm1_usage_from_semantic(&var->semantic, &usage, &usage_idx); - assert(ret); - reg.type = output ? D3DSPR_OUTPUT : D3DSPR_INPUT; - reg.reg = var->regs[HLSL_REGSET_NUMERIC].id; - } - - token = D3DSIO_DCL; - if (ctx->profile->major_version > 1) - token |= 2 << D3DSI_INSTLENGTH_SHIFT; - put_u32(buffer, token); - - token = (1u << 31); - token |= usage << D3DSP_DCL_USAGE_SHIFT; - token |= usage_idx << D3DSP_DCL_USAGEINDEX_SHIFT; - put_u32(buffer, token); - - reg.writemask = (1 << var->data_type->dimx) - 1; - write_sm1_dst_register(buffer, ®); -} - -static void write_sm1_semantic_dcls(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer) -{ - bool write_in = false, write_out = false; - struct hlsl_ir_var *var; - - if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL && ctx->profile->major_version >= 2) - write_in = true; - else if (ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX && ctx->profile->major_version == 3) - write_in = write_out = true; - else if (ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX && ctx->profile->major_version < 3) - write_in = true; - - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { - if (write_in && var->is_input_semantic) - write_sm1_semantic_dcl(ctx, buffer, var, false); - if (write_out && var->is_output_semantic) - write_sm1_semantic_dcl(ctx, buffer, var, true); - } -} - -static void write_sm1_constant(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - const struct hlsl_ir_node *instr) -{ - const struct hlsl_ir_constant *constant = hlsl_ir_constant(instr); - struct sm1_instruction sm1_instr = - { - .opcode = D3DSIO_MOV, - - .dst.type = D3DSPR_TEMP, - .dst.reg = instr->reg.id, - .dst.writemask = instr->reg.writemask, - .has_dst = 1, - - .srcs[0].type = D3DSPR_CONST, - .srcs[0].reg = constant->reg.id, - .srcs[0].swizzle = hlsl_swizzle_from_writemask(constant->reg.writemask), - .src_count = 1, - }; - - assert(instr->reg.allocated); - assert(constant->reg.allocated); - sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); - write_sm1_instruction(ctx, buffer, &sm1_instr); -} - -static void write_sm1_per_component_unary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - const struct hlsl_ir_node *instr, D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode) -{ - struct hlsl_ir_expr *expr = hlsl_ir_expr(instr); - struct hlsl_ir_node *arg1 = expr->operands[0].node; - unsigned int i; - - for (i = 0; i < instr->data_type->dimx; ++i) - { - struct hlsl_reg src = arg1->reg, dst = instr->reg; - - src.writemask = hlsl_combine_writemasks(src.writemask, 1u << i); - dst.writemask = hlsl_combine_writemasks(dst.writemask, 1u << i); - write_sm1_unary_op(ctx, buffer, opcode, &dst, &src, 0, 0); - } -} - -static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) -{ - struct hlsl_ir_expr *expr = hlsl_ir_expr(instr); - struct hlsl_ir_node *arg1 = expr->operands[0].node; - struct hlsl_ir_node *arg2 = expr->operands[1].node; - struct hlsl_ir_node *arg3 = expr->operands[2].node; - - assert(instr->reg.allocated); - - if (instr->data_type->base_type != HLSL_TYPE_FLOAT) - { - /* These need to be lowered. */ - hlsl_fixme(ctx, &instr->loc, "SM1 non-float expression."); - return; - } - - switch (expr->op) - { - case HLSL_OP1_ABS: - write_sm1_unary_op(ctx, buffer, D3DSIO_ABS, &instr->reg, &arg1->reg, 0, 0); - break; - - case HLSL_OP1_EXP2: - write_sm1_per_component_unary_op(ctx, buffer, instr, D3DSIO_EXP); - break; - - case HLSL_OP1_LOG2: - write_sm1_per_component_unary_op(ctx, buffer, instr, D3DSIO_LOG); - break; - - case HLSL_OP1_NEG: - write_sm1_unary_op(ctx, buffer, D3DSIO_MOV, &instr->reg, &arg1->reg, D3DSPSM_NEG, 0); - break; - - case HLSL_OP1_SAT: - write_sm1_unary_op(ctx, buffer, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, D3DSPDM_SATURATE); - break; - - case HLSL_OP1_RCP: - write_sm1_per_component_unary_op(ctx, buffer, instr, D3DSIO_RCP); - break; - - case HLSL_OP1_RSQ: - write_sm1_per_component_unary_op(ctx, buffer, instr, D3DSIO_RSQ); - break; - - case HLSL_OP2_ADD: - write_sm1_binary_op(ctx, buffer, D3DSIO_ADD, &instr->reg, &arg1->reg, &arg2->reg); - break; - - case HLSL_OP2_MAX: - write_sm1_binary_op(ctx, buffer, D3DSIO_MAX, &instr->reg, &arg1->reg, &arg2->reg); - break; - - case HLSL_OP2_MIN: - write_sm1_binary_op(ctx, buffer, D3DSIO_MIN, &instr->reg, &arg1->reg, &arg2->reg); - break; - - case HLSL_OP2_MUL: - write_sm1_binary_op(ctx, buffer, D3DSIO_MUL, &instr->reg, &arg1->reg, &arg2->reg); - break; - - case HLSL_OP1_FRACT: - write_sm1_unary_op(ctx, buffer, D3DSIO_FRC, &instr->reg, &arg1->reg, D3DSPSM_NONE, 0); - break; - - case HLSL_OP2_DOT: - switch (arg1->data_type->dimx) - { - case 4: - write_sm1_binary_op_dot(ctx, buffer, D3DSIO_DP4, &instr->reg, &arg1->reg, &arg2->reg); - break; - - case 3: - write_sm1_binary_op_dot(ctx, buffer, D3DSIO_DP3, &instr->reg, &arg1->reg, &arg2->reg); - break; - - default: - vkd3d_unreachable(); - } - break; - - case HLSL_OP3_DP2ADD: - write_sm1_dp2add(ctx, buffer, &instr->reg, &arg1->reg, &arg2->reg, &arg3->reg); - break; - - default: - hlsl_fixme(ctx, &instr->loc, "SM1 "%s" expression.", debug_hlsl_expr_op(expr->op)); - break; - } -} - -static void write_sm1_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) -{ - const struct hlsl_ir_load *load = hlsl_ir_load(instr); - const struct hlsl_reg reg = hlsl_reg_from_deref(ctx, &load->src); - struct sm1_instruction sm1_instr = - { - .opcode = D3DSIO_MOV, - - .dst.type = D3DSPR_TEMP, - .dst.reg = instr->reg.id, - .dst.writemask = instr->reg.writemask, - .has_dst = 1, - - .srcs[0].type = D3DSPR_TEMP, - .srcs[0].reg = reg.id, - .srcs[0].swizzle = hlsl_swizzle_from_writemask(reg.writemask), - .src_count = 1, - }; - - assert(instr->reg.allocated); - - if (load->src.var->is_uniform) - { - assert(reg.allocated); - sm1_instr.srcs[0].type = D3DSPR_CONST; - } - else if (load->src.var->is_input_semantic) - { - if (!hlsl_sm1_register_from_semantic(ctx, &load->src.var->semantic, - false, &sm1_instr.srcs[0].type, &sm1_instr.srcs[0].reg)) - { - assert(reg.allocated); - sm1_instr.srcs[0].type = D3DSPR_INPUT; - sm1_instr.srcs[0].reg = reg.id; - } - else - sm1_instr.srcs[0].swizzle = hlsl_swizzle_from_writemask((1 << load->src.var->data_type->dimx) - 1); - } - - sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); - write_sm1_instruction(ctx, buffer, &sm1_instr); -} - -static void write_sm1_store(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - const struct hlsl_ir_node *instr) -{ - const struct hlsl_ir_store *store = hlsl_ir_store(instr); - const struct hlsl_ir_node *rhs = store->rhs.node; - const struct hlsl_reg reg = hlsl_reg_from_deref(ctx, &store->lhs); - struct sm1_instruction sm1_instr = - { - .opcode = D3DSIO_MOV, - - .dst.type = D3DSPR_TEMP, - .dst.reg = reg.id, - .dst.writemask = hlsl_combine_writemasks(reg.writemask, store->writemask), - .has_dst = 1, - - .srcs[0].type = D3DSPR_TEMP, - .srcs[0].reg = rhs->reg.id, - .srcs[0].swizzle = hlsl_swizzle_from_writemask(rhs->reg.writemask), - .src_count = 1, - }; - - if (store->lhs.var->data_type->class == HLSL_CLASS_MATRIX) - { - FIXME("Matrix writemasks need to be lowered.\n"); - return; - } - - if (store->lhs.var->is_output_semantic) - { - if (!hlsl_sm1_register_from_semantic(ctx, &store->lhs.var->semantic, - true, &sm1_instr.dst.type, &sm1_instr.dst.reg)) - { - assert(reg.allocated); - sm1_instr.dst.type = D3DSPR_OUTPUT; - sm1_instr.dst.reg = reg.id; - } - else - sm1_instr.dst.writemask = (1u << store->lhs.var->data_type->dimx) - 1; - } - else - assert(reg.allocated); - - sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); - write_sm1_instruction(ctx, buffer, &sm1_instr); -} - -static void write_sm1_swizzle(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - const struct hlsl_ir_node *instr) -{ - const struct hlsl_ir_swizzle *swizzle = hlsl_ir_swizzle(instr); - const struct hlsl_ir_node *val = swizzle->val.node; - struct sm1_instruction sm1_instr = - { - .opcode = D3DSIO_MOV, - - .dst.type = D3DSPR_TEMP, - .dst.reg = instr->reg.id, - .dst.writemask = instr->reg.writemask, - .has_dst = 1, - - .srcs[0].type = D3DSPR_TEMP, - .srcs[0].reg = val->reg.id, - .srcs[0].swizzle = hlsl_combine_swizzles(hlsl_swizzle_from_writemask(val->reg.writemask), - swizzle->swizzle, instr->data_type->dimx), - .src_count = 1, - }; - - assert(instr->reg.allocated); - assert(val->reg.allocated); - sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); - write_sm1_instruction(ctx, buffer, &sm1_instr); -} - -static void write_sm1_instructions(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - const struct hlsl_ir_function_decl *entry_func) -{ - const struct hlsl_ir_node *instr; - - LIST_FOR_EACH_ENTRY(instr, &entry_func->body.instrs, struct hlsl_ir_node, entry) - { - if (instr->data_type) - { - if (instr->data_type->class == HLSL_CLASS_MATRIX) - { - /* These need to be lowered. */ - hlsl_fixme(ctx, &instr->loc, "SM1 matrix expression."); - continue; - } - else if (instr->data_type->class == HLSL_CLASS_OBJECT) - { - hlsl_fixme(ctx, &instr->loc, "Object copy."); - break; - } - - assert(instr->data_type->class == HLSL_CLASS_SCALAR || instr->data_type->class == HLSL_CLASS_VECTOR); - } - - switch (instr->type) - { - case HLSL_IR_CALL: - vkd3d_unreachable(); - - case HLSL_IR_CONSTANT: - write_sm1_constant(ctx, buffer, instr); - break; - - case HLSL_IR_EXPR: - write_sm1_expr(ctx, buffer, instr); - break; - - case HLSL_IR_LOAD: - write_sm1_load(ctx, buffer, instr); - break; - - case HLSL_IR_STORE: - write_sm1_store(ctx, buffer, instr); - break; - - case HLSL_IR_SWIZZLE: - write_sm1_swizzle(ctx, buffer, instr); - break; - - default: - hlsl_fixme(ctx, &instr->loc, "Instruction type %s.", hlsl_node_type_to_string(instr->type)); - } - } -} - -int hlsl_sm1_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out) -{ - struct vkd3d_bytecode_buffer buffer = {0}; - int ret; - - put_u32(&buffer, sm1_version(ctx->profile->type, ctx->profile->major_version, ctx->profile->minor_version)); - - write_sm1_uniforms(ctx, &buffer, entry_func); - - write_sm1_constant_defs(ctx, &buffer); - write_sm1_semantic_dcls(ctx, &buffer); - write_sm1_instructions(ctx, &buffer, entry_func); - - put_u32(&buffer, D3DSIO_END); - - if (!(ret = buffer.status)) - { - out->code = buffer.data; - out->size = buffer.size; - } - return ret; -}
While properly distinguishing DXBC and TPF seems a good idea, I am not a fan of having very long source files which mix different logical pieces of code. I find them difficult to read and to understand. So I'd keep separate the parsing and emitting code, both for SM4 and SM1.
(also, I find a bit funny the way we handle copyright lines in headers: it seems they are added by the first person checking in a file and never updated any more; not that it's a big problem for me, just funny)
I wouldn't call either d3dbc.c or tpf.c very long, but I suppose that's a matter of opinion. I imagine it doesn't help that I find the practice of spreading source code over a million tiny files to be one of the most irritating things a codebase can do...
In any case, while it's perhaps not quite true at the moment, parsing and serialising the different bytecode formats should be closely related and use the same constants, structures, and so on. There's currently some HLSL-specific code intermixed that would more properly belong in hlsl.c/hlsl_codegen.c, but we can deal with that once we get to it.