-- v4: vkd3d: Import upstream commit 269747dbf3ee32bf23e6d1ab388d2a058ca90f9f. vkd3d: Import upstream release 1.8.
From: Alexandre Julliard julliard@winehq.org
--- dlls/d3dcompiler_43/tests/blob.c | 2 +- dlls/d3dcompiler_43/tests/hlsl_d3d9.c | 8 +- libs/vkd3d/AUTHORS | 2 + libs/vkd3d/Makefile.in | 6 +- libs/vkd3d/config.h | 4 +- libs/vkd3d/include/private/vkd3d_common.h | 1 + libs/vkd3d/include/private/vkd3d_debug.h | 2 +- libs/vkd3d/include/vkd3d.h | 1 + libs/vkd3d/include/vkd3d_shader.h | 1 + libs/vkd3d/libs/vkd3d-common/debug.c | 4 +- .../libs/vkd3d-shader/{trace.c => d3d_asm.c} | 25 +- libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 1131 +++- libs/vkd3d/libs/vkd3d-shader/dxbc.c | 1773 +----- libs/vkd3d/libs/vkd3d-shader/hlsl.c | 619 +- libs/vkd3d/libs/vkd3d-shader/hlsl.h | 233 +- libs/vkd3d/libs/vkd3d-shader/hlsl.l | 8 + libs/vkd3d/libs/vkd3d-shader/hlsl.y | 2362 +++++--- libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 1821 ++++-- .../libs/vkd3d-shader/hlsl_constant_ops.c | 358 +- libs/vkd3d/libs/vkd3d-shader/hlsl_sm1.c | 980 --- libs/vkd3d/libs/vkd3d-shader/hlsl_sm4.c | 2531 -------- libs/vkd3d/libs/vkd3d-shader/ir.c | 1072 ++++ libs/vkd3d/libs/vkd3d-shader/preproc.l | 6 +- libs/vkd3d/libs/vkd3d-shader/sm4.h | 552 -- libs/vkd3d/libs/vkd3d-shader/spirv.c | 1323 ++--- libs/vkd3d/libs/vkd3d-shader/tpf.c | 5234 +++++++++++++++++ .../libs/vkd3d-shader/vkd3d_shader_main.c | 239 +- .../libs/vkd3d-shader/vkd3d_shader_private.h | 89 +- libs/vkd3d/libs/vkd3d/command.c | 160 +- libs/vkd3d/libs/vkd3d/device.c | 205 +- libs/vkd3d/libs/vkd3d/resource.c | 935 ++- libs/vkd3d/libs/vkd3d/state.c | 4 +- libs/vkd3d/libs/vkd3d/vkd3d_private.h | 234 +- 33 files changed, 12569 insertions(+), 9356 deletions(-) rename libs/vkd3d/libs/vkd3d-shader/{trace.c => d3d_asm.c} (98%) delete mode 100644 libs/vkd3d/libs/vkd3d-shader/hlsl_sm1.c delete mode 100644 libs/vkd3d/libs/vkd3d-shader/hlsl_sm4.c create mode 100644 libs/vkd3d/libs/vkd3d-shader/ir.c delete mode 100644 libs/vkd3d/libs/vkd3d-shader/sm4.h create mode 100644 libs/vkd3d/libs/vkd3d-shader/tpf.c
diff --git a/dlls/d3dcompiler_43/tests/blob.c b/dlls/d3dcompiler_43/tests/blob.c index 8b2ccc667b2..56432a9a81d 100644 --- a/dlls/d3dcompiler_43/tests/blob.c +++ b/dlls/d3dcompiler_43/tests/blob.c @@ -764,7 +764,7 @@ static void test_get_blob_part2(void) ok(hr == S_OK, "Got unexpected hr %#lx.\n", hr);
size = ID3D10Blob_GetBufferSize(blob); - todo_wine ok(size == 4735, "Got unexpected size %Iu.\n", size); + ok(size == 4735, "Got unexpected size %Iu.\n", size);
dword = ((DWORD*)ID3D10Blob_GetBufferPointer(blob)); ok(TAG_DXBC == *dword, "DXBC got %#lx, expected %#lx.\n", *dword, TAG_DXBC); diff --git a/dlls/d3dcompiler_43/tests/hlsl_d3d9.c b/dlls/d3dcompiler_43/tests/hlsl_d3d9.c index 42fb3b97fc2..7f84c2c62e3 100644 --- a/dlls/d3dcompiler_43/tests/hlsl_d3d9.c +++ b/dlls/d3dcompiler_43/tests/hlsl_d3d9.c @@ -579,7 +579,7 @@ static void test_conditionals(void) release_readback(&rb); ID3D10Blob_Release(ps_code);
- todo_wine ps_code = compile_shader(ps_ternary_source, "ps_2_0", 0); + ps_code = compile_shader(ps_ternary_source, "ps_2_0", 0); if (ps_code) { draw_quad(device, ps_code); @@ -588,14 +588,14 @@ static void test_conditionals(void) for (i = 0; i < 320; i += 40) { v = get_readback_vec4(&rb, i, 0); - ok(compare_vec4(v, 0.5f, 0.25f, 0.5f, 0.75f, 0), + todo_wine ok(compare_vec4(v, 0.5f, 0.25f, 0.5f, 0.75f, 0), "Got unexpected value {%.8e, %.8e, %.8e, %.8e}.\n", v->x, v->y, v->z, v->w); }
for (i = 360; i < 640; i += 40) { v = get_readback_vec4(&rb, i, 0); - ok(compare_vec4(v, 0.6f, 0.8f, 0.1f, 0.2f, 0), + todo_wine ok(compare_vec4(v, 0.6f, 0.8f, 0.1f, 0.2f, 0), "Got unexpected value {%.8e, %.8e, %.8e, %.8e}.\n", v->x, v->y, v->z, v->w); }
@@ -1174,7 +1174,7 @@ static void test_samplers(void) draw_quad(test_context.device, ps_code);
v = get_color_vec4(test_context.device, 0, 0); - todo_wine ok(compare_vec4(&v, 0.25f, 0.0f, 0.25f, 0.0f, 128), + ok(compare_vec4(&v, 0.25f, 0.0f, 0.25f, 0.0f, 128), "Test %u: Got unexpected value {%.8e, %.8e, %.8e, %.8e}.\n", i, v.x, v.y, v.z, v.w);
ID3D10Blob_Release(ps_code); diff --git a/libs/vkd3d/AUTHORS b/libs/vkd3d/AUTHORS index 622aecbb078..6d4f0e0617e 100644 --- a/libs/vkd3d/AUTHORS +++ b/libs/vkd3d/AUTHORS @@ -8,6 +8,7 @@ Chip Davis Conor McCarthy David Gow Derek Lesho +Ethan Lee Fabian Maurer Francisco Casas Francois Gouget @@ -16,6 +17,7 @@ Hans-Kristian Arntzen Henri Verbeet Isabella Bosia Jactry Zeng +Jan Sikorski Joshua Ashton Józef Kucia Martin Storsjö diff --git a/libs/vkd3d/Makefile.in b/libs/vkd3d/Makefile.in index 0ed4e27ad83..1ba0e9f71e1 100644 --- a/libs/vkd3d/Makefile.in +++ b/libs/vkd3d/Makefile.in @@ -14,6 +14,7 @@ SOURCES = \ libs/vkd3d-common/memory.c \ libs/vkd3d-common/utf8.c \ libs/vkd3d-shader/checksum.c \ + libs/vkd3d-shader/d3d_asm.c \ libs/vkd3d-shader/d3dbc.c \ libs/vkd3d-shader/dxbc.c \ libs/vkd3d-shader/glsl.c \ @@ -22,12 +23,11 @@ SOURCES = \ libs/vkd3d-shader/hlsl.y \ libs/vkd3d-shader/hlsl_codegen.c \ libs/vkd3d-shader/hlsl_constant_ops.c \ - libs/vkd3d-shader/hlsl_sm1.c \ - libs/vkd3d-shader/hlsl_sm4.c \ + libs/vkd3d-shader/ir.c \ libs/vkd3d-shader/preproc.l \ libs/vkd3d-shader/preproc.y \ libs/vkd3d-shader/spirv.c \ - libs/vkd3d-shader/trace.c \ + libs/vkd3d-shader/tpf.c \ libs/vkd3d-shader/vkd3d_shader_main.c \ libs/vkd3d/command.c \ libs/vkd3d/device.c \ diff --git a/libs/vkd3d/config.h b/libs/vkd3d/config.h index 6d2eeba235a..8c5aa958dde 100644 --- a/libs/vkd3d/config.h +++ b/libs/vkd3d/config.h @@ -1,5 +1,5 @@ #define PACKAGE_NAME "vkd3d" -#define PACKAGE_STRING "vkd3d 1.7" -#define PACKAGE_VERSION "1.7" +#define PACKAGE_STRING "vkd3d 1.8" +#define PACKAGE_VERSION "1.8" #define PATH_MAX 1024 #define SONAME_LIBVULKAN "vulkan-1.dll" diff --git a/libs/vkd3d/include/private/vkd3d_common.h b/libs/vkd3d/include/private/vkd3d_common.h index 3cf0422596c..08dde1b2e7f 100644 --- a/libs/vkd3d/include/private/vkd3d_common.h +++ b/libs/vkd3d/include/private/vkd3d_common.h @@ -249,6 +249,7 @@ static inline LONG InterlockedDecrement(LONG volatile *x) # else # error "InterlockedDecrement() not implemented for this platform" # endif + #endif /* _WIN32 */
static inline void vkd3d_parse_version(const char *version, int *major, int *minor) diff --git a/libs/vkd3d/include/private/vkd3d_debug.h b/libs/vkd3d/include/private/vkd3d_debug.h index 4f6d43af12f..6708cad344f 100644 --- a/libs/vkd3d/include/private/vkd3d_debug.h +++ b/libs/vkd3d/include/private/vkd3d_debug.h @@ -91,7 +91,7 @@ const char *debugstr_w(const WCHAR *wstr, size_t wchar_size);
#define FIXME_ONCE VKD3D_DBG_LOG_ONCE(FIXME, WARN)
-#define VKD3D_DEBUG_ENV_NAME(name) const char *vkd3d_dbg_env_name = name +#define VKD3D_DEBUG_ENV_NAME(name) const char *const vkd3d_dbg_env_name = name
static inline const char *debugstr_guid(const GUID *guid) { diff --git a/libs/vkd3d/include/vkd3d.h b/libs/vkd3d/include/vkd3d.h index ff2b15c51dc..72ed3ced671 100644 --- a/libs/vkd3d/include/vkd3d.h +++ b/libs/vkd3d/include/vkd3d.h @@ -76,6 +76,7 @@ enum vkd3d_api_version VKD3D_API_VERSION_1_5, VKD3D_API_VERSION_1_6, VKD3D_API_VERSION_1_7, + VKD3D_API_VERSION_1_8,
VKD3D_FORCE_32_BIT_ENUM(VKD3D_API_VERSION), }; diff --git a/libs/vkd3d/include/vkd3d_shader.h b/libs/vkd3d/include/vkd3d_shader.h index 859b8c79792..274241546ea 100644 --- a/libs/vkd3d/include/vkd3d_shader.h +++ b/libs/vkd3d/include/vkd3d_shader.h @@ -49,6 +49,7 @@ enum vkd3d_shader_api_version VKD3D_SHADER_API_VERSION_1_5, VKD3D_SHADER_API_VERSION_1_6, VKD3D_SHADER_API_VERSION_1_7, + VKD3D_SHADER_API_VERSION_1_8,
VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_API_VERSION), }; diff --git a/libs/vkd3d/libs/vkd3d-common/debug.c b/libs/vkd3d/libs/vkd3d-common/debug.c index 499334a35f1..b363efbd360 100644 --- a/libs/vkd3d/libs/vkd3d-common/debug.c +++ b/libs/vkd3d/libs/vkd3d-common/debug.c @@ -40,9 +40,9 @@ #define VKD3D_DEBUG_BUFFER_COUNT 64 #define VKD3D_DEBUG_BUFFER_SIZE 512
-extern const char *vkd3d_dbg_env_name; +extern const char *const vkd3d_dbg_env_name;
-static const char *debug_level_names[] = +static const char *const debug_level_names[] = { /* VKD3D_DBG_LEVEL_NONE */ "none", /* VKD3D_DBG_LEVEL_ERR */ "err", diff --git a/libs/vkd3d/libs/vkd3d-shader/trace.c b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c similarity index 98% rename from libs/vkd3d/libs/vkd3d-shader/trace.c rename to libs/vkd3d/libs/vkd3d-shader/d3d_asm.c index 6cd2dcb270c..0a821b5c878 100644 --- a/libs/vkd3d/libs/vkd3d-shader/trace.c +++ b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c @@ -109,6 +109,7 @@ static const char * const shader_opcode_names[] = [VKD3DSIH_DEQ ] = "deq", [VKD3DSIH_DFMA ] = "dfma", [VKD3DSIH_DGE ] = "dge", + [VKD3DSIH_DISCARD ] = "discard", [VKD3DSIH_DIV ] = "div", [VKD3DSIH_DLT ] = "dlt", [VKD3DSIH_DMAX ] = "dmax", @@ -645,7 +646,7 @@ static void shader_dump_decl_usage(struct vkd3d_d3d_asm_compiler *compiler, break;
case VKD3D_SHADER_RESOURCE_TEXTURE_3D: - shader_addline(buffer, "_3d"); + shader_addline(buffer, "_volume"); break;
case VKD3D_SHADER_RESOURCE_TEXTURE_CUBE: @@ -660,8 +661,9 @@ static void shader_dump_decl_usage(struct vkd3d_d3d_asm_compiler *compiler, else if (semantic->resource.reg.reg.type == VKD3DSPR_RESOURCE || semantic->resource.reg.reg.type == VKD3DSPR_UAV) { if (semantic->resource.reg.reg.type == VKD3DSPR_RESOURCE) - shader_addline(buffer, "_resource_"); + shader_addline(buffer, "_resource");
+ shader_addline(buffer, "_"); shader_dump_resource_type(compiler, semantic->resource_type); if (semantic->resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMS || semantic->resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY) @@ -1505,9 +1507,9 @@ static void shader_dump_instruction_flags(struct vkd3d_d3d_asm_compiler *compile { case VKD3DSIH_BREAKP: case VKD3DSIH_CONTINUEP: + case VKD3DSIH_DISCARD: case VKD3DSIH_IF: case VKD3DSIH_RETP: - case VKD3DSIH_TEXKILL: switch (ins->flags) { case VKD3D_SHADER_CONDITIONAL_OP_NZ: shader_addline(buffer, "_nz"); break; @@ -1857,11 +1859,11 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, shader_addline(buffer, "\n"); }
-enum vkd3d_result vkd3d_dxbc_binary_to_text(struct vkd3d_shader_parser *parser, - const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out) +enum vkd3d_result vkd3d_dxbc_binary_to_text(const struct vkd3d_shader_instruction_array *instructions, + const struct vkd3d_shader_version *shader_version, const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_code *out) { enum vkd3d_shader_compile_option_formatting_flags formatting; - struct vkd3d_shader_version *shader_version; struct vkd3d_d3d_asm_compiler compiler; enum vkd3d_result result = VKD3D_OK; struct vkd3d_string_buffer *buffer; @@ -1919,16 +1921,16 @@ enum vkd3d_result vkd3d_dxbc_binary_to_text(struct vkd3d_shader_parser *parser, buffer = &compiler.buffer; vkd3d_string_buffer_init(buffer);
+ compiler.shader_version = *shader_version; shader_version = &compiler.shader_version; - *shader_version = parser->shader_version; vkd3d_string_buffer_printf(buffer, "%s%s_%u_%u%s\n", compiler.colours.version, shader_get_type_prefix(shader_version->type), shader_version->major, shader_version->minor, compiler.colours.reset);
indent = 0; - for (i = 0; i < parser->instructions.count; ++i) + for (i = 0; i < instructions->count; ++i) { - struct vkd3d_shader_instruction *ins = &parser->instructions.elements[i]; + struct vkd3d_shader_instruction *ins = &instructions->elements[i];
switch (ins->handler_idx) { @@ -1981,12 +1983,13 @@ enum vkd3d_result vkd3d_dxbc_binary_to_text(struct vkd3d_shader_parser *parser, return result; }
-void vkd3d_shader_trace(struct vkd3d_shader_parser *parser) +void vkd3d_shader_trace(const struct vkd3d_shader_instruction_array *instructions, + const struct vkd3d_shader_version *shader_version) { const char *p, *q, *end; struct vkd3d_shader_code code;
- if (vkd3d_dxbc_binary_to_text(parser, NULL, &code) != VKD3D_OK) + if (vkd3d_dxbc_binary_to_text(instructions, shader_version, NULL, &code) != VKD3D_OK) return;
end = (const char *)code.code + code.size; diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c index ed81137d225..712613ac13b 100644 --- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c +++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c @@ -1,4 +1,6 @@ /* + * d3dbc (Direct3D shader models 1-3 bytecode) support + * * Copyright 2002-2003 Jason Edmeades * Copyright 2002-2003 Raphael Junqueira * Copyright 2004 Christian Costa @@ -6,6 +8,7 @@ * Copyright 2006 Ivan Gyurdiev * Copyright 2007-2008 Stefan Dösinger for CodeWeavers * Copyright 2009, 2021 Henri Verbeet for CodeWeavers + * Copyright 2019-2020 Zebediah Figura for CodeWeavers * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -22,7 +25,7 @@ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA */
-#include "vkd3d_shader_private.h" +#include "hlsl.h"
#define VKD3D_SM1_VS 0xfffeu #define VKD3D_SM1_PS 0xffffu @@ -207,7 +210,7 @@ struct vkd3d_sm1_opcode_info struct vkd3d_shader_sm1_parser { const struct vkd3d_sm1_opcode_info *opcode_table; - const uint32_t *start, *end; + const uint32_t *start, *end, *ptr; bool abort;
struct vkd3d_shader_parser p; @@ -462,6 +465,7 @@ static void shader_sm1_parse_src_param(uint32_t param, const struct vkd3d_shader src->reg.idx[1].rel_addr = NULL; src->reg.idx[2].offset = ~0u; src->reg.idx[2].rel_addr = NULL; + src->reg.idx_count = 1; src->swizzle = swizzle_from_sm1((param & VKD3D_SM1_SWIZZLE_MASK) >> VKD3D_SM1_SWIZZLE_SHIFT); src->modifiers = (param & VKD3D_SM1_SRC_MODIFIER_MASK) >> VKD3D_SM1_SRC_MODIFIER_SHIFT; } @@ -480,6 +484,7 @@ static void shader_sm1_parse_dst_param(uint32_t param, const struct vkd3d_shader dst->reg.idx[1].rel_addr = NULL; dst->reg.idx[2].offset = ~0u; dst->reg.idx[2].rel_addr = NULL; + dst->reg.idx_count = 1; dst->write_mask = (param & VKD3D_SM1_WRITEMASK_MASK) >> VKD3D_SM1_WRITEMASK_SHIFT; dst->modifiers = (param & VKD3D_SM1_DST_MODIFIER_MASK) >> VKD3D_SM1_DST_MODIFIER_SHIFT; dst->shift = (param & VKD3D_SM1_DSTSHIFT_MASK) >> VKD3D_SM1_DSTSHIFT_SHIFT; @@ -661,6 +666,7 @@ static void shader_sm1_read_immconst(struct vkd3d_shader_sm1_parser *sm1, const src_param->reg.idx[1].rel_addr = NULL; src_param->reg.idx[2].offset = ~0u; src_param->reg.idx[2].rel_addr = NULL; + src_param->reg.idx_count = 0; src_param->reg.immconst_type = type; memcpy(src_param->reg.u.immconst_uint, *ptr, count * sizeof(uint32_t)); src_param->swizzle = VKD3D_SHADER_NO_SWIZZLE; @@ -671,7 +677,7 @@ static void shader_sm1_read_immconst(struct vkd3d_shader_sm1_parser *sm1, const
static void shader_sm1_read_comment(struct vkd3d_shader_sm1_parser *sm1) { - const uint32_t **ptr = &sm1->p.ptr; + const uint32_t **ptr = &sm1->ptr; const char *comment; unsigned int size; size_t remaining; @@ -738,13 +744,12 @@ static void shader_sm1_validate_instruction(struct vkd3d_shader_sm1_parser *sm1, } }
-static void shader_sm1_read_instruction(struct vkd3d_shader_parser *parser, struct vkd3d_shader_instruction *ins) +static void shader_sm1_read_instruction(struct vkd3d_shader_sm1_parser *sm1, struct vkd3d_shader_instruction *ins) { - struct vkd3d_shader_sm1_parser *sm1 = vkd3d_shader_sm1_parser(parser); struct vkd3d_shader_src_param *src_params, *predicate; const struct vkd3d_sm1_opcode_info *opcode_info; struct vkd3d_shader_dst_param *dst_param; - const uint32_t **ptr = &parser->ptr; + const uint32_t **ptr = &sm1->ptr; uint32_t opcode_token; const uint32_t *p; bool predicated; @@ -758,11 +763,11 @@ static void shader_sm1_read_instruction(struct vkd3d_shader_parser *parser, stru goto fail; }
- ++parser->location.line; + ++sm1->p.location.line; opcode_token = read_u32(ptr); if (!(opcode_info = shader_sm1_get_opcode_info(sm1, opcode_token & VKD3D_SM1_OPCODE_MASK))) { - vkd3d_shader_parser_error(parser, VKD3D_SHADER_ERROR_D3DBC_INVALID_OPCODE, + vkd3d_shader_parser_error(&sm1->p, VKD3D_SHADER_ERROR_D3DBC_INVALID_OPCODE, "Invalid opcode %#x (token 0x%08x, shader version %u.%u).", opcode_token & VKD3D_SM1_OPCODE_MASK, opcode_token, sm1->p.shader_version.major, sm1->p.shader_version.minor); @@ -775,14 +780,14 @@ static void shader_sm1_read_instruction(struct vkd3d_shader_parser *parser, stru ins->raw = false; ins->structured = false; predicated = !!(opcode_token & VKD3D_SM1_INSTRUCTION_PREDICATED); - ins->predicate = predicate = predicated ? shader_parser_get_src_params(parser, 1) : NULL; + ins->predicate = predicate = predicated ? shader_parser_get_src_params(&sm1->p, 1) : NULL; ins->dst_count = opcode_info->dst_count; - ins->dst = dst_param = shader_parser_get_dst_params(parser, ins->dst_count); + ins->dst = dst_param = shader_parser_get_dst_params(&sm1->p, ins->dst_count); ins->src_count = opcode_info->src_count; - ins->src = src_params = shader_parser_get_src_params(parser, ins->src_count); + ins->src = src_params = shader_parser_get_src_params(&sm1->p, ins->src_count); if ((!predicate && predicated) || (!src_params && ins->src_count) || (!dst_param && ins->dst_count)) { - vkd3d_shader_parser_error(parser, VKD3D_SHADER_ERROR_D3DBC_OUT_OF_MEMORY, "Out of memory."); + vkd3d_shader_parser_error(&sm1->p, VKD3D_SHADER_ERROR_D3DBC_OUT_OF_MEMORY, "Out of memory."); goto fail; }
@@ -852,10 +857,9 @@ fail: *ptr = sm1->end; }
-static bool shader_sm1_is_end(struct vkd3d_shader_parser *parser) +static bool shader_sm1_is_end(struct vkd3d_shader_sm1_parser *sm1) { - struct vkd3d_shader_sm1_parser *sm1 = vkd3d_shader_sm1_parser(parser); - const uint32_t **ptr = &parser->ptr; + const uint32_t **ptr = &sm1->ptr;
shader_sm1_read_comment(sm1);
@@ -938,7 +942,7 @@ static enum vkd3d_result shader_sm1_init(struct vkd3d_shader_sm1_parser *sm1, shader_desc = &sm1->p.shader_desc; shader_desc->byte_code = code; shader_desc->byte_code_size = code_size; - sm1->p.ptr = sm1->start; + sm1->ptr = sm1->start;
return VKD3D_OK; } @@ -965,7 +969,7 @@ int vkd3d_shader_sm1_parser_create(const struct vkd3d_shader_compile_info *compi }
instructions = &sm1->p.instructions; - while (!shader_sm1_is_end(&sm1->p)) + while (!shader_sm1_is_end(sm1)) { if (!shader_instruction_array_reserve(instructions, instructions->count + 1)) { @@ -975,7 +979,7 @@ int vkd3d_shader_sm1_parser_create(const struct vkd3d_shader_compile_info *compi return VKD3D_ERROR_OUT_OF_MEMORY; } ins = &instructions->elements[instructions->count]; - shader_sm1_read_instruction(&sm1->p, ins); + shader_sm1_read_instruction(sm1, ins);
if (ins->handler_idx == VKD3DSIH_INVALID) { @@ -988,5 +992,1094 @@ int vkd3d_shader_sm1_parser_create(const struct vkd3d_shader_compile_info *compi
*parser = &sm1->p;
- return VKD3D_OK; + return sm1->p.failed ? VKD3D_ERROR_INVALID_SHADER : VKD3D_OK; +} + +bool hlsl_sm1_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, + bool output, D3DSHADER_PARAM_REGISTER_TYPE *type, unsigned int *reg) +{ + unsigned int i; + + static const struct + { + const char *semantic; + bool output; + enum vkd3d_shader_type shader_type; + unsigned int major_version; + D3DSHADER_PARAM_REGISTER_TYPE type; + DWORD offset; + } + register_table[] = + { + {"color", false, VKD3D_SHADER_TYPE_PIXEL, 1, D3DSPR_INPUT}, + {"texcoord", false, VKD3D_SHADER_TYPE_PIXEL, 1, D3DSPR_TEXTURE}, + + {"color", true, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_COLOROUT}, + {"depth", true, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_DEPTHOUT}, + {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_DEPTHOUT}, + {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_COLOROUT}, + {"color", false, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_INPUT}, + {"texcoord", false, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_TEXTURE}, + + {"color", true, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_COLOROUT}, + {"depth", true, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_DEPTHOUT}, + {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_DEPTHOUT}, + {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_COLOROUT}, + {"sv_position", false, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_MISCTYPE, D3DSMO_POSITION}, + {"vface", false, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_MISCTYPE, D3DSMO_FACE}, + {"vpos", false, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_MISCTYPE, D3DSMO_POSITION}, + + {"color", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_ATTROUT}, + {"fog", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT, D3DSRO_FOG}, + {"position", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT, D3DSRO_POSITION}, + {"psize", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT, D3DSRO_POINT_SIZE}, + {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT, D3DSRO_POSITION}, + {"texcoord", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_TEXCRDOUT}, + + {"color", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_ATTROUT}, + {"fog", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT, D3DSRO_FOG}, + {"position", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT, D3DSRO_POSITION}, + {"psize", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT, D3DSRO_POINT_SIZE}, + {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT, D3DSRO_POSITION}, + {"texcoord", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_TEXCRDOUT}, + }; + + for (i = 0; i < ARRAY_SIZE(register_table); ++i) + { + if (!ascii_strcasecmp(semantic->name, register_table[i].semantic) + && output == register_table[i].output + && ctx->profile->type == register_table[i].shader_type + && ctx->profile->major_version == register_table[i].major_version) + { + *type = register_table[i].type; + if (register_table[i].type == D3DSPR_MISCTYPE || register_table[i].type == D3DSPR_RASTOUT) + *reg = register_table[i].offset; + else + *reg = semantic->index; + return true; + } + } + + return false; +} + +bool hlsl_sm1_usage_from_semantic(const struct hlsl_semantic *semantic, D3DDECLUSAGE *usage, uint32_t *usage_idx) +{ + static const struct + { + const char *name; + D3DDECLUSAGE usage; + } + semantics[] = + { + {"binormal", D3DDECLUSAGE_BINORMAL}, + {"blendindices", D3DDECLUSAGE_BLENDINDICES}, + {"blendweight", D3DDECLUSAGE_BLENDWEIGHT}, + {"color", D3DDECLUSAGE_COLOR}, + {"depth", D3DDECLUSAGE_DEPTH}, + {"fog", D3DDECLUSAGE_FOG}, + {"normal", D3DDECLUSAGE_NORMAL}, + {"position", D3DDECLUSAGE_POSITION}, + {"positiont", D3DDECLUSAGE_POSITIONT}, + {"psize", D3DDECLUSAGE_PSIZE}, + {"sample", D3DDECLUSAGE_SAMPLE}, + {"sv_depth", D3DDECLUSAGE_DEPTH}, + {"sv_position", D3DDECLUSAGE_POSITION}, + {"sv_target", D3DDECLUSAGE_COLOR}, + {"tangent", D3DDECLUSAGE_TANGENT}, + {"tessfactor", D3DDECLUSAGE_TESSFACTOR}, + {"texcoord", D3DDECLUSAGE_TEXCOORD}, + }; + + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(semantics); ++i) + { + if (!ascii_strcasecmp(semantic->name, semantics[i].name)) + { + *usage = semantics[i].usage; + *usage_idx = semantic->index; + return true; + } + } + + return false; +} + +static uint32_t sm1_version(enum vkd3d_shader_type type, unsigned int major, unsigned int minor) +{ + if (type == VKD3D_SHADER_TYPE_VERTEX) + return D3DVS_VERSION(major, minor); + else + return D3DPS_VERSION(major, minor); +} + +static D3DXPARAMETER_CLASS sm1_class(const struct hlsl_type *type) +{ + switch (type->class) + { + case HLSL_CLASS_ARRAY: + return sm1_class(type->e.array.type); + case HLSL_CLASS_MATRIX: + assert(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); + if (type->modifiers & HLSL_MODIFIER_COLUMN_MAJOR) + return D3DXPC_MATRIX_COLUMNS; + else + return D3DXPC_MATRIX_ROWS; + case HLSL_CLASS_OBJECT: + return D3DXPC_OBJECT; + case HLSL_CLASS_SCALAR: + return D3DXPC_SCALAR; + case HLSL_CLASS_STRUCT: + return D3DXPC_STRUCT; + case HLSL_CLASS_VECTOR: + return D3DXPC_VECTOR; + default: + ERR("Invalid class %#x.\n", type->class); + vkd3d_unreachable(); + } +} + +static D3DXPARAMETER_TYPE sm1_base_type(const struct hlsl_type *type) +{ + switch (type->base_type) + { + case HLSL_TYPE_BOOL: + return D3DXPT_BOOL; + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: + return D3DXPT_FLOAT; + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: + return D3DXPT_INT; + case HLSL_TYPE_PIXELSHADER: + return D3DXPT_PIXELSHADER; + case HLSL_TYPE_SAMPLER: + switch (type->sampler_dim) + { + case HLSL_SAMPLER_DIM_1D: + return D3DXPT_SAMPLER1D; + case HLSL_SAMPLER_DIM_2D: + return D3DXPT_SAMPLER2D; + case HLSL_SAMPLER_DIM_3D: + return D3DXPT_SAMPLER3D; + case HLSL_SAMPLER_DIM_CUBE: + return D3DXPT_SAMPLERCUBE; + case HLSL_SAMPLER_DIM_GENERIC: + return D3DXPT_SAMPLER; + default: + ERR("Invalid dimension %#x.\n", type->sampler_dim); + vkd3d_unreachable(); + } + break; + case HLSL_TYPE_STRING: + return D3DXPT_STRING; + case HLSL_TYPE_TEXTURE: + switch (type->sampler_dim) + { + case HLSL_SAMPLER_DIM_1D: + return D3DXPT_TEXTURE1D; + case HLSL_SAMPLER_DIM_2D: + return D3DXPT_TEXTURE2D; + case HLSL_SAMPLER_DIM_3D: + return D3DXPT_TEXTURE3D; + case HLSL_SAMPLER_DIM_CUBE: + return D3DXPT_TEXTURECUBE; + case HLSL_SAMPLER_DIM_GENERIC: + return D3DXPT_TEXTURE; + default: + ERR("Invalid dimension %#x.\n", type->sampler_dim); + vkd3d_unreachable(); + } + break; + case HLSL_TYPE_VERTEXSHADER: + return D3DXPT_VERTEXSHADER; + case HLSL_TYPE_VOID: + return D3DXPT_VOID; + default: + vkd3d_unreachable(); + } +} + +static void write_sm1_type(struct vkd3d_bytecode_buffer *buffer, struct hlsl_type *type, unsigned int ctab_start) +{ + const struct hlsl_type *array_type = hlsl_get_multiarray_element_type(type); + unsigned int array_size = hlsl_get_multiarray_size(type); + unsigned int field_count = 0; + size_t fields_offset = 0; + size_t i; + + if (type->bytecode_offset) + return; + + if (array_type->class == HLSL_CLASS_STRUCT) + { + field_count = array_type->e.record.field_count; + + for (i = 0; i < field_count; ++i) + { + struct hlsl_struct_field *field = &array_type->e.record.fields[i]; + + field->name_bytecode_offset = put_string(buffer, field->name); + write_sm1_type(buffer, field->type, ctab_start); + } + + fields_offset = bytecode_align(buffer) - ctab_start; + + for (i = 0; i < field_count; ++i) + { + struct hlsl_struct_field *field = &array_type->e.record.fields[i]; + + put_u32(buffer, field->name_bytecode_offset - ctab_start); + put_u32(buffer, field->type->bytecode_offset - ctab_start); + } + } + + type->bytecode_offset = put_u32(buffer, vkd3d_make_u32(sm1_class(type), sm1_base_type(array_type))); + put_u32(buffer, vkd3d_make_u32(type->dimy, type->dimx)); + put_u32(buffer, vkd3d_make_u32(array_size, field_count)); + put_u32(buffer, fields_offset); +} + +static void sm1_sort_extern(struct list *sorted, struct hlsl_ir_var *to_sort) +{ + struct hlsl_ir_var *var; + + list_remove(&to_sort->extern_entry); + + LIST_FOR_EACH_ENTRY(var, sorted, struct hlsl_ir_var, extern_entry) + { + if (strcmp(to_sort->name, var->name) < 0) + { + list_add_before(&var->extern_entry, &to_sort->extern_entry); + return; + } + } + + list_add_tail(sorted, &to_sort->extern_entry); +} + +static void sm1_sort_externs(struct hlsl_ctx *ctx) +{ + struct list sorted = LIST_INIT(sorted); + struct hlsl_ir_var *var, *next; + + LIST_FOR_EACH_ENTRY_SAFE(var, next, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + if (var->is_uniform) + sm1_sort_extern(&sorted, var); + } + list_move_tail(&ctx->extern_vars, &sorted); +} + +static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + struct hlsl_ir_function_decl *entry_func) +{ + size_t ctab_offset, ctab_start, ctab_end, vars_start, size_offset, creator_offset, offset; + unsigned int uniform_count = 0; + struct hlsl_ir_var *var; + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + unsigned int r; + + for (r = 0; r <= HLSL_REGSET_LAST; ++r) + { + if (var->semantic.name || !var->regs[r].allocated) + continue; + + ++uniform_count; + + if (var->is_param && var->is_uniform) + { + struct vkd3d_string_buffer *name; + + if (!(name = hlsl_get_string_buffer(ctx))) + { + buffer->status = VKD3D_ERROR_OUT_OF_MEMORY; + return; + } + vkd3d_string_buffer_printf(name, "$%s", var->name); + vkd3d_free((char *)var->name); + var->name = hlsl_strdup(ctx, name->buffer); + hlsl_release_string_buffer(ctx, name); + } + } + } + + sm1_sort_externs(ctx); + + size_offset = put_u32(buffer, 0); + ctab_offset = put_u32(buffer, VKD3D_MAKE_TAG('C','T','A','B')); + + ctab_start = put_u32(buffer, sizeof(D3DXSHADER_CONSTANTTABLE)); + creator_offset = put_u32(buffer, 0); + put_u32(buffer, sm1_version(ctx->profile->type, ctx->profile->major_version, ctx->profile->minor_version)); + put_u32(buffer, uniform_count); + put_u32(buffer, sizeof(D3DXSHADER_CONSTANTTABLE)); /* offset of constants */ + put_u32(buffer, 0); /* FIXME: flags */ + put_u32(buffer, 0); /* FIXME: target string */ + + vars_start = bytecode_align(buffer); + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + unsigned int r; + + for (r = 0; r <= HLSL_REGSET_LAST; ++r) + { + if (var->semantic.name || !var->regs[r].allocated) + continue; + + put_u32(buffer, 0); /* name */ + if (r == HLSL_REGSET_NUMERIC) + { + put_u32(buffer, vkd3d_make_u32(D3DXRS_FLOAT4, var->regs[r].id)); + put_u32(buffer, var->data_type->reg_size[r] / 4); + } + else + { + put_u32(buffer, vkd3d_make_u32(D3DXRS_SAMPLER, var->regs[r].id)); + put_u32(buffer, var->regs[r].bind_count); + } + put_u32(buffer, 0); /* type */ + put_u32(buffer, 0); /* FIXME: default value */ + } + } + + uniform_count = 0; + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + unsigned int r; + + for (r = 0; r <= HLSL_REGSET_LAST; ++r) + { + size_t var_offset, name_offset; + + if (var->semantic.name || !var->regs[r].allocated) + continue; + + var_offset = vars_start + (uniform_count * 5 * sizeof(uint32_t)); + + name_offset = put_string(buffer, var->name); + set_u32(buffer, var_offset, name_offset - ctab_start); + + write_sm1_type(buffer, var->data_type, ctab_start); + set_u32(buffer, var_offset + 3 * sizeof(uint32_t), var->data_type->bytecode_offset - ctab_start); + ++uniform_count; + } + } + + offset = put_string(buffer, vkd3d_shader_get_version(NULL, NULL)); + set_u32(buffer, creator_offset, offset - ctab_start); + + ctab_end = bytecode_align(buffer); + set_u32(buffer, size_offset, vkd3d_make_u32(D3DSIO_COMMENT, (ctab_end - ctab_offset) / sizeof(uint32_t))); +} + +static uint32_t sm1_encode_register_type(D3DSHADER_PARAM_REGISTER_TYPE type) +{ + return ((type << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK) + | ((type << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2); +} + +struct sm1_instruction +{ + D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode; + + struct sm1_dst_register + { + D3DSHADER_PARAM_REGISTER_TYPE type; + D3DSHADER_PARAM_DSTMOD_TYPE mod; + unsigned int writemask; + uint32_t reg; + } dst; + + struct sm1_src_register + { + D3DSHADER_PARAM_REGISTER_TYPE type; + D3DSHADER_PARAM_SRCMOD_TYPE mod; + unsigned int swizzle; + uint32_t reg; + } srcs[3]; + unsigned int src_count; + + unsigned int has_dst; +}; + +static void write_sm1_dst_register(struct vkd3d_bytecode_buffer *buffer, const struct sm1_dst_register *reg) +{ + assert(reg->writemask); + put_u32(buffer, (1u << 31) | sm1_encode_register_type(reg->type) | reg->mod | (reg->writemask << 16) | reg->reg); +} + +static void write_sm1_src_register(struct vkd3d_bytecode_buffer *buffer, + const struct sm1_src_register *reg) +{ + put_u32(buffer, (1u << 31) | sm1_encode_register_type(reg->type) | reg->mod | (reg->swizzle << 16) | reg->reg); +} + +static void write_sm1_instruction(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + const struct sm1_instruction *instr) +{ + uint32_t token = instr->opcode; + unsigned int i; + + if (ctx->profile->major_version > 1) + token |= (instr->has_dst + instr->src_count) << D3DSI_INSTLENGTH_SHIFT; + put_u32(buffer, token); + + if (instr->has_dst) + write_sm1_dst_register(buffer, &instr->dst); + + for (i = 0; i < instr->src_count; ++i) + write_sm1_src_register(buffer, &instr->srcs[i]); +}; + +static void sm1_map_src_swizzle(struct sm1_src_register *src, unsigned int map_writemask) +{ + src->swizzle = hlsl_map_swizzle(src->swizzle, map_writemask); +} + +static void write_sm1_dp2add(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + const struct hlsl_reg *dst, const struct hlsl_reg *src1, const struct hlsl_reg *src2, + const struct hlsl_reg *src3) +{ + struct sm1_instruction instr = + { + .opcode = D3DSIO_DP2ADD, + + .dst.type = D3DSPR_TEMP, + .dst.writemask = dst->writemask, + .dst.reg = dst->id, + .has_dst = 1, + + .srcs[0].type = D3DSPR_TEMP, + .srcs[0].swizzle = hlsl_swizzle_from_writemask(src1->writemask), + .srcs[0].reg = src1->id, + .srcs[1].type = D3DSPR_TEMP, + .srcs[1].swizzle = hlsl_swizzle_from_writemask(src2->writemask), + .srcs[1].reg = src2->id, + .srcs[2].type = D3DSPR_TEMP, + .srcs[2].swizzle = hlsl_swizzle_from_writemask(src3->writemask), + .srcs[2].reg = src3->id, + .src_count = 3, + }; + + write_sm1_instruction(ctx, buffer, &instr); +} + +static void write_sm1_binary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, const struct hlsl_reg *dst, + const struct hlsl_reg *src1, const struct hlsl_reg *src2) +{ + struct sm1_instruction instr = + { + .opcode = opcode, + + .dst.type = D3DSPR_TEMP, + .dst.writemask = dst->writemask, + .dst.reg = dst->id, + .has_dst = 1, + + .srcs[0].type = D3DSPR_TEMP, + .srcs[0].swizzle = hlsl_swizzle_from_writemask(src1->writemask), + .srcs[0].reg = src1->id, + .srcs[1].type = D3DSPR_TEMP, + .srcs[1].swizzle = hlsl_swizzle_from_writemask(src2->writemask), + .srcs[1].reg = src2->id, + .src_count = 2, + }; + + sm1_map_src_swizzle(&instr.srcs[0], instr.dst.writemask); + sm1_map_src_swizzle(&instr.srcs[1], instr.dst.writemask); + write_sm1_instruction(ctx, buffer, &instr); +} + +static void write_sm1_binary_op_dot(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, const struct hlsl_reg *dst, + const struct hlsl_reg *src1, const struct hlsl_reg *src2) +{ + struct sm1_instruction instr = + { + .opcode = opcode, + + .dst.type = D3DSPR_TEMP, + .dst.writemask = dst->writemask, + .dst.reg = dst->id, + .has_dst = 1, + + .srcs[0].type = D3DSPR_TEMP, + .srcs[0].swizzle = hlsl_swizzle_from_writemask(src1->writemask), + .srcs[0].reg = src1->id, + .srcs[1].type = D3DSPR_TEMP, + .srcs[1].swizzle = hlsl_swizzle_from_writemask(src2->writemask), + .srcs[1].reg = src2->id, + .src_count = 2, + }; + + write_sm1_instruction(ctx, buffer, &instr); +} + +static void write_sm1_unary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, const struct hlsl_reg *dst, + const struct hlsl_reg *src, D3DSHADER_PARAM_SRCMOD_TYPE src_mod, D3DSHADER_PARAM_DSTMOD_TYPE dst_mod) +{ + struct sm1_instruction instr = + { + .opcode = opcode, + + .dst.type = D3DSPR_TEMP, + .dst.mod = dst_mod, + .dst.writemask = dst->writemask, + .dst.reg = dst->id, + .has_dst = 1, + + .srcs[0].type = D3DSPR_TEMP, + .srcs[0].swizzle = hlsl_swizzle_from_writemask(src->writemask), + .srcs[0].reg = src->id, + .srcs[0].mod = src_mod, + .src_count = 1, + }; + + sm1_map_src_swizzle(&instr.srcs[0], instr.dst.writemask); + write_sm1_instruction(ctx, buffer, &instr); +} + +static void write_sm1_constant_defs(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer) +{ + unsigned int i, x; + + for (i = 0; i < ctx->constant_defs.count; ++i) + { + uint32_t token = D3DSIO_DEF; + const struct sm1_dst_register reg = + { + .type = D3DSPR_CONST, + .writemask = VKD3DSP_WRITEMASK_ALL, + .reg = i, + }; + + if (ctx->profile->major_version > 1) + token |= 5 << D3DSI_INSTLENGTH_SHIFT; + put_u32(buffer, token); + + write_sm1_dst_register(buffer, ®); + for (x = 0; x < 4; ++x) + put_f32(buffer, ctx->constant_defs.values[i].f[x]); + } +} + +static void write_sm1_semantic_dcl(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + const struct hlsl_ir_var *var, bool output) +{ + struct sm1_dst_register reg = {0}; + uint32_t token, usage_idx; + D3DDECLUSAGE usage; + bool ret; + + if (hlsl_sm1_register_from_semantic(ctx, &var->semantic, output, ®.type, ®.reg)) + { + usage = 0; + usage_idx = 0; + } + else + { + ret = hlsl_sm1_usage_from_semantic(&var->semantic, &usage, &usage_idx); + assert(ret); + reg.type = output ? D3DSPR_OUTPUT : D3DSPR_INPUT; + reg.reg = var->regs[HLSL_REGSET_NUMERIC].id; + } + + token = D3DSIO_DCL; + if (ctx->profile->major_version > 1) + token |= 2 << D3DSI_INSTLENGTH_SHIFT; + put_u32(buffer, token); + + token = (1u << 31); + token |= usage << D3DSP_DCL_USAGE_SHIFT; + token |= usage_idx << D3DSP_DCL_USAGEINDEX_SHIFT; + put_u32(buffer, token); + + reg.writemask = (1 << var->data_type->dimx) - 1; + write_sm1_dst_register(buffer, ®); +} + +static void write_sm1_semantic_dcls(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer) +{ + bool write_in = false, write_out = false; + struct hlsl_ir_var *var; + + if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL && ctx->profile->major_version >= 2) + write_in = true; + else if (ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX && ctx->profile->major_version == 3) + write_in = write_out = true; + else if (ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX && ctx->profile->major_version < 3) + write_in = true; + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + if (write_in && var->is_input_semantic) + write_sm1_semantic_dcl(ctx, buffer, var, false); + if (write_out && var->is_output_semantic) + write_sm1_semantic_dcl(ctx, buffer, var, true); + } +} + +static void write_sm1_sampler_dcl(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + unsigned int reg_id, enum hlsl_sampler_dim sampler_dim) +{ + struct sm1_dst_register reg = {0}; + uint32_t token, res_type = 0; + + token = D3DSIO_DCL; + if (ctx->profile->major_version > 1) + token |= 2 << D3DSI_INSTLENGTH_SHIFT; + put_u32(buffer, token); + + switch (sampler_dim) + { + case HLSL_SAMPLER_DIM_1D: + res_type = VKD3D_SM1_RESOURCE_TEXTURE_1D; + break; + + case HLSL_SAMPLER_DIM_2D: + res_type = VKD3D_SM1_RESOURCE_TEXTURE_2D; + break; + + case HLSL_SAMPLER_DIM_CUBE: + res_type = VKD3D_SM1_RESOURCE_TEXTURE_CUBE; + break; + + case HLSL_SAMPLER_DIM_3D: + res_type = VKD3D_SM1_RESOURCE_TEXTURE_3D; + break; + + default: + vkd3d_unreachable(); + break; + } + + token = (1u << 31); + token |= res_type << VKD3D_SM1_RESOURCE_TYPE_SHIFT; + put_u32(buffer, token); + + reg.type = D3DSPR_SAMPLER; + reg.writemask = VKD3DSP_WRITEMASK_ALL; + reg.reg = reg_id; + + write_sm1_dst_register(buffer, ®); +} + +static void write_sm1_sampler_dcls(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer) +{ + enum hlsl_sampler_dim sampler_dim; + unsigned int i, count, reg_id; + struct hlsl_ir_var *var; + + if (ctx->profile->major_version < 2) + return; + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + if (!var->regs[HLSL_REGSET_SAMPLERS].allocated) + continue; + + count = var->regs[HLSL_REGSET_SAMPLERS].bind_count; + + for (i = 0; i < count; ++i) + { + if (var->objects_usage[HLSL_REGSET_SAMPLERS][i].used) + { + sampler_dim = var->objects_usage[HLSL_REGSET_SAMPLERS][i].sampler_dim; + assert(sampler_dim != HLSL_SAMPLER_DIM_GENERIC); + + reg_id = var->regs[HLSL_REGSET_SAMPLERS].id + i; + write_sm1_sampler_dcl(ctx, buffer, reg_id, sampler_dim); + } + } + } +} + +static void write_sm1_constant(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + const struct hlsl_ir_node *instr) +{ + const struct hlsl_ir_constant *constant = hlsl_ir_constant(instr); + struct sm1_instruction sm1_instr = + { + .opcode = D3DSIO_MOV, + + .dst.type = D3DSPR_TEMP, + .dst.reg = instr->reg.id, + .dst.writemask = instr->reg.writemask, + .has_dst = 1, + + .srcs[0].type = D3DSPR_CONST, + .srcs[0].reg = constant->reg.id, + .srcs[0].swizzle = hlsl_swizzle_from_writemask(constant->reg.writemask), + .src_count = 1, + }; + + assert(instr->reg.allocated); + assert(constant->reg.allocated); + sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); + write_sm1_instruction(ctx, buffer, &sm1_instr); +} + +static void write_sm1_per_component_unary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + const struct hlsl_ir_node *instr, D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode) +{ + struct hlsl_ir_expr *expr = hlsl_ir_expr(instr); + struct hlsl_ir_node *arg1 = expr->operands[0].node; + unsigned int i; + + for (i = 0; i < instr->data_type->dimx; ++i) + { + struct hlsl_reg src = arg1->reg, dst = instr->reg; + + src.writemask = hlsl_combine_writemasks(src.writemask, 1u << i); + dst.writemask = hlsl_combine_writemasks(dst.writemask, 1u << i); + write_sm1_unary_op(ctx, buffer, opcode, &dst, &src, 0, 0); + } +} + +static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) +{ + struct hlsl_ir_expr *expr = hlsl_ir_expr(instr); + struct hlsl_ir_node *arg1 = expr->operands[0].node; + struct hlsl_ir_node *arg2 = expr->operands[1].node; + struct hlsl_ir_node *arg3 = expr->operands[2].node; + + assert(instr->reg.allocated); + + if (instr->data_type->base_type != HLSL_TYPE_FLOAT) + { + /* These need to be lowered. */ + hlsl_fixme(ctx, &instr->loc, "SM1 non-float expression."); + return; + } + + switch (expr->op) + { + case HLSL_OP1_ABS: + write_sm1_unary_op(ctx, buffer, D3DSIO_ABS, &instr->reg, &arg1->reg, 0, 0); + break; + + case HLSL_OP1_DSX: + write_sm1_unary_op(ctx, buffer, D3DSIO_DSX, &instr->reg, &arg1->reg, 0, 0); + break; + + case HLSL_OP1_DSY: + write_sm1_unary_op(ctx, buffer, D3DSIO_DSY, &instr->reg, &arg1->reg, 0, 0); + break; + + case HLSL_OP1_EXP2: + write_sm1_per_component_unary_op(ctx, buffer, instr, D3DSIO_EXP); + break; + + case HLSL_OP1_LOG2: + write_sm1_per_component_unary_op(ctx, buffer, instr, D3DSIO_LOG); + break; + + case HLSL_OP1_NEG: + write_sm1_unary_op(ctx, buffer, D3DSIO_MOV, &instr->reg, &arg1->reg, D3DSPSM_NEG, 0); + break; + + case HLSL_OP1_SAT: + write_sm1_unary_op(ctx, buffer, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, D3DSPDM_SATURATE); + break; + + case HLSL_OP1_RCP: + write_sm1_per_component_unary_op(ctx, buffer, instr, D3DSIO_RCP); + break; + + case HLSL_OP1_RSQ: + write_sm1_per_component_unary_op(ctx, buffer, instr, D3DSIO_RSQ); + break; + + case HLSL_OP2_ADD: + write_sm1_binary_op(ctx, buffer, D3DSIO_ADD, &instr->reg, &arg1->reg, &arg2->reg); + break; + + case HLSL_OP2_MAX: + write_sm1_binary_op(ctx, buffer, D3DSIO_MAX, &instr->reg, &arg1->reg, &arg2->reg); + break; + + case HLSL_OP2_MIN: + write_sm1_binary_op(ctx, buffer, D3DSIO_MIN, &instr->reg, &arg1->reg, &arg2->reg); + break; + + case HLSL_OP2_MUL: + write_sm1_binary_op(ctx, buffer, D3DSIO_MUL, &instr->reg, &arg1->reg, &arg2->reg); + break; + + case HLSL_OP1_FRACT: + write_sm1_unary_op(ctx, buffer, D3DSIO_FRC, &instr->reg, &arg1->reg, D3DSPSM_NONE, 0); + break; + + case HLSL_OP2_DOT: + switch (arg1->data_type->dimx) + { + case 4: + write_sm1_binary_op_dot(ctx, buffer, D3DSIO_DP4, &instr->reg, &arg1->reg, &arg2->reg); + break; + + case 3: + write_sm1_binary_op_dot(ctx, buffer, D3DSIO_DP3, &instr->reg, &arg1->reg, &arg2->reg); + break; + + default: + vkd3d_unreachable(); + } + break; + + case HLSL_OP3_DP2ADD: + write_sm1_dp2add(ctx, buffer, &instr->reg, &arg1->reg, &arg2->reg, &arg3->reg); + break; + + default: + hlsl_fixme(ctx, &instr->loc, "SM1 "%s" expression.", debug_hlsl_expr_op(expr->op)); + break; + } +} + +static void write_sm1_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) +{ + const struct hlsl_ir_load *load = hlsl_ir_load(instr); + const struct hlsl_reg reg = hlsl_reg_from_deref(ctx, &load->src); + struct sm1_instruction sm1_instr = + { + .opcode = D3DSIO_MOV, + + .dst.type = D3DSPR_TEMP, + .dst.reg = instr->reg.id, + .dst.writemask = instr->reg.writemask, + .has_dst = 1, + + .srcs[0].type = D3DSPR_TEMP, + .srcs[0].reg = reg.id, + .srcs[0].swizzle = hlsl_swizzle_from_writemask(reg.writemask), + .src_count = 1, + }; + + assert(instr->reg.allocated); + + if (load->src.var->is_uniform) + { + assert(reg.allocated); + sm1_instr.srcs[0].type = D3DSPR_CONST; + } + else if (load->src.var->is_input_semantic) + { + if (!hlsl_sm1_register_from_semantic(ctx, &load->src.var->semantic, + false, &sm1_instr.srcs[0].type, &sm1_instr.srcs[0].reg)) + { + assert(reg.allocated); + sm1_instr.srcs[0].type = D3DSPR_INPUT; + sm1_instr.srcs[0].reg = reg.id; + } + else + sm1_instr.srcs[0].swizzle = hlsl_swizzle_from_writemask((1 << load->src.var->data_type->dimx) - 1); + } + + sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); + write_sm1_instruction(ctx, buffer, &sm1_instr); +} + +static void write_sm1_resource_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + const struct hlsl_ir_node *instr) +{ + const struct hlsl_ir_resource_load *load = hlsl_ir_resource_load(instr); + struct hlsl_ir_node *coords = load->coords.node; + unsigned int sampler_offset, reg_id; + struct sm1_instruction sm1_instr; + + sampler_offset = hlsl_offset_from_deref_safe(ctx, &load->resource); + reg_id = load->resource.var->regs[HLSL_REGSET_SAMPLERS].id + sampler_offset; + + sm1_instr = (struct sm1_instruction) + { + .opcode = D3DSIO_TEX, + + .dst.type = D3DSPR_TEMP, + .dst.reg = instr->reg.id, + .dst.writemask = instr->reg.writemask, + .has_dst = 1, + + .srcs[0].type = D3DSPR_TEMP, + .srcs[0].reg = coords->reg.id, + .srcs[0].swizzle = hlsl_swizzle_from_writemask(VKD3DSP_WRITEMASK_ALL), + + .srcs[1].type = D3DSPR_SAMPLER, + .srcs[1].reg = reg_id, + .srcs[1].swizzle = hlsl_swizzle_from_writemask(VKD3DSP_WRITEMASK_ALL), + + .src_count = 2, + }; + + assert(instr->reg.allocated); + + write_sm1_instruction(ctx, buffer, &sm1_instr); +} + +static void write_sm1_store(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + const struct hlsl_ir_node *instr) +{ + const struct hlsl_ir_store *store = hlsl_ir_store(instr); + const struct hlsl_ir_node *rhs = store->rhs.node; + const struct hlsl_reg reg = hlsl_reg_from_deref(ctx, &store->lhs); + struct sm1_instruction sm1_instr = + { + .opcode = D3DSIO_MOV, + + .dst.type = D3DSPR_TEMP, + .dst.reg = reg.id, + .dst.writemask = hlsl_combine_writemasks(reg.writemask, store->writemask), + .has_dst = 1, + + .srcs[0].type = D3DSPR_TEMP, + .srcs[0].reg = rhs->reg.id, + .srcs[0].swizzle = hlsl_swizzle_from_writemask(rhs->reg.writemask), + .src_count = 1, + }; + + if (store->lhs.var->data_type->class == HLSL_CLASS_MATRIX) + { + FIXME("Matrix writemasks need to be lowered.\n"); + return; + } + + if (store->lhs.var->is_output_semantic) + { + if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL && ctx->profile->major_version == 1) + { + sm1_instr.dst.type = D3DSPR_TEMP; + sm1_instr.dst.reg = 0; + } + else if (!hlsl_sm1_register_from_semantic(ctx, &store->lhs.var->semantic, + true, &sm1_instr.dst.type, &sm1_instr.dst.reg)) + { + assert(reg.allocated); + sm1_instr.dst.type = D3DSPR_OUTPUT; + sm1_instr.dst.reg = reg.id; + } + else + sm1_instr.dst.writemask = (1u << store->lhs.var->data_type->dimx) - 1; + } + else + assert(reg.allocated); + + sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); + write_sm1_instruction(ctx, buffer, &sm1_instr); +} + +static void write_sm1_swizzle(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + const struct hlsl_ir_node *instr) +{ + const struct hlsl_ir_swizzle *swizzle = hlsl_ir_swizzle(instr); + const struct hlsl_ir_node *val = swizzle->val.node; + struct sm1_instruction sm1_instr = + { + .opcode = D3DSIO_MOV, + + .dst.type = D3DSPR_TEMP, + .dst.reg = instr->reg.id, + .dst.writemask = instr->reg.writemask, + .has_dst = 1, + + .srcs[0].type = D3DSPR_TEMP, + .srcs[0].reg = val->reg.id, + .srcs[0].swizzle = hlsl_combine_swizzles(hlsl_swizzle_from_writemask(val->reg.writemask), + swizzle->swizzle, instr->data_type->dimx), + .src_count = 1, + }; + + assert(instr->reg.allocated); + assert(val->reg.allocated); + sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); + write_sm1_instruction(ctx, buffer, &sm1_instr); +} + +static void write_sm1_instructions(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + const struct hlsl_ir_function_decl *entry_func) +{ + const struct hlsl_ir_node *instr; + + LIST_FOR_EACH_ENTRY(instr, &entry_func->body.instrs, struct hlsl_ir_node, entry) + { + if (instr->data_type) + { + if (instr->data_type->class == HLSL_CLASS_MATRIX) + { + /* These need to be lowered. */ + hlsl_fixme(ctx, &instr->loc, "SM1 matrix expression."); + continue; + } + else if (instr->data_type->class == HLSL_CLASS_OBJECT) + { + hlsl_fixme(ctx, &instr->loc, "Object copy."); + break; + } + + assert(instr->data_type->class == HLSL_CLASS_SCALAR || instr->data_type->class == HLSL_CLASS_VECTOR); + } + + switch (instr->type) + { + case HLSL_IR_CALL: + vkd3d_unreachable(); + + case HLSL_IR_CONSTANT: + write_sm1_constant(ctx, buffer, instr); + break; + + case HLSL_IR_EXPR: + write_sm1_expr(ctx, buffer, instr); + break; + + case HLSL_IR_LOAD: + write_sm1_load(ctx, buffer, instr); + break; + + case HLSL_IR_RESOURCE_LOAD: + write_sm1_resource_load(ctx, buffer, instr); + break; + + case HLSL_IR_STORE: + write_sm1_store(ctx, buffer, instr); + break; + + case HLSL_IR_SWIZZLE: + write_sm1_swizzle(ctx, buffer, instr); + break; + + default: + hlsl_fixme(ctx, &instr->loc, "Instruction type %s.", hlsl_node_type_to_string(instr->type)); + } + } +} + +int hlsl_sm1_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out) +{ + struct vkd3d_bytecode_buffer buffer = {0}; + int ret; + + put_u32(&buffer, sm1_version(ctx->profile->type, ctx->profile->major_version, ctx->profile->minor_version)); + + write_sm1_uniforms(ctx, &buffer, entry_func); + + write_sm1_constant_defs(ctx, &buffer); + write_sm1_semantic_dcls(ctx, &buffer); + write_sm1_sampler_dcls(ctx, &buffer); + write_sm1_instructions(ctx, &buffer, entry_func); + + put_u32(&buffer, D3DSIO_END); + + if (!(ret = buffer.status)) + { + out->code = buffer.data; + out->size = buffer.size; + } + return ret; } diff --git a/libs/vkd3d/libs/vkd3d-shader/dxbc.c b/libs/vkd3d/libs/vkd3d-shader/dxbc.c index d99ea2e36b6..3e3f06faeb5 100644 --- a/libs/vkd3d/libs/vkd3d-shader/dxbc.c +++ b/libs/vkd3d/libs/vkd3d-shader/dxbc.c @@ -19,1680 +19,74 @@ */
#include "vkd3d_shader_private.h" -#include "sm4.h"
-#define SM4_MAX_SRC_COUNT 6 -#define SM4_MAX_DST_COUNT 2 - -STATIC_ASSERT(SM4_MAX_SRC_COUNT <= SPIRV_MAX_SRC_COUNT); - -void dxbc_writer_init(struct dxbc_writer *dxbc) -{ - memset(dxbc, 0, sizeof(*dxbc)); -} - -void dxbc_writer_add_section(struct dxbc_writer *dxbc, uint32_t tag, const void *data, size_t size) -{ - struct vkd3d_shader_dxbc_section_desc *section; - - assert(dxbc->section_count < ARRAY_SIZE(dxbc->sections)); - - section = &dxbc->sections[dxbc->section_count++]; - section->tag = tag; - section->data.code = data; - section->data.size = size; -} - -int vkd3d_shader_serialize_dxbc(size_t section_count, const struct vkd3d_shader_dxbc_section_desc *sections, - struct vkd3d_shader_code *dxbc, char **messages) -{ - size_t size_position, offsets_position, checksum_position, i; - struct vkd3d_bytecode_buffer buffer = {0}; - uint32_t checksum[4]; - - TRACE("section_count %zu, sections %p, dxbc %p, messages %p.\n", section_count, sections, dxbc, messages); - - if (messages) - *messages = NULL; - - put_u32(&buffer, TAG_DXBC); - - checksum_position = bytecode_get_size(&buffer); - for (i = 0; i < 4; ++i) - put_u32(&buffer, 0); - - put_u32(&buffer, 1); /* version */ - size_position = put_u32(&buffer, 0); - put_u32(&buffer, section_count); - - offsets_position = bytecode_get_size(&buffer); - for (i = 0; i < section_count; ++i) - put_u32(&buffer, 0); - - for (i = 0; i < section_count; ++i) - { - set_u32(&buffer, offsets_position + i * sizeof(uint32_t), bytecode_get_size(&buffer)); - put_u32(&buffer, sections[i].tag); - put_u32(&buffer, sections[i].data.size); - bytecode_put_bytes(&buffer, sections[i].data.code, sections[i].data.size); - } - set_u32(&buffer, size_position, bytecode_get_size(&buffer)); - - vkd3d_compute_dxbc_checksum(buffer.data, buffer.size, checksum); - for (i = 0; i < 4; ++i) - set_u32(&buffer, checksum_position + i * sizeof(uint32_t), checksum[i]); - - if (!buffer.status) - { - dxbc->code = buffer.data; - dxbc->size = buffer.size; - } - return buffer.status; -} - -int dxbc_writer_write(struct dxbc_writer *dxbc, struct vkd3d_shader_code *out) -{ - return vkd3d_shader_serialize_dxbc(dxbc->section_count, dxbc->sections, out, NULL); -} - -struct vkd3d_shader_src_param_entry -{ - struct list entry; - struct vkd3d_shader_src_param param; -}; - -struct vkd3d_shader_sm4_parser -{ - const uint32_t *start, *end; - - unsigned int output_map[MAX_REG_OUTPUT]; - - struct vkd3d_shader_parser p; -}; - -struct vkd3d_sm4_opcode_info -{ - enum vkd3d_sm4_opcode opcode; - enum vkd3d_shader_opcode handler_idx; - char dst_info[SM4_MAX_DST_COUNT]; - char src_info[SM4_MAX_SRC_COUNT]; - void (*read_opcode_func)(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, - const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv); -}; - -static const enum vkd3d_primitive_type output_primitive_type_table[] = -{ - /* UNKNOWN */ VKD3D_PT_UNDEFINED, - /* VKD3D_SM4_OUTPUT_PT_POINTLIST */ VKD3D_PT_POINTLIST, - /* UNKNOWN */ VKD3D_PT_UNDEFINED, - /* VKD3D_SM4_OUTPUT_PT_LINESTRIP */ VKD3D_PT_LINESTRIP, - /* UNKNOWN */ VKD3D_PT_UNDEFINED, - /* VKD3D_SM4_OUTPUT_PT_TRIANGLESTRIP */ VKD3D_PT_TRIANGLESTRIP, -}; - -static const enum vkd3d_primitive_type input_primitive_type_table[] = -{ - /* UNKNOWN */ VKD3D_PT_UNDEFINED, - /* VKD3D_SM4_INPUT_PT_POINT */ VKD3D_PT_POINTLIST, - /* VKD3D_SM4_INPUT_PT_LINE */ VKD3D_PT_LINELIST, - /* VKD3D_SM4_INPUT_PT_TRIANGLE */ VKD3D_PT_TRIANGLELIST, - /* UNKNOWN */ VKD3D_PT_UNDEFINED, - /* UNKNOWN */ VKD3D_PT_UNDEFINED, - /* VKD3D_SM4_INPUT_PT_LINEADJ */ VKD3D_PT_LINELIST_ADJ, - /* VKD3D_SM4_INPUT_PT_TRIANGLEADJ */ VKD3D_PT_TRIANGLELIST_ADJ, -}; - -static const enum vkd3d_shader_resource_type resource_type_table[] = -{ - /* 0 */ VKD3D_SHADER_RESOURCE_NONE, - /* VKD3D_SM4_RESOURCE_BUFFER */ VKD3D_SHADER_RESOURCE_BUFFER, - /* VKD3D_SM4_RESOURCE_TEXTURE_1D */ VKD3D_SHADER_RESOURCE_TEXTURE_1D, - /* VKD3D_SM4_RESOURCE_TEXTURE_2D */ VKD3D_SHADER_RESOURCE_TEXTURE_2D, - /* VKD3D_SM4_RESOURCE_TEXTURE_2DMS */ VKD3D_SHADER_RESOURCE_TEXTURE_2DMS, - /* VKD3D_SM4_RESOURCE_TEXTURE_3D */ VKD3D_SHADER_RESOURCE_TEXTURE_3D, - /* VKD3D_SM4_RESOURCE_TEXTURE_CUBE */ VKD3D_SHADER_RESOURCE_TEXTURE_CUBE, - /* VKD3D_SM4_RESOURCE_TEXTURE_1DARRAY */ VKD3D_SHADER_RESOURCE_TEXTURE_1DARRAY, - /* VKD3D_SM4_RESOURCE_TEXTURE_2DARRAY */ VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY, - /* VKD3D_SM4_RESOURCE_TEXTURE_2DMSARRAY */ VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY, - /* VKD3D_SM4_RESOURCE_TEXTURE_CUBEARRAY */ VKD3D_SHADER_RESOURCE_TEXTURE_CUBEARRAY, - /* VKD3D_SM4_RESOURCE_RAW_BUFFER */ VKD3D_SHADER_RESOURCE_BUFFER, - /* VKD3D_SM4_RESOURCE_STRUCTURED_BUFFER */ VKD3D_SHADER_RESOURCE_BUFFER, -}; - -static const enum vkd3d_data_type data_type_table[] = -{ - /* 0 */ VKD3D_DATA_FLOAT, - /* VKD3D_SM4_DATA_UNORM */ VKD3D_DATA_UNORM, - /* VKD3D_SM4_DATA_SNORM */ VKD3D_DATA_SNORM, - /* VKD3D_SM4_DATA_INT */ VKD3D_DATA_INT, - /* VKD3D_SM4_DATA_UINT */ VKD3D_DATA_UINT, - /* VKD3D_SM4_DATA_FLOAT */ VKD3D_DATA_FLOAT, - /* VKD3D_SM4_DATA_MIXED */ VKD3D_DATA_MIXED, - /* VKD3D_SM4_DATA_DOUBLE */ VKD3D_DATA_DOUBLE, - /* VKD3D_SM4_DATA_CONTINUED */ VKD3D_DATA_CONTINUED, - /* VKD3D_SM4_DATA_UNUSED */ VKD3D_DATA_UNUSED, -}; - -static struct vkd3d_shader_sm4_parser *vkd3d_shader_sm4_parser(struct vkd3d_shader_parser *parser) -{ - return CONTAINING_RECORD(parser, struct vkd3d_shader_sm4_parser, p); -} - -static bool shader_is_sm_5_1(const struct vkd3d_shader_sm4_parser *sm4) -{ - const struct vkd3d_shader_version *version = &sm4->p.shader_version; - - return version->major >= 5 && version->minor >= 1; -} - -static bool shader_sm4_read_src_param(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, - const uint32_t *end, enum vkd3d_data_type data_type, struct vkd3d_shader_src_param *src_param); -static bool shader_sm4_read_dst_param(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, - const uint32_t *end, enum vkd3d_data_type data_type, struct vkd3d_shader_dst_param *dst_param); - -static bool shader_sm4_read_register_space(struct vkd3d_shader_sm4_parser *priv, - const uint32_t **ptr, const uint32_t *end, unsigned int *register_space) -{ - *register_space = 0; - - if (!shader_is_sm_5_1(priv)) - return true; - - if (*ptr >= end) - { - WARN("Invalid ptr %p >= end %p.\n", *ptr, end); - return false; - } - - *register_space = *(*ptr)++; - return true; -} - -static void shader_sm4_read_conditional_op(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - shader_sm4_read_src_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_UINT, - (struct vkd3d_shader_src_param *)&ins->src[0]); - ins->flags = (opcode_token & VKD3D_SM4_CONDITIONAL_NZ) ? - VKD3D_SHADER_CONDITIONAL_OP_NZ : VKD3D_SHADER_CONDITIONAL_OP_Z; -} - -static void shader_sm4_read_shader_data(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, - const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - struct vkd3d_shader_immediate_constant_buffer *icb; - enum vkd3d_sm4_shader_data_type type; - unsigned int icb_size; - - type = (opcode_token & VKD3D_SM4_SHADER_DATA_TYPE_MASK) >> VKD3D_SM4_SHADER_DATA_TYPE_SHIFT; - if (type != VKD3D_SM4_SHADER_DATA_IMMEDIATE_CONSTANT_BUFFER) - { - FIXME("Ignoring shader data type %#x.\n", type); - ins->handler_idx = VKD3DSIH_NOP; - return; - } - - ++tokens; - icb_size = token_count - 1; - if (icb_size % 4) - { - FIXME("Unexpected immediate constant buffer size %u.\n", icb_size); - ins->handler_idx = VKD3DSIH_INVALID; - return; - } - - if (!(icb = vkd3d_malloc(offsetof(struct vkd3d_shader_immediate_constant_buffer, data[icb_size])))) - { - ERR("Failed to allocate immediate constant buffer, size %u.\n", icb_size); - vkd3d_shader_parser_error(&priv->p, VKD3D_SHADER_ERROR_TPF_OUT_OF_MEMORY, "Out of memory."); - ins->handler_idx = VKD3DSIH_INVALID; - return; - } - icb->vec4_count = icb_size / 4; - memcpy(icb->data, tokens, sizeof(*tokens) * icb_size); - shader_instruction_array_add_icb(&priv->p.instructions, icb); - ins->declaration.icb = icb; -} - -static void shader_sm4_set_descriptor_register_range(struct vkd3d_shader_sm4_parser *sm4, - const struct vkd3d_shader_register *reg, struct vkd3d_shader_register_range *range) -{ - range->first = reg->idx[1].offset; - range->last = reg->idx[shader_is_sm_5_1(sm4) ? 2 : 1].offset; - if (range->last < range->first) - { - FIXME("Invalid register range [%u:%u].\n", range->first, range->last); - vkd3d_shader_parser_error(&sm4->p, VKD3D_SHADER_ERROR_TPF_INVALID_REGISTER_RANGE, - "Last register %u must not be less than first register %u in range.\n", range->last, range->first); - } -} - -static void shader_sm4_read_dcl_resource(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - struct vkd3d_shader_semantic *semantic = &ins->declaration.semantic; - enum vkd3d_sm4_resource_type resource_type; - const uint32_t *end = &tokens[token_count]; - enum vkd3d_sm4_data_type data_type; - enum vkd3d_data_type reg_data_type; - DWORD components; - unsigned int i; - - resource_type = (opcode_token & VKD3D_SM4_RESOURCE_TYPE_MASK) >> VKD3D_SM4_RESOURCE_TYPE_SHIFT; - if (!resource_type || (resource_type >= ARRAY_SIZE(resource_type_table))) - { - FIXME("Unhandled resource type %#x.\n", resource_type); - semantic->resource_type = VKD3D_SHADER_RESOURCE_NONE; - } - else - { - semantic->resource_type = resource_type_table[resource_type]; - } - - if (semantic->resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMS - || semantic->resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY) - { - semantic->sample_count = (opcode_token & VKD3D_SM4_RESOURCE_SAMPLE_COUNT_MASK) >> VKD3D_SM4_RESOURCE_SAMPLE_COUNT_SHIFT; - } - - reg_data_type = opcode == VKD3D_SM4_OP_DCL_RESOURCE ? VKD3D_DATA_RESOURCE : VKD3D_DATA_UAV; - shader_sm4_read_dst_param(priv, &tokens, end, reg_data_type, &semantic->resource.reg); - shader_sm4_set_descriptor_register_range(priv, &semantic->resource.reg.reg, &semantic->resource.range); - - components = *tokens++; - for (i = 0; i < VKD3D_VEC4_SIZE; i++) - { - data_type = VKD3D_SM4_TYPE_COMPONENT(components, i); - - if (!data_type || (data_type >= ARRAY_SIZE(data_type_table))) - { - FIXME("Unhandled data type %#x.\n", data_type); - semantic->resource_data_type[i] = VKD3D_DATA_FLOAT; - } - else - { - semantic->resource_data_type[i] = data_type_table[data_type]; - } - } - - if (reg_data_type == VKD3D_DATA_UAV) - ins->flags = (opcode_token & VKD3D_SM5_UAV_FLAGS_MASK) >> VKD3D_SM5_UAV_FLAGS_SHIFT; - - shader_sm4_read_register_space(priv, &tokens, end, &semantic->resource.range.space); -} - -static void shader_sm4_read_dcl_constant_buffer(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - const uint32_t *end = &tokens[token_count]; - - shader_sm4_read_src_param(priv, &tokens, end, VKD3D_DATA_FLOAT, &ins->declaration.cb.src); - shader_sm4_set_descriptor_register_range(priv, &ins->declaration.cb.src.reg, &ins->declaration.cb.range); - if (opcode_token & VKD3D_SM4_INDEX_TYPE_MASK) - ins->flags |= VKD3DSI_INDEXED_DYNAMIC; - - ins->declaration.cb.size = ins->declaration.cb.src.reg.idx[2].offset; - ins->declaration.cb.range.space = 0; - - if (shader_is_sm_5_1(priv)) - { - if (tokens >= end) - { - FIXME("Invalid ptr %p >= end %p.\n", tokens, end); - return; - } - - ins->declaration.cb.size = *tokens++; - shader_sm4_read_register_space(priv, &tokens, end, &ins->declaration.cb.range.space); - } -} - -static void shader_sm4_read_dcl_sampler(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, - const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - const uint32_t *end = &tokens[token_count]; - - ins->flags = (opcode_token & VKD3D_SM4_SAMPLER_MODE_MASK) >> VKD3D_SM4_SAMPLER_MODE_SHIFT; - if (ins->flags & ~VKD3D_SM4_SAMPLER_COMPARISON) - FIXME("Unhandled sampler mode %#x.\n", ins->flags); - shader_sm4_read_src_param(priv, &tokens, end, VKD3D_DATA_SAMPLER, &ins->declaration.sampler.src); - shader_sm4_set_descriptor_register_range(priv, &ins->declaration.sampler.src.reg, &ins->declaration.sampler.range); - shader_sm4_read_register_space(priv, &tokens, end, &ins->declaration.sampler.range.space); -} - -static void shader_sm4_read_dcl_index_range(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_OPAQUE, - &ins->declaration.index_range.dst); - ins->declaration.index_range.register_count = *tokens; -} - -static void shader_sm4_read_dcl_output_topology(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - enum vkd3d_sm4_output_primitive_type primitive_type; - - primitive_type = (opcode_token & VKD3D_SM4_PRIMITIVE_TYPE_MASK) >> VKD3D_SM4_PRIMITIVE_TYPE_SHIFT; - if (primitive_type >= ARRAY_SIZE(output_primitive_type_table)) - ins->declaration.primitive_type.type = VKD3D_PT_UNDEFINED; - else - ins->declaration.primitive_type.type = output_primitive_type_table[primitive_type]; - - if (ins->declaration.primitive_type.type == VKD3D_PT_UNDEFINED) - FIXME("Unhandled output primitive type %#x.\n", primitive_type); -} - -static void shader_sm4_read_dcl_input_primitive(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - enum vkd3d_sm4_input_primitive_type primitive_type; - - primitive_type = (opcode_token & VKD3D_SM4_PRIMITIVE_TYPE_MASK) >> VKD3D_SM4_PRIMITIVE_TYPE_SHIFT; - if (VKD3D_SM5_INPUT_PT_PATCH1 <= primitive_type && primitive_type <= VKD3D_SM5_INPUT_PT_PATCH32) - { - ins->declaration.primitive_type.type = VKD3D_PT_PATCH; - ins->declaration.primitive_type.patch_vertex_count = primitive_type - VKD3D_SM5_INPUT_PT_PATCH1 + 1; - } - else if (primitive_type >= ARRAY_SIZE(input_primitive_type_table)) - { - ins->declaration.primitive_type.type = VKD3D_PT_UNDEFINED; - } - else - { - ins->declaration.primitive_type.type = input_primitive_type_table[primitive_type]; - } - - if (ins->declaration.primitive_type.type == VKD3D_PT_UNDEFINED) - FIXME("Unhandled input primitive type %#x.\n", primitive_type); -} - -static void shader_sm4_read_declaration_count(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - ins->declaration.count = *tokens; -} - -static void shader_sm4_read_declaration_dst(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_FLOAT, &ins->declaration.dst); -} - -static void shader_sm4_read_declaration_register_semantic(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_FLOAT, - &ins->declaration.register_semantic.reg); - ins->declaration.register_semantic.sysval_semantic = *tokens; -} - -static void shader_sm4_read_dcl_input_ps(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - ins->flags = (opcode_token & VKD3D_SM4_INTERPOLATION_MODE_MASK) >> VKD3D_SM4_INTERPOLATION_MODE_SHIFT; - shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_FLOAT, &ins->declaration.dst); -} - -static void shader_sm4_read_dcl_input_ps_siv(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - ins->flags = (opcode_token & VKD3D_SM4_INTERPOLATION_MODE_MASK) >> VKD3D_SM4_INTERPOLATION_MODE_SHIFT; - shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_FLOAT, - &ins->declaration.register_semantic.reg); - ins->declaration.register_semantic.sysval_semantic = *tokens; -} - -static void shader_sm4_read_dcl_indexable_temp(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - ins->declaration.indexable_temp.register_idx = *tokens++; - ins->declaration.indexable_temp.register_size = *tokens++; - ins->declaration.indexable_temp.component_count = *tokens; -} - -static void shader_sm4_read_dcl_global_flags(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - ins->flags = (opcode_token & VKD3D_SM4_GLOBAL_FLAGS_MASK) >> VKD3D_SM4_GLOBAL_FLAGS_SHIFT; -} - -static void shader_sm5_read_fcall(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, - const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - struct vkd3d_shader_src_param *src_params = (struct vkd3d_shader_src_param *)ins->src; - src_params[0].reg.u.fp_body_idx = *tokens++; - shader_sm4_read_src_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_OPAQUE, &src_params[0]); -} - -static void shader_sm5_read_dcl_function_body(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - ins->declaration.index = *tokens; -} - -static void shader_sm5_read_dcl_function_table(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - ins->declaration.index = *tokens++; - FIXME("Ignoring set of function bodies (count %u).\n", *tokens); -} - -static void shader_sm5_read_dcl_interface(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - ins->declaration.fp.index = *tokens++; - ins->declaration.fp.body_count = *tokens++; - ins->declaration.fp.array_size = *tokens >> VKD3D_SM5_FP_ARRAY_SIZE_SHIFT; - ins->declaration.fp.table_count = *tokens++ & VKD3D_SM5_FP_TABLE_COUNT_MASK; - FIXME("Ignoring set of function tables (count %u).\n", ins->declaration.fp.table_count); -} - -static void shader_sm5_read_control_point_count(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - ins->declaration.count = (opcode_token & VKD3D_SM5_CONTROL_POINT_COUNT_MASK) - >> VKD3D_SM5_CONTROL_POINT_COUNT_SHIFT; -} - -static void shader_sm5_read_dcl_tessellator_domain(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - ins->declaration.tessellator_domain = (opcode_token & VKD3D_SM5_TESSELLATOR_MASK) - >> VKD3D_SM5_TESSELLATOR_SHIFT; -} - -static void shader_sm5_read_dcl_tessellator_partitioning(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - ins->declaration.tessellator_partitioning = (opcode_token & VKD3D_SM5_TESSELLATOR_MASK) - >> VKD3D_SM5_TESSELLATOR_SHIFT; -} - -static void shader_sm5_read_dcl_tessellator_output_primitive(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - ins->declaration.tessellator_output_primitive = (opcode_token & VKD3D_SM5_TESSELLATOR_MASK) - >> VKD3D_SM5_TESSELLATOR_SHIFT; -} - -static void shader_sm5_read_dcl_hs_max_tessfactor(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - ins->declaration.max_tessellation_factor = *(float *)tokens; -} - -static void shader_sm5_read_dcl_thread_group(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - ins->declaration.thread_group_size.x = *tokens++; - ins->declaration.thread_group_size.y = *tokens++; - ins->declaration.thread_group_size.z = *tokens++; -} - -static void shader_sm5_read_dcl_uav_raw(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, - const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - struct vkd3d_shader_raw_resource *resource = &ins->declaration.raw_resource; - const uint32_t *end = &tokens[token_count]; - - shader_sm4_read_dst_param(priv, &tokens, end, VKD3D_DATA_UAV, &resource->resource.reg); - shader_sm4_set_descriptor_register_range(priv, &resource->resource.reg.reg, &resource->resource.range); - ins->flags = (opcode_token & VKD3D_SM5_UAV_FLAGS_MASK) >> VKD3D_SM5_UAV_FLAGS_SHIFT; - shader_sm4_read_register_space(priv, &tokens, end, &resource->resource.range.space); -} - -static void shader_sm5_read_dcl_uav_structured(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - struct vkd3d_shader_structured_resource *resource = &ins->declaration.structured_resource; - const uint32_t *end = &tokens[token_count]; - - shader_sm4_read_dst_param(priv, &tokens, end, VKD3D_DATA_UAV, &resource->resource.reg); - shader_sm4_set_descriptor_register_range(priv, &resource->resource.reg.reg, &resource->resource.range); - ins->flags = (opcode_token & VKD3D_SM5_UAV_FLAGS_MASK) >> VKD3D_SM5_UAV_FLAGS_SHIFT; - resource->byte_stride = *tokens++; - if (resource->byte_stride % 4) - FIXME("Byte stride %u is not multiple of 4.\n", resource->byte_stride); - shader_sm4_read_register_space(priv, &tokens, end, &resource->resource.range.space); -} - -static void shader_sm5_read_dcl_tgsm_raw(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_FLOAT, &ins->declaration.tgsm_raw.reg); - ins->declaration.tgsm_raw.byte_count = *tokens; - if (ins->declaration.tgsm_raw.byte_count % 4) - FIXME("Byte count %u is not multiple of 4.\n", ins->declaration.tgsm_raw.byte_count); -} - -static void shader_sm5_read_dcl_tgsm_structured(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_FLOAT, - &ins->declaration.tgsm_structured.reg); - ins->declaration.tgsm_structured.byte_stride = *tokens++; - ins->declaration.tgsm_structured.structure_count = *tokens; - if (ins->declaration.tgsm_structured.byte_stride % 4) - FIXME("Byte stride %u is not multiple of 4.\n", ins->declaration.tgsm_structured.byte_stride); -} - -static void shader_sm5_read_dcl_resource_structured(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - struct vkd3d_shader_structured_resource *resource = &ins->declaration.structured_resource; - const uint32_t *end = &tokens[token_count]; - - shader_sm4_read_dst_param(priv, &tokens, end, VKD3D_DATA_RESOURCE, &resource->resource.reg); - shader_sm4_set_descriptor_register_range(priv, &resource->resource.reg.reg, &resource->resource.range); - resource->byte_stride = *tokens++; - if (resource->byte_stride % 4) - FIXME("Byte stride %u is not multiple of 4.\n", resource->byte_stride); - shader_sm4_read_register_space(priv, &tokens, end, &resource->resource.range.space); -} - -static void shader_sm5_read_dcl_resource_raw(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - struct vkd3d_shader_raw_resource *resource = &ins->declaration.raw_resource; - const uint32_t *end = &tokens[token_count]; - - shader_sm4_read_dst_param(priv, &tokens, end, VKD3D_DATA_RESOURCE, &resource->resource.reg); - shader_sm4_set_descriptor_register_range(priv, &resource->resource.reg.reg, &resource->resource.range); - shader_sm4_read_register_space(priv, &tokens, end, &resource->resource.range.space); -} - -static void shader_sm5_read_sync(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, - const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - ins->flags = (opcode_token & VKD3D_SM5_SYNC_FLAGS_MASK) >> VKD3D_SM5_SYNC_FLAGS_SHIFT; -} - -/* - * d -> VKD3D_DATA_DOUBLE - * f -> VKD3D_DATA_FLOAT - * i -> VKD3D_DATA_INT - * u -> VKD3D_DATA_UINT - * O -> VKD3D_DATA_OPAQUE - * R -> VKD3D_DATA_RESOURCE - * S -> VKD3D_DATA_SAMPLER - * U -> VKD3D_DATA_UAV - */ -static const struct vkd3d_sm4_opcode_info opcode_table[] = -{ - {VKD3D_SM4_OP_ADD, VKD3DSIH_ADD, "f", "ff"}, - {VKD3D_SM4_OP_AND, VKD3DSIH_AND, "u", "uu"}, - {VKD3D_SM4_OP_BREAK, VKD3DSIH_BREAK, "", ""}, - {VKD3D_SM4_OP_BREAKC, VKD3DSIH_BREAKP, "", "u", - shader_sm4_read_conditional_op}, - {VKD3D_SM4_OP_CASE, VKD3DSIH_CASE, "", "u"}, - {VKD3D_SM4_OP_CONTINUE, VKD3DSIH_CONTINUE, "", ""}, - {VKD3D_SM4_OP_CONTINUEC, VKD3DSIH_CONTINUEP, "", "u", - shader_sm4_read_conditional_op}, - {VKD3D_SM4_OP_CUT, VKD3DSIH_CUT, "", ""}, - {VKD3D_SM4_OP_DEFAULT, VKD3DSIH_DEFAULT, "", ""}, - {VKD3D_SM4_OP_DERIV_RTX, VKD3DSIH_DSX, "f", "f"}, - {VKD3D_SM4_OP_DERIV_RTY, VKD3DSIH_DSY, "f", "f"}, - {VKD3D_SM4_OP_DISCARD, VKD3DSIH_TEXKILL, "", "u", - shader_sm4_read_conditional_op}, - {VKD3D_SM4_OP_DIV, VKD3DSIH_DIV, "f", "ff"}, - {VKD3D_SM4_OP_DP2, VKD3DSIH_DP2, "f", "ff"}, - {VKD3D_SM4_OP_DP3, VKD3DSIH_DP3, "f", "ff"}, - {VKD3D_SM4_OP_DP4, VKD3DSIH_DP4, "f", "ff"}, - {VKD3D_SM4_OP_ELSE, VKD3DSIH_ELSE, "", ""}, - {VKD3D_SM4_OP_EMIT, VKD3DSIH_EMIT, "", ""}, - {VKD3D_SM4_OP_ENDIF, VKD3DSIH_ENDIF, "", ""}, - {VKD3D_SM4_OP_ENDLOOP, VKD3DSIH_ENDLOOP, "", ""}, - {VKD3D_SM4_OP_ENDSWITCH, VKD3DSIH_ENDSWITCH, "", ""}, - {VKD3D_SM4_OP_EQ, VKD3DSIH_EQ, "u", "ff"}, - {VKD3D_SM4_OP_EXP, VKD3DSIH_EXP, "f", "f"}, - {VKD3D_SM4_OP_FRC, VKD3DSIH_FRC, "f", "f"}, - {VKD3D_SM4_OP_FTOI, VKD3DSIH_FTOI, "i", "f"}, - {VKD3D_SM4_OP_FTOU, VKD3DSIH_FTOU, "u", "f"}, - {VKD3D_SM4_OP_GE, VKD3DSIH_GE, "u", "ff"}, - {VKD3D_SM4_OP_IADD, VKD3DSIH_IADD, "i", "ii"}, - {VKD3D_SM4_OP_IF, VKD3DSIH_IF, "", "u", - shader_sm4_read_conditional_op}, - {VKD3D_SM4_OP_IEQ, VKD3DSIH_IEQ, "u", "ii"}, - {VKD3D_SM4_OP_IGE, VKD3DSIH_IGE, "u", "ii"}, - {VKD3D_SM4_OP_ILT, VKD3DSIH_ILT, "u", "ii"}, - {VKD3D_SM4_OP_IMAD, VKD3DSIH_IMAD, "i", "iii"}, - {VKD3D_SM4_OP_IMAX, VKD3DSIH_IMAX, "i", "ii"}, - {VKD3D_SM4_OP_IMIN, VKD3DSIH_IMIN, "i", "ii"}, - {VKD3D_SM4_OP_IMUL, VKD3DSIH_IMUL, "ii", "ii"}, - {VKD3D_SM4_OP_INE, VKD3DSIH_INE, "u", "ii"}, - {VKD3D_SM4_OP_INEG, VKD3DSIH_INEG, "i", "i"}, - {VKD3D_SM4_OP_ISHL, VKD3DSIH_ISHL, "i", "ii"}, - {VKD3D_SM4_OP_ISHR, VKD3DSIH_ISHR, "i", "ii"}, - {VKD3D_SM4_OP_ITOF, VKD3DSIH_ITOF, "f", "i"}, - {VKD3D_SM4_OP_LABEL, VKD3DSIH_LABEL, "", "O"}, - {VKD3D_SM4_OP_LD, VKD3DSIH_LD, "u", "iR"}, - {VKD3D_SM4_OP_LD2DMS, VKD3DSIH_LD2DMS, "u", "iRi"}, - {VKD3D_SM4_OP_LOG, VKD3DSIH_LOG, "f", "f"}, - {VKD3D_SM4_OP_LOOP, VKD3DSIH_LOOP, "", ""}, - {VKD3D_SM4_OP_LT, VKD3DSIH_LT, "u", "ff"}, - {VKD3D_SM4_OP_MAD, VKD3DSIH_MAD, "f", "fff"}, - {VKD3D_SM4_OP_MIN, VKD3DSIH_MIN, "f", "ff"}, - {VKD3D_SM4_OP_MAX, VKD3DSIH_MAX, "f", "ff"}, - {VKD3D_SM4_OP_SHADER_DATA, VKD3DSIH_DCL_IMMEDIATE_CONSTANT_BUFFER, "", "", - shader_sm4_read_shader_data}, - {VKD3D_SM4_OP_MOV, VKD3DSIH_MOV, "f", "f"}, - {VKD3D_SM4_OP_MOVC, VKD3DSIH_MOVC, "f", "uff"}, - {VKD3D_SM4_OP_MUL, VKD3DSIH_MUL, "f", "ff"}, - {VKD3D_SM4_OP_NE, VKD3DSIH_NE, "u", "ff"}, - {VKD3D_SM4_OP_NOP, VKD3DSIH_NOP, "", ""}, - {VKD3D_SM4_OP_NOT, VKD3DSIH_NOT, "u", "u"}, - {VKD3D_SM4_OP_OR, VKD3DSIH_OR, "u", "uu"}, - {VKD3D_SM4_OP_RESINFO, VKD3DSIH_RESINFO, "f", "iR"}, - {VKD3D_SM4_OP_RET, VKD3DSIH_RET, "", ""}, - {VKD3D_SM4_OP_RETC, VKD3DSIH_RETP, "", "u", - shader_sm4_read_conditional_op}, - {VKD3D_SM4_OP_ROUND_NE, VKD3DSIH_ROUND_NE, "f", "f"}, - {VKD3D_SM4_OP_ROUND_NI, VKD3DSIH_ROUND_NI, "f", "f"}, - {VKD3D_SM4_OP_ROUND_PI, VKD3DSIH_ROUND_PI, "f", "f"}, - {VKD3D_SM4_OP_ROUND_Z, VKD3DSIH_ROUND_Z, "f", "f"}, - {VKD3D_SM4_OP_RSQ, VKD3DSIH_RSQ, "f", "f"}, - {VKD3D_SM4_OP_SAMPLE, VKD3DSIH_SAMPLE, "u", "fRS"}, - {VKD3D_SM4_OP_SAMPLE_C, VKD3DSIH_SAMPLE_C, "f", "fRSf"}, - {VKD3D_SM4_OP_SAMPLE_C_LZ, VKD3DSIH_SAMPLE_C_LZ, "f", "fRSf"}, - {VKD3D_SM4_OP_SAMPLE_LOD, VKD3DSIH_SAMPLE_LOD, "u", "fRSf"}, - {VKD3D_SM4_OP_SAMPLE_GRAD, VKD3DSIH_SAMPLE_GRAD, "u", "fRSff"}, - {VKD3D_SM4_OP_SAMPLE_B, VKD3DSIH_SAMPLE_B, "u", "fRSf"}, - {VKD3D_SM4_OP_SQRT, VKD3DSIH_SQRT, "f", "f"}, - {VKD3D_SM4_OP_SWITCH, VKD3DSIH_SWITCH, "", "i"}, - {VKD3D_SM4_OP_SINCOS, VKD3DSIH_SINCOS, "ff", "f"}, - {VKD3D_SM4_OP_UDIV, VKD3DSIH_UDIV, "uu", "uu"}, - {VKD3D_SM4_OP_ULT, VKD3DSIH_ULT, "u", "uu"}, - {VKD3D_SM4_OP_UGE, VKD3DSIH_UGE, "u", "uu"}, - {VKD3D_SM4_OP_UMUL, VKD3DSIH_UMUL, "uu", "uu"}, - {VKD3D_SM4_OP_UMAX, VKD3DSIH_UMAX, "u", "uu"}, - {VKD3D_SM4_OP_UMIN, VKD3DSIH_UMIN, "u", "uu"}, - {VKD3D_SM4_OP_USHR, VKD3DSIH_USHR, "u", "uu"}, - {VKD3D_SM4_OP_UTOF, VKD3DSIH_UTOF, "f", "u"}, - {VKD3D_SM4_OP_XOR, VKD3DSIH_XOR, "u", "uu"}, - {VKD3D_SM4_OP_DCL_RESOURCE, VKD3DSIH_DCL, "", "", - shader_sm4_read_dcl_resource}, - {VKD3D_SM4_OP_DCL_CONSTANT_BUFFER, VKD3DSIH_DCL_CONSTANT_BUFFER, "", "", - shader_sm4_read_dcl_constant_buffer}, - {VKD3D_SM4_OP_DCL_SAMPLER, VKD3DSIH_DCL_SAMPLER, "", "", - shader_sm4_read_dcl_sampler}, - {VKD3D_SM4_OP_DCL_INDEX_RANGE, VKD3DSIH_DCL_INDEX_RANGE, "", "", - shader_sm4_read_dcl_index_range}, - {VKD3D_SM4_OP_DCL_OUTPUT_TOPOLOGY, VKD3DSIH_DCL_OUTPUT_TOPOLOGY, "", "", - shader_sm4_read_dcl_output_topology}, - {VKD3D_SM4_OP_DCL_INPUT_PRIMITIVE, VKD3DSIH_DCL_INPUT_PRIMITIVE, "", "", - shader_sm4_read_dcl_input_primitive}, - {VKD3D_SM4_OP_DCL_VERTICES_OUT, VKD3DSIH_DCL_VERTICES_OUT, "", "", - shader_sm4_read_declaration_count}, - {VKD3D_SM4_OP_DCL_INPUT, VKD3DSIH_DCL_INPUT, "", "", - shader_sm4_read_declaration_dst}, - {VKD3D_SM4_OP_DCL_INPUT_SGV, VKD3DSIH_DCL_INPUT_SGV, "", "", - shader_sm4_read_declaration_register_semantic}, - {VKD3D_SM4_OP_DCL_INPUT_SIV, VKD3DSIH_DCL_INPUT_SIV, "", "", - shader_sm4_read_declaration_register_semantic}, - {VKD3D_SM4_OP_DCL_INPUT_PS, VKD3DSIH_DCL_INPUT_PS, "", "", - shader_sm4_read_dcl_input_ps}, - {VKD3D_SM4_OP_DCL_INPUT_PS_SGV, VKD3DSIH_DCL_INPUT_PS_SGV, "", "", - shader_sm4_read_declaration_register_semantic}, - {VKD3D_SM4_OP_DCL_INPUT_PS_SIV, VKD3DSIH_DCL_INPUT_PS_SIV, "", "", - shader_sm4_read_dcl_input_ps_siv}, - {VKD3D_SM4_OP_DCL_OUTPUT, VKD3DSIH_DCL_OUTPUT, "", "", - shader_sm4_read_declaration_dst}, - {VKD3D_SM4_OP_DCL_OUTPUT_SIV, VKD3DSIH_DCL_OUTPUT_SIV, "", "", - shader_sm4_read_declaration_register_semantic}, - {VKD3D_SM4_OP_DCL_TEMPS, VKD3DSIH_DCL_TEMPS, "", "", - shader_sm4_read_declaration_count}, - {VKD3D_SM4_OP_DCL_INDEXABLE_TEMP, VKD3DSIH_DCL_INDEXABLE_TEMP, "", "", - shader_sm4_read_dcl_indexable_temp}, - {VKD3D_SM4_OP_DCL_GLOBAL_FLAGS, VKD3DSIH_DCL_GLOBAL_FLAGS, "", "", - shader_sm4_read_dcl_global_flags}, - {VKD3D_SM4_OP_LOD, VKD3DSIH_LOD, "f", "fRS"}, - {VKD3D_SM4_OP_GATHER4, VKD3DSIH_GATHER4, "u", "fRS"}, - {VKD3D_SM4_OP_SAMPLE_POS, VKD3DSIH_SAMPLE_POS, "f", "Ru"}, - {VKD3D_SM4_OP_SAMPLE_INFO, VKD3DSIH_SAMPLE_INFO, "f", "R"}, - {VKD3D_SM5_OP_HS_DECLS, VKD3DSIH_HS_DECLS, "", ""}, - {VKD3D_SM5_OP_HS_CONTROL_POINT_PHASE, VKD3DSIH_HS_CONTROL_POINT_PHASE, "", ""}, - {VKD3D_SM5_OP_HS_FORK_PHASE, VKD3DSIH_HS_FORK_PHASE, "", ""}, - {VKD3D_SM5_OP_HS_JOIN_PHASE, VKD3DSIH_HS_JOIN_PHASE, "", ""}, - {VKD3D_SM5_OP_EMIT_STREAM, VKD3DSIH_EMIT_STREAM, "", "f"}, - {VKD3D_SM5_OP_CUT_STREAM, VKD3DSIH_CUT_STREAM, "", "f"}, - {VKD3D_SM5_OP_FCALL, VKD3DSIH_FCALL, "", "O", - shader_sm5_read_fcall}, - {VKD3D_SM5_OP_BUFINFO, VKD3DSIH_BUFINFO, "i", "U"}, - {VKD3D_SM5_OP_DERIV_RTX_COARSE, VKD3DSIH_DSX_COARSE, "f", "f"}, - {VKD3D_SM5_OP_DERIV_RTX_FINE, VKD3DSIH_DSX_FINE, "f", "f"}, - {VKD3D_SM5_OP_DERIV_RTY_COARSE, VKD3DSIH_DSY_COARSE, "f", "f"}, - {VKD3D_SM5_OP_DERIV_RTY_FINE, VKD3DSIH_DSY_FINE, "f", "f"}, - {VKD3D_SM5_OP_GATHER4_C, VKD3DSIH_GATHER4_C, "f", "fRSf"}, - {VKD3D_SM5_OP_GATHER4_PO, VKD3DSIH_GATHER4_PO, "f", "fiRS"}, - {VKD3D_SM5_OP_GATHER4_PO_C, VKD3DSIH_GATHER4_PO_C, "f", "fiRSf"}, - {VKD3D_SM5_OP_RCP, VKD3DSIH_RCP, "f", "f"}, - {VKD3D_SM5_OP_F32TOF16, VKD3DSIH_F32TOF16, "u", "f"}, - {VKD3D_SM5_OP_F16TOF32, VKD3DSIH_F16TOF32, "f", "u"}, - {VKD3D_SM5_OP_COUNTBITS, VKD3DSIH_COUNTBITS, "u", "u"}, - {VKD3D_SM5_OP_FIRSTBIT_HI, VKD3DSIH_FIRSTBIT_HI, "u", "u"}, - {VKD3D_SM5_OP_FIRSTBIT_LO, VKD3DSIH_FIRSTBIT_LO, "u", "u"}, - {VKD3D_SM5_OP_FIRSTBIT_SHI, VKD3DSIH_FIRSTBIT_SHI, "u", "i"}, - {VKD3D_SM5_OP_UBFE, VKD3DSIH_UBFE, "u", "iiu"}, - {VKD3D_SM5_OP_IBFE, VKD3DSIH_IBFE, "i", "iii"}, - {VKD3D_SM5_OP_BFI, VKD3DSIH_BFI, "u", "iiuu"}, - {VKD3D_SM5_OP_BFREV, VKD3DSIH_BFREV, "u", "u"}, - {VKD3D_SM5_OP_SWAPC, VKD3DSIH_SWAPC, "ff", "uff"}, - {VKD3D_SM5_OP_DCL_STREAM, VKD3DSIH_DCL_STREAM, "", "O"}, - {VKD3D_SM5_OP_DCL_FUNCTION_BODY, VKD3DSIH_DCL_FUNCTION_BODY, "", "", - shader_sm5_read_dcl_function_body}, - {VKD3D_SM5_OP_DCL_FUNCTION_TABLE, VKD3DSIH_DCL_FUNCTION_TABLE, "", "", - shader_sm5_read_dcl_function_table}, - {VKD3D_SM5_OP_DCL_INTERFACE, VKD3DSIH_DCL_INTERFACE, "", "", - shader_sm5_read_dcl_interface}, - {VKD3D_SM5_OP_DCL_INPUT_CONTROL_POINT_COUNT, VKD3DSIH_DCL_INPUT_CONTROL_POINT_COUNT, "", "", - shader_sm5_read_control_point_count}, - {VKD3D_SM5_OP_DCL_OUTPUT_CONTROL_POINT_COUNT, VKD3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT, "", "", - shader_sm5_read_control_point_count}, - {VKD3D_SM5_OP_DCL_TESSELLATOR_DOMAIN, VKD3DSIH_DCL_TESSELLATOR_DOMAIN, "", "", - shader_sm5_read_dcl_tessellator_domain}, - {VKD3D_SM5_OP_DCL_TESSELLATOR_PARTITIONING, VKD3DSIH_DCL_TESSELLATOR_PARTITIONING, "", "", - shader_sm5_read_dcl_tessellator_partitioning}, - {VKD3D_SM5_OP_DCL_TESSELLATOR_OUTPUT_PRIMITIVE, VKD3DSIH_DCL_TESSELLATOR_OUTPUT_PRIMITIVE, "", "", - shader_sm5_read_dcl_tessellator_output_primitive}, - {VKD3D_SM5_OP_DCL_HS_MAX_TESSFACTOR, VKD3DSIH_DCL_HS_MAX_TESSFACTOR, "", "", - shader_sm5_read_dcl_hs_max_tessfactor}, - {VKD3D_SM5_OP_DCL_HS_FORK_PHASE_INSTANCE_COUNT, VKD3DSIH_DCL_HS_FORK_PHASE_INSTANCE_COUNT, "", "", - shader_sm4_read_declaration_count}, - {VKD3D_SM5_OP_DCL_HS_JOIN_PHASE_INSTANCE_COUNT, VKD3DSIH_DCL_HS_JOIN_PHASE_INSTANCE_COUNT, "", "", - shader_sm4_read_declaration_count}, - {VKD3D_SM5_OP_DCL_THREAD_GROUP, VKD3DSIH_DCL_THREAD_GROUP, "", "", - shader_sm5_read_dcl_thread_group}, - {VKD3D_SM5_OP_DCL_UAV_TYPED, VKD3DSIH_DCL_UAV_TYPED, "", "", - shader_sm4_read_dcl_resource}, - {VKD3D_SM5_OP_DCL_UAV_RAW, VKD3DSIH_DCL_UAV_RAW, "", "", - shader_sm5_read_dcl_uav_raw}, - {VKD3D_SM5_OP_DCL_UAV_STRUCTURED, VKD3DSIH_DCL_UAV_STRUCTURED, "", "", - shader_sm5_read_dcl_uav_structured}, - {VKD3D_SM5_OP_DCL_TGSM_RAW, VKD3DSIH_DCL_TGSM_RAW, "", "", - shader_sm5_read_dcl_tgsm_raw}, - {VKD3D_SM5_OP_DCL_TGSM_STRUCTURED, VKD3DSIH_DCL_TGSM_STRUCTURED, "", "", - shader_sm5_read_dcl_tgsm_structured}, - {VKD3D_SM5_OP_DCL_RESOURCE_RAW, VKD3DSIH_DCL_RESOURCE_RAW, "", "", - shader_sm5_read_dcl_resource_raw}, - {VKD3D_SM5_OP_DCL_RESOURCE_STRUCTURED, VKD3DSIH_DCL_RESOURCE_STRUCTURED, "", "", - shader_sm5_read_dcl_resource_structured}, - {VKD3D_SM5_OP_LD_UAV_TYPED, VKD3DSIH_LD_UAV_TYPED, "u", "iU"}, - {VKD3D_SM5_OP_STORE_UAV_TYPED, VKD3DSIH_STORE_UAV_TYPED, "U", "iu"}, - {VKD3D_SM5_OP_LD_RAW, VKD3DSIH_LD_RAW, "u", "iU"}, - {VKD3D_SM5_OP_STORE_RAW, VKD3DSIH_STORE_RAW, "U", "uu"}, - {VKD3D_SM5_OP_LD_STRUCTURED, VKD3DSIH_LD_STRUCTURED, "u", "iiR"}, - {VKD3D_SM5_OP_STORE_STRUCTURED, VKD3DSIH_STORE_STRUCTURED, "U", "iiu"}, - {VKD3D_SM5_OP_ATOMIC_AND, VKD3DSIH_ATOMIC_AND, "U", "iu"}, - {VKD3D_SM5_OP_ATOMIC_OR, VKD3DSIH_ATOMIC_OR, "U", "iu"}, - {VKD3D_SM5_OP_ATOMIC_XOR, VKD3DSIH_ATOMIC_XOR, "U", "iu"}, - {VKD3D_SM5_OP_ATOMIC_CMP_STORE, VKD3DSIH_ATOMIC_CMP_STORE, "U", "iuu"}, - {VKD3D_SM5_OP_ATOMIC_IADD, VKD3DSIH_ATOMIC_IADD, "U", "ii"}, - {VKD3D_SM5_OP_ATOMIC_IMAX, VKD3DSIH_ATOMIC_IMAX, "U", "ii"}, - {VKD3D_SM5_OP_ATOMIC_IMIN, VKD3DSIH_ATOMIC_IMIN, "U", "ii"}, - {VKD3D_SM5_OP_ATOMIC_UMAX, VKD3DSIH_ATOMIC_UMAX, "U", "iu"}, - {VKD3D_SM5_OP_ATOMIC_UMIN, VKD3DSIH_ATOMIC_UMIN, "U", "iu"}, - {VKD3D_SM5_OP_IMM_ATOMIC_ALLOC, VKD3DSIH_IMM_ATOMIC_ALLOC, "u", "U"}, - {VKD3D_SM5_OP_IMM_ATOMIC_CONSUME, VKD3DSIH_IMM_ATOMIC_CONSUME, "u", "U"}, - {VKD3D_SM5_OP_IMM_ATOMIC_IADD, VKD3DSIH_IMM_ATOMIC_IADD, "uU", "ii"}, - {VKD3D_SM5_OP_IMM_ATOMIC_AND, VKD3DSIH_IMM_ATOMIC_AND, "uU", "iu"}, - {VKD3D_SM5_OP_IMM_ATOMIC_OR, VKD3DSIH_IMM_ATOMIC_OR, "uU", "iu"}, - {VKD3D_SM5_OP_IMM_ATOMIC_XOR, VKD3DSIH_IMM_ATOMIC_XOR, "uU", "iu"}, - {VKD3D_SM5_OP_IMM_ATOMIC_EXCH, VKD3DSIH_IMM_ATOMIC_EXCH, "uU", "iu"}, - {VKD3D_SM5_OP_IMM_ATOMIC_CMP_EXCH, VKD3DSIH_IMM_ATOMIC_CMP_EXCH, "uU", "iuu"}, - {VKD3D_SM5_OP_IMM_ATOMIC_IMAX, VKD3DSIH_IMM_ATOMIC_IMAX, "iU", "ii"}, - {VKD3D_SM5_OP_IMM_ATOMIC_IMIN, VKD3DSIH_IMM_ATOMIC_IMIN, "iU", "ii"}, - {VKD3D_SM5_OP_IMM_ATOMIC_UMAX, VKD3DSIH_IMM_ATOMIC_UMAX, "uU", "iu"}, - {VKD3D_SM5_OP_IMM_ATOMIC_UMIN, VKD3DSIH_IMM_ATOMIC_UMIN, "uU", "iu"}, - {VKD3D_SM5_OP_SYNC, VKD3DSIH_SYNC, "", "", - shader_sm5_read_sync}, - {VKD3D_SM5_OP_DADD, VKD3DSIH_DADD, "d", "dd"}, - {VKD3D_SM5_OP_DMAX, VKD3DSIH_DMAX, "d", "dd"}, - {VKD3D_SM5_OP_DMIN, VKD3DSIH_DMIN, "d", "dd"}, - {VKD3D_SM5_OP_DMUL, VKD3DSIH_DMUL, "d", "dd"}, - {VKD3D_SM5_OP_DEQ, VKD3DSIH_DEQ, "u", "dd"}, - {VKD3D_SM5_OP_DGE, VKD3DSIH_DGE, "u", "dd"}, - {VKD3D_SM5_OP_DLT, VKD3DSIH_DLT, "u", "dd"}, - {VKD3D_SM5_OP_DNE, VKD3DSIH_DNE, "u", "dd"}, - {VKD3D_SM5_OP_DMOV, VKD3DSIH_DMOV, "d", "d"}, - {VKD3D_SM5_OP_DMOVC, VKD3DSIH_DMOVC, "d", "udd"}, - {VKD3D_SM5_OP_DTOF, VKD3DSIH_DTOF, "f", "d"}, - {VKD3D_SM5_OP_FTOD, VKD3DSIH_FTOD, "d", "f"}, - {VKD3D_SM5_OP_EVAL_SAMPLE_INDEX, VKD3DSIH_EVAL_SAMPLE_INDEX, "f", "fi"}, - {VKD3D_SM5_OP_EVAL_CENTROID, VKD3DSIH_EVAL_CENTROID, "f", "f"}, - {VKD3D_SM5_OP_DCL_GS_INSTANCES, VKD3DSIH_DCL_GS_INSTANCES, "", "", - shader_sm4_read_declaration_count}, - {VKD3D_SM5_OP_DDIV, VKD3DSIH_DDIV, "d", "dd"}, - {VKD3D_SM5_OP_DFMA, VKD3DSIH_DFMA, "d", "ddd"}, - {VKD3D_SM5_OP_DRCP, VKD3DSIH_DRCP, "d", "d"}, - {VKD3D_SM5_OP_MSAD, VKD3DSIH_MSAD, "u", "uuu"}, - {VKD3D_SM5_OP_DTOI, VKD3DSIH_DTOI, "i", "d"}, - {VKD3D_SM5_OP_DTOU, VKD3DSIH_DTOU, "u", "d"}, - {VKD3D_SM5_OP_ITOD, VKD3DSIH_ITOD, "d", "i"}, - {VKD3D_SM5_OP_UTOD, VKD3DSIH_UTOD, "d", "u"}, - {VKD3D_SM5_OP_GATHER4_S, VKD3DSIH_GATHER4_S, "uu", "fRS"}, - {VKD3D_SM5_OP_GATHER4_C_S, VKD3DSIH_GATHER4_C_S, "fu", "fRSf"}, - {VKD3D_SM5_OP_GATHER4_PO_S, VKD3DSIH_GATHER4_PO_S, "fu", "fiRS"}, - {VKD3D_SM5_OP_GATHER4_PO_C_S, VKD3DSIH_GATHER4_PO_C_S, "fu", "fiRSf"}, - {VKD3D_SM5_OP_LD_S, VKD3DSIH_LD_S, "uu", "iR"}, - {VKD3D_SM5_OP_LD2DMS_S, VKD3DSIH_LD2DMS_S, "uu", "iRi"}, - {VKD3D_SM5_OP_LD_UAV_TYPED_S, VKD3DSIH_LD_UAV_TYPED_S, "uu", "iU"}, - {VKD3D_SM5_OP_LD_RAW_S, VKD3DSIH_LD_RAW_S, "uu", "iU"}, - {VKD3D_SM5_OP_LD_STRUCTURED_S, VKD3DSIH_LD_STRUCTURED_S, "uu", "iiR"}, - {VKD3D_SM5_OP_SAMPLE_LOD_S, VKD3DSIH_SAMPLE_LOD_S, "uu", "fRSf"}, - {VKD3D_SM5_OP_SAMPLE_C_LZ_S, VKD3DSIH_SAMPLE_C_LZ_S, "fu", "fRSf"}, - {VKD3D_SM5_OP_SAMPLE_CL_S, VKD3DSIH_SAMPLE_CL_S, "uu", "fRSf"}, - {VKD3D_SM5_OP_SAMPLE_B_CL_S, VKD3DSIH_SAMPLE_B_CL_S, "uu", "fRSff"}, - {VKD3D_SM5_OP_SAMPLE_GRAD_CL_S, VKD3DSIH_SAMPLE_GRAD_CL_S, "uu", "fRSfff"}, - {VKD3D_SM5_OP_SAMPLE_C_CL_S, VKD3DSIH_SAMPLE_C_CL_S, "fu", "fRSff"}, - {VKD3D_SM5_OP_CHECK_ACCESS_FULLY_MAPPED, VKD3DSIH_CHECK_ACCESS_FULLY_MAPPED, "u", "u"}, -}; - -static const enum vkd3d_shader_register_type register_type_table[] = -{ - /* VKD3D_SM4_RT_TEMP */ VKD3DSPR_TEMP, - /* VKD3D_SM4_RT_INPUT */ VKD3DSPR_INPUT, - /* VKD3D_SM4_RT_OUTPUT */ VKD3DSPR_OUTPUT, - /* VKD3D_SM4_RT_INDEXABLE_TEMP */ VKD3DSPR_IDXTEMP, - /* VKD3D_SM4_RT_IMMCONST */ VKD3DSPR_IMMCONST, - /* VKD3D_SM4_RT_IMMCONST64 */ VKD3DSPR_IMMCONST64, - /* VKD3D_SM4_RT_SAMPLER */ VKD3DSPR_SAMPLER, - /* VKD3D_SM4_RT_RESOURCE */ VKD3DSPR_RESOURCE, - /* VKD3D_SM4_RT_CONSTBUFFER */ VKD3DSPR_CONSTBUFFER, - /* VKD3D_SM4_RT_IMMCONSTBUFFER */ VKD3DSPR_IMMCONSTBUFFER, - /* UNKNOWN */ ~0u, - /* VKD3D_SM4_RT_PRIMID */ VKD3DSPR_PRIMID, - /* VKD3D_SM4_RT_DEPTHOUT */ VKD3DSPR_DEPTHOUT, - /* VKD3D_SM4_RT_NULL */ VKD3DSPR_NULL, - /* VKD3D_SM4_RT_RASTERIZER */ VKD3DSPR_RASTERIZER, - /* VKD3D_SM4_RT_OMASK */ VKD3DSPR_SAMPLEMASK, - /* VKD3D_SM5_RT_STREAM */ VKD3DSPR_STREAM, - /* VKD3D_SM5_RT_FUNCTION_BODY */ VKD3DSPR_FUNCTIONBODY, - /* UNKNOWN */ ~0u, - /* VKD3D_SM5_RT_FUNCTION_POINTER */ VKD3DSPR_FUNCTIONPOINTER, - /* UNKNOWN */ ~0u, - /* UNKNOWN */ ~0u, - /* VKD3D_SM5_RT_OUTPUT_CONTROL_POINT_ID */ VKD3DSPR_OUTPOINTID, - /* VKD3D_SM5_RT_FORK_INSTANCE_ID */ VKD3DSPR_FORKINSTID, - /* VKD3D_SM5_RT_JOIN_INSTANCE_ID */ VKD3DSPR_JOININSTID, - /* VKD3D_SM5_RT_INPUT_CONTROL_POINT */ VKD3DSPR_INCONTROLPOINT, - /* VKD3D_SM5_RT_OUTPUT_CONTROL_POINT */ VKD3DSPR_OUTCONTROLPOINT, - /* VKD3D_SM5_RT_PATCH_CONSTANT_DATA */ VKD3DSPR_PATCHCONST, - /* VKD3D_SM5_RT_DOMAIN_LOCATION */ VKD3DSPR_TESSCOORD, - /* UNKNOWN */ ~0u, - /* VKD3D_SM5_RT_UAV */ VKD3DSPR_UAV, - /* VKD3D_SM5_RT_SHARED_MEMORY */ VKD3DSPR_GROUPSHAREDMEM, - /* VKD3D_SM5_RT_THREAD_ID */ VKD3DSPR_THREADID, - /* VKD3D_SM5_RT_THREAD_GROUP_ID */ VKD3DSPR_THREADGROUPID, - /* VKD3D_SM5_RT_LOCAL_THREAD_ID */ VKD3DSPR_LOCALTHREADID, - /* VKD3D_SM5_RT_COVERAGE */ VKD3DSPR_COVERAGE, - /* VKD3D_SM5_RT_LOCAL_THREAD_INDEX */ VKD3DSPR_LOCALTHREADINDEX, - /* VKD3D_SM5_RT_GS_INSTANCE_ID */ VKD3DSPR_GSINSTID, - /* VKD3D_SM5_RT_DEPTHOUT_GREATER_EQUAL */ VKD3DSPR_DEPTHOUTGE, - /* VKD3D_SM5_RT_DEPTHOUT_LESS_EQUAL */ VKD3DSPR_DEPTHOUTLE, - /* VKD3D_SM5_RT_CYCLE_COUNTER */ ~0u, - /* VKD3D_SM5_RT_OUTPUT_STENCIL_REF */ VKD3DSPR_OUTSTENCILREF, -}; - -static const enum vkd3d_shader_register_precision register_precision_table[] = -{ - /* VKD3D_SM4_REGISTER_PRECISION_DEFAULT */ VKD3D_SHADER_REGISTER_PRECISION_DEFAULT, - /* VKD3D_SM4_REGISTER_PRECISION_MIN_FLOAT_16 */ VKD3D_SHADER_REGISTER_PRECISION_MIN_FLOAT_16, - /* VKD3D_SM4_REGISTER_PRECISION_MIN_FLOAT_10 */ VKD3D_SHADER_REGISTER_PRECISION_MIN_FLOAT_10, - /* UNKNOWN */ VKD3D_SHADER_REGISTER_PRECISION_INVALID, - /* VKD3D_SM4_REGISTER_PRECISION_MIN_INT_16 */ VKD3D_SHADER_REGISTER_PRECISION_MIN_INT_16, - /* VKD3D_SM4_REGISTER_PRECISION_MIN_UINT_16 */ VKD3D_SHADER_REGISTER_PRECISION_MIN_UINT_16, -}; - -static const struct vkd3d_sm4_opcode_info *get_opcode_info(enum vkd3d_sm4_opcode opcode) -{ - unsigned int i; - - for (i = 0; i < sizeof(opcode_table) / sizeof(*opcode_table); ++i) - { - if (opcode == opcode_table[i].opcode) return &opcode_table[i]; - } - - return NULL; -} - -static void map_register(const struct vkd3d_shader_sm4_parser *sm4, struct vkd3d_shader_register *reg) -{ - switch (sm4->p.shader_version.type) - { - case VKD3D_SHADER_TYPE_PIXEL: - if (reg->type == VKD3DSPR_OUTPUT) - { - unsigned int reg_idx = reg->idx[0].offset; - - if (reg_idx >= ARRAY_SIZE(sm4->output_map)) - { - ERR("Invalid output index %u.\n", reg_idx); - break; - } - - reg->type = VKD3DSPR_COLOROUT; - reg->idx[0].offset = sm4->output_map[reg_idx]; - } - break; - - default: - break; - } -} - -static enum vkd3d_data_type map_data_type(char t) -{ - switch (t) - { - case 'd': - return VKD3D_DATA_DOUBLE; - case 'f': - return VKD3D_DATA_FLOAT; - case 'i': - return VKD3D_DATA_INT; - case 'u': - return VKD3D_DATA_UINT; - case 'O': - return VKD3D_DATA_OPAQUE; - case 'R': - return VKD3D_DATA_RESOURCE; - case 'S': - return VKD3D_DATA_SAMPLER; - case 'U': - return VKD3D_DATA_UAV; - default: - ERR("Invalid data type '%c'.\n", t); - return VKD3D_DATA_FLOAT; - } -} - -static void shader_sm4_destroy(struct vkd3d_shader_parser *parser) -{ - struct vkd3d_shader_sm4_parser *sm4 = vkd3d_shader_sm4_parser(parser); - - shader_instruction_array_destroy(&parser->instructions); - free_shader_desc(&parser->shader_desc); - vkd3d_free(sm4); -} - -static bool shader_sm4_read_reg_idx(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, - const uint32_t *end, uint32_t addressing, struct vkd3d_shader_register_index *reg_idx) -{ - if (addressing & VKD3D_SM4_ADDRESSING_RELATIVE) - { - struct vkd3d_shader_src_param *rel_addr = shader_parser_get_src_params(&priv->p, 1); - - if (!(reg_idx->rel_addr = rel_addr)) - { - ERR("Failed to get src param for relative addressing.\n"); - return false; - } - - if (addressing & VKD3D_SM4_ADDRESSING_OFFSET) - reg_idx->offset = *(*ptr)++; - else - reg_idx->offset = 0; - shader_sm4_read_src_param(priv, ptr, end, VKD3D_DATA_INT, rel_addr); - } - else - { - reg_idx->rel_addr = NULL; - reg_idx->offset = *(*ptr)++; - } - - return true; -} - -static bool sm4_register_is_descriptor(enum vkd3d_sm4_register_type register_type) -{ - switch (register_type) - { - case VKD3D_SM4_RT_SAMPLER: - case VKD3D_SM4_RT_RESOURCE: - case VKD3D_SM4_RT_CONSTBUFFER: - case VKD3D_SM5_RT_UAV: - return true; - - default: - return false; - } -} - -static bool shader_sm4_read_param(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, const uint32_t *end, - enum vkd3d_data_type data_type, struct vkd3d_shader_register *param, enum vkd3d_shader_src_modifier *modifier) -{ - enum vkd3d_sm4_register_precision precision; - enum vkd3d_sm4_register_type register_type; - enum vkd3d_sm4_extended_operand_type type; - enum vkd3d_sm4_register_modifier m; - uint32_t token, order, extended; - - if (*ptr >= end) - { - WARN("Invalid ptr %p >= end %p.\n", *ptr, end); - return false; - } - token = *(*ptr)++; - - register_type = (token & VKD3D_SM4_REGISTER_TYPE_MASK) >> VKD3D_SM4_REGISTER_TYPE_SHIFT; - if (register_type >= ARRAY_SIZE(register_type_table) - || register_type_table[register_type] == VKD3DSPR_INVALID) - { - FIXME("Unhandled register type %#x.\n", register_type); - param->type = VKD3DSPR_TEMP; - } - else - { - param->type = register_type_table[register_type]; - } - param->precision = VKD3D_SHADER_REGISTER_PRECISION_DEFAULT; - param->non_uniform = false; - param->data_type = data_type; - - *modifier = VKD3DSPSM_NONE; - if (token & VKD3D_SM4_EXTENDED_OPERAND) - { - if (*ptr >= end) - { - WARN("Invalid ptr %p >= end %p.\n", *ptr, end); - return false; - } - extended = *(*ptr)++; - - if (extended & VKD3D_SM4_EXTENDED_OPERAND) - { - FIXME("Skipping second-order extended operand.\n"); - *ptr += *ptr < end; - } - - type = extended & VKD3D_SM4_EXTENDED_OPERAND_TYPE_MASK; - if (type == VKD3D_SM4_EXTENDED_OPERAND_MODIFIER) - { - m = (extended & VKD3D_SM4_REGISTER_MODIFIER_MASK) >> VKD3D_SM4_REGISTER_MODIFIER_SHIFT; - switch (m) - { - case VKD3D_SM4_REGISTER_MODIFIER_NEGATE: - *modifier = VKD3DSPSM_NEG; - break; - - case VKD3D_SM4_REGISTER_MODIFIER_ABS: - *modifier = VKD3DSPSM_ABS; - break; - - case VKD3D_SM4_REGISTER_MODIFIER_ABS_NEGATE: - *modifier = VKD3DSPSM_ABSNEG; - break; - - default: - FIXME("Unhandled register modifier %#x.\n", m); - /* fall-through */ - case VKD3D_SM4_REGISTER_MODIFIER_NONE: - break; - } - - precision = (extended & VKD3D_SM4_REGISTER_PRECISION_MASK) >> VKD3D_SM4_REGISTER_PRECISION_SHIFT; - if (precision >= ARRAY_SIZE(register_precision_table) - || register_precision_table[precision] == VKD3D_SHADER_REGISTER_PRECISION_INVALID) - { - FIXME("Unhandled register precision %#x.\n", precision); - param->precision = VKD3D_SHADER_REGISTER_PRECISION_INVALID; - } - else - { - param->precision = register_precision_table[precision]; - } - - if (extended & VKD3D_SM4_REGISTER_NON_UNIFORM_MASK) - param->non_uniform = true; - - extended &= ~(VKD3D_SM4_EXTENDED_OPERAND_TYPE_MASK | VKD3D_SM4_REGISTER_MODIFIER_MASK - | VKD3D_SM4_REGISTER_PRECISION_MASK | VKD3D_SM4_REGISTER_NON_UNIFORM_MASK - | VKD3D_SM4_EXTENDED_OPERAND); - if (extended) - FIXME("Skipping unhandled extended operand bits 0x%08x.\n", extended); - } - else if (type) - { - FIXME("Skipping unhandled extended operand token 0x%08x (type %#x).\n", extended, type); - } - } - - order = (token & VKD3D_SM4_REGISTER_ORDER_MASK) >> VKD3D_SM4_REGISTER_ORDER_SHIFT; - - if (order < 1) - { - param->idx[0].offset = ~0u; - param->idx[0].rel_addr = NULL; - } - else - { - DWORD addressing = (token & VKD3D_SM4_ADDRESSING_MASK0) >> VKD3D_SM4_ADDRESSING_SHIFT0; - if (!(shader_sm4_read_reg_idx(priv, ptr, end, addressing, ¶m->idx[0]))) - { - ERR("Failed to read register index.\n"); - return false; - } - } - - if (order < 2) - { - param->idx[1].offset = ~0u; - param->idx[1].rel_addr = NULL; - } - else - { - DWORD addressing = (token & VKD3D_SM4_ADDRESSING_MASK1) >> VKD3D_SM4_ADDRESSING_SHIFT1; - if (!(shader_sm4_read_reg_idx(priv, ptr, end, addressing, ¶m->idx[1]))) - { - ERR("Failed to read register index.\n"); - return false; - } - } - - if (order < 3) - { - param->idx[2].offset = ~0u; - param->idx[2].rel_addr = NULL; - } - else - { - DWORD addressing = (token & VKD3D_SM4_ADDRESSING_MASK2) >> VKD3D_SM4_ADDRESSING_SHIFT2; - if (!(shader_sm4_read_reg_idx(priv, ptr, end, addressing, ¶m->idx[2]))) - { - ERR("Failed to read register index.\n"); - return false; - } - } - - if (order > 3) - { - WARN("Unhandled order %u.\n", order); - return false; - } - - if (register_type == VKD3D_SM4_RT_IMMCONST || register_type == VKD3D_SM4_RT_IMMCONST64) - { - enum vkd3d_sm4_dimension dimension = (token & VKD3D_SM4_DIMENSION_MASK) >> VKD3D_SM4_DIMENSION_SHIFT; - unsigned int dword_count; - - switch (dimension) - { - case VKD3D_SM4_DIMENSION_SCALAR: - param->immconst_type = VKD3D_IMMCONST_SCALAR; - dword_count = 1 + (register_type == VKD3D_SM4_RT_IMMCONST64); - if (end - *ptr < dword_count) - { - WARN("Invalid ptr %p, end %p.\n", *ptr, end); - return false; - } - memcpy(param->u.immconst_uint, *ptr, dword_count * sizeof(DWORD)); - *ptr += dword_count; - break; - - case VKD3D_SM4_DIMENSION_VEC4: - param->immconst_type = VKD3D_IMMCONST_VEC4; - if (end - *ptr < VKD3D_VEC4_SIZE) - { - WARN("Invalid ptr %p, end %p.\n", *ptr, end); - return false; - } - memcpy(param->u.immconst_uint, *ptr, VKD3D_VEC4_SIZE * sizeof(DWORD)); - *ptr += 4; - break; - - default: - FIXME("Unhandled dimension %#x.\n", dimension); - break; - } - } - else if (!shader_is_sm_5_1(priv) && sm4_register_is_descriptor(register_type)) - { - /* SM5.1 places a symbol identifier in idx[0] and moves - * other values up one slot. Normalize to SM5.1. */ - param->idx[2] = param->idx[1]; - param->idx[1] = param->idx[0]; - } - - map_register(priv, param); - - return true; -} - -static bool shader_sm4_is_scalar_register(const struct vkd3d_shader_register *reg) -{ - switch (reg->type) - { - case VKD3DSPR_COVERAGE: - case VKD3DSPR_DEPTHOUT: - case VKD3DSPR_DEPTHOUTGE: - case VKD3DSPR_DEPTHOUTLE: - case VKD3DSPR_GSINSTID: - case VKD3DSPR_LOCALTHREADINDEX: - case VKD3DSPR_OUTPOINTID: - case VKD3DSPR_PRIMID: - case VKD3DSPR_SAMPLEMASK: - case VKD3DSPR_OUTSTENCILREF: - return true; - default: - return false; - } -} - -static uint32_t swizzle_from_sm4(uint32_t s) -{ - return vkd3d_shader_create_swizzle(s & 0x3, (s >> 2) & 0x3, (s >> 4) & 0x3, (s >> 6) & 0x3); -} - -static bool shader_sm4_read_src_param(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, - const uint32_t *end, enum vkd3d_data_type data_type, struct vkd3d_shader_src_param *src_param) +void dxbc_writer_init(struct dxbc_writer *dxbc) { - DWORD token; - - if (*ptr >= end) - { - WARN("Invalid ptr %p >= end %p.\n", *ptr, end); - return false; - } - token = **ptr; - - if (!shader_sm4_read_param(priv, ptr, end, data_type, &src_param->reg, &src_param->modifiers)) - { - ERR("Failed to read parameter.\n"); - return false; - } - - if (src_param->reg.type == VKD3DSPR_IMMCONST || src_param->reg.type == VKD3DSPR_IMMCONST64) - { - src_param->swizzle = VKD3D_SHADER_NO_SWIZZLE; - } - else - { - enum vkd3d_sm4_swizzle_type swizzle_type = - (token & VKD3D_SM4_SWIZZLE_TYPE_MASK) >> VKD3D_SM4_SWIZZLE_TYPE_SHIFT; - - switch (swizzle_type) - { - case VKD3D_SM4_SWIZZLE_NONE: - if (shader_sm4_is_scalar_register(&src_param->reg)) - src_param->swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); - else - src_param->swizzle = VKD3D_SHADER_NO_SWIZZLE; - break; - - case VKD3D_SM4_SWIZZLE_SCALAR: - src_param->swizzle = (token & VKD3D_SM4_SWIZZLE_MASK) >> VKD3D_SM4_SWIZZLE_SHIFT; - src_param->swizzle = (src_param->swizzle & 0x3) * 0x01010101; - break; - - case VKD3D_SM4_SWIZZLE_VEC4: - src_param->swizzle = swizzle_from_sm4((token & VKD3D_SM4_SWIZZLE_MASK) >> VKD3D_SM4_SWIZZLE_SHIFT); - break; - - default: - FIXME("Unhandled swizzle type %#x.\n", swizzle_type); - break; - } - } - - return true; + memset(dxbc, 0, sizeof(*dxbc)); }
-static bool shader_sm4_read_dst_param(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, - const uint32_t *end, enum vkd3d_data_type data_type, struct vkd3d_shader_dst_param *dst_param) +void dxbc_writer_add_section(struct dxbc_writer *dxbc, uint32_t tag, const void *data, size_t size) { - enum vkd3d_shader_src_modifier modifier; - DWORD token; - - if (*ptr >= end) - { - WARN("Invalid ptr %p >= end %p.\n", *ptr, end); - return false; - } - token = **ptr; - - if (!shader_sm4_read_param(priv, ptr, end, data_type, &dst_param->reg, &modifier)) - { - ERR("Failed to read parameter.\n"); - return false; - } - - if (modifier != VKD3DSPSM_NONE) - { - ERR("Invalid source modifier %#x on destination register.\n", modifier); - return false; - } + struct vkd3d_shader_dxbc_section_desc *section;
- dst_param->write_mask = (token & VKD3D_SM4_WRITEMASK_MASK) >> VKD3D_SM4_WRITEMASK_SHIFT; - if (data_type == VKD3D_DATA_DOUBLE) - dst_param->write_mask = vkd3d_write_mask_64_from_32(dst_param->write_mask); - /* Scalar registers are declared with no write mask in shader bytecode. */ - if (!dst_param->write_mask && shader_sm4_is_scalar_register(&dst_param->reg)) - dst_param->write_mask = VKD3DSP_WRITEMASK_0; - dst_param->modifiers = 0; - dst_param->shift = 0; + assert(dxbc->section_count < ARRAY_SIZE(dxbc->sections));
- return true; + section = &dxbc->sections[dxbc->section_count++]; + section->tag = tag; + section->data.code = data; + section->data.size = size; }
-static void shader_sm4_read_instruction_modifier(DWORD modifier, struct vkd3d_shader_instruction *ins) +int vkd3d_shader_serialize_dxbc(size_t section_count, const struct vkd3d_shader_dxbc_section_desc *sections, + struct vkd3d_shader_code *dxbc, char **messages) { - enum vkd3d_sm4_instruction_modifier modifier_type = modifier & VKD3D_SM4_MODIFIER_MASK; - - switch (modifier_type) - { - case VKD3D_SM4_MODIFIER_AOFFIMMI: - { - static const DWORD recognized_bits = VKD3D_SM4_INSTRUCTION_MODIFIER - | VKD3D_SM4_MODIFIER_MASK - | VKD3D_SM4_AOFFIMMI_U_MASK - | VKD3D_SM4_AOFFIMMI_V_MASK - | VKD3D_SM4_AOFFIMMI_W_MASK; - - /* Bit fields are used for sign extension. */ - struct - { - int u : 4; - int v : 4; - int w : 4; - } aoffimmi; - - if (modifier & ~recognized_bits) - FIXME("Unhandled instruction modifier %#x.\n", modifier); - - aoffimmi.u = (modifier & VKD3D_SM4_AOFFIMMI_U_MASK) >> VKD3D_SM4_AOFFIMMI_U_SHIFT; - aoffimmi.v = (modifier & VKD3D_SM4_AOFFIMMI_V_MASK) >> VKD3D_SM4_AOFFIMMI_V_SHIFT; - aoffimmi.w = (modifier & VKD3D_SM4_AOFFIMMI_W_MASK) >> VKD3D_SM4_AOFFIMMI_W_SHIFT; - ins->texel_offset.u = aoffimmi.u; - ins->texel_offset.v = aoffimmi.v; - ins->texel_offset.w = aoffimmi.w; - break; - } - - case VKD3D_SM5_MODIFIER_DATA_TYPE: - { - DWORD components = (modifier & VKD3D_SM5_MODIFIER_DATA_TYPE_MASK) >> VKD3D_SM5_MODIFIER_DATA_TYPE_SHIFT; - unsigned int i; - - for (i = 0; i < VKD3D_VEC4_SIZE; i++) - { - enum vkd3d_sm4_data_type data_type = VKD3D_SM4_TYPE_COMPONENT(components, i); - - if (!data_type || (data_type >= ARRAY_SIZE(data_type_table))) - { - FIXME("Unhandled data type %#x.\n", data_type); - ins->resource_data_type[i] = VKD3D_DATA_FLOAT; - } - else - { - ins->resource_data_type[i] = data_type_table[data_type]; - } - } - break; - } - - case VKD3D_SM5_MODIFIER_RESOURCE_TYPE: - { - enum vkd3d_sm4_resource_type resource_type - = (modifier & VKD3D_SM5_MODIFIER_RESOURCE_TYPE_MASK) >> VKD3D_SM5_MODIFIER_RESOURCE_TYPE_SHIFT; - - if (resource_type == VKD3D_SM4_RESOURCE_RAW_BUFFER) - ins->raw = true; - else if (resource_type == VKD3D_SM4_RESOURCE_STRUCTURED_BUFFER) - ins->structured = true; - - if (resource_type < ARRAY_SIZE(resource_type_table)) - ins->resource_type = resource_type_table[resource_type]; - else - { - FIXME("Unhandled resource type %#x.\n", resource_type); - ins->resource_type = VKD3D_SHADER_RESOURCE_NONE; - } - - ins->resource_stride - = (modifier & VKD3D_SM5_MODIFIER_RESOURCE_STRIDE_MASK) >> VKD3D_SM5_MODIFIER_RESOURCE_STRIDE_SHIFT; - break; - } + size_t size_position, offsets_position, checksum_position, i; + struct vkd3d_bytecode_buffer buffer = {0}; + uint32_t checksum[4];
- default: - FIXME("Unhandled instruction modifier %#x.\n", modifier); - } -} + TRACE("section_count %zu, sections %p, dxbc %p, messages %p.\n", section_count, sections, dxbc, messages);
-static void shader_sm4_read_instruction(struct vkd3d_shader_parser *parser, struct vkd3d_shader_instruction *ins) -{ - struct vkd3d_shader_sm4_parser *sm4 = vkd3d_shader_sm4_parser(parser); - const struct vkd3d_sm4_opcode_info *opcode_info; - uint32_t opcode_token, opcode, previous_token; - struct vkd3d_shader_dst_param *dst_params; - struct vkd3d_shader_src_param *src_params; - const uint32_t **ptr = &parser->ptr; - unsigned int i, len; - size_t remaining; - const uint32_t *p; - DWORD precise; + if (messages) + *messages = NULL;
- if (*ptr >= sm4->end) - { - WARN("End of byte-code, failed to read opcode.\n"); - goto fail; - } - remaining = sm4->end - *ptr; + put_u32(&buffer, TAG_DXBC);
- ++parser->location.line; + checksum_position = bytecode_get_size(&buffer); + for (i = 0; i < 4; ++i) + put_u32(&buffer, 0);
- opcode_token = *(*ptr)++; - opcode = opcode_token & VKD3D_SM4_OPCODE_MASK; + put_u32(&buffer, 1); /* version */ + size_position = put_u32(&buffer, 0); + put_u32(&buffer, section_count);
- len = ((opcode_token & VKD3D_SM4_INSTRUCTION_LENGTH_MASK) >> VKD3D_SM4_INSTRUCTION_LENGTH_SHIFT); - if (!len) - { - if (remaining < 2) - { - WARN("End of byte-code, failed to read length token.\n"); - goto fail; - } - len = **ptr; - } - if (!len || remaining < len) - { - WARN("Read invalid length %u (remaining %zu).\n", len, remaining); - goto fail; - } - --len; + offsets_position = bytecode_get_size(&buffer); + for (i = 0; i < section_count; ++i) + put_u32(&buffer, 0);
- if (!(opcode_info = get_opcode_info(opcode))) + for (i = 0; i < section_count; ++i) { - FIXME("Unrecognized opcode %#x, opcode_token 0x%08x.\n", opcode, opcode_token); - ins->handler_idx = VKD3DSIH_INVALID; - *ptr += len; - return; - } - - ins->handler_idx = opcode_info->handler_idx; - ins->flags = 0; - ins->coissue = false; - ins->raw = false; - ins->structured = false; - ins->predicate = NULL; - ins->dst_count = strnlen(opcode_info->dst_info, SM4_MAX_DST_COUNT); - ins->src_count = strnlen(opcode_info->src_info, SM4_MAX_SRC_COUNT); - ins->src = src_params = shader_parser_get_src_params(parser, ins->src_count); - if (!src_params && ins->src_count) - { - ERR("Failed to allocate src parameters.\n"); - vkd3d_shader_parser_error(parser, VKD3D_SHADER_ERROR_TPF_OUT_OF_MEMORY, "Out of memory."); - ins->handler_idx = VKD3DSIH_INVALID; - return; + set_u32(&buffer, offsets_position + i * sizeof(uint32_t), bytecode_align(&buffer)); + put_u32(&buffer, sections[i].tag); + put_u32(&buffer, sections[i].data.size); + bytecode_put_bytes(&buffer, sections[i].data.code, sections[i].data.size); } - ins->resource_type = VKD3D_SHADER_RESOURCE_NONE; - ins->resource_stride = 0; - ins->resource_data_type[0] = VKD3D_DATA_FLOAT; - ins->resource_data_type[1] = VKD3D_DATA_FLOAT; - ins->resource_data_type[2] = VKD3D_DATA_FLOAT; - ins->resource_data_type[3] = VKD3D_DATA_FLOAT; - memset(&ins->texel_offset, 0, sizeof(ins->texel_offset)); + set_u32(&buffer, size_position, bytecode_get_size(&buffer));
- p = *ptr; - *ptr += len; + vkd3d_compute_dxbc_checksum(buffer.data, buffer.size, checksum); + for (i = 0; i < 4; ++i) + set_u32(&buffer, checksum_position + i * sizeof(uint32_t), checksum[i]);
- if (opcode_info->read_opcode_func) - { - ins->dst = NULL; - ins->dst_count = 0; - opcode_info->read_opcode_func(ins, opcode, opcode_token, p, len, sm4); - } - else + if (!buffer.status) { - enum vkd3d_shader_dst_modifier instruction_dst_modifier = VKD3DSPDM_NONE; - - previous_token = opcode_token; - while (previous_token & VKD3D_SM4_INSTRUCTION_MODIFIER && p != *ptr) - shader_sm4_read_instruction_modifier(previous_token = *p++, ins); - - ins->flags = (opcode_token & VKD3D_SM4_INSTRUCTION_FLAGS_MASK) >> VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT; - if (ins->flags & VKD3D_SM4_INSTRUCTION_FLAG_SATURATE) - { - ins->flags &= ~VKD3D_SM4_INSTRUCTION_FLAG_SATURATE; - instruction_dst_modifier = VKD3DSPDM_SATURATE; - } - precise = (opcode_token & VKD3D_SM5_PRECISE_MASK) >> VKD3D_SM5_PRECISE_SHIFT; - ins->flags |= precise << VKD3DSI_PRECISE_SHIFT; - - ins->dst = dst_params = shader_parser_get_dst_params(parser, ins->dst_count); - if (!dst_params && ins->dst_count) - { - ERR("Failed to allocate dst parameters.\n"); - vkd3d_shader_parser_error(parser, VKD3D_SHADER_ERROR_TPF_OUT_OF_MEMORY, "Out of memory."); - ins->handler_idx = VKD3DSIH_INVALID; - return; - } - for (i = 0; i < ins->dst_count; ++i) - { - if (!(shader_sm4_read_dst_param(sm4, &p, *ptr, map_data_type(opcode_info->dst_info[i]), - &dst_params[i]))) - { - ins->handler_idx = VKD3DSIH_INVALID; - return; - } - dst_params[i].modifiers |= instruction_dst_modifier; - } - - for (i = 0; i < ins->src_count; ++i) - { - if (!(shader_sm4_read_src_param(sm4, &p, *ptr, map_data_type(opcode_info->src_info[i]), - &src_params[i]))) - { - ins->handler_idx = VKD3DSIH_INVALID; - return; - } - } + dxbc->code = buffer.data; + dxbc->size = buffer.size; } - - return; - -fail: - *ptr = sm4->end; - ins->handler_idx = VKD3DSIH_INVALID; - return; -} - -static bool shader_sm4_is_end(struct vkd3d_shader_parser *parser) -{ - struct vkd3d_shader_sm4_parser *sm4 = vkd3d_shader_sm4_parser(parser); - - return parser->ptr == sm4->end; + return buffer.status; }
-static const struct vkd3d_shader_parser_ops shader_sm4_parser_ops = -{ - .parser_destroy = shader_sm4_destroy, -}; - -static bool shader_sm4_init(struct vkd3d_shader_sm4_parser *sm4, const uint32_t *byte_code, - size_t byte_code_size, const char *source_name, const struct vkd3d_shader_signature *output_signature, - struct vkd3d_shader_message_context *message_context) +int dxbc_writer_write(struct dxbc_writer *dxbc, struct vkd3d_shader_code *out) { - struct vkd3d_shader_version version; - uint32_t version_token, token_count; - unsigned int i; - - if (byte_code_size / sizeof(*byte_code) < 2) - { - WARN("Invalid byte code size %lu.\n", (long)byte_code_size); - return false; - } - - version_token = byte_code[0]; - TRACE("Version: 0x%08x.\n", version_token); - token_count = byte_code[1]; - TRACE("Token count: %u.\n", token_count); - - if (token_count < 2 || byte_code_size / sizeof(*byte_code) < token_count) - { - WARN("Invalid token count %u.\n", token_count); - return false; - } - - sm4->start = &byte_code[2]; - sm4->end = &byte_code[token_count]; - - switch (version_token >> 16) - { - case VKD3D_SM4_PS: - version.type = VKD3D_SHADER_TYPE_PIXEL; - break; - - case VKD3D_SM4_VS: - version.type = VKD3D_SHADER_TYPE_VERTEX; - break; - - case VKD3D_SM4_GS: - version.type = VKD3D_SHADER_TYPE_GEOMETRY; - break; - - case VKD3D_SM5_HS: - version.type = VKD3D_SHADER_TYPE_HULL; - break; - - case VKD3D_SM5_DS: - version.type = VKD3D_SHADER_TYPE_DOMAIN; - break; - - case VKD3D_SM5_CS: - version.type = VKD3D_SHADER_TYPE_COMPUTE; - break; - - default: - FIXME("Unrecognised shader type %#x.\n", version_token >> 16); - } - version.major = VKD3D_SM4_VERSION_MAJOR(version_token); - version.minor = VKD3D_SM4_VERSION_MINOR(version_token); - - /* Estimate instruction count to avoid reallocation in most shaders. */ - if (!vkd3d_shader_parser_init(&sm4->p, message_context, source_name, &version, &shader_sm4_parser_ops, - token_count / 7u + 20)) - return false; - sm4->p.ptr = sm4->start; - - memset(sm4->output_map, 0xff, sizeof(sm4->output_map)); - for (i = 0; i < output_signature->element_count; ++i) - { - struct vkd3d_shader_signature_element *e = &output_signature->elements[i]; - - if (version.type == VKD3D_SHADER_TYPE_PIXEL - && ascii_strcasecmp(e->semantic_name, "SV_Target")) - continue; - if (e->register_index >= ARRAY_SIZE(sm4->output_map)) - { - WARN("Invalid output index %u.\n", e->register_index); - continue; - } - - sm4->output_map[e->register_index] = e->semantic_index; - } - - return true; + return vkd3d_shader_serialize_dxbc(dxbc->section_count, dxbc->sections, out, NULL); }
static bool require_space(size_t offset, size_t count, size_t size, size_t data_size) @@ -1928,12 +322,12 @@ int vkd3d_shader_parse_dxbc(const struct vkd3d_shader_code *dxbc, }
static int shader_parse_signature(const struct vkd3d_shader_dxbc_section_desc *section, - struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_signature *s) + struct vkd3d_shader_message_context *message_context, struct shader_signature *s) { bool has_stream_index, has_min_precision; - struct vkd3d_shader_signature_element *e; const char *data = section->data.code; uint32_t count, header_size; + struct signature_element *e; const char *ptr = data; unsigned int i;
@@ -1979,6 +373,8 @@ static int shader_parse_signature(const struct vkd3d_shader_dxbc_section_desc *s { uint32_t name_offset, mask;
+ e[i].sort_index = i; + if (has_stream_index) read_dword(&ptr, &e[i].stream_index); else @@ -1995,6 +391,7 @@ static int shader_parse_signature(const struct vkd3d_shader_dxbc_section_desc *s read_dword(&ptr, &e[i].sysval_semantic); read_dword(&ptr, &e[i].component_type); read_dword(&ptr, &e[i].register_index); + e[i].register_count = 1; read_dword(&ptr, &mask); e[i].mask = mask & 0xff; e[i].used_mask = (mask >> 8) & 0xff; @@ -2029,7 +426,7 @@ static int shader_parse_signature(const struct vkd3d_shader_dxbc_section_desc *s static int isgn_handler(const struct vkd3d_shader_dxbc_section_desc *section, struct vkd3d_shader_message_context *message_context, void *ctx) { - struct vkd3d_shader_signature *is = ctx; + struct shader_signature *is = ctx;
if (section->tag != TAG_ISGN) return VKD3D_OK; @@ -2037,13 +434,13 @@ static int isgn_handler(const struct vkd3d_shader_dxbc_section_desc *section, if (is->elements) { FIXME("Multiple input signatures.\n"); - vkd3d_shader_free_shader_signature(is); + shader_signature_cleanup(is); } return shader_parse_signature(section, message_context, is); }
int shader_parse_input_signature(const struct vkd3d_shader_code *dxbc, - struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_signature *signature) + struct vkd3d_shader_message_context *message_context, struct shader_signature *signature) { int ret;
@@ -2122,12 +519,12 @@ static int shdr_handler(const struct vkd3d_shader_dxbc_section_desc *section,
void free_shader_desc(struct vkd3d_shader_desc *desc) { - vkd3d_shader_free_shader_signature(&desc->input_signature); - vkd3d_shader_free_shader_signature(&desc->output_signature); - vkd3d_shader_free_shader_signature(&desc->patch_constant_signature); + shader_signature_cleanup(&desc->input_signature); + shader_signature_cleanup(&desc->output_signature); + shader_signature_cleanup(&desc->patch_constant_signature); }
-static int shader_extract_from_dxbc(const struct vkd3d_shader_code *dxbc, +int shader_extract_from_dxbc(const struct vkd3d_shader_code *dxbc, struct vkd3d_shader_message_context *message_context, const char *source_name, struct vkd3d_shader_desc *desc) { int ret; @@ -2151,66 +548,6 @@ static int shader_extract_from_dxbc(const struct vkd3d_shader_code *dxbc, return ret; }
-int vkd3d_shader_sm4_parser_create(const struct vkd3d_shader_compile_info *compile_info, - struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser **parser) -{ - struct vkd3d_shader_instruction_array *instructions; - struct vkd3d_shader_desc *shader_desc; - struct vkd3d_shader_instruction *ins; - struct vkd3d_shader_sm4_parser *sm4; - int ret; - - if (!(sm4 = vkd3d_calloc(1, sizeof(*sm4)))) - { - ERR("Failed to allocate parser.\n"); - return VKD3D_ERROR_OUT_OF_MEMORY; - } - - shader_desc = &sm4->p.shader_desc; - if ((ret = shader_extract_from_dxbc(&compile_info->source, - message_context, compile_info->source_name, shader_desc)) < 0) - { - WARN("Failed to extract shader, vkd3d result %d.\n", ret); - vkd3d_free(sm4); - return ret; - } - - if (!shader_sm4_init(sm4, shader_desc->byte_code, shader_desc->byte_code_size, - compile_info->source_name, &shader_desc->output_signature, message_context)) - { - WARN("Failed to initialise shader parser.\n"); - free_shader_desc(shader_desc); - vkd3d_free(sm4); - return VKD3D_ERROR_INVALID_ARGUMENT; - } - - instructions = &sm4->p.instructions; - while (!shader_sm4_is_end(&sm4->p)) - { - if (!shader_instruction_array_reserve(instructions, instructions->count + 1)) - { - ERR("Failed to allocate instructions.\n"); - vkd3d_shader_parser_error(&sm4->p, VKD3D_SHADER_ERROR_TPF_OUT_OF_MEMORY, "Out of memory."); - shader_sm4_destroy(&sm4->p); - return VKD3D_ERROR_OUT_OF_MEMORY; - } - ins = &instructions->elements[instructions->count]; - shader_sm4_read_instruction(&sm4->p, ins); - - if (ins->handler_idx == VKD3DSIH_INVALID) - { - WARN("Encountered unrecognized or invalid instruction.\n"); - shader_sm4_destroy(&sm4->p); - return VKD3D_ERROR_OUT_OF_MEMORY; - } - ++instructions->count; - } - - *parser = &sm4->p; - - return VKD3D_OK; -} - /* root signatures */ #define VKD3D_ROOT_SIGNATURE_1_0_ROOT_DESCRIPTOR_FLAGS VKD3D_SHADER_ROOT_DESCRIPTOR_FLAG_DATA_VOLATILE
@@ -2862,7 +1199,7 @@ static int shader_write_root_parameters(struct root_signature_writer_context *co size_t parameters_position; unsigned int i;
- parameters_position = bytecode_get_size(buffer); + parameters_position = bytecode_align(buffer); for (i = 0; i < parameter_count; ++i) { put_u32(buffer, versioned_root_signature_get_parameter_type(desc, i)); diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c index 64d6e87065b..ba5bcfbfaf0 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c @@ -112,8 +112,12 @@ struct hlsl_ir_var *hlsl_get_var(struct hlsl_scope *scope, const char *name)
void hlsl_free_var(struct hlsl_ir_var *decl) { + unsigned int k; + vkd3d_free((void *)decl->name); hlsl_cleanup_semantic(&decl->semantic); + for (k = 0; k <= HLSL_REGSET_LAST_OBJECT; ++k) + vkd3d_free((void *)decl->objects_usage[k]); vkd3d_free(decl); }
@@ -126,7 +130,7 @@ bool hlsl_type_is_row_major(const struct hlsl_type *type)
unsigned int hlsl_type_minor_size(const struct hlsl_type *type) { - if (type->type != HLSL_CLASS_MATRIX || hlsl_type_is_row_major(type)) + if (type->class != HLSL_CLASS_MATRIX || hlsl_type_is_row_major(type)) return type->dimx; else return type->dimy; @@ -134,7 +138,7 @@ unsigned int hlsl_type_minor_size(const struct hlsl_type *type)
unsigned int hlsl_type_major_size(const struct hlsl_type *type) { - if (type->type != HLSL_CLASS_MATRIX || hlsl_type_is_row_major(type)) + if (type->class != HLSL_CLASS_MATRIX || hlsl_type_is_row_major(type)) return type->dimy; else return type->dimx; @@ -142,7 +146,7 @@ unsigned int hlsl_type_major_size(const struct hlsl_type *type)
unsigned int hlsl_type_element_count(const struct hlsl_type *type) { - switch (type->type) + switch (type->class) { case HLSL_CLASS_VECTOR: return type->dimx; @@ -157,16 +161,26 @@ unsigned int hlsl_type_element_count(const struct hlsl_type *type) } }
-static unsigned int get_array_size(const struct hlsl_type *type) +const struct hlsl_type *hlsl_get_multiarray_element_type(const struct hlsl_type *type) +{ + if (type->class == HLSL_CLASS_ARRAY) + return hlsl_get_multiarray_element_type(type->e.array.type); + return type; +} + +unsigned int hlsl_get_multiarray_size(const struct hlsl_type *type) { - if (type->type == HLSL_CLASS_ARRAY) - return get_array_size(type->e.array.type) * type->e.array.elements_count; + if (type->class == HLSL_CLASS_ARRAY) + return hlsl_get_multiarray_size(type->e.array.type) * type->e.array.elements_count; return 1; }
bool hlsl_type_is_resource(const struct hlsl_type *type) { - if (type->type == HLSL_CLASS_OBJECT) + if (type->class == HLSL_CLASS_ARRAY) + return hlsl_type_is_resource(type->e.array.type); + + if (type->class == HLSL_CLASS_OBJECT) { switch (type->base_type) { @@ -183,10 +197,13 @@ bool hlsl_type_is_resource(const struct hlsl_type *type)
enum hlsl_regset hlsl_type_get_regset(const struct hlsl_type *type) { - if (type->type <= HLSL_CLASS_LAST_NUMERIC) + if (type->class <= HLSL_CLASS_LAST_NUMERIC) return HLSL_REGSET_NUMERIC;
- if (type->type == HLSL_CLASS_OBJECT) + if (type->class == HLSL_CLASS_ARRAY) + return hlsl_type_get_regset(type->e.array.type); + + if (type->class == HLSL_CLASS_OBJECT) { switch (type->base_type) { @@ -203,8 +220,6 @@ enum hlsl_regset hlsl_type_get_regset(const struct hlsl_type *type) vkd3d_unreachable(); } } - else if (type->type == HLSL_CLASS_ARRAY) - return hlsl_type_get_regset(type->e.array.type);
vkd3d_unreachable(); } @@ -216,7 +231,8 @@ unsigned int hlsl_type_get_sm4_offset(const struct hlsl_type *type, unsigned int * (b) the type would cross a vec4 boundary; i.e. a vec3 and a * vec1 can be packed together, but not a vec3 and a vec2. */ - if (type->type > HLSL_CLASS_LAST_NUMERIC || (offset & 3) + type->reg_size[HLSL_REGSET_NUMERIC] > 4) + if (type->class == HLSL_CLASS_STRUCT || type->class == HLSL_CLASS_ARRAY + || (offset & 3) + type->reg_size[HLSL_REGSET_NUMERIC] > 4) return align(offset, 4); return offset; } @@ -229,7 +245,7 @@ static void hlsl_type_calculate_reg_size(struct hlsl_ctx *ctx, struct hlsl_type for (k = 0; k <= HLSL_REGSET_LAST; ++k) type->reg_size[k] = 0;
- switch (type->type) + switch (type->class) { case HLSL_CLASS_SCALAR: case HLSL_CLASS_VECTOR: @@ -278,7 +294,7 @@ static void hlsl_type_calculate_reg_size(struct hlsl_ctx *ctx, struct hlsl_type type->reg_size[k] += field->type->reg_size[k]; }
- type->dimx += field->type->dimx * field->type->dimy * get_array_size(field->type); + type->dimx += field->type->dimx * field->type->dimy * hlsl_get_multiarray_size(field->type); } break; } @@ -317,7 +333,7 @@ static struct hlsl_type *hlsl_new_type(struct hlsl_ctx *ctx, const char *name, e vkd3d_free(type); return NULL; } - type->type = type_class; + type->class = type_class; type->base_type = base_type; type->dimx = dimx; type->dimy = dimy; @@ -330,7 +346,7 @@ static struct hlsl_type *hlsl_new_type(struct hlsl_ctx *ctx, const char *name, e
static bool type_is_single_component(const struct hlsl_type *type) { - return type->type == HLSL_CLASS_SCALAR || type->type == HLSL_CLASS_OBJECT; + return type->class == HLSL_CLASS_SCALAR || type->class == HLSL_CLASS_OBJECT; }
/* Given a type and a component index, this function moves one step through the path required to @@ -349,7 +365,7 @@ static unsigned int traverse_path_from_component_index(struct hlsl_ctx *ctx, assert(!type_is_single_component(type)); assert(index < hlsl_type_component_count(type));
- switch (type->type) + switch (type->class) { case HLSL_CLASS_VECTOR: assert(index < type->dimx); @@ -427,7 +443,7 @@ static bool init_deref(struct hlsl_ctx *ctx, struct hlsl_deref *deref, struct hl return true; }
- if (!(deref->path = hlsl_alloc(ctx, sizeof(*deref->path) * deref->path_len))) + if (!(deref->path = hlsl_calloc(ctx, deref->path_len, sizeof(*deref->path)))) { deref->var = NULL; deref->path_len = 0; @@ -437,6 +453,71 @@ static bool init_deref(struct hlsl_ctx *ctx, struct hlsl_deref *deref, struct hl return true; }
+bool hlsl_init_deref_from_index_chain(struct hlsl_ctx *ctx, struct hlsl_deref *deref, struct hlsl_ir_node *chain) +{ + struct hlsl_ir_index *index; + struct hlsl_ir_load *load; + unsigned int chain_len, i; + struct hlsl_ir_node *ptr; + + deref->path = NULL; + deref->path_len = 0; + deref->offset.node = NULL; + + assert(chain); + if (chain->type == HLSL_IR_INDEX) + assert(!hlsl_index_is_noncontiguous(hlsl_ir_index(chain))); + + /* Find the length of the index chain */ + chain_len = 0; + ptr = chain; + while (ptr->type == HLSL_IR_INDEX) + { + index = hlsl_ir_index(ptr); + + chain_len++; + ptr = index->val.node; + } + + if (ptr->type != HLSL_IR_LOAD) + { + hlsl_error(ctx, &chain->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_LVALUE, "Invalid l-value."); + return false; + } + load = hlsl_ir_load(ptr); + + if (!init_deref(ctx, deref, load->src.var, load->src.path_len + chain_len)) + return false; + + for (i = 0; i < load->src.path_len; ++i) + hlsl_src_from_node(&deref->path[i], load->src.path[i].node); + + chain_len = 0; + ptr = chain; + while (ptr->type == HLSL_IR_INDEX) + { + unsigned int p = deref->path_len - 1 - chain_len; + + index = hlsl_ir_index(ptr); + if (hlsl_index_is_noncontiguous(index)) + { + hlsl_src_from_node(&deref->path[p], deref->path[p + 1].node); + hlsl_src_remove(&deref->path[p + 1]); + hlsl_src_from_node(&deref->path[p + 1], index->idx.node); + } + else + { + hlsl_src_from_node(&deref->path[p], index->idx.node); + } + + chain_len++; + ptr = index->val.node; + } + assert(deref->path_len == load->src.path_len + chain_len); + + return true; +} + struct hlsl_type *hlsl_deref_get_type(struct hlsl_ctx *ctx, const struct hlsl_deref *deref) { struct hlsl_type *type; @@ -459,9 +540,9 @@ static bool init_deref_from_component_index(struct hlsl_ctx *ctx, struct hlsl_bl { unsigned int path_len, path_index, deref_path_len, i; struct hlsl_type *path_type; - struct hlsl_ir_constant *c; + struct hlsl_ir_node *c;
- list_init(&block->instrs); + hlsl_block_init(block);
path_len = 0; path_type = hlsl_deref_get_type(ctx, prefix); @@ -487,12 +568,12 @@ static bool init_deref_from_component_index(struct hlsl_ctx *ctx, struct hlsl_bl
if (!(c = hlsl_new_uint_constant(ctx, next_index, loc))) { - hlsl_free_instr_list(&block->instrs); + hlsl_block_cleanup(block); return false; } - list_add_tail(&block->instrs, &c->node.entry); + hlsl_block_add_instr(block, c);
- hlsl_src_from_node(&deref->path[deref_path_len++], &c->node); + hlsl_src_from_node(&deref->path[deref_path_len++], c); }
assert(deref_path_len == deref->path_len); @@ -505,7 +586,7 @@ struct hlsl_type *hlsl_get_element_type_from_path_index(struct hlsl_ctx *ctx, co { assert(idx);
- switch (type->type) + switch (type->class) { case HLSL_CLASS_VECTOR: return hlsl_get_scalar_type(ctx, type->base_type); @@ -523,8 +604,8 @@ struct hlsl_type *hlsl_get_element_type_from_path_index(struct hlsl_ctx *ctx, co { struct hlsl_ir_constant *c = hlsl_ir_constant(idx);
- assert(c->value[0].u < type->e.record.field_count); - return type->e.record.fields[c->value[0].u].type; + assert(c->value.u[0].u < type->e.record.field_count); + return type->e.record.fields[c->value.u[0].u].type; }
default: @@ -539,7 +620,7 @@ struct hlsl_type *hlsl_new_array_type(struct hlsl_ctx *ctx, struct hlsl_type *ba if (!(type = hlsl_alloc(ctx, sizeof(*type)))) return NULL;
- type->type = HLSL_CLASS_ARRAY; + type->class = HLSL_CLASS_ARRAY; type->modifiers = basic_type->modifiers; type->e.array.elements_count = array_size; type->e.array.type = basic_type; @@ -559,7 +640,7 @@ struct hlsl_type *hlsl_new_struct_type(struct hlsl_ctx *ctx, const char *name,
if (!(type = hlsl_alloc(ctx, sizeof(*type)))) return NULL; - type->type = HLSL_CLASS_STRUCT; + type->class = HLSL_CLASS_STRUCT; type->base_type = HLSL_TYPE_VOID; type->name = name; type->dimy = 1; @@ -579,7 +660,7 @@ struct hlsl_type *hlsl_new_texture_type(struct hlsl_ctx *ctx, enum hlsl_sampler_
if (!(type = hlsl_alloc(ctx, sizeof(*type)))) return NULL; - type->type = HLSL_CLASS_OBJECT; + type->class = HLSL_CLASS_OBJECT; type->base_type = HLSL_TYPE_TEXTURE; type->dimx = 4; type->dimy = 1; @@ -597,7 +678,7 @@ struct hlsl_type *hlsl_new_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim
if (!(type = vkd3d_calloc(1, sizeof(*type)))) return NULL; - type->type = HLSL_CLASS_OBJECT; + type->class = HLSL_CLASS_OBJECT; type->base_type = HLSL_TYPE_UAV; type->dimx = format->dimx; type->dimy = 1; @@ -614,6 +695,8 @@ static const char * get_case_insensitive_typename(const char *name) { "dword", "float", + "matrix", + "vector", }; unsigned int i;
@@ -677,7 +760,7 @@ struct hlsl_ir_function_decl *hlsl_get_func_decl(struct hlsl_ctx *ctx, const cha
unsigned int hlsl_type_component_count(const struct hlsl_type *type) { - switch (type->type) + switch (type->class) { case HLSL_CLASS_SCALAR: case HLSL_CLASS_VECTOR: @@ -709,7 +792,7 @@ bool hlsl_types_are_equal(const struct hlsl_type *t1, const struct hlsl_type *t2 if (t1 == t2) return true;
- if (t1->type != t2->type) + if (t1->class != t2->class) return false; if (t1->base_type != t2->base_type) return false; @@ -729,7 +812,7 @@ bool hlsl_types_are_equal(const struct hlsl_type *t1, const struct hlsl_type *t2 return false; if (t1->dimy != t2->dimy) return false; - if (t1->type == HLSL_CLASS_STRUCT) + if (t1->class == HLSL_CLASS_STRUCT) { size_t i;
@@ -748,7 +831,7 @@ bool hlsl_types_are_equal(const struct hlsl_type *t1, const struct hlsl_type *t2 return false; } } - if (t1->type == HLSL_CLASS_ARRAY) + if (t1->class == HLSL_CLASS_ARRAY) return t1->e.array.elements_count == t2->e.array.elements_count && hlsl_types_are_equal(t1->e.array.type, t2->e.array.type);
@@ -772,7 +855,7 @@ struct hlsl_type *hlsl_type_clone(struct hlsl_ctx *ctx, struct hlsl_type *old, return NULL; } } - type->type = old->type; + type->class = old->class; type->base_type = old->base_type; type->dimx = old->dimx; type->dimy = old->dimy; @@ -781,7 +864,7 @@ struct hlsl_type *hlsl_type_clone(struct hlsl_ctx *ctx, struct hlsl_type *old, type->modifiers |= default_majority; type->sampler_dim = old->sampler_dim; type->is_minimum_precision = old->is_minimum_precision; - switch (old->type) + switch (old->class) { case HLSL_CLASS_ARRAY: if (!(type->e.array.type = hlsl_type_clone(ctx, old->e.array.type, default_majority, modifiers))) @@ -799,7 +882,7 @@ struct hlsl_type *hlsl_type_clone(struct hlsl_ctx *ctx, struct hlsl_type *old,
type->e.record.field_count = field_count;
- if (!(type->e.record.fields = hlsl_alloc(ctx, field_count * sizeof(*type->e.record.fields)))) + if (!(type->e.record.fields = hlsl_calloc(ctx, field_count, sizeof(*type->e.record.fields)))) { vkd3d_free((void *)type->name); vkd3d_free(type); @@ -848,40 +931,58 @@ bool hlsl_scope_add_type(struct hlsl_scope *scope, struct hlsl_type *type) return true; }
-struct hlsl_ir_expr *hlsl_new_cast(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, struct hlsl_type *type, +struct hlsl_ir_node *hlsl_new_cast(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, struct hlsl_type *type, const struct vkd3d_shader_location *loc) { struct hlsl_ir_node *cast;
- cast = hlsl_new_unary_expr(ctx, HLSL_OP1_CAST, node, *loc); + cast = hlsl_new_unary_expr(ctx, HLSL_OP1_CAST, node, loc); if (cast) cast->data_type = type; - return hlsl_ir_expr(cast); + return cast; }
-struct hlsl_ir_expr *hlsl_new_copy(struct hlsl_ctx *ctx, struct hlsl_ir_node *node) +struct hlsl_ir_node *hlsl_new_copy(struct hlsl_ctx *ctx, struct hlsl_ir_node *node) { /* Use a cast to the same type as a makeshift identity expression. */ return hlsl_new_cast(ctx, node, node->data_type, &node->loc); }
struct hlsl_ir_var *hlsl_new_var(struct hlsl_ctx *ctx, const char *name, struct hlsl_type *type, - const struct vkd3d_shader_location loc, const struct hlsl_semantic *semantic, unsigned int modifiers, + const struct vkd3d_shader_location *loc, const struct hlsl_semantic *semantic, unsigned int modifiers, const struct hlsl_reg_reservation *reg_reservation) { struct hlsl_ir_var *var; + unsigned int k;
if (!(var = hlsl_alloc(ctx, sizeof(*var)))) return NULL;
var->name = name; var->data_type = type; - var->loc = loc; + var->loc = *loc; if (semantic) var->semantic = *semantic; var->storage_modifiers = modifiers; if (reg_reservation) var->reg_reservation = *reg_reservation; + + for (k = 0; k <= HLSL_REGSET_LAST_OBJECT; ++k) + { + unsigned int i, obj_count = type->reg_size[k]; + + if (obj_count == 0) + continue; + + if (!(var->objects_usage[k] = hlsl_calloc(ctx, obj_count, sizeof(*var->objects_usage[0])))) + { + for (i = 0; i < k; ++i) + vkd3d_free(var->objects_usage[i]); + vkd3d_free(var); + return NULL; + } + } + return var; }
@@ -901,7 +1002,7 @@ struct hlsl_ir_var *hlsl_new_synthetic_var(struct hlsl_ctx *ctx, const char *tem hlsl_release_string_buffer(ctx, string); return NULL; } - var = hlsl_new_var(ctx, name, type, *loc, NULL, 0, NULL); + var = hlsl_new_var(ctx, name, type, loc, NULL, 0, NULL); hlsl_release_string_buffer(ctx, string); if (var) list_add_tail(&ctx->dummy_scope->vars, &var->scope_entry); @@ -910,7 +1011,7 @@ struct hlsl_ir_var *hlsl_new_synthetic_var(struct hlsl_ctx *ctx, const char *tem
static bool type_is_single_reg(const struct hlsl_type *type) { - return type->type == HLSL_CLASS_SCALAR || type->type == HLSL_CLASS_VECTOR; + return type->class == HLSL_CLASS_SCALAR || type->class == HLSL_CLASS_VECTOR; }
bool hlsl_copy_deref(struct hlsl_ctx *ctx, struct hlsl_deref *deref, const struct hlsl_deref *other) @@ -964,7 +1065,7 @@ static void init_node(struct hlsl_ir_node *node, enum hlsl_ir_node_type type, list_init(&node->uses); }
-struct hlsl_ir_store *hlsl_new_simple_store(struct hlsl_ctx *ctx, struct hlsl_ir_var *lhs, struct hlsl_ir_node *rhs) +struct hlsl_ir_node *hlsl_new_simple_store(struct hlsl_ctx *ctx, struct hlsl_ir_var *lhs, struct hlsl_ir_node *rhs) { struct hlsl_deref lhs_deref;
@@ -972,7 +1073,7 @@ struct hlsl_ir_store *hlsl_new_simple_store(struct hlsl_ctx *ctx, struct hlsl_ir return hlsl_new_store_index(ctx, &lhs_deref, NULL, rhs, 0, &rhs->loc); }
-struct hlsl_ir_store *hlsl_new_store_index(struct hlsl_ctx *ctx, const struct hlsl_deref *lhs, +struct hlsl_ir_node *hlsl_new_store_index(struct hlsl_ctx *ctx, const struct hlsl_deref *lhs, struct hlsl_ir_node *idx, struct hlsl_ir_node *rhs, unsigned int writemask, const struct vkd3d_shader_location *loc) { struct hlsl_ir_store *store; @@ -1001,35 +1102,35 @@ struct hlsl_ir_store *hlsl_new_store_index(struct hlsl_ctx *ctx, const struct hl writemask = (1 << rhs->data_type->dimx) - 1; store->writemask = writemask;
- return store; + return &store->node; }
-struct hlsl_ir_store *hlsl_new_store_component(struct hlsl_ctx *ctx, struct hlsl_block *block, +bool hlsl_new_store_component(struct hlsl_ctx *ctx, struct hlsl_block *block, const struct hlsl_deref *lhs, unsigned int comp, struct hlsl_ir_node *rhs) { struct hlsl_block comp_path_block; struct hlsl_ir_store *store;
- list_init(&block->instrs); + hlsl_block_init(block);
if (!(store = hlsl_alloc(ctx, sizeof(*store)))) - return NULL; + return false; init_node(&store->node, HLSL_IR_STORE, NULL, &rhs->loc);
if (!init_deref_from_component_index(ctx, &comp_path_block, &store->lhs, lhs, comp, &rhs->loc)) { vkd3d_free(store); - return NULL; + return false; } - list_move_tail(&block->instrs, &comp_path_block.instrs); + hlsl_block_add_block(block, &comp_path_block); hlsl_src_from_node(&store->rhs, rhs);
if (type_is_single_reg(rhs->data_type)) store->writemask = (1 << rhs->data_type->dimx) - 1;
- list_add_tail(&block->instrs, &store->node.entry); + hlsl_block_add_instr(block, &store->node);
- return store; + return true; }
struct hlsl_ir_node *hlsl_new_call(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *decl, @@ -1045,66 +1146,54 @@ struct hlsl_ir_node *hlsl_new_call(struct hlsl_ctx *ctx, struct hlsl_ir_function return &call->node; }
-struct hlsl_ir_constant *hlsl_new_constant(struct hlsl_ctx *ctx, struct hlsl_type *type, - const struct vkd3d_shader_location *loc) +struct hlsl_ir_node *hlsl_new_constant(struct hlsl_ctx *ctx, struct hlsl_type *type, + const struct hlsl_constant_value *value, const struct vkd3d_shader_location *loc) { struct hlsl_ir_constant *c;
- assert(type->type <= HLSL_CLASS_VECTOR); + assert(type->class <= HLSL_CLASS_VECTOR);
if (!(c = hlsl_alloc(ctx, sizeof(*c)))) return NULL;
init_node(&c->node, HLSL_IR_CONSTANT, type, loc); + c->value = *value;
- return c; + return &c->node; }
-struct hlsl_ir_constant *hlsl_new_bool_constant(struct hlsl_ctx *ctx, bool b, const struct vkd3d_shader_location *loc) +struct hlsl_ir_node *hlsl_new_bool_constant(struct hlsl_ctx *ctx, bool b, const struct vkd3d_shader_location *loc) { - struct hlsl_ir_constant *c; - - if ((c = hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, HLSL_TYPE_BOOL), loc))) - c->value[0].u = b ? ~0u : 0; + struct hlsl_constant_value value;
- return c; + value.u[0].u = b ? ~0u : 0; + return hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, HLSL_TYPE_BOOL), &value, loc); }
-struct hlsl_ir_constant *hlsl_new_float_constant(struct hlsl_ctx *ctx, float f, +struct hlsl_ir_node *hlsl_new_float_constant(struct hlsl_ctx *ctx, float f, const struct vkd3d_shader_location *loc) { - struct hlsl_ir_constant *c; + struct hlsl_constant_value value;
- if ((c = hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, HLSL_TYPE_FLOAT), loc))) - c->value[0].f = f; - - return c; + value.u[0].f = f; + return hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, HLSL_TYPE_FLOAT), &value, loc); }
-struct hlsl_ir_constant *hlsl_new_int_constant(struct hlsl_ctx *ctx, int n, - const struct vkd3d_shader_location *loc) +struct hlsl_ir_node *hlsl_new_int_constant(struct hlsl_ctx *ctx, int32_t n, const struct vkd3d_shader_location *loc) { - struct hlsl_ir_constant *c; - - c = hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, HLSL_TYPE_INT), loc); - - if (c) - c->value[0].i = n; + struct hlsl_constant_value value;
- return c; + value.u[0].i = n; + return hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, HLSL_TYPE_INT), &value, loc); }
-struct hlsl_ir_constant *hlsl_new_uint_constant(struct hlsl_ctx *ctx, unsigned int n, +struct hlsl_ir_node *hlsl_new_uint_constant(struct hlsl_ctx *ctx, unsigned int n, const struct vkd3d_shader_location *loc) { - struct hlsl_ir_constant *c; - - c = hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc); + struct hlsl_constant_value value;
- if (c) - c->value[0].u = n; - - return c; + value.u[0].u = n; + return hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), &value, loc); }
struct hlsl_ir_node *hlsl_new_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op, @@ -1124,11 +1213,11 @@ struct hlsl_ir_node *hlsl_new_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op }
struct hlsl_ir_node *hlsl_new_unary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op, - struct hlsl_ir_node *arg, struct vkd3d_shader_location loc) + struct hlsl_ir_node *arg, const struct vkd3d_shader_location *loc) { struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {arg};
- return hlsl_new_expr(ctx, op, operands, arg->data_type, &loc); + return hlsl_new_expr(ctx, op, operands, arg->data_type, loc); }
struct hlsl_ir_node *hlsl_new_binary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op, @@ -1140,17 +1229,21 @@ struct hlsl_ir_node *hlsl_new_binary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_exp return hlsl_new_expr(ctx, op, operands, arg1->data_type, &arg1->loc); }
-struct hlsl_ir_if *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *condition, struct vkd3d_shader_location loc) +struct hlsl_ir_node *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *condition, + struct hlsl_block *then_block, struct hlsl_block *else_block, const struct vkd3d_shader_location *loc) { struct hlsl_ir_if *iff;
if (!(iff = hlsl_alloc(ctx, sizeof(*iff)))) return NULL; - init_node(&iff->node, HLSL_IR_IF, NULL, &loc); + init_node(&iff->node, HLSL_IR_IF, NULL, loc); hlsl_src_from_node(&iff->condition, condition); - list_init(&iff->then_instrs.instrs); - list_init(&iff->else_instrs.instrs); - return iff; + hlsl_block_init(&iff->then_block); + hlsl_block_add_block(&iff->then_block, then_block); + hlsl_block_init(&iff->else_block); + if (else_block) + hlsl_block_add_block(&iff->else_block, else_block); + return &iff->node; }
struct hlsl_ir_load *hlsl_new_load_index(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, @@ -1183,23 +1276,36 @@ struct hlsl_ir_load *hlsl_new_load_index(struct hlsl_ctx *ctx, const struct hlsl return load; }
+struct hlsl_ir_load *hlsl_new_load_parent(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, + const struct vkd3d_shader_location *loc) +{ + /* This deref can only exists temporarily because it is not the real owner of its members. */ + struct hlsl_deref tmp_deref; + + assert(deref->path_len >= 1); + + tmp_deref = *deref; + tmp_deref.path_len = deref->path_len - 1; + return hlsl_new_load_index(ctx, &tmp_deref, NULL, loc); +} + struct hlsl_ir_load *hlsl_new_var_load(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, - struct vkd3d_shader_location loc) + const struct vkd3d_shader_location *loc) { struct hlsl_deref var_deref;
hlsl_init_simple_deref_from_var(&var_deref, var); - return hlsl_new_load_index(ctx, &var_deref, NULL, &loc); + return hlsl_new_load_index(ctx, &var_deref, NULL, loc); }
-struct hlsl_ir_load *hlsl_new_load_component(struct hlsl_ctx *ctx, struct hlsl_block *block, +struct hlsl_ir_node *hlsl_new_load_component(struct hlsl_ctx *ctx, struct hlsl_block *block, const struct hlsl_deref *deref, unsigned int comp, const struct vkd3d_shader_location *loc) { struct hlsl_type *type, *comp_type; struct hlsl_block comp_path_block; struct hlsl_ir_load *load;
- list_init(&block->instrs); + hlsl_block_init(block);
if (!(load = hlsl_alloc(ctx, sizeof(*load)))) return NULL; @@ -1213,14 +1319,14 @@ struct hlsl_ir_load *hlsl_new_load_component(struct hlsl_ctx *ctx, struct hlsl_b vkd3d_free(load); return NULL; } - list_move_tail(&block->instrs, &comp_path_block.instrs); + hlsl_block_add_block(block, &comp_path_block);
- list_add_tail(&block->instrs, &load->node.entry); + hlsl_block_add_instr(block, &load->node);
- return load; + return &load->node; }
-struct hlsl_ir_resource_load *hlsl_new_resource_load(struct hlsl_ctx *ctx, +struct hlsl_ir_node *hlsl_new_resource_load(struct hlsl_ctx *ctx, const struct hlsl_resource_load_params *params, const struct vkd3d_shader_location *loc) { struct hlsl_ir_resource_load *load; @@ -1229,24 +1335,37 @@ struct hlsl_ir_resource_load *hlsl_new_resource_load(struct hlsl_ctx *ctx, return NULL; init_node(&load->node, HLSL_IR_RESOURCE_LOAD, params->format, loc); load->load_type = params->type; - if (!hlsl_copy_deref(ctx, &load->resource, ¶ms->resource)) + + if (!hlsl_init_deref_from_index_chain(ctx, &load->resource, params->resource)) { vkd3d_free(load); return NULL; } - if (!hlsl_copy_deref(ctx, &load->sampler, ¶ms->sampler)) + + if (params->sampler) { - hlsl_cleanup_deref(&load->resource); - vkd3d_free(load); - return NULL; + if (!hlsl_init_deref_from_index_chain(ctx, &load->sampler, params->sampler)) + { + hlsl_cleanup_deref(&load->resource); + vkd3d_free(load); + return NULL; + } } + hlsl_src_from_node(&load->coords, params->coords); + hlsl_src_from_node(&load->sample_index, params->sample_index); hlsl_src_from_node(&load->texel_offset, params->texel_offset); hlsl_src_from_node(&load->lod, params->lod); - return load; + hlsl_src_from_node(&load->ddx, params->ddx); + hlsl_src_from_node(&load->ddy, params->ddy); + hlsl_src_from_node(&load->cmp, params->cmp); + load->sampling_dim = params->sampling_dim; + if (load->sampling_dim == HLSL_SAMPLER_DIM_GENERIC) + load->sampling_dim = hlsl_deref_get_type(ctx, &load->resource)->sampler_dim; + return &load->node; }
-struct hlsl_ir_resource_store *hlsl_new_resource_store(struct hlsl_ctx *ctx, const struct hlsl_deref *resource, +struct hlsl_ir_node *hlsl_new_resource_store(struct hlsl_ctx *ctx, const struct hlsl_deref *resource, struct hlsl_ir_node *coords, struct hlsl_ir_node *value, const struct vkd3d_shader_location *loc) { struct hlsl_ir_resource_store *store; @@ -1257,10 +1376,10 @@ struct hlsl_ir_resource_store *hlsl_new_resource_store(struct hlsl_ctx *ctx, con hlsl_copy_deref(ctx, &store->resource, resource); hlsl_src_from_node(&store->coords, coords); hlsl_src_from_node(&store->value, value); - return store; + return &store->node; }
-struct hlsl_ir_swizzle *hlsl_new_swizzle(struct hlsl_ctx *ctx, DWORD s, unsigned int components, +struct hlsl_ir_node *hlsl_new_swizzle(struct hlsl_ctx *ctx, DWORD s, unsigned int components, struct hlsl_ir_node *val, const struct vkd3d_shader_location *loc) { struct hlsl_ir_swizzle *swizzle; @@ -1275,29 +1394,66 @@ struct hlsl_ir_swizzle *hlsl_new_swizzle(struct hlsl_ctx *ctx, DWORD s, unsigned init_node(&swizzle->node, HLSL_IR_SWIZZLE, type, loc); hlsl_src_from_node(&swizzle->val, val); swizzle->swizzle = s; - return swizzle; + return &swizzle->node; +} + +bool hlsl_index_is_noncontiguous(struct hlsl_ir_index *index) +{ + struct hlsl_type *type = index->val.node->data_type; + + return type->class == HLSL_CLASS_MATRIX && !hlsl_type_is_row_major(type); +} + +bool hlsl_index_is_resource_access(struct hlsl_ir_index *index) +{ + return index->val.node->data_type->class == HLSL_CLASS_OBJECT; +} + +struct hlsl_ir_node *hlsl_new_index(struct hlsl_ctx *ctx, struct hlsl_ir_node *val, + struct hlsl_ir_node *idx, const struct vkd3d_shader_location *loc) +{ + struct hlsl_type *type = val->data_type; + struct hlsl_ir_index *index; + + if (!(index = hlsl_alloc(ctx, sizeof(*index)))) + return NULL; + + if (type->class == HLSL_CLASS_OBJECT) + type = type->e.resource_format; + else if (type->class == HLSL_CLASS_MATRIX) + type = hlsl_get_vector_type(ctx, type->base_type, type->dimx); + else + type = hlsl_get_element_type_from_path_index(ctx, type, idx); + + init_node(&index->node, HLSL_IR_INDEX, type, loc); + hlsl_src_from_node(&index->val, val); + hlsl_src_from_node(&index->idx, idx); + return &index->node; }
-struct hlsl_ir_jump *hlsl_new_jump(struct hlsl_ctx *ctx, enum hlsl_ir_jump_type type, struct vkd3d_shader_location loc) +struct hlsl_ir_node *hlsl_new_jump(struct hlsl_ctx *ctx, enum hlsl_ir_jump_type type, + const struct vkd3d_shader_location *loc) { struct hlsl_ir_jump *jump;
if (!(jump = hlsl_alloc(ctx, sizeof(*jump)))) return NULL; - init_node(&jump->node, HLSL_IR_JUMP, NULL, &loc); + init_node(&jump->node, HLSL_IR_JUMP, NULL, loc); jump->type = type; - return jump; + return &jump->node; }
-struct hlsl_ir_loop *hlsl_new_loop(struct hlsl_ctx *ctx, struct vkd3d_shader_location loc) +struct hlsl_ir_node *hlsl_new_loop(struct hlsl_ctx *ctx, + struct hlsl_block *block, const struct vkd3d_shader_location *loc) { struct hlsl_ir_loop *loop;
if (!(loop = hlsl_alloc(ctx, sizeof(*loop)))) return NULL; - init_node(&loop->node, HLSL_IR_LOOP, NULL, &loc); - list_init(&loop->body.instrs); - return loop; + init_node(&loop->node, HLSL_IR_LOOP, NULL, loc); + hlsl_block_init(&loop->body); + hlsl_block_add_block(&loop->body, block); + return &loop->node; }
struct clone_instr_map @@ -1319,11 +1475,13 @@ static bool clone_block(struct hlsl_ctx *ctx, struct hlsl_block *dst_block, const struct hlsl_ir_node *src; struct hlsl_ir_node *dst;
+ hlsl_block_init(dst_block); + LIST_FOR_EACH_ENTRY(src, &src_block->instrs, struct hlsl_ir_node, entry) { if (!(dst = clone_instr(ctx, map, src))) { - hlsl_free_instr_list(&dst_block->instrs); + hlsl_block_cleanup(dst_block); return false; } list_add_tail(&dst_block->instrs, &dst->entry); @@ -1332,7 +1490,7 @@ static bool clone_block(struct hlsl_ctx *ctx, struct hlsl_block *dst_block, { if (!vkd3d_array_reserve((void **)&map->instrs, &map->capacity, map->count + 1, sizeof(*map->instrs))) { - hlsl_free_instr_list(&dst_block->instrs); + hlsl_block_cleanup(dst_block); return false; }
@@ -1390,12 +1548,7 @@ static struct hlsl_ir_node *clone_call(struct hlsl_ctx *ctx, struct hlsl_ir_call
static struct hlsl_ir_node *clone_constant(struct hlsl_ctx *ctx, struct hlsl_ir_constant *src) { - struct hlsl_ir_constant *dst; - - if (!(dst = hlsl_new_constant(ctx, src->node.data_type, &src->node.loc))) - return NULL; - memcpy(dst->value, src->value, sizeof(src->value)); - return &dst->node; + return hlsl_new_constant(ctx, src->node.data_type, &src->value, &src->node.loc); }
static struct hlsl_ir_node *clone_expr(struct hlsl_ctx *ctx, struct clone_instr_map *map, struct hlsl_ir_expr *src) @@ -1411,27 +1564,30 @@ static struct hlsl_ir_node *clone_expr(struct hlsl_ctx *ctx, struct clone_instr_
static struct hlsl_ir_node *clone_if(struct hlsl_ctx *ctx, struct clone_instr_map *map, struct hlsl_ir_if *src) { - struct hlsl_ir_if *dst; + struct hlsl_block then_block, else_block; + struct hlsl_ir_node *dst;
- if (!(dst = hlsl_new_if(ctx, map_instr(map, src->condition.node), src->node.loc))) + if (!clone_block(ctx, &then_block, &src->then_block, map)) + return NULL; + if (!clone_block(ctx, &else_block, &src->else_block, map)) + { + hlsl_block_cleanup(&then_block); return NULL; + }
- if (!clone_block(ctx, &dst->then_instrs, &src->then_instrs, map) - || !clone_block(ctx, &dst->else_instrs, &src->else_instrs, map)) + if (!(dst = hlsl_new_if(ctx, map_instr(map, src->condition.node), &then_block, &else_block, &src->node.loc))) { - hlsl_free_instr(&dst->node); + hlsl_block_cleanup(&then_block); + hlsl_block_cleanup(&else_block); return NULL; } - return &dst->node; + + return dst; }
static struct hlsl_ir_node *clone_jump(struct hlsl_ctx *ctx, struct hlsl_ir_jump *src) { - struct hlsl_ir_jump *dst; - - if (!(dst = hlsl_new_jump(ctx, src->type, src->node.loc))) - return NULL; - return &dst->node; + return hlsl_new_jump(ctx, src->type, &src->node.loc); }
static struct hlsl_ir_node *clone_load(struct hlsl_ctx *ctx, struct clone_instr_map *map, struct hlsl_ir_load *src) @@ -1452,16 +1608,18 @@ static struct hlsl_ir_node *clone_load(struct hlsl_ctx *ctx, struct clone_instr_
static struct hlsl_ir_node *clone_loop(struct hlsl_ctx *ctx, struct clone_instr_map *map, struct hlsl_ir_loop *src) { - struct hlsl_ir_loop *dst; + struct hlsl_ir_node *dst; + struct hlsl_block body;
- if (!(dst = hlsl_new_loop(ctx, src->node.loc))) + if (!clone_block(ctx, &body, &src->body, map)) return NULL; - if (!clone_block(ctx, &dst->body, &src->body, map)) + + if (!(dst = hlsl_new_loop(ctx, &body, &src->node.loc))) { - hlsl_free_instr(&dst->node); + hlsl_block_cleanup(&body); return NULL; } - return &dst->node; + return dst; }
static struct hlsl_ir_node *clone_resource_load(struct hlsl_ctx *ctx, @@ -1486,7 +1644,12 @@ static struct hlsl_ir_node *clone_resource_load(struct hlsl_ctx *ctx, } clone_src(map, &dst->coords, &src->coords); clone_src(map, &dst->lod, &src->lod); + clone_src(map, &dst->ddx, &src->ddx); + clone_src(map, &dst->ddy, &src->ddy); + clone_src(map, &dst->sample_index, &src->sample_index); + clone_src(map, &dst->cmp, &src->cmp); clone_src(map, &dst->texel_offset, &src->texel_offset); + dst->sampling_dim = src->sampling_dim; return &dst->node; }
@@ -1529,12 +1692,19 @@ static struct hlsl_ir_node *clone_store(struct hlsl_ctx *ctx, struct clone_instr static struct hlsl_ir_node *clone_swizzle(struct hlsl_ctx *ctx, struct clone_instr_map *map, struct hlsl_ir_swizzle *src) { - struct hlsl_ir_swizzle *dst; + return hlsl_new_swizzle(ctx, src->swizzle, src->node.data_type->dimx, + map_instr(map, src->val.node), &src->node.loc); +}
- if (!(dst = hlsl_new_swizzle(ctx, src->swizzle, src->node.data_type->dimx, - map_instr(map, src->val.node), &src->node.loc))) +static struct hlsl_ir_node *clone_index(struct hlsl_ctx *ctx, struct clone_instr_map *map, + struct hlsl_ir_index *src) +{ + struct hlsl_ir_node *dst; + + if (!(dst = hlsl_new_index(ctx, map_instr(map, src->val.node), map_instr(map, src->idx.node), + &src->node.loc))) return NULL; - return &dst->node; + return dst; }
static struct hlsl_ir_node *clone_instr(struct hlsl_ctx *ctx, @@ -1554,6 +1724,9 @@ static struct hlsl_ir_node *clone_instr(struct hlsl_ctx *ctx, case HLSL_IR_IF: return clone_if(ctx, map, hlsl_ir_if(instr));
+ case HLSL_IR_INDEX: + return clone_index(ctx, map, hlsl_ir_index(instr)); + case HLSL_IR_JUMP: return clone_jump(ctx, hlsl_ir_jump(instr));
@@ -1593,13 +1766,12 @@ struct hlsl_ir_function_decl *hlsl_new_func_decl(struct hlsl_ctx *ctx, struct hlsl_type *return_type, const struct hlsl_func_parameters *parameters, const struct hlsl_semantic *semantic, const struct vkd3d_shader_location *loc) { + struct hlsl_ir_node *constant, *store; struct hlsl_ir_function_decl *decl; - struct hlsl_ir_constant *constant; - struct hlsl_ir_store *store;
if (!(decl = hlsl_alloc(ctx, sizeof(*decl)))) return NULL; - list_init(&decl->body.instrs); + hlsl_block_init(&decl->body); decl->return_type = return_type; decl->parameters = *parameters; decl->loc = *loc; @@ -1620,17 +1792,17 @@ struct hlsl_ir_function_decl *hlsl_new_func_decl(struct hlsl_ctx *ctx,
if (!(constant = hlsl_new_bool_constant(ctx, false, loc))) return decl; - list_add_tail(&decl->body.instrs, &constant->node.entry); + hlsl_block_add_instr(&decl->body, constant);
- if (!(store = hlsl_new_simple_store(ctx, decl->early_return_var, &constant->node))) + if (!(store = hlsl_new_simple_store(ctx, decl->early_return_var, constant))) return decl; - list_add_tail(&decl->body.instrs, &store->node.entry); + hlsl_block_add_instr(&decl->body, store);
return decl; }
struct hlsl_buffer *hlsl_new_buffer(struct hlsl_ctx *ctx, enum hlsl_buffer_type type, const char *name, - const struct hlsl_reg_reservation *reservation, struct vkd3d_shader_location loc) + const struct hlsl_reg_reservation *reservation, const struct vkd3d_shader_location *loc) { struct hlsl_buffer *buffer;
@@ -1640,7 +1812,7 @@ struct hlsl_buffer *hlsl_new_buffer(struct hlsl_ctx *ctx, enum hlsl_buffer_type buffer->name = name; if (reservation) buffer->reservation = *reservation; - buffer->loc = loc; + buffer->loc = *loc; list_add_tail(&ctx->buffers, &buffer->entry); return buffer; } @@ -1698,10 +1870,10 @@ static int compare_param_hlsl_types(const struct hlsl_type *t1, const struct hls { int r;
- if ((r = vkd3d_u32_compare(t1->type, t2->type))) + if ((r = vkd3d_u32_compare(t1->class, t2->class))) { - if (!((t1->type == HLSL_CLASS_SCALAR && t2->type == HLSL_CLASS_VECTOR) - || (t1->type == HLSL_CLASS_VECTOR && t2->type == HLSL_CLASS_SCALAR))) + if (!((t1->class == HLSL_CLASS_SCALAR && t2->class == HLSL_CLASS_VECTOR) + || (t1->class == HLSL_CLASS_VECTOR && t2->class == HLSL_CLASS_SCALAR))) return r; } if ((r = vkd3d_u32_compare(t1->base_type, t2->base_type))) @@ -1718,7 +1890,7 @@ static int compare_param_hlsl_types(const struct hlsl_type *t1, const struct hls return r; if ((r = vkd3d_u32_compare(t1->dimy, t2->dimy))) return r; - if (t1->type == HLSL_CLASS_STRUCT) + if (t1->class == HLSL_CLASS_STRUCT) { size_t i;
@@ -1738,7 +1910,7 @@ static int compare_param_hlsl_types(const struct hlsl_type *t1, const struct hls } return 0; } - if (t1->type == HLSL_CLASS_ARRAY) + if (t1->class == HLSL_CLASS_ARRAY) { if ((r = vkd3d_u32_compare(t1->e.array.elements_count, t2->e.array.elements_count))) return r; @@ -1768,7 +1940,7 @@ static int compare_function_decl_rb(const void *key, const struct rb_entry *entr
struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const struct hlsl_type *type) { - struct vkd3d_string_buffer *string; + struct vkd3d_string_buffer *string, *inner_string;
static const char *const base_types[] = { @@ -1789,7 +1961,7 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru return string; }
- switch (type->type) + switch (type->class) { case HLSL_CLASS_SCALAR: assert(type->base_type < ARRAY_SIZE(base_types)); @@ -1808,10 +1980,9 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru
case HLSL_CLASS_ARRAY: { - struct vkd3d_string_buffer *inner_string; const struct hlsl_type *t;
- for (t = type; t->type == HLSL_CLASS_ARRAY; t = t->e.array.type) + for (t = type; t->class == HLSL_CLASS_ARRAY; t = t->e.array.type) ;
if ((inner_string = hlsl_type_to_string(ctx, t))) @@ -1820,7 +1991,7 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru hlsl_release_string_buffer(ctx, inner_string); }
- for (t = type; t->type == HLSL_CLASS_ARRAY; t = t->e.array.type) + for (t = type; t->class == HLSL_CLASS_ARRAY; t = t->e.array.type) { if (t->e.array.elements_count == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT) vkd3d_string_buffer_printf(string, "[]"); @@ -1860,13 +2031,26 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru
assert(type->sampler_dim < ARRAY_SIZE(dimensions)); assert(type->e.resource_format->base_type < ARRAY_SIZE(base_types)); - vkd3d_string_buffer_printf(string, "Texture%s<%s%u>", dimensions[type->sampler_dim], - base_types[type->e.resource_format->base_type], type->e.resource_format->dimx); + vkd3d_string_buffer_printf(string, "Texture%s", dimensions[type->sampler_dim]); + if ((inner_string = hlsl_type_to_string(ctx, type->e.resource_format))) + { + vkd3d_string_buffer_printf(string, "<%s>", inner_string->buffer); + hlsl_release_string_buffer(ctx, inner_string); + } return string;
case HLSL_TYPE_UAV: - vkd3d_string_buffer_printf(string, "RWTexture%s<%s%u>", dimensions[type->sampler_dim], - base_types[type->e.resource_format->base_type], type->e.resource_format->dimx); + if (type->sampler_dim == HLSL_SAMPLER_DIM_BUFFER) + vkd3d_string_buffer_printf(string, "RWBuffer"); + else if (type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) + vkd3d_string_buffer_printf(string, "RWStructuredBuffer"); + else + vkd3d_string_buffer_printf(string, "RWTexture%s", dimensions[type->sampler_dim]); + if ((inner_string = hlsl_type_to_string(ctx, type->e.resource_format))) + { + vkd3d_string_buffer_printf(string, "<%s>", inner_string->buffer); + hlsl_release_string_buffer(ctx, inner_string); + } return string;
default: @@ -1943,6 +2127,7 @@ const char *hlsl_node_type_to_string(enum hlsl_ir_node_type type) "HLSL_IR_CONSTANT", "HLSL_IR_EXPR", "HLSL_IR_IF", + "HLSL_IR_INDEX", "HLSL_IR_LOAD", "HLSL_IR_LOOP", "HLSL_IR_JUMP", @@ -2107,7 +2292,7 @@ static void dump_ir_constant(struct vkd3d_string_buffer *buffer, const struct hl vkd3d_string_buffer_printf(buffer, "{"); for (x = 0; x < type->dimx; ++x) { - const union hlsl_constant_value *value = &constant->value[x]; + const union hlsl_constant_value_component *value = &constant->value.u[x];
switch (type->base_type) { @@ -2168,6 +2353,7 @@ const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op) [HLSL_OP1_SIN] = "sin", [HLSL_OP1_SIN_REDUCED] = "sin_reduced", [HLSL_OP1_SQRT] = "sqrt", + [HLSL_OP1_TRUNC] = "trunc",
[HLSL_OP2_ADD] = "+", [HLSL_OP2_BIT_AND] = "&", @@ -2214,9 +2400,9 @@ static void dump_ir_if(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, vkd3d_string_buffer_printf(buffer, "if ("); dump_src(buffer, &if_node->condition); vkd3d_string_buffer_printf(buffer, ") {\n"); - dump_instr_list(ctx, buffer, &if_node->then_instrs.instrs); + dump_instr_list(ctx, buffer, &if_node->then_block.instrs); vkd3d_string_buffer_printf(buffer, " %10s } else {\n", ""); - dump_instr_list(ctx, buffer, &if_node->else_instrs.instrs); + dump_instr_list(ctx, buffer, &if_node->else_block.instrs); vkd3d_string_buffer_printf(buffer, " %10s }", ""); }
@@ -2255,7 +2441,11 @@ static void dump_ir_resource_load(struct vkd3d_string_buffer *buffer, const stru { [HLSL_RESOURCE_LOAD] = "load_resource", [HLSL_RESOURCE_SAMPLE] = "sample", + [HLSL_RESOURCE_SAMPLE_CMP] = "sample_cmp", + [HLSL_RESOURCE_SAMPLE_CMP_LZ] = "sample_cmp_lz", [HLSL_RESOURCE_SAMPLE_LOD] = "sample_lod", + [HLSL_RESOURCE_SAMPLE_LOD_BIAS] = "sample_biased", + [HLSL_RESOURCE_SAMPLE_GRAD] = "sample_grad", [HLSL_RESOURCE_GATHER_RED] = "gather_red", [HLSL_RESOURCE_GATHER_GREEN] = "gather_green", [HLSL_RESOURCE_GATHER_BLUE] = "gather_blue", @@ -2269,6 +2459,11 @@ static void dump_ir_resource_load(struct vkd3d_string_buffer *buffer, const stru dump_deref(buffer, &load->sampler); vkd3d_string_buffer_printf(buffer, ", coords = "); dump_src(buffer, &load->coords); + if (load->sample_index.node) + { + vkd3d_string_buffer_printf(buffer, ", sample index = "); + dump_src(buffer, &load->sample_index); + } if (load->texel_offset.node) { vkd3d_string_buffer_printf(buffer, ", offset = "); @@ -2279,6 +2474,21 @@ static void dump_ir_resource_load(struct vkd3d_string_buffer *buffer, const stru vkd3d_string_buffer_printf(buffer, ", lod = "); dump_src(buffer, &load->lod); } + if (load->ddx.node) + { + vkd3d_string_buffer_printf(buffer, ", ddx = "); + dump_src(buffer, &load->ddx); + } + if (load->ddy.node) + { + vkd3d_string_buffer_printf(buffer, ", ddy = "); + dump_src(buffer, &load->ddy); + } + if (load->cmp.node) + { + vkd3d_string_buffer_printf(buffer, ", cmp = "); + dump_src(buffer, &load->cmp); + } vkd3d_string_buffer_printf(buffer, ")"); }
@@ -2321,6 +2531,14 @@ static void dump_ir_swizzle(struct vkd3d_string_buffer *buffer, const struct hls } }
+static void dump_ir_index(struct vkd3d_string_buffer *buffer, const struct hlsl_ir_index *index) +{ + dump_src(buffer, &index->val); + vkd3d_string_buffer_printf(buffer, "[idx:"); + dump_src(buffer, &index->idx); + vkd3d_string_buffer_printf(buffer, "]"); +} + static void dump_instr(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, const struct hlsl_ir_node *instr) { if (instr->index) @@ -2348,6 +2566,10 @@ static void dump_instr(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, dump_ir_if(ctx, buffer, hlsl_ir_if(instr)); break;
+ case HLSL_IR_INDEX: + dump_ir_index(buffer, hlsl_ir_index(instr)); + break; + case HLSL_IR_JUMP: dump_ir_jump(buffer, hlsl_ir_jump(instr)); break; @@ -2421,7 +2643,7 @@ void hlsl_free_type(struct hlsl_type *type) size_t i;
vkd3d_free((void *)type->name); - if (type->type == HLSL_CLASS_STRUCT) + if (type->class == HLSL_CLASS_STRUCT) { for (i = 0; i < type->e.record.field_count; ++i) { @@ -2447,6 +2669,11 @@ void hlsl_free_instr_list(struct list *list) hlsl_free_instr(node); }
+void hlsl_block_cleanup(struct hlsl_block *block) +{ + hlsl_free_instr_list(&block->instrs); +} + static void free_ir_call(struct hlsl_ir_call *call) { vkd3d_free(call); @@ -2468,8 +2695,8 @@ static void free_ir_expr(struct hlsl_ir_expr *expr)
static void free_ir_if(struct hlsl_ir_if *if_node) { - hlsl_free_instr_list(&if_node->then_instrs.instrs); - hlsl_free_instr_list(&if_node->else_instrs.instrs); + hlsl_block_cleanup(&if_node->then_block); + hlsl_block_cleanup(&if_node->else_block); hlsl_src_remove(&if_node->condition); vkd3d_free(if_node); } @@ -2487,7 +2714,7 @@ static void free_ir_load(struct hlsl_ir_load *load)
static void free_ir_loop(struct hlsl_ir_loop *loop) { - hlsl_free_instr_list(&loop->body.instrs); + hlsl_block_cleanup(&loop->body); vkd3d_free(loop); }
@@ -2497,7 +2724,11 @@ static void free_ir_resource_load(struct hlsl_ir_resource_load *load) hlsl_cleanup_deref(&load->resource); hlsl_src_remove(&load->coords); hlsl_src_remove(&load->lod); + hlsl_src_remove(&load->ddx); + hlsl_src_remove(&load->ddy); + hlsl_src_remove(&load->cmp); hlsl_src_remove(&load->texel_offset); + hlsl_src_remove(&load->sample_index); vkd3d_free(load); }
@@ -2522,6 +2753,13 @@ static void free_ir_swizzle(struct hlsl_ir_swizzle *swizzle) vkd3d_free(swizzle); }
+static void free_ir_index(struct hlsl_ir_index *index) +{ + hlsl_src_remove(&index->val); + hlsl_src_remove(&index->idx); + vkd3d_free(index); +} + void hlsl_free_instr(struct hlsl_ir_node *node) { assert(list_empty(&node->uses)); @@ -2544,6 +2782,10 @@ void hlsl_free_instr(struct hlsl_ir_node *node) free_ir_if(hlsl_ir_if(node)); break;
+ case HLSL_IR_INDEX: + free_ir_index(hlsl_ir_index(node)); + break; + case HLSL_IR_JUMP: free_ir_jump(hlsl_ir_jump(node)); break; @@ -2600,7 +2842,7 @@ static void free_function_decl(struct hlsl_ir_function_decl *decl) vkd3d_free((void *)decl->attrs);
vkd3d_free(decl->parameters.vars); - hlsl_free_instr_list(&decl->body.instrs); + hlsl_block_cleanup(&decl->body); vkd3d_free(decl); }
@@ -2826,11 +3068,12 @@ static void declare_predefined_types(struct hlsl_ctx *ctx)
static const char *const sampler_names[] = { - [HLSL_SAMPLER_DIM_GENERIC] = "sampler", - [HLSL_SAMPLER_DIM_1D] = "sampler1D", - [HLSL_SAMPLER_DIM_2D] = "sampler2D", - [HLSL_SAMPLER_DIM_3D] = "sampler3D", - [HLSL_SAMPLER_DIM_CUBE] = "samplerCUBE", + [HLSL_SAMPLER_DIM_GENERIC] = "sampler", + [HLSL_SAMPLER_DIM_COMPARISON] = "SamplerComparisonState", + [HLSL_SAMPLER_DIM_1D] = "sampler1D", + [HLSL_SAMPLER_DIM_2D] = "sampler2D", + [HLSL_SAMPLER_DIM_3D] = "sampler3D", + [HLSL_SAMPLER_DIM_CUBE] = "samplerCUBE", };
static const struct @@ -2844,8 +3087,8 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) { {"dword", HLSL_CLASS_SCALAR, HLSL_TYPE_UINT, 1, 1}, {"float", HLSL_CLASS_SCALAR, HLSL_TYPE_FLOAT, 1, 1}, - {"VECTOR", HLSL_CLASS_VECTOR, HLSL_TYPE_FLOAT, 4, 1}, - {"MATRIX", HLSL_CLASS_MATRIX, HLSL_TYPE_FLOAT, 4, 4}, + {"vector", HLSL_CLASS_VECTOR, HLSL_TYPE_FLOAT, 4, 1}, + {"matrix", HLSL_CLASS_MATRIX, HLSL_TYPE_FLOAT, 4, 4}, {"STRING", HLSL_CLASS_OBJECT, HLSL_TYPE_STRING, 1, 1}, {"TEXTURE", HLSL_CLASS_OBJECT, HLSL_TYPE_TEXTURE, 1, 1}, {"PIXELSHADER", HLSL_CLASS_OBJECT, HLSL_TYPE_PIXELSHADER, 1, 1}, @@ -2993,16 +3236,16 @@ static bool hlsl_ctx_init(struct hlsl_ctx *ctx, const char *source_name,
rb_init(&ctx->functions, compare_function_rb);
- list_init(&ctx->static_initializers); + hlsl_block_init(&ctx->static_initializers); list_init(&ctx->extern_vars);
list_init(&ctx->buffers);
if (!(ctx->globals_buffer = hlsl_new_buffer(ctx, HLSL_BUFFER_CONSTANT, - hlsl_strdup(ctx, "$Globals"), NULL, ctx->location))) + hlsl_strdup(ctx, "$Globals"), NULL, &ctx->location))) return false; if (!(ctx->params_buffer = hlsl_new_buffer(ctx, HLSL_BUFFER_CONSTANT, - hlsl_strdup(ctx, "$Params"), NULL, ctx->location))) + hlsl_strdup(ctx, "$Params"), NULL, &ctx->location))) return false; ctx->cur_buffer = ctx->globals_buffer;
diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h index ccbf22a5801..cd1ad37a542 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h @@ -24,7 +24,9 @@ #include "wine/rbtree.h" #include "d3dcommon.h" #include "d3dx9shader.h" -#include "sm4.h" + +enum vkd3d_sm4_register_type; +enum vkd3d_sm4_swizzle_type;
/* The general IR structure is inspired by Mesa GLSL hir, even though the code * ends up being quite different in practice. Anyway, here comes the relevant @@ -102,18 +104,22 @@ enum hlsl_base_type
enum hlsl_sampler_dim { - HLSL_SAMPLER_DIM_GENERIC, - HLSL_SAMPLER_DIM_1D, - HLSL_SAMPLER_DIM_2D, - HLSL_SAMPLER_DIM_3D, - HLSL_SAMPLER_DIM_CUBE, - HLSL_SAMPLER_DIM_LAST_SAMPLER = HLSL_SAMPLER_DIM_CUBE, - HLSL_SAMPLER_DIM_1DARRAY, - HLSL_SAMPLER_DIM_2DARRAY, - HLSL_SAMPLER_DIM_2DMS, - HLSL_SAMPLER_DIM_2DMSARRAY, - HLSL_SAMPLER_DIM_CUBEARRAY, - HLSL_SAMPLER_DIM_MAX = HLSL_SAMPLER_DIM_CUBEARRAY, + HLSL_SAMPLER_DIM_GENERIC, + HLSL_SAMPLER_DIM_COMPARISON, + HLSL_SAMPLER_DIM_1D, + HLSL_SAMPLER_DIM_2D, + HLSL_SAMPLER_DIM_3D, + HLSL_SAMPLER_DIM_CUBE, + HLSL_SAMPLER_DIM_LAST_SAMPLER = HLSL_SAMPLER_DIM_CUBE, + HLSL_SAMPLER_DIM_1DARRAY, + HLSL_SAMPLER_DIM_2DARRAY, + HLSL_SAMPLER_DIM_2DMS, + HLSL_SAMPLER_DIM_2DMSARRAY, + HLSL_SAMPLER_DIM_CUBEARRAY, + HLSL_SAMPLER_DIM_LAST_TEXTURE = HLSL_SAMPLER_DIM_CUBEARRAY, + HLSL_SAMPLER_DIM_BUFFER, + HLSL_SAMPLER_DIM_STRUCTURED_BUFFER, + HLSL_SAMPLER_DIM_MAX = HLSL_SAMPLER_DIM_STRUCTURED_BUFFER, };
enum hlsl_regset @@ -134,16 +140,17 @@ struct hlsl_type /* Item entry in hlsl_scope->types. hlsl_type->name is used as key (if not NULL). */ struct rb_entry scope_entry;
- enum hlsl_type_class type; + enum hlsl_type_class class; /* If type is <= HLSL_CLASS_LAST_NUMERIC, then base_type is <= HLSL_TYPE_LAST_SCALAR. * If type is HLSL_CLASS_OBJECT, then base_type is > HLSL_TYPE_LAST_SCALAR. * Otherwise, base_type is not used. */ enum hlsl_base_type base_type;
/* If base_type is HLSL_TYPE_SAMPLER, then sampler_dim is <= HLSL_SAMPLER_DIM_LAST_SAMPLER. - * If base_type is HLSL_TYPE_TEXTURE, then sampler_dim can have any value of the enum. - * If base_type is HLSL_TYPE_UAV, them sampler_dim must be one of HLSL_SAMPLER_DIM_1D, - * HLSL_SAMPLER_DIM_2D, HLSL_SAMPLER_DIM_3D, HLSL_SAMPLER_DIM_1DARRAY, or HLSL_SAMPLER_DIM_2DARRAY. + * If base_type is HLSL_TYPE_TEXTURE, then sampler_dim is <= HLSL_SAMPLER_DIM_LAST_TEXTURE. + * If base_type is HLSL_TYPE_UAV, then sampler_dim must be one of HLSL_SAMPLER_DIM_1D, + * HLSL_SAMPLER_DIM_2D, HLSL_SAMPLER_DIM_3D, HLSL_SAMPLER_DIM_1DARRAY, HLSL_SAMPLER_DIM_2DARRAY, + * HLSL_SAMPLER_DIM_BUFFER, or HLSL_SAMPLER_DIM_STRUCTURED_BUFFER. * Otherwise, sampler_dim is not used */ enum hlsl_sampler_dim sampler_dim; /* Name, in case the type is a named struct or a typedef. */ @@ -207,6 +214,16 @@ struct hlsl_semantic { const char *name; uint32_t index; + + /* If the variable or field that stores this hlsl_semantic has already reported that it is missing. */ + bool reported_missing; + /* In case the variable or field that stores this semantic has already reported to use a + * duplicated output semantic, this value stores the last reported index + 1. Otherwise it is 0. */ + uint32_t reported_duplicated_output_next_index; + /* In case the variable or field that stores this semantic has already reported to use a + * duplicated input semantic with incompatible values, this value stores the last reported + * index + 1. Otherwise it is 0. */ + uint32_t reported_duplicated_input_incompatible_next_index; };
/* A field within a struct type declaration, used in hlsl_type.e.fields. */ @@ -228,16 +245,21 @@ struct hlsl_struct_field size_t name_bytecode_offset; };
-/* Information of the register allocated for an instruction node or variable. +/* Information of the register(s) allocated for an instruction node or variable. * These values are initialized at the end of hlsl_emit_bytecode(), after the compilation passes, * just before writing the bytecode. - * For numeric registers, a writemask can be provided to indicate the reservation of only some of the - * 4 components. * The type of register (register class) is implied from its use, so it is not stored in this * struct. */ struct hlsl_reg { + /* Index of the first register allocated. */ uint32_t id; + /* Number of registers to be allocated. + * Unlike the variable's type's regsize, it is not expressed in register components, but rather + * in whole registers, and may depend on which components are used within the shader. */ + uint32_t bind_count; + /* For numeric registers, a writemask can be provided to indicate the reservation of only some + * of the 4 components. */ unsigned int writemask; /* Whether the register has been allocated. */ bool allocated; @@ -254,6 +276,7 @@ enum hlsl_ir_node_type HLSL_IR_CONSTANT, HLSL_IR_EXPR, HLSL_IR_IF, + HLSL_IR_INDEX, HLSL_IR_LOAD, HLSL_IR_LOOP, HLSL_IR_JUMP, @@ -342,12 +365,17 @@ struct hlsl_attribute
#define HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT 0
-/* Reservation of a specific register to a variable, field, or buffer, written in the HLSL source - * using the register(·) syntax */ +/* Reservation of a register and/or an offset for objects inside constant buffers, to be used as a + * starting point of their allocation. They are available through the register(·) and the + * packoffset(·) syntaxes, respectivelly. + * The costant buffer offset is measured register components. */ struct hlsl_reg_reservation { - char type; - unsigned int index; + char reg_type; + unsigned int reg_index; + + char offset_type; + unsigned int offset_index; };
struct hlsl_ir_var @@ -360,8 +388,7 @@ struct hlsl_ir_var struct hlsl_buffer *buffer; /* Bitfield for storage modifiers (type modifiers are stored in data_type->modifiers). */ unsigned int storage_modifiers; - /* Optional register to be used as a starting point for the variable allocation, specified - * by the user via the register(·) syntax. */ + /* Optional reservations of registers and/or offsets for variables within constant buffers. */ struct hlsl_reg_reservation reg_reservation;
/* Item entry in hlsl_scope.vars. Specifically hlsl_ctx.globals.vars if the variable is global. */ @@ -384,6 +411,13 @@ struct hlsl_ir_var * and the buffer_offset instead. */ struct hlsl_reg regs[HLSL_REGSET_LAST + 1];
+ struct + { + bool used; + enum hlsl_sampler_dim sampler_dim; + struct vkd3d_shader_location first_sampler_dim_loc; + } *objects_usage[HLSL_REGSET_LAST_OBJECT + 1]; + uint32_t is_input_semantic : 1; uint32_t is_output_semantic : 1; uint32_t is_uniform : 1; @@ -446,8 +480,8 @@ struct hlsl_ir_if { struct hlsl_ir_node node; struct hlsl_src condition; - struct hlsl_block then_instrs; - struct hlsl_block else_instrs; + struct hlsl_block then_block; + struct hlsl_block else_block; };
struct hlsl_ir_loop @@ -485,6 +519,7 @@ enum hlsl_ir_expr_op HLSL_OP1_SIN, HLSL_OP1_SIN_REDUCED, /* Reduced range [-pi, pi] */ HLSL_OP1_SQRT, + HLSL_OP1_TRUNC,
HLSL_OP2_ADD, HLSL_OP2_BIT_AND, @@ -540,6 +575,12 @@ struct hlsl_ir_swizzle DWORD swizzle; };
+struct hlsl_ir_index +{ + struct hlsl_ir_node node; + struct hlsl_src val, idx; +}; + /* Reference to a variable, or a part of it (e.g. a vector within a matrix within a struct). */ struct hlsl_deref { @@ -574,7 +615,11 @@ enum hlsl_resource_load_type { HLSL_RESOURCE_LOAD, HLSL_RESOURCE_SAMPLE, + HLSL_RESOURCE_SAMPLE_CMP, + HLSL_RESOURCE_SAMPLE_CMP_LZ, HLSL_RESOURCE_SAMPLE_LOD, + HLSL_RESOURCE_SAMPLE_LOD_BIAS, + HLSL_RESOURCE_SAMPLE_GRAD, HLSL_RESOURCE_GATHER_RED, HLSL_RESOURCE_GATHER_GREEN, HLSL_RESOURCE_GATHER_BLUE, @@ -586,7 +631,8 @@ struct hlsl_ir_resource_load struct hlsl_ir_node node; enum hlsl_resource_load_type load_type; struct hlsl_deref resource, sampler; - struct hlsl_src coords, lod, texel_offset; + struct hlsl_src coords, lod, ddx, ddy, cmp, sample_index, texel_offset; + enum hlsl_sampler_dim sampling_dim; };
struct hlsl_ir_resource_store @@ -607,13 +653,16 @@ struct hlsl_ir_store struct hlsl_ir_constant { struct hlsl_ir_node node; - union hlsl_constant_value + struct hlsl_constant_value { - uint32_t u; - int32_t i; - float f; - double d; - } value[4]; + union hlsl_constant_value_component + { + uint32_t u; + int32_t i; + float f; + double d; + } u[4]; + } value; /* Constant register of type 'c' where the constant value is stored for SM1. */ struct hlsl_reg reg; }; @@ -674,6 +723,9 @@ struct hlsl_buffer unsigned size, used_size; /* Register of type 'b' on which the buffer is allocated. */ struct hlsl_reg reg; + + bool manually_packed_elements; + bool automatically_packed_elements; };
struct hlsl_ctx @@ -744,9 +796,8 @@ struct hlsl_ctx struct hlsl_type *Void; } builtin_types;
- /* List of the instruction nodes for initializing static variables; linked by the - * hlsl_ir_node.entry fields. */ - struct list static_initializers; + /* List of the instruction nodes for initializing static variables. */ + struct hlsl_block static_initializers;
/* Dynamic array of constant values that appear in the shader, associated to the 'c' registers. * Only used for SM1 profiles. */ @@ -780,8 +831,9 @@ struct hlsl_resource_load_params { struct hlsl_type *format; enum hlsl_resource_load_type type; - struct hlsl_deref resource, sampler; - struct hlsl_ir_node *coords, *lod, *texel_offset; + struct hlsl_ir_node *resource, *sampler; + struct hlsl_ir_node *coords, *lod, *ddx, *ddy, *cmp, *sample_index, *texel_offset; + enum hlsl_sampler_dim sampling_dim; };
static inline struct hlsl_ir_call *hlsl_ir_call(const struct hlsl_ir_node *node) @@ -850,6 +902,27 @@ static inline struct hlsl_ir_swizzle *hlsl_ir_swizzle(const struct hlsl_ir_node return CONTAINING_RECORD(node, struct hlsl_ir_swizzle, node); }
+static inline struct hlsl_ir_index *hlsl_ir_index(const struct hlsl_ir_node *node) +{ + assert(node->type == HLSL_IR_INDEX); + return CONTAINING_RECORD(node, struct hlsl_ir_index, node); +} + +static inline void hlsl_block_init(struct hlsl_block *block) +{ + list_init(&block->instrs); +} + +static inline void hlsl_block_add_instr(struct hlsl_block *block, struct hlsl_ir_node *instr) +{ + list_add_tail(&block->instrs, &instr->entry); +} + +static inline void hlsl_block_add_block(struct hlsl_block *block, struct hlsl_block *add) +{ + list_move_tail(&block->instrs, &add->instrs); +} + static inline void hlsl_src_from_node(struct hlsl_src *src, struct hlsl_ir_node *node) { src->node = node; @@ -873,6 +946,15 @@ static inline void *hlsl_alloc(struct hlsl_ctx *ctx, size_t size) return ptr; }
+static inline void *hlsl_calloc(struct hlsl_ctx *ctx, size_t count, size_t size) +{ + void *ptr = vkd3d_calloc(count, size); + + if (!ptr) + ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; + return ptr; +} + static inline void *hlsl_realloc(struct hlsl_ctx *ctx, void *ptr, size_t size) { void *ret = vkd3d_realloc(ptr, size); @@ -948,6 +1030,8 @@ static inline unsigned int hlsl_sampler_dim_count(enum hlsl_sampler_dim dim) switch (dim) { case HLSL_SAMPLER_DIM_1D: + case HLSL_SAMPLER_DIM_BUFFER: + case HLSL_SAMPLER_DIM_STRUCTURED_BUFFER: return 1; case HLSL_SAMPLER_DIM_1DARRAY: case HLSL_SAMPLER_DIM_2D: @@ -974,11 +1058,12 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru struct vkd3d_string_buffer *hlsl_modifiers_to_string(struct hlsl_ctx *ctx, unsigned int modifiers); const char *hlsl_node_type_to_string(enum hlsl_ir_node_type type);
-struct hlsl_ir_load *hlsl_add_conditional(struct hlsl_ctx *ctx, struct list *instrs, +struct hlsl_ir_node *hlsl_add_conditional(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *condition, struct hlsl_ir_node *if_true, struct hlsl_ir_node *if_false); void hlsl_add_function(struct hlsl_ctx *ctx, char *name, struct hlsl_ir_function_decl *decl); bool hlsl_add_var(struct hlsl_ctx *ctx, struct hlsl_ir_var *decl, bool local_var);
+void hlsl_block_cleanup(struct hlsl_block *block); bool hlsl_clone_block(struct hlsl_ctx *ctx, struct hlsl_block *dst_block, const struct hlsl_block *src_block);
void hlsl_dump_function(struct hlsl_ctx *ctx, const struct hlsl_ir_function_decl *func); @@ -986,6 +1071,7 @@ void hlsl_dump_function(struct hlsl_ctx *ctx, const struct hlsl_ir_function_decl int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, enum vkd3d_shader_target_type target_type, struct vkd3d_shader_code *out);
+bool hlsl_init_deref_from_index_chain(struct hlsl_ctx *ctx, struct hlsl_deref *deref, struct hlsl_ir_node *chain); bool hlsl_copy_deref(struct hlsl_ctx *ctx, struct hlsl_deref *deref, const struct hlsl_deref *other);
void hlsl_cleanup_deref(struct hlsl_deref *deref); @@ -1012,64 +1098,73 @@ const char *hlsl_jump_type_to_string(enum hlsl_ir_jump_type type); struct hlsl_type *hlsl_new_array_type(struct hlsl_ctx *ctx, struct hlsl_type *basic_type, unsigned int array_size); struct hlsl_ir_node *hlsl_new_binary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2); -struct hlsl_ir_constant *hlsl_new_bool_constant(struct hlsl_ctx *ctx, bool b, const struct vkd3d_shader_location *loc); +struct hlsl_ir_node *hlsl_new_bool_constant(struct hlsl_ctx *ctx, bool b, const struct vkd3d_shader_location *loc); struct hlsl_buffer *hlsl_new_buffer(struct hlsl_ctx *ctx, enum hlsl_buffer_type type, const char *name, - const struct hlsl_reg_reservation *reservation, struct vkd3d_shader_location loc); + const struct hlsl_reg_reservation *reservation, const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_new_call(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *decl, const struct vkd3d_shader_location *loc); -struct hlsl_ir_expr *hlsl_new_cast(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, struct hlsl_type *type, - const struct vkd3d_shader_location *loc); -struct hlsl_ir_constant *hlsl_new_constant(struct hlsl_ctx *ctx, struct hlsl_type *type, +struct hlsl_ir_node *hlsl_new_cast(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, struct hlsl_type *type, const struct vkd3d_shader_location *loc); -struct hlsl_ir_expr *hlsl_new_copy(struct hlsl_ctx *ctx, struct hlsl_ir_node *node); +struct hlsl_ir_node *hlsl_new_constant(struct hlsl_ctx *ctx, struct hlsl_type *type, + const struct hlsl_constant_value *value, const struct vkd3d_shader_location *loc); +struct hlsl_ir_node *hlsl_new_copy(struct hlsl_ctx *ctx, struct hlsl_ir_node *node); struct hlsl_ir_node *hlsl_new_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op, struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS], struct hlsl_type *data_type, const struct vkd3d_shader_location *loc); -struct hlsl_ir_constant *hlsl_new_float_constant(struct hlsl_ctx *ctx, +struct hlsl_ir_node *hlsl_new_float_constant(struct hlsl_ctx *ctx, float f, const struct vkd3d_shader_location *loc); struct hlsl_ir_function_decl *hlsl_new_func_decl(struct hlsl_ctx *ctx, struct hlsl_type *return_type, const struct hlsl_func_parameters *parameters, const struct hlsl_semantic *semantic, const struct vkd3d_shader_location *loc); -struct hlsl_ir_if *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *condition, struct vkd3d_shader_location loc); -struct hlsl_ir_constant *hlsl_new_int_constant(struct hlsl_ctx *ctx, int n, - const struct vkd3d_shader_location *loc); -struct hlsl_ir_jump *hlsl_new_jump(struct hlsl_ctx *ctx, enum hlsl_ir_jump_type type, struct vkd3d_shader_location loc); +struct hlsl_ir_node *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *condition, + struct hlsl_block *then_block, struct hlsl_block *else_block, const struct vkd3d_shader_location *loc); +struct hlsl_ir_node *hlsl_new_int_constant(struct hlsl_ctx *ctx, int32_t n, const struct vkd3d_shader_location *loc); +struct hlsl_ir_node *hlsl_new_jump(struct hlsl_ctx *ctx, + enum hlsl_ir_jump_type type, const struct vkd3d_shader_location *loc);
void hlsl_init_simple_deref_from_var(struct hlsl_deref *deref, struct hlsl_ir_var *var);
struct hlsl_ir_load *hlsl_new_var_load(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, - struct vkd3d_shader_location loc); + const struct vkd3d_shader_location *loc); struct hlsl_ir_load *hlsl_new_load_index(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, struct hlsl_ir_node *idx, const struct vkd3d_shader_location *loc); -struct hlsl_ir_load *hlsl_new_load_component(struct hlsl_ctx *ctx, struct hlsl_block *block, +struct hlsl_ir_load *hlsl_new_load_parent(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, + const struct vkd3d_shader_location *loc); +struct hlsl_ir_node *hlsl_new_load_component(struct hlsl_ctx *ctx, struct hlsl_block *block, const struct hlsl_deref *deref, unsigned int comp, const struct vkd3d_shader_location *loc);
-struct hlsl_ir_store *hlsl_new_simple_store(struct hlsl_ctx *ctx, struct hlsl_ir_var *lhs, struct hlsl_ir_node *rhs); -struct hlsl_ir_store *hlsl_new_store_index(struct hlsl_ctx *ctx, const struct hlsl_deref *lhs, +struct hlsl_ir_node *hlsl_new_simple_store(struct hlsl_ctx *ctx, struct hlsl_ir_var *lhs, struct hlsl_ir_node *rhs); +struct hlsl_ir_node *hlsl_new_store_index(struct hlsl_ctx *ctx, const struct hlsl_deref *lhs, struct hlsl_ir_node *idx, struct hlsl_ir_node *rhs, unsigned int writemask, const struct vkd3d_shader_location *loc); -struct hlsl_ir_store *hlsl_new_store_component(struct hlsl_ctx *ctx, struct hlsl_block *block, +bool hlsl_new_store_component(struct hlsl_ctx *ctx, struct hlsl_block *block, const struct hlsl_deref *lhs, unsigned int comp, struct hlsl_ir_node *rhs);
-struct hlsl_ir_loop *hlsl_new_loop(struct hlsl_ctx *ctx, struct vkd3d_shader_location loc); -struct hlsl_ir_resource_load *hlsl_new_resource_load(struct hlsl_ctx *ctx, +bool hlsl_index_is_noncontiguous(struct hlsl_ir_index *index); +bool hlsl_index_is_resource_access(struct hlsl_ir_index *index); + +struct hlsl_ir_node *hlsl_new_index(struct hlsl_ctx *ctx, struct hlsl_ir_node *val, + struct hlsl_ir_node *idx, const struct vkd3d_shader_location *loc); +struct hlsl_ir_node *hlsl_new_loop(struct hlsl_ctx *ctx, + struct hlsl_block *block, const struct vkd3d_shader_location *loc); +struct hlsl_ir_node *hlsl_new_resource_load(struct hlsl_ctx *ctx, const struct hlsl_resource_load_params *params, const struct vkd3d_shader_location *loc); -struct hlsl_ir_resource_store *hlsl_new_resource_store(struct hlsl_ctx *ctx, const struct hlsl_deref *resource, +struct hlsl_ir_node *hlsl_new_resource_store(struct hlsl_ctx *ctx, const struct hlsl_deref *resource, struct hlsl_ir_node *coords, struct hlsl_ir_node *value, const struct vkd3d_shader_location *loc); struct hlsl_type *hlsl_new_struct_type(struct hlsl_ctx *ctx, const char *name, struct hlsl_struct_field *fields, size_t field_count); -struct hlsl_ir_swizzle *hlsl_new_swizzle(struct hlsl_ctx *ctx, DWORD s, unsigned int components, +struct hlsl_ir_node *hlsl_new_swizzle(struct hlsl_ctx *ctx, DWORD s, unsigned int components, struct hlsl_ir_node *val, const struct vkd3d_shader_location *loc); struct hlsl_ir_var *hlsl_new_synthetic_var(struct hlsl_ctx *ctx, const char *template, struct hlsl_type *type, const struct vkd3d_shader_location *loc); struct hlsl_type *hlsl_new_texture_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, struct hlsl_type *format, unsigned int sample_count); struct hlsl_type *hlsl_new_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, struct hlsl_type *format); -struct hlsl_ir_constant *hlsl_new_uint_constant(struct hlsl_ctx *ctx, unsigned int n, +struct hlsl_ir_node *hlsl_new_uint_constant(struct hlsl_ctx *ctx, unsigned int n, const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_new_unary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg, - struct vkd3d_shader_location loc); + const struct vkd3d_shader_location *loc); struct hlsl_ir_var *hlsl_new_var(struct hlsl_ctx *ctx, const char *name, struct hlsl_type *type, - const struct vkd3d_shader_location loc, const struct hlsl_semantic *semantic, unsigned int modifiers, + const struct vkd3d_shader_location *loc, const struct hlsl_semantic *semantic, unsigned int modifiers, const struct hlsl_reg_reservation *reg_reservation);
void hlsl_error(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc, @@ -1101,6 +1196,9 @@ enum hlsl_regset hlsl_type_get_regset(const struct hlsl_type *type); unsigned int hlsl_type_get_sm4_offset(const struct hlsl_type *type, unsigned int offset); bool hlsl_types_are_equal(const struct hlsl_type *t1, const struct hlsl_type *t2);
+const struct hlsl_type *hlsl_get_multiarray_element_type(const struct hlsl_type *type); +unsigned int hlsl_get_multiarray_size(const struct hlsl_type *type); + unsigned int hlsl_combine_swizzles(unsigned int first, unsigned int second, unsigned int dim); unsigned int hlsl_combine_writemasks(unsigned int first, unsigned int second); unsigned int hlsl_map_swizzle(unsigned int swizzle, unsigned int writemask); @@ -1109,12 +1207,17 @@ unsigned int hlsl_swizzle_from_writemask(unsigned int writemask); struct hlsl_type *hlsl_deref_get_type(struct hlsl_ctx *ctx, const struct hlsl_deref *deref); bool hlsl_component_index_range_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, unsigned int *start, unsigned int *count); +bool hlsl_regset_index_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, + enum hlsl_regset regset, unsigned int *index); bool hlsl_offset_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, unsigned int *offset); unsigned int hlsl_offset_from_deref_safe(struct hlsl_ctx *ctx, const struct hlsl_deref *deref); struct hlsl_reg hlsl_reg_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref);
+bool hlsl_copy_propagation_execute(struct hlsl_ctx *ctx, struct hlsl_block *block); bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context); bool hlsl_fold_constant_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context); +bool hlsl_transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx, struct hlsl_ir_node *, void *), + struct hlsl_block *block, void *context);
bool hlsl_sm1_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, bool output, D3DSHADER_PARAM_REGISTER_TYPE *type, unsigned int *reg); @@ -1124,7 +1227,7 @@ int hlsl_sm1_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_fun bool hlsl_sm4_usage_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, bool output, D3D_NAME *usage); bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, - bool output, enum vkd3d_sm4_register_type *type, enum vkd3d_sm4_swizzle_type *swizzle_type, bool *has_idx); + bool output, unsigned int *type, enum vkd3d_sm4_swizzle_type *swizzle_type, bool *has_idx); int hlsl_sm4_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out);
int hlsl_lexer_compile(struct hlsl_ctx *ctx, const struct vkd3d_shader_code *hlsl); diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.l b/libs/vkd3d/libs/vkd3d-shader/hlsl.l index adff1da04d8..e9ae3ccf3d3 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.l +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.l @@ -37,6 +37,7 @@ static void update_location(struct hlsl_ctx *ctx, YYLTYPE *loc); %option bison-locations %option extra-type="struct hlsl_ctx *" %option never-interactive +%option nodefault %option noinput %option nounput %option noyywrap @@ -95,6 +96,7 @@ matrix {return KW_MATRIX; } namespace {return KW_NAMESPACE; } nointerpolation {return KW_NOINTERPOLATION; } out {return KW_OUT; } +packoffset {return KW_PACKOFFSET; } pass {return KW_PASS; } PixelShader {return KW_PIXELSHADER; } precise {return KW_PRECISE; } @@ -102,6 +104,8 @@ RasterizerState {return KW_RASTERIZERSTATE; } RenderTargetView {return KW_RENDERTARGETVIEW; } return {return KW_RETURN; } register {return KW_REGISTER; } +RWBuffer {return KW_RWBUFFER; } +RWStructuredBuffer {return KW_RWSTRUCTUREDBUFFER; } RWTexture1D {return KW_RWTEXTURE1D; } RWTexture2D {return KW_RWTEXTURE2D; } RWTexture3D {return KW_RWTEXTURE3D; } @@ -265,6 +269,10 @@ row_major {return KW_ROW_MAJOR; } return STRING; } <pp_line>{WS}+ {} +<pp_line>{ANY} { + FIXME("Malformed preprocessor line directive?\n"); + BEGIN(INITIAL); + } <pp_line>{NEWLINE} { FIXME("Malformed preprocessor line directive?\n"); BEGIN(INITIAL); diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y index fd1eaf6ec95..0e07fe578e1 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y @@ -85,8 +85,8 @@ struct parse_function
struct parse_if_body { - struct list *then_instrs; - struct list *else_instrs; + struct list *then_block; + struct list *else_block; };
enum parse_assign_op @@ -164,7 +164,7 @@ static bool hlsl_types_are_componentwise_compatible(struct hlsl_ctx *ctx, struct src_comp_type = hlsl_type_get_component_type(ctx, src, k); dst_comp_type = hlsl_type_get_component_type(ctx, dst, k);
- if ((src_comp_type->type != HLSL_CLASS_SCALAR || dst_comp_type->type != HLSL_CLASS_SCALAR) + if ((src_comp_type->class != HLSL_CLASS_SCALAR || dst_comp_type->class != HLSL_CLASS_SCALAR) && !hlsl_types_are_equal(src_comp_type, dst_comp_type)) return false; } @@ -196,9 +196,9 @@ static bool type_contains_only_numerics(struct hlsl_type *type) { unsigned int i;
- if (type->type == HLSL_CLASS_ARRAY) + if (type->class == HLSL_CLASS_ARRAY) return type_contains_only_numerics(type->e.array.type); - if (type->type == HLSL_CLASS_STRUCT) + if (type->class == HLSL_CLASS_STRUCT) { for (i = 0; i < type->e.record.field_count; ++i) { @@ -207,23 +207,23 @@ static bool type_contains_only_numerics(struct hlsl_type *type) } return true; } - return type->type <= HLSL_CLASS_LAST_NUMERIC; + return type->class <= HLSL_CLASS_LAST_NUMERIC; }
static bool explicit_compatible_data_types(struct hlsl_ctx *ctx, struct hlsl_type *src, struct hlsl_type *dst) { - if (src->type <= HLSL_CLASS_LAST_NUMERIC && src->dimx == 1 && src->dimy == 1 && type_contains_only_numerics(dst)) + if (src->class <= HLSL_CLASS_LAST_NUMERIC && src->dimx == 1 && src->dimy == 1 && type_contains_only_numerics(dst)) return true;
- if (src->type == HLSL_CLASS_MATRIX && dst->type == HLSL_CLASS_MATRIX + if (src->class == HLSL_CLASS_MATRIX && dst->class == HLSL_CLASS_MATRIX && src->dimx >= dst->dimx && src->dimy >= dst->dimy) return true;
- if ((src->type == HLSL_CLASS_MATRIX && src->dimx > 1 && src->dimy > 1) + if ((src->class == HLSL_CLASS_MATRIX && src->dimx > 1 && src->dimy > 1) && hlsl_type_component_count(src) != hlsl_type_component_count(dst)) return false;
- if ((dst->type == HLSL_CLASS_MATRIX && dst->dimy > 1) + if ((dst->class == HLSL_CLASS_MATRIX && dst->dimy > 1) && hlsl_type_component_count(src) != hlsl_type_component_count(dst)) return false;
@@ -232,10 +232,10 @@ static bool explicit_compatible_data_types(struct hlsl_ctx *ctx, struct hlsl_typ
static bool implicit_compatible_data_types(struct hlsl_ctx *ctx, struct hlsl_type *src, struct hlsl_type *dst) { - if ((src->type <= HLSL_CLASS_LAST_NUMERIC) != (dst->type <= HLSL_CLASS_LAST_NUMERIC)) + if ((src->class <= HLSL_CLASS_LAST_NUMERIC) != (dst->class <= HLSL_CLASS_LAST_NUMERIC)) return false;
- if (src->type <= HLSL_CLASS_LAST_NUMERIC) + if (src->class <= HLSL_CLASS_LAST_NUMERIC) { /* Scalar vars can be converted to any other numeric data type */ if (src->dimx == 1 && src->dimy == 1) @@ -244,21 +244,21 @@ static bool implicit_compatible_data_types(struct hlsl_ctx *ctx, struct hlsl_typ if (dst->dimx == 1 && dst->dimy == 1) return true;
- if (src->type == HLSL_CLASS_MATRIX || dst->type == HLSL_CLASS_MATRIX) + if (src->class == HLSL_CLASS_MATRIX || dst->class == HLSL_CLASS_MATRIX) { - if (src->type == HLSL_CLASS_MATRIX && dst->type == HLSL_CLASS_MATRIX) + if (src->class == HLSL_CLASS_MATRIX && dst->class == HLSL_CLASS_MATRIX) return src->dimx >= dst->dimx && src->dimy >= dst->dimy;
/* Matrix-vector conversion is apparently allowed if they have * the same components count, or if the matrix is 1xN or Nx1 * and we are reducing the component count */ - if (src->type == HLSL_CLASS_VECTOR || dst->type == HLSL_CLASS_VECTOR) + if (src->class == HLSL_CLASS_VECTOR || dst->class == HLSL_CLASS_VECTOR) { if (hlsl_type_component_count(src) == hlsl_type_component_count(dst)) return true;
- if ((src->type == HLSL_CLASS_VECTOR || src->dimx == 1 || src->dimy == 1) && - (dst->type == HLSL_CLASS_VECTOR || dst->dimx == 1 || dst->dimy == 1)) + if ((src->class == HLSL_CLASS_VECTOR || src->dimx == 1 || src->dimy == 1) && + (dst->class == HLSL_CLASS_VECTOR || dst->dimx == 1 || dst->dimy == 1)) return hlsl_type_component_count(src) >= hlsl_type_component_count(dst); }
@@ -273,19 +273,19 @@ static bool implicit_compatible_data_types(struct hlsl_ctx *ctx, struct hlsl_typ return hlsl_types_are_componentwise_equal(ctx, src, dst); }
-static struct hlsl_ir_load *add_load_component(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *var_instr, +static struct hlsl_ir_node *add_load_component(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *var_instr, unsigned int comp, const struct vkd3d_shader_location *loc);
static struct hlsl_ir_node *add_cast(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *node, struct hlsl_type *dst_type, const struct vkd3d_shader_location *loc) { struct hlsl_type *src_type = node->data_type; - struct hlsl_ir_expr *cast; + struct hlsl_ir_node *cast;
if (hlsl_types_are_equal(src_type, dst_type)) return node;
- if (src_type->type > HLSL_CLASS_VECTOR || dst_type->type > HLSL_CLASS_VECTOR) + if (src_type->class > HLSL_CLASS_VECTOR || dst_type->class > HLSL_CLASS_VECTOR) { unsigned int src_comp_count = hlsl_type_component_count(src_type); unsigned int dst_comp_count = hlsl_type_component_count(dst_type); @@ -295,9 +295,9 @@ static struct hlsl_ir_node *add_cast(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_var *var; unsigned int dst_idx;
- broadcast = src_type->type <= HLSL_CLASS_LAST_NUMERIC && src_type->dimx == 1 && src_type->dimy == 1; + broadcast = src_type->class <= HLSL_CLASS_LAST_NUMERIC && src_type->dimx == 1 && src_type->dimy == 1; matrix_cast = !broadcast && dst_comp_count != src_comp_count - && src_type->type == HLSL_CLASS_MATRIX && dst_type->type == HLSL_CLASS_MATRIX; + && src_type->class == HLSL_CLASS_MATRIX && dst_type->class == HLSL_CLASS_MATRIX; assert(src_comp_count >= dst_comp_count || broadcast); if (matrix_cast) { @@ -311,8 +311,8 @@ static struct hlsl_ir_node *add_cast(struct hlsl_ctx *ctx, struct list *instrs,
for (dst_idx = 0; dst_idx < dst_comp_count; ++dst_idx) { + struct hlsl_ir_node *component_load; struct hlsl_type *dst_comp_type; - struct hlsl_ir_store *store; struct hlsl_block block; unsigned int src_idx;
@@ -333,19 +333,19 @@ static struct hlsl_ir_node *add_cast(struct hlsl_ctx *ctx, struct list *instrs,
dst_comp_type = hlsl_type_get_component_type(ctx, dst_type, dst_idx);
- if (!(load = add_load_component(ctx, instrs, node, src_idx, loc))) + if (!(component_load = add_load_component(ctx, instrs, node, src_idx, loc))) return NULL;
- if (!(cast = hlsl_new_cast(ctx, &load->node, dst_comp_type, loc))) + if (!(cast = hlsl_new_cast(ctx, component_load, dst_comp_type, loc))) return NULL; - list_add_tail(instrs, &cast->node.entry); + list_add_tail(instrs, &cast->entry);
- if (!(store = hlsl_new_store_component(ctx, &block, &var_deref, dst_idx, &cast->node))) + if (!hlsl_new_store_component(ctx, &block, &var_deref, dst_idx, cast)) return NULL; list_move_tail(instrs, &block.instrs); }
- if (!(load = hlsl_new_var_load(ctx, var, *loc))) + if (!(load = hlsl_new_var_load(ctx, var, loc))) return NULL; list_add_tail(instrs, &load->node.entry);
@@ -355,8 +355,8 @@ static struct hlsl_ir_node *add_cast(struct hlsl_ctx *ctx, struct list *instrs, { if (!(cast = hlsl_new_cast(ctx, node, dst_type, loc))) return NULL; - list_add_tail(instrs, &cast->node.entry); - return &cast->node; + list_add_tail(instrs, &cast->entry); + return cast; } }
@@ -384,19 +384,20 @@ static struct hlsl_ir_node *add_implicit_conversion(struct hlsl_ctx *ctx, struct
if (dst_type->dimx * dst_type->dimy < src_type->dimx * src_type->dimy) hlsl_warning(ctx, loc, VKD3D_SHADER_WARNING_HLSL_IMPLICIT_TRUNCATION, "Implicit truncation of %s type.", - src_type->type == HLSL_CLASS_VECTOR ? "vector" : "matrix"); + src_type->class == HLSL_CLASS_VECTOR ? "vector" : "matrix");
return add_cast(ctx, instrs, node, dst_type, loc); }
-static DWORD add_modifiers(struct hlsl_ctx *ctx, DWORD modifiers, DWORD mod, const struct vkd3d_shader_location loc) +static DWORD add_modifiers(struct hlsl_ctx *ctx, DWORD modifiers, DWORD mod, + const struct vkd3d_shader_location *loc) { if (modifiers & mod) { struct vkd3d_string_buffer *string;
if ((string = hlsl_modifiers_to_string(ctx, mod))) - hlsl_error(ctx, &loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, "Modifier '%s' was already specified.", string->buffer); hlsl_release_string_buffer(ctx, string); return modifiers; @@ -406,26 +407,27 @@ static DWORD add_modifiers(struct hlsl_ctx *ctx, DWORD modifiers, DWORD mod, con
static bool append_conditional_break(struct hlsl_ctx *ctx, struct list *cond_list) { - struct hlsl_ir_node *condition, *not; - struct hlsl_ir_jump *jump; - struct hlsl_ir_if *iff; + struct hlsl_ir_node *condition, *not, *iff, *jump; + struct hlsl_block then_block;
/* E.g. "for (i = 0; ; ++i)". */ if (list_empty(cond_list)) return true;
condition = node_from_list(cond_list); - if (!(not = hlsl_new_unary_expr(ctx, HLSL_OP1_LOGIC_NOT, condition, condition->loc))) + if (!(not = hlsl_new_unary_expr(ctx, HLSL_OP1_LOGIC_NOT, condition, &condition->loc))) return false; list_add_tail(cond_list, ¬->entry);
- if (!(iff = hlsl_new_if(ctx, not, condition->loc))) + hlsl_block_init(&then_block); + + if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_BREAK, &condition->loc))) return false; - list_add_tail(cond_list, &iff->node.entry); + hlsl_block_add_instr(&then_block, jump);
- if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_BREAK, condition->loc))) + if (!(iff = hlsl_new_if(ctx, not, &then_block, NULL, &condition->loc))) return false; - list_add_head(&iff->then_instrs.instrs, &jump->node.entry); + list_add_tail(cond_list, &iff->entry); return true; }
@@ -436,46 +438,87 @@ enum loop_type LOOP_DO_WHILE };
-static struct list *create_loop(struct hlsl_ctx *ctx, enum loop_type type, struct list *init, struct list *cond, - struct list *iter, struct list *body, struct vkd3d_shader_location loc) +static bool attribute_list_has_duplicates(const struct parse_attribute_list *attrs) { - struct list *list = NULL; - struct hlsl_ir_loop *loop = NULL; - struct hlsl_ir_if *cond_jump = NULL; + unsigned int i, j;
- if (!(list = make_empty_list(ctx))) - goto oom; + for (i = 0; i < attrs->count; ++i) + { + for (j = i + 1; j < attrs->count; ++j) + { + if (!strcmp(attrs->attrs[i]->name, attrs->attrs[j]->name)) + return true; + } + } + + return false; +} + +static struct list *create_loop(struct hlsl_ctx *ctx, enum loop_type type, const struct parse_attribute_list *attributes, struct list *init, struct list *cond, + struct list *iter, struct list *body, const struct vkd3d_shader_location *loc) +{ + struct hlsl_block body_block; + struct hlsl_ir_node *loop; + unsigned int i; + + if (attribute_list_has_duplicates(attributes)) + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Found duplicate attribute.");
- if (init) - list_move_head(list, init); + /* Ignore unroll(0) attribute, and any invalid attribute. */ + for (i = 0; i < attributes->count; ++i) + { + const struct hlsl_attribute *attr = attributes->attrs[i]; + if (!strcmp(attr->name, "unroll")) + { + if (attr->args_count) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NOT_IMPLEMENTED, "Unroll attribute with iteration count."); + } + else + { + hlsl_warning(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NOT_IMPLEMENTED, "Loop unrolling is not implemented.\n"); + } + } + else if (!strcmp(attr->name, "loop") + || !strcmp(attr->name, "fastopt") + || !strcmp(attr->name, "allow_uav_condition")) + { + hlsl_fixme(ctx, loc, "Unhandled attribute %s.", attr->name); + } + else + { + hlsl_warning(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NOT_IMPLEMENTED, "Unrecognized attribute %s.", attr->name); + } + }
- if (!(loop = hlsl_new_loop(ctx, loc))) + if (!init && !(init = make_empty_list(ctx))) goto oom; - list_add_tail(list, &loop->node.entry);
if (!append_conditional_break(ctx, cond)) goto oom;
+ hlsl_block_init(&body_block); + if (type != LOOP_DO_WHILE) - list_move_tail(&loop->body.instrs, cond); + list_move_tail(&body_block.instrs, cond);
- list_move_tail(&loop->body.instrs, body); + list_move_tail(&body_block.instrs, body);
if (iter) - list_move_tail(&loop->body.instrs, iter); + list_move_tail(&body_block.instrs, iter);
if (type == LOOP_DO_WHILE) - list_move_tail(&loop->body.instrs, cond); + list_move_tail(&body_block.instrs, cond); + + if (!(loop = hlsl_new_loop(ctx, &body_block, loc))) + goto oom; + list_add_tail(init, &loop->entry);
- vkd3d_free(init); vkd3d_free(cond); vkd3d_free(body); - return list; + return init;
oom: - vkd3d_free(loop); - vkd3d_free(cond_jump); - vkd3d_free(list); destroy_instr_list(init); destroy_instr_list(cond); destroy_instr_list(iter); @@ -500,14 +543,14 @@ static void free_parse_initializer(struct parse_initializer *initializer) vkd3d_free(initializer->args); }
-static struct hlsl_ir_swizzle *get_swizzle(struct hlsl_ctx *ctx, struct hlsl_ir_node *value, const char *swizzle, +static struct hlsl_ir_node *get_swizzle(struct hlsl_ctx *ctx, struct hlsl_ir_node *value, const char *swizzle, struct vkd3d_shader_location *loc) { unsigned int len = strlen(swizzle), component = 0; unsigned int i, set, swiz = 0; bool valid;
- if (value->data_type->type == HLSL_CLASS_MATRIX) + if (value->data_type->class == HLSL_CLASS_MATRIX) { /* Matrix swizzle */ bool m_swizzle; @@ -582,224 +625,102 @@ static struct hlsl_ir_swizzle *get_swizzle(struct hlsl_ctx *ctx, struct hlsl_ir_ return NULL; }
-static struct hlsl_ir_jump *add_return(struct hlsl_ctx *ctx, struct list *instrs, - struct hlsl_ir_node *return_value, struct vkd3d_shader_location loc) +static bool add_return(struct hlsl_ctx *ctx, struct list *instrs, + struct hlsl_ir_node *return_value, const struct vkd3d_shader_location *loc) { struct hlsl_type *return_type = ctx->cur_function->return_type; - struct hlsl_ir_jump *jump; + struct hlsl_ir_node *jump;
if (ctx->cur_function->return_var) { if (return_value) { - struct hlsl_ir_store *store; + struct hlsl_ir_node *store;
- if (!(return_value = add_implicit_conversion(ctx, instrs, return_value, return_type, &loc))) - return NULL; + if (!(return_value = add_implicit_conversion(ctx, instrs, return_value, return_type, loc))) + return false;
if (!(store = hlsl_new_simple_store(ctx, ctx->cur_function->return_var, return_value))) - return NULL; - list_add_after(&return_value->entry, &store->node.entry); + return false; + list_add_after(&return_value->entry, &store->entry); } else { - hlsl_error(ctx, &loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RETURN, "Non-void functions must return a value."); - return NULL; + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RETURN, "Non-void functions must return a value."); + return false; } } else { if (return_value) - hlsl_error(ctx, &loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RETURN, "Void functions cannot return a value."); + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RETURN, "Void functions cannot return a value."); }
if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_RETURN, loc))) - return NULL; - list_add_tail(instrs, &jump->node.entry); - - return jump; -} - -static struct hlsl_ir_load *add_load_index(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *var_instr, - struct hlsl_ir_node *idx, const struct vkd3d_shader_location *loc) -{ - const struct hlsl_deref *src; - struct hlsl_ir_load *load; - - if (var_instr->type == HLSL_IR_LOAD) - { - src = &hlsl_ir_load(var_instr)->src; - } - else - { - struct hlsl_ir_store *store; - struct hlsl_ir_var *var; - - if (!(var = hlsl_new_synthetic_var(ctx, "deref", var_instr->data_type, &var_instr->loc))) - return NULL; - - if (!(store = hlsl_new_simple_store(ctx, var, var_instr))) - return NULL; - list_add_tail(instrs, &store->node.entry); - - src = &store->lhs; - } - - if (!(load = hlsl_new_load_index(ctx, src, idx, loc))) - return NULL; - list_add_tail(instrs, &load->node.entry); + return false; + list_add_tail(instrs, &jump->entry);
- return load; + return true; }
-static struct hlsl_ir_load *add_load_component(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *var_instr, +static struct hlsl_ir_node *add_load_component(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *var_instr, unsigned int comp, const struct vkd3d_shader_location *loc) { - const struct hlsl_deref *src; - struct hlsl_ir_load *load; + struct hlsl_ir_node *load, *store; struct hlsl_block block; + struct hlsl_ir_var *var; + struct hlsl_deref src;
- if (var_instr->type == HLSL_IR_LOAD) - { - src = &hlsl_ir_load(var_instr)->src; - } - else - { - struct hlsl_ir_store *store; - struct hlsl_ir_var *var; - - if (!(var = hlsl_new_synthetic_var(ctx, "deref", var_instr->data_type, &var_instr->loc))) - return NULL; - - if (!(store = hlsl_new_simple_store(ctx, var, var_instr))) - return NULL; - list_add_tail(instrs, &store->node.entry); + if (!(var = hlsl_new_synthetic_var(ctx, "deref", var_instr->data_type, &var_instr->loc))) + return NULL;
- src = &store->lhs; - } + if (!(store = hlsl_new_simple_store(ctx, var, var_instr))) + return NULL; + list_add_tail(instrs, &store->entry);
- if (!(load = hlsl_new_load_component(ctx, &block, src, comp, loc))) + hlsl_init_simple_deref_from_var(&src, var); + if (!(load = hlsl_new_load_component(ctx, &block, &src, comp, loc))) return NULL; list_move_tail(instrs, &block.instrs);
return load; }
-static bool add_record_load(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *record, - unsigned int idx, const struct vkd3d_shader_location loc) +static bool add_record_access(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *record, + unsigned int idx, const struct vkd3d_shader_location *loc) { - struct hlsl_ir_constant *c; + struct hlsl_ir_node *index, *c;
assert(idx < record->data_type->e.record.field_count);
- if (!(c = hlsl_new_uint_constant(ctx, idx, &loc))) - return false; - list_add_tail(instrs, &c->node.entry); - - return !!add_load_index(ctx, instrs, record, &c->node, &loc); -} - -static struct hlsl_ir_node *add_binary_arithmetic_expr(struct hlsl_ctx *ctx, struct list *instrs, - enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, - const struct vkd3d_shader_location *loc); - -static bool add_matrix_index(struct hlsl_ctx *ctx, struct list *instrs, - struct hlsl_ir_node *matrix, struct hlsl_ir_node *index, const struct vkd3d_shader_location *loc) -{ - struct hlsl_type *mat_type = matrix->data_type, *ret_type; - struct hlsl_deref var_deref; - struct hlsl_ir_load *load; - struct hlsl_ir_var *var; - unsigned int i; - - if (hlsl_type_is_row_major(mat_type)) - return add_load_index(ctx, instrs, matrix, index, loc); - - ret_type = hlsl_get_vector_type(ctx, mat_type->base_type, mat_type->dimx); - - if (!(var = hlsl_new_synthetic_var(ctx, "index", ret_type, loc))) + if (!(c = hlsl_new_uint_constant(ctx, idx, loc))) return false; - hlsl_init_simple_deref_from_var(&var_deref, var); - - for (i = 0; i < mat_type->dimx; ++i) - { - struct hlsl_ir_load *column, *value; - struct hlsl_ir_store *store; - struct hlsl_ir_constant *c; - struct hlsl_block block; - - if (!(c = hlsl_new_uint_constant(ctx, i, loc))) - return false; - list_add_tail(instrs, &c->node.entry); - - if (!(column = add_load_index(ctx, instrs, matrix, &c->node, loc))) - return false; - - if (!(value = add_load_index(ctx, instrs, &column->node, index, loc))) - return false; + list_add_tail(instrs, &c->entry);
- if (!(store = hlsl_new_store_component(ctx, &block, &var_deref, i, &value->node))) - return false; - list_move_tail(instrs, &block.instrs); - } - - if (!(load = hlsl_new_var_load(ctx, var, *loc))) + if (!(index = hlsl_new_index(ctx, record, c, loc))) return false; - list_add_tail(instrs, &load->node.entry); + list_add_tail(instrs, &index->entry);
return true; }
-static struct hlsl_ir_node *add_zero_mipmap_level(struct hlsl_ctx *ctx, struct list *instrs, - struct hlsl_ir_node *index, unsigned int dim_count, const struct vkd3d_shader_location *loc) -{ - struct hlsl_ir_load *coords_load; - struct hlsl_deref coords_deref; - struct hlsl_ir_constant *zero; - struct hlsl_ir_store *store; - struct hlsl_ir_var *coords; - - if (!(coords = hlsl_new_synthetic_var(ctx, "coords", - hlsl_get_vector_type(ctx, HLSL_TYPE_UINT, dim_count + 1), loc))) - return NULL; - - hlsl_init_simple_deref_from_var(&coords_deref, coords); - if (!(store = hlsl_new_store_index(ctx, &coords_deref, NULL, index, (1u << dim_count) - 1, loc))) - return NULL; - list_add_tail(instrs, &store->node.entry); - - if (!(zero = hlsl_new_uint_constant(ctx, 0, loc))) - return NULL; - list_add_tail(instrs, &zero->node.entry); - - if (!(store = hlsl_new_store_index(ctx, &coords_deref, NULL, &zero->node, 1u << dim_count, loc))) - return NULL; - list_add_tail(instrs, &store->node.entry); - - if (!(coords_load = hlsl_new_var_load(ctx, coords, *loc))) - return NULL; - list_add_tail(instrs, &coords_load->node.entry); - - return &coords_load->node; -} +static struct hlsl_ir_node *add_binary_arithmetic_expr(struct hlsl_ctx *ctx, struct list *instrs, + enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, + const struct vkd3d_shader_location *loc);
-static bool add_array_load(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *array, +static bool add_array_access(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *array, struct hlsl_ir_node *index, const struct vkd3d_shader_location *loc) { const struct hlsl_type *expr_type = array->data_type, *index_type = index->data_type; - struct hlsl_ir_expr *cast; + struct hlsl_ir_node *return_index, *cast;
- if (expr_type->type == HLSL_CLASS_OBJECT + if (expr_type->class == HLSL_CLASS_OBJECT && (expr_type->base_type == HLSL_TYPE_TEXTURE || expr_type->base_type == HLSL_TYPE_UAV) && expr_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC) { - struct hlsl_resource_load_params load_params = {.type = HLSL_RESOURCE_LOAD}; unsigned int dim_count = hlsl_sampler_dim_count(expr_type->sampler_dim); - /* Only HLSL_IR_LOAD can return an object. */ - struct hlsl_ir_load *object_load = hlsl_ir_load(array); - struct hlsl_ir_resource_load *resource_load;
- if (index_type->type > HLSL_CLASS_VECTOR || index_type->dimx != dim_count) + if (index_type->class > HLSL_CLASS_VECTOR || index_type->dimx != dim_count) { struct vkd3d_string_buffer *string;
@@ -814,20 +735,14 @@ static bool add_array_load(struct hlsl_ctx *ctx, struct list *instrs, struct hls hlsl_get_vector_type(ctx, HLSL_TYPE_UINT, dim_count), &index->loc))) return false;
- if (!(index = add_zero_mipmap_level(ctx, instrs, index, dim_count, loc))) + if (!(return_index = hlsl_new_index(ctx, array, index, loc))) return false; + list_add_tail(instrs, &return_index->entry);
- load_params.format = expr_type->e.resource_format; - load_params.resource = object_load->src; - load_params.coords = index; - - if (!(resource_load = hlsl_new_resource_load(ctx, &load_params, loc))) - return false; - list_add_tail(instrs, &resource_load->node.entry); return true; }
- if (index_type->type != HLSL_CLASS_SCALAR) + if (index_type->class != HLSL_CLASS_SCALAR) { hlsl_error(ctx, &index->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Array index is not scalar."); return false; @@ -835,23 +750,21 @@ static bool add_array_load(struct hlsl_ctx *ctx, struct list *instrs, struct hls
if (!(cast = hlsl_new_cast(ctx, index, hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), &index->loc))) return false; - list_add_tail(instrs, &cast->node.entry); - index = &cast->node; + list_add_tail(instrs, &cast->entry); + index = cast;
- if (expr_type->type == HLSL_CLASS_MATRIX) - return add_matrix_index(ctx, instrs, array, index, loc); - - if (expr_type->type != HLSL_CLASS_ARRAY && expr_type->type != HLSL_CLASS_VECTOR) + if (expr_type->class != HLSL_CLASS_ARRAY && expr_type->class != HLSL_CLASS_VECTOR && expr_type->class != HLSL_CLASS_MATRIX) { - if (expr_type->type == HLSL_CLASS_SCALAR) + if (expr_type->class == HLSL_CLASS_SCALAR) hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_INDEX, "Scalar expressions cannot be array-indexed."); else hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_INDEX, "Expression cannot be array-indexed."); return false; }
- if (!add_load_index(ctx, instrs, array, index, loc)) + if (!(return_index = hlsl_new_index(ctx, array, index, loc))) return false; + list_add_tail(instrs, &return_index->entry);
return true; } @@ -877,12 +790,12 @@ static struct hlsl_type *apply_type_modifiers(struct hlsl_ctx *ctx, struct hlsl_
if (!(*modifiers & HLSL_MODIFIERS_MAJORITY_MASK) && !(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK) - && type->type == HLSL_CLASS_MATRIX) + && type->class == HLSL_CLASS_MATRIX) { if (!(default_majority = ctx->matrix_majority) && force_majority) default_majority = HLSL_MODIFIER_COLUMN_MAJOR; } - else if (type->type != HLSL_CLASS_MATRIX && (*modifiers & HLSL_MODIFIERS_MAJORITY_MASK)) + else if (type->class != HLSL_CLASS_MATRIX && (*modifiers & HLSL_MODIFIERS_MAJORITY_MASK)) { hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, "'row_major' and 'column_major' modifiers are only allowed for matrices."); @@ -923,7 +836,7 @@ static bool gen_struct_fields(struct hlsl_ctx *ctx, struct parse_fields *fields, struct parse_variable_def *v, *v_next; size_t i = 0;
- if (type->type == HLSL_CLASS_MATRIX) + if (type->class == HLSL_CLASS_MATRIX) assert(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK);
memset(fields, 0, sizeof(*fields)); @@ -939,7 +852,7 @@ static bool gen_struct_fields(struct hlsl_ctx *ctx, struct parse_fields *fields,
field->type = type;
- if (shader_is_sm_5_1(ctx) && type->type == HLSL_CLASS_OBJECT) + if (shader_is_sm_5_1(ctx) && type->class == HLSL_CLASS_OBJECT) { for (k = 0; k < v->arrays.count; ++k) unbounded_res_array |= (v->arrays.sizes[k] == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT); @@ -983,6 +896,9 @@ static bool gen_struct_fields(struct hlsl_ctx *ctx, struct parse_fields *fields, hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Illegal initializer on a struct field."); free_parse_initializer(&v->initializer); } + if (v->reg_reservation.offset_type) + hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, + "packoffset() is not allowed inside struct definitions."); vkd3d_free(v); } vkd3d_free(defs); @@ -1052,18 +968,23 @@ static bool add_typedef(struct hlsl_ctx *ctx, struct hlsl_type *const orig_type, }
static bool add_func_parameter(struct hlsl_ctx *ctx, struct hlsl_func_parameters *parameters, - struct parse_parameter *param, const struct vkd3d_shader_location loc) + struct parse_parameter *param, const struct vkd3d_shader_location *loc) { struct hlsl_ir_var *var;
- if (param->type->type == HLSL_CLASS_MATRIX) + if (param->type->class == HLSL_CLASS_MATRIX) assert(param->type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK);
if ((param->modifiers & HLSL_STORAGE_OUT) && (param->modifiers & HLSL_STORAGE_UNIFORM)) - hlsl_error(ctx, &loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, "Parameter '%s' is declared as both "out" and "uniform".", param->name);
- if (!(var = hlsl_new_var(ctx, param->name, param->type, loc, ¶m->semantic, param->modifiers, ¶m->reg_reservation))) + if (param->reg_reservation.offset_type) + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, + "packoffset() is not allowed on function parameters."); + + if (!(var = hlsl_new_var(ctx, param->name, param->type, loc, ¶m->semantic, param->modifiers, + ¶m->reg_reservation))) return false; var->is_param = 1;
@@ -1084,12 +1005,61 @@ static struct hlsl_reg_reservation parse_reg_reservation(const char *reg_string) { struct hlsl_reg_reservation reservation = {0};
- if (!sscanf(reg_string + 1, "%u", &reservation.index)) + if (!sscanf(reg_string + 1, "%u", &reservation.reg_index)) { FIXME("Unsupported register reservation syntax.\n"); return reservation; } - reservation.type = reg_string[0]; + reservation.reg_type = ascii_tolower(reg_string[0]); + return reservation; +} + +static struct hlsl_reg_reservation parse_packoffset(struct hlsl_ctx *ctx, const char *reg_string, + const char *swizzle, const struct vkd3d_shader_location *loc) +{ + struct hlsl_reg_reservation reservation = {0}; + char *endptr; + + if (ctx->profile->major_version < 4) + return reservation; + + reservation.offset_index = strtoul(reg_string + 1, &endptr, 10); + if (*endptr) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, + "Invalid packoffset() syntax."); + return reservation; + } + + reservation.offset_type = ascii_tolower(reg_string[0]); + if (reservation.offset_type != 'c') + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, + "Only 'c' registers are allowed in packoffset()."); + return reservation; + } + + reservation.offset_index *= 4; + + if (swizzle) + { + if (strlen(swizzle) != 1) + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, + "Invalid packoffset() component "%s".", swizzle); + + if (swizzle[0] == 'x' || swizzle[0] == 'r') + reservation.offset_index += 0; + else if (swizzle[0] == 'y' || swizzle[0] == 'g') + reservation.offset_index += 1; + else if (swizzle[0] == 'z' || swizzle[0] == 'b') + reservation.offset_index += 2; + else if (swizzle[0] == 'w' || swizzle[0] == 'a') + reservation.offset_index += 3; + else + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, + "Invalid packoffset() component "%s".", swizzle); + } + return reservation; }
@@ -1122,53 +1092,37 @@ static struct list *make_list(struct hlsl_ctx *ctx, struct hlsl_ir_node *node) return list; }
-static unsigned int evaluate_static_expression(struct hlsl_ir_node *node) +static unsigned int evaluate_static_expression_as_uint(struct hlsl_ctx *ctx, struct hlsl_block *block, + const struct vkd3d_shader_location *loc) { - if (node->data_type->type != HLSL_CLASS_SCALAR) + struct hlsl_ir_constant *constant; + struct hlsl_ir_node *node; + unsigned int ret = 0; + bool progress; + + if (!add_implicit_conversion(ctx, &block->instrs, node_from_list(&block->instrs), + hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc)) return 0;
- switch (node->type) + do { - case HLSL_IR_CONSTANT: - { - struct hlsl_ir_constant *constant = hlsl_ir_constant(node); - const union hlsl_constant_value *value = &constant->value[0]; - - switch (constant->node.data_type->base_type) - { - case HLSL_TYPE_UINT: - return value->u; - case HLSL_TYPE_INT: - return value->i; - case HLSL_TYPE_FLOAT: - case HLSL_TYPE_HALF: - return value->f; - case HLSL_TYPE_DOUBLE: - return value->d; - case HLSL_TYPE_BOOL: - return !!value->u; - default: - vkd3d_unreachable(); - } - } - - case HLSL_IR_EXPR: - case HLSL_IR_LOAD: - case HLSL_IR_RESOURCE_LOAD: - case HLSL_IR_SWIZZLE: - FIXME("Unhandled type %s.\n", hlsl_node_type_to_string(node->type)); - return 0; + progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, block, NULL); + progress |= hlsl_copy_propagation_execute(ctx, block); + } while (progress);
- case HLSL_IR_CALL: - case HLSL_IR_IF: - case HLSL_IR_JUMP: - case HLSL_IR_LOOP: - case HLSL_IR_RESOURCE_STORE: - case HLSL_IR_STORE: - vkd3d_unreachable(); + node = node_from_list(&block->instrs); + if (node->type == HLSL_IR_CONSTANT) + { + constant = hlsl_ir_constant(node); + ret = constant->value.u[0].u; + } + else + { + hlsl_error(ctx, &node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, + "Failed to evaluate constant expression %d.", node->type); }
- vkd3d_unreachable(); + return ret; }
static bool expr_compatible_data_types(struct hlsl_type *t1, struct hlsl_type *t2) @@ -1180,20 +1134,20 @@ static bool expr_compatible_data_types(struct hlsl_type *t1, struct hlsl_type *t if ((t1->dimx == 1 && t1->dimy == 1) || (t2->dimx == 1 && t2->dimy == 1)) return true;
- if (t1->type == HLSL_CLASS_VECTOR && t2->type == HLSL_CLASS_VECTOR) + if (t1->class == HLSL_CLASS_VECTOR && t2->class == HLSL_CLASS_VECTOR) return true;
- if (t1->type == HLSL_CLASS_MATRIX || t2->type == HLSL_CLASS_MATRIX) + if (t1->class == HLSL_CLASS_MATRIX || t2->class == HLSL_CLASS_MATRIX) { /* Matrix-vector conversion is apparently allowed if either they have the same components count or the matrix is nx1 or 1xn */ - if (t1->type == HLSL_CLASS_VECTOR || t2->type == HLSL_CLASS_VECTOR) + if (t1->class == HLSL_CLASS_VECTOR || t2->class == HLSL_CLASS_VECTOR) { if (hlsl_type_component_count(t1) == hlsl_type_component_count(t2)) return true;
- return (t1->type == HLSL_CLASS_MATRIX && (t1->dimx == 1 || t1->dimy == 1)) - || (t2->type == HLSL_CLASS_MATRIX && (t2->dimx == 1 || t2->dimy == 1)); + return (t1->class == HLSL_CLASS_MATRIX && (t1->dimx == 1 || t1->dimy == 1)) + || (t2->class == HLSL_CLASS_MATRIX && (t2->dimx == 1 || t2->dimy == 1)); }
/* Both matrices */ @@ -1226,7 +1180,7 @@ static enum hlsl_base_type expr_common_base_type(enum hlsl_base_type t1, enum hl static bool expr_common_shape(struct hlsl_ctx *ctx, struct hlsl_type *t1, struct hlsl_type *t2, const struct vkd3d_shader_location *loc, enum hlsl_type_class *type, unsigned int *dimx, unsigned int *dimy) { - if (t1->type > HLSL_CLASS_LAST_NUMERIC) + if (t1->class > HLSL_CLASS_LAST_NUMERIC) { struct vkd3d_string_buffer *string;
@@ -1237,7 +1191,7 @@ static bool expr_common_shape(struct hlsl_ctx *ctx, struct hlsl_type *t1, struct return false; }
- if (t2->type > HLSL_CLASS_LAST_NUMERIC) + if (t2->class > HLSL_CLASS_LAST_NUMERIC) { struct vkd3d_string_buffer *string;
@@ -1264,17 +1218,17 @@ static bool expr_common_shape(struct hlsl_ctx *ctx, struct hlsl_type *t1, struct
if (t1->dimx == 1 && t1->dimy == 1) { - *type = t2->type; + *type = t2->class; *dimx = t2->dimx; *dimy = t2->dimy; } else if (t2->dimx == 1 && t2->dimy == 1) { - *type = t1->type; + *type = t1->class; *dimx = t1->dimx; *dimy = t1->dimy; } - else if (t1->type == HLSL_CLASS_MATRIX && t2->type == HLSL_CLASS_MATRIX) + else if (t1->class == HLSL_CLASS_MATRIX && t2->class == HLSL_CLASS_MATRIX) { *type = HLSL_CLASS_MATRIX; *dimx = min(t1->dimx, t2->dimx); @@ -1284,13 +1238,13 @@ static bool expr_common_shape(struct hlsl_ctx *ctx, struct hlsl_type *t1, struct { if (t1->dimx * t1->dimy <= t2->dimx * t2->dimy) { - *type = t1->type; + *type = t1->class; *dimx = t1->dimx; *dimy = t1->dimy; } else { - *type = t2->type; + *type = t2->class; *dimx = t2->dimx; *dimy = t2->dimy; } @@ -1306,55 +1260,50 @@ static struct hlsl_ir_node *add_expr(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *expr; unsigned int i;
- if (type->type == HLSL_CLASS_MATRIX) + if (type->class == HLSL_CLASS_MATRIX) { - struct hlsl_type *vector_type; + struct hlsl_type *scalar_type; + struct hlsl_ir_load *var_load; struct hlsl_deref var_deref; - struct hlsl_ir_load *load; + struct hlsl_ir_node *load; struct hlsl_ir_var *var;
- vector_type = hlsl_get_vector_type(ctx, type->base_type, hlsl_type_minor_size(type)); + scalar_type = hlsl_get_scalar_type(ctx, type->base_type);
if (!(var = hlsl_new_synthetic_var(ctx, "split_op", type, loc))) return NULL; hlsl_init_simple_deref_from_var(&var_deref, var);
- for (i = 0; i < hlsl_type_major_size(type); ++i) + for (i = 0; i < type->dimy * type->dimx; ++i) { - struct hlsl_ir_node *value, *vector_operands[HLSL_MAX_OPERANDS] = { NULL }; - struct hlsl_ir_store *store; - struct hlsl_ir_constant *c; + struct hlsl_ir_node *value, *cell_operands[HLSL_MAX_OPERANDS] = { NULL }; + struct hlsl_block block; unsigned int j;
- if (!(c = hlsl_new_uint_constant(ctx, i, loc))) - return NULL; - list_add_tail(instrs, &c->node.entry); - for (j = 0; j < HLSL_MAX_OPERANDS; j++) { if (operands[j]) { - struct hlsl_ir_load *load; - - if (!(load = add_load_index(ctx, instrs, operands[j], &c->node, loc))) + if (!(load = add_load_component(ctx, instrs, operands[j], i, loc))) return NULL; - vector_operands[j] = &load->node; + + cell_operands[j] = load; } }
- if (!(value = add_expr(ctx, instrs, op, vector_operands, vector_type, loc))) + if (!(value = add_expr(ctx, instrs, op, cell_operands, scalar_type, loc))) return NULL;
- if (!(store = hlsl_new_store_index(ctx, &var_deref, &c->node, value, 0, loc))) + if (!hlsl_new_store_component(ctx, &block, &var_deref, i, value)) return NULL; - list_add_tail(instrs, &store->node.entry); + list_move_tail(instrs, &block.instrs); }
- if (!(load = hlsl_new_var_load(ctx, var, *loc))) + if (!(var_load = hlsl_new_var_load(ctx, var, loc))) return NULL; - list_add_tail(instrs, &load->node.entry); + list_add_tail(instrs, &var_load->node.entry);
- return &load->node; + return &var_load->node; }
if (!(expr = hlsl_new_expr(ctx, op, operands, type, loc))) @@ -1407,7 +1356,7 @@ static struct hlsl_ir_node *add_unary_logical_expr(struct hlsl_ctx *ctx, struct struct hlsl_ir_node *args[HLSL_MAX_OPERANDS] = {0}; struct hlsl_type *bool_type;
- bool_type = hlsl_get_numeric_type(ctx, arg->data_type->type, HLSL_TYPE_BOOL, + bool_type = hlsl_get_numeric_type(ctx, arg->data_type->class, HLSL_TYPE_BOOL, arg->data_type->dimx, arg->data_type->dimy);
if (!(args[0] = add_implicit_conversion(ctx, instrs, arg, bool_type, loc))) @@ -1416,20 +1365,27 @@ static struct hlsl_ir_node *add_unary_logical_expr(struct hlsl_ctx *ctx, struct return add_expr(ctx, instrs, op, args, bool_type, loc); }
-static struct hlsl_ir_node *add_binary_arithmetic_expr(struct hlsl_ctx *ctx, struct list *instrs, - enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, - const struct vkd3d_shader_location *loc) +static struct hlsl_type *get_common_numeric_type(struct hlsl_ctx *ctx, const struct hlsl_ir_node *arg1, + const struct hlsl_ir_node *arg2, const struct vkd3d_shader_location *loc) { - struct hlsl_type *common_type; enum hlsl_base_type base = expr_common_base_type(arg1->data_type->base_type, arg2->data_type->base_type); enum hlsl_type_class type; unsigned int dimx, dimy; - struct hlsl_ir_node *args[HLSL_MAX_OPERANDS] = {0};
if (!expr_common_shape(ctx, arg1->data_type, arg2->data_type, loc, &type, &dimx, &dimy)) return NULL;
- common_type = hlsl_get_numeric_type(ctx, type, base, dimx, dimy); + return hlsl_get_numeric_type(ctx, type, base, dimx, dimy); +} + +static struct hlsl_ir_node *add_binary_arithmetic_expr(struct hlsl_ctx *ctx, struct list *instrs, + enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, + const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *args[HLSL_MAX_OPERANDS] = {0}; + struct hlsl_type *common_type; + + common_type = get_common_numeric_type(ctx, arg1, arg2, loc);
if (!(args[0] = add_implicit_conversion(ctx, instrs, arg1, common_type, loc))) return NULL; @@ -1441,13 +1397,13 @@ static struct hlsl_ir_node *add_binary_arithmetic_expr(struct hlsl_ctx *ctx, str }
static struct list *add_binary_arithmetic_expr_merge(struct hlsl_ctx *ctx, struct list *list1, struct list *list2, - enum hlsl_ir_expr_op op, struct vkd3d_shader_location loc) + enum hlsl_ir_expr_op op, const struct vkd3d_shader_location *loc) { struct hlsl_ir_node *arg1 = node_from_list(list1), *arg2 = node_from_list(list2);
list_move_tail(list1, list2); vkd3d_free(list2); - add_binary_arithmetic_expr(ctx, list1, op, arg1, arg2, &loc); + add_binary_arithmetic_expr(ctx, list1, op, arg1, arg2, loc); return list1; }
@@ -1499,13 +1455,13 @@ static struct hlsl_ir_node *add_binary_comparison_expr(struct hlsl_ctx *ctx, str }
static struct list *add_binary_comparison_expr_merge(struct hlsl_ctx *ctx, struct list *list1, struct list *list2, - enum hlsl_ir_expr_op op, const struct vkd3d_shader_location loc) + enum hlsl_ir_expr_op op, const struct vkd3d_shader_location *loc) { struct hlsl_ir_node *arg1 = node_from_list(list1), *arg2 = node_from_list(list2);
list_move_tail(list1, list2); vkd3d_free(list2); - add_binary_comparison_expr(ctx, list1, op, arg1, arg2, &loc); + add_binary_comparison_expr(ctx, list1, op, arg1, arg2, loc); return list1; }
@@ -1596,7 +1552,7 @@ static struct hlsl_ir_node *add_binary_dot_expr(struct hlsl_ctx *ctx, struct lis enum hlsl_ir_expr_op op; unsigned dim;
- if (arg1->data_type->type == HLSL_CLASS_MATRIX) + if (arg1->data_type->class == HLSL_CLASS_MATRIX) { struct vkd3d_string_buffer *string;
@@ -1607,7 +1563,7 @@ static struct hlsl_ir_node *add_binary_dot_expr(struct hlsl_ctx *ctx, struct lis return NULL; }
- if (arg2->data_type->type == HLSL_CLASS_MATRIX) + if (arg2->data_type->class == HLSL_CLASS_MATRIX) { struct vkd3d_string_buffer *string;
@@ -1618,9 +1574,9 @@ static struct hlsl_ir_node *add_binary_dot_expr(struct hlsl_ctx *ctx, struct lis return NULL; }
- if (arg1->data_type->type == HLSL_CLASS_SCALAR) + if (arg1->data_type->class == HLSL_CLASS_SCALAR) dim = arg2->data_type->dimx; - else if (arg2->data_type->type == HLSL_CLASS_SCALAR) + else if (arg2->data_type->class == HLSL_CLASS_SCALAR) dim = arg1->data_type->dimx; else dim = min(arg1->data_type->dimx, arg2->data_type->dimx); @@ -1702,7 +1658,7 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in enum parse_assign_op assign_op, struct hlsl_ir_node *rhs) { struct hlsl_type *lhs_type = lhs->data_type; - struct hlsl_ir_expr *copy; + struct hlsl_ir_node *copy; unsigned int writemask = 0;
if (assign_op == ASSIGN_OP_SUB) @@ -1720,13 +1676,13 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in return NULL; }
- if (lhs_type->type <= HLSL_CLASS_LAST_NUMERIC) + if (lhs_type->class <= HLSL_CLASS_LAST_NUMERIC) writemask = (1 << lhs_type->dimx) - 1;
if (!(rhs = add_implicit_conversion(ctx, instrs, rhs, lhs_type, &rhs->loc))) return NULL;
- while (lhs->type != HLSL_IR_LOAD && lhs->type != HLSL_IR_RESOURCE_LOAD) + while (lhs->type != HLSL_IR_LOAD && lhs->type != HLSL_IR_INDEX) { if (lhs->type == HLSL_IR_EXPR && hlsl_ir_expr(lhs)->op == HLSL_OP1_CAST) { @@ -1735,10 +1691,11 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in } else if (lhs->type == HLSL_IR_SWIZZLE) { - struct hlsl_ir_swizzle *swizzle = hlsl_ir_swizzle(lhs), *new_swizzle; + struct hlsl_ir_swizzle *swizzle = hlsl_ir_swizzle(lhs); unsigned int width, s = swizzle->swizzle; + struct hlsl_ir_node *new_swizzle;
- if (lhs->data_type->type == HLSL_CLASS_MATRIX) + if (lhs->data_type->class == HLSL_CLASS_MATRIX) hlsl_fixme(ctx, &lhs->loc, "Matrix assignment with a writemask.");
if (!invert_swizzle(&s, &writemask, &width)) @@ -1751,10 +1708,10 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in { return NULL; } - list_add_tail(instrs, &new_swizzle->node.entry); + list_add_tail(instrs, &new_swizzle->entry);
lhs = swizzle->val.node; - rhs = &new_swizzle->node; + rhs = new_swizzle; } else { @@ -1763,18 +1720,19 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in } }
- if (lhs->type == HLSL_IR_RESOURCE_LOAD) + if (lhs->type == HLSL_IR_INDEX && hlsl_index_is_resource_access(hlsl_ir_index(lhs))) { - struct hlsl_ir_resource_load *load = hlsl_ir_resource_load(lhs); - struct hlsl_ir_resource_store *store; + struct hlsl_ir_node *coords = hlsl_ir_index(lhs)->idx.node; + struct hlsl_deref resource_deref; struct hlsl_type *resource_type; - struct hlsl_ir_swizzle *coords; + struct hlsl_ir_node *store; unsigned int dim_count;
- /* Such an lvalue was produced by an index expression. */ - assert(load->load_type == HLSL_RESOURCE_LOAD); - resource_type = hlsl_deref_get_type(ctx, &load->resource); - assert(resource_type->type == HLSL_CLASS_OBJECT); + if (!hlsl_init_deref_from_index_chain(ctx, &resource_deref, hlsl_ir_index(lhs)->val.node)) + return NULL; + + resource_type = hlsl_deref_get_type(ctx, &resource_deref); + assert(resource_type->class == HLSL_CLASS_OBJECT); assert(resource_type->base_type == HLSL_TYPE_TEXTURE || resource_type->base_type == HLSL_TYPE_UAV);
if (resource_type->base_type != HLSL_TYPE_UAV) @@ -1787,25 +1745,70 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in hlsl_error(ctx, &lhs->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_WRITEMASK, "Resource store expressions must write to all components.");
- /* Remove the (implicit) mipmap level from the load expression. */ - assert(load->coords.node->data_type->type == HLSL_CLASS_VECTOR); - assert(load->coords.node->data_type->base_type == HLSL_TYPE_UINT); - assert(load->coords.node->data_type->dimx == dim_count + 1); - if (!(coords = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, Y, Z, W), dim_count, load->coords.node, &lhs->loc))) - return NULL; - list_add_tail(instrs, &coords->node.entry); + assert(coords->data_type->class == HLSL_CLASS_VECTOR); + assert(coords->data_type->base_type == HLSL_TYPE_UINT); + assert(coords->data_type->dimx == dim_count);
- if (!(store = hlsl_new_resource_store(ctx, &load->resource, &coords->node, rhs, &lhs->loc))) + if (!(store = hlsl_new_resource_store(ctx, &resource_deref, coords, rhs, &lhs->loc))) + { + hlsl_cleanup_deref(&resource_deref); return NULL; - list_add_tail(instrs, &store->node.entry); + } + list_add_tail(instrs, &store->entry); + hlsl_cleanup_deref(&resource_deref); + } + else if (lhs->type == HLSL_IR_INDEX && hlsl_index_is_noncontiguous(hlsl_ir_index(lhs))) + { + struct hlsl_ir_index *row = hlsl_ir_index(lhs); + struct hlsl_ir_node *mat = row->val.node; + unsigned int i, k = 0; + + for (i = 0; i < mat->data_type->dimx; ++i) + { + struct hlsl_ir_node *cell, *load, *store, *c; + struct hlsl_deref deref; + + if (!(writemask & (1 << i))) + continue; + + if (!(c = hlsl_new_uint_constant(ctx, i, &lhs->loc))) + return NULL; + list_add_tail(instrs, &c->entry); + + if (!(cell = hlsl_new_index(ctx, &row->node, c, &lhs->loc))) + return NULL; + list_add_tail(instrs, &cell->entry); + + if (!(load = add_load_component(ctx, instrs, rhs, k++, &rhs->loc))) + return NULL; + + if (!hlsl_init_deref_from_index_chain(ctx, &deref, cell)) + return NULL; + + if (!(store = hlsl_new_store_index(ctx, &deref, NULL, load, 0, &rhs->loc))) + { + hlsl_cleanup_deref(&deref); + return NULL; + } + list_add_tail(instrs, &store->entry); + hlsl_cleanup_deref(&deref); + } } else { - struct hlsl_ir_store *store; + struct hlsl_ir_node *store; + struct hlsl_deref deref;
- if (!(store = hlsl_new_store_index(ctx, &hlsl_ir_load(lhs)->src, NULL, rhs, writemask, &rhs->loc))) + if (!hlsl_init_deref_from_index_chain(ctx, &deref, lhs)) return NULL; - list_add_tail(instrs, &store->node.entry); + + if (!(store = hlsl_new_store_index(ctx, &deref, NULL, rhs, writemask, &rhs->loc))) + { + hlsl_cleanup_deref(&deref); + return NULL; + } + list_add_tail(instrs, &store->entry); + hlsl_cleanup_deref(&deref); }
/* Don't use the instruction itself as a source, as this makes structure @@ -1813,37 +1816,37 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in * the last instruction in the list, we do need to copy. */ if (!(copy = hlsl_new_copy(ctx, rhs))) return NULL; - list_add_tail(instrs, ©->node.entry); - return ©->node; + list_add_tail(instrs, ©->entry); + return copy; }
static bool add_increment(struct hlsl_ctx *ctx, struct list *instrs, bool decrement, bool post, - struct vkd3d_shader_location loc) + const struct vkd3d_shader_location *loc) { struct hlsl_ir_node *lhs = node_from_list(instrs); - struct hlsl_ir_constant *one; + struct hlsl_ir_node *one;
if (lhs->data_type->modifiers & HLSL_MODIFIER_CONST) - hlsl_error(ctx, &loc, VKD3D_SHADER_ERROR_HLSL_MODIFIES_CONST, + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_MODIFIES_CONST, "Argument to %s%screment operator is const.", post ? "post" : "pre", decrement ? "de" : "in");
- if (!(one = hlsl_new_int_constant(ctx, 1, &loc))) + if (!(one = hlsl_new_int_constant(ctx, 1, loc))) return false; - list_add_tail(instrs, &one->node.entry); + list_add_tail(instrs, &one->entry);
- if (!add_assignment(ctx, instrs, lhs, decrement ? ASSIGN_OP_SUB : ASSIGN_OP_ADD, &one->node)) + if (!add_assignment(ctx, instrs, lhs, decrement ? ASSIGN_OP_SUB : ASSIGN_OP_ADD, one)) return false;
if (post) { - struct hlsl_ir_expr *copy; + struct hlsl_ir_node *copy;
if (!(copy = hlsl_new_copy(ctx, lhs))) return false; - list_add_tail(instrs, ©->node.entry); + list_add_tail(instrs, ©->entry);
/* Post increment/decrement expressions are considered const. */ - if (!(copy->node.data_type = hlsl_type_clone(ctx, copy->node.data_type, 0, HLSL_MODIFIER_CONST))) + if (!(copy->data_type = hlsl_type_clone(ctx, copy->data_type, 0, HLSL_MODIFIER_CONST))) return false; }
@@ -1861,10 +1864,8 @@ static void initialize_var_components(struct hlsl_ctx *ctx, struct list *instrs,
for (k = 0; k < src_comp_count; ++k) { + struct hlsl_ir_node *conv, *load; struct hlsl_type *dst_comp_type; - struct hlsl_ir_store *store; - struct hlsl_ir_load *load; - struct hlsl_ir_node *conv; struct hlsl_block block;
if (!(load = add_load_component(ctx, instrs, src, k, &src->loc))) @@ -1872,10 +1873,10 @@ static void initialize_var_components(struct hlsl_ctx *ctx, struct list *instrs,
dst_comp_type = hlsl_type_get_component_type(ctx, dst->data_type, *store_index);
- if (!(conv = add_implicit_conversion(ctx, instrs, &load->node, dst_comp_type, &src->loc))) + if (!(conv = add_implicit_conversion(ctx, instrs, load, dst_comp_type, &src->loc))) return;
- if (!(store = hlsl_new_store_component(ctx, &block, &dst_deref, *store_index, conv))) + if (!hlsl_new_store_component(ctx, &block, &dst_deref, *store_index, conv)) return; list_move_tail(instrs, &block.instrs);
@@ -1885,12 +1886,12 @@ static void initialize_var_components(struct hlsl_ctx *ctx, struct list *instrs,
static bool type_has_object_components(struct hlsl_type *type, bool must_be_in_struct) { - if (type->type == HLSL_CLASS_OBJECT) + if (type->class == HLSL_CLASS_OBJECT) return !must_be_in_struct; - if (type->type == HLSL_CLASS_ARRAY) + if (type->class == HLSL_CLASS_ARRAY) return type_has_object_components(type->e.array.type, must_be_in_struct);
- if (type->type == HLSL_CLASS_STRUCT) + if (type->class == HLSL_CLASS_STRUCT) { unsigned int i;
@@ -1905,12 +1906,12 @@ static bool type_has_object_components(struct hlsl_type *type, bool must_be_in_s
static bool type_has_numeric_components(struct hlsl_type *type) { - if (type->type <= HLSL_CLASS_LAST_NUMERIC) + if (type->class <= HLSL_CLASS_LAST_NUMERIC) return true; - if (type->type == HLSL_CLASS_ARRAY) + if (type->class == HLSL_CLASS_ARRAY) return type_has_numeric_components(type->e.array.type);
- if (type->type == HLSL_CLASS_STRUCT) + if (type->class == HLSL_CLASS_STRUCT) { unsigned int i;
@@ -1934,7 +1935,7 @@ static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_t struct hlsl_type *type; bool local = true;
- if (basic_type->type == HLSL_CLASS_MATRIX) + if (basic_type->class == HLSL_CLASS_MATRIX) assert(basic_type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK);
if (!(statements_list = make_empty_list(ctx))) @@ -1966,7 +1967,7 @@ static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_t
type = basic_type;
- if (shader_is_sm_5_1(ctx) && type->type == HLSL_CLASS_OBJECT) + if (shader_is_sm_5_1(ctx) && type->class == HLSL_CLASS_OBJECT) { for (i = 0; i < v->arrays.count; ++i) unbounded_res_array |= (v->arrays.sizes[i] == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT); @@ -2035,7 +2036,7 @@ static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_t } vkd3d_free(v->arrays.sizes);
- if (!(var = hlsl_new_var(ctx, v->name, type, v->loc, &v->semantic, modifiers, &v->reg_reservation))) + if (!(var = hlsl_new_var(ctx, v->name, type, &v->loc, &v->semantic, modifiers, &v->reg_reservation))) { free_parse_variable_def(v); continue; @@ -2043,6 +2044,13 @@ static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_t
var->buffer = ctx->cur_buffer;
+ if (var->buffer == ctx->globals_buffer) + { + if (var->reg_reservation.offset_type) + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, + "packoffset() is only allowed inside constant buffer declarations."); + } + if (ctx->cur_scope == ctx->globals) { local = false; @@ -2148,7 +2156,7 @@ static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_t } else { - struct hlsl_ir_load *load = hlsl_new_var_load(ctx, var, var->loc); + struct hlsl_ir_load *load = hlsl_new_var_load(ctx, var, &var->loc);
assert(v->initializer.args_count == 1); list_add_tail(v->initializer.instrs, &load->node.entry); @@ -2156,7 +2164,7 @@ static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_t }
if (modifiers & HLSL_STORAGE_STATIC) - list_move_tail(&ctx->static_initializers, v->initializer.instrs); + list_move_tail(&ctx->static_initializers.instrs, v->initializer.instrs); else list_move_tail(statements_list, v->initializer.instrs); vkd3d_free(v->initializer.args); @@ -2164,9 +2172,7 @@ static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_t } else if (var->storage_modifiers & HLSL_STORAGE_STATIC) { - struct hlsl_ir_constant *zero; - struct hlsl_ir_store *store; - struct hlsl_ir_node *cast; + struct hlsl_ir_node *cast, *store, *zero;
/* Initialize statics to zero by default. */
@@ -2181,9 +2187,9 @@ static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_t vkd3d_free(v); continue; } - list_add_tail(&ctx->static_initializers, &zero->node.entry); + hlsl_block_add_instr(&ctx->static_initializers, zero);
- if (!(cast = add_cast(ctx, &ctx->static_initializers, &zero->node, var->data_type, &var->loc))) + if (!(cast = add_cast(ctx, &ctx->static_initializers.instrs, zero, var->data_type, &var->loc))) { vkd3d_free(v); continue; @@ -2194,7 +2200,7 @@ static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_t vkd3d_free(v); continue; } - list_add_tail(&ctx->static_initializers, &store->node.entry); + hlsl_block_add_instr(&ctx->static_initializers, store); } vkd3d_free(v); } @@ -2279,7 +2285,7 @@ static struct hlsl_ir_node *intrinsic_float_convert_arg(struct hlsl_ctx *ctx, if (type->base_type == HLSL_TYPE_FLOAT || type->base_type == HLSL_TYPE_HALF) return arg;
- type = hlsl_get_numeric_type(ctx, type->type, HLSL_TYPE_FLOAT, type->dimx, type->dimy); + type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy); return add_implicit_conversion(ctx, params->instrs, arg, type, loc); }
@@ -2315,12 +2321,12 @@ static struct hlsl_type *elementwise_intrinsic_get_common_type(struct hlsl_ctx *
base = expr_common_base_type(base, arg_type->base_type);
- if (arg_type->type == HLSL_CLASS_VECTOR) + if (arg_type->class == HLSL_CLASS_VECTOR) { vectors = true; dimx = min(dimx, arg_type->dimx); } - else if (arg_type->type == HLSL_CLASS_MATRIX) + else if (arg_type->class == HLSL_CLASS_MATRIX) { matrices = true; dimx = min(dimx, arg_type->dimx); @@ -2369,7 +2375,7 @@ static bool elementwise_intrinsic_float_convert_args(struct hlsl_ctx *ctx, if (!(type = elementwise_intrinsic_get_common_type(ctx, params, loc))) return false;
- type = hlsl_get_numeric_type(ctx, type->type, HLSL_TYPE_FLOAT, type->dimx, type->dimy); + type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy);
return convert_args(ctx, params, type, loc); } @@ -2383,20 +2389,18 @@ static bool intrinsic_abs(struct hlsl_ctx *ctx, static bool intrinsic_all(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { - struct hlsl_ir_node *arg = params->args[0], *mul; - struct hlsl_ir_constant *one, *zero; - struct hlsl_ir_load *load; + struct hlsl_ir_node *arg = params->args[0], *mul, *one, *zero, *load; unsigned int i, count;
if (!(one = hlsl_new_float_constant(ctx, 1.0f, loc))) return false; - list_add_tail(params->instrs, &one->node.entry); + list_add_tail(params->instrs, &one->entry);
if (!(zero = hlsl_new_float_constant(ctx, 0.0f, loc))) return false; - list_add_tail(params->instrs, &zero->node.entry); + list_add_tail(params->instrs, &zero->entry);
- mul = &one->node; + mul = one;
count = hlsl_type_component_count(arg->data_type); for (i = 0; i < count; ++i) @@ -2404,52 +2408,123 @@ static bool intrinsic_all(struct hlsl_ctx *ctx, if (!(load = add_load_component(ctx, params->instrs, arg, i, loc))) return false;
- if (!(mul = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, &load->node, mul, loc))) + if (!(mul = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, load, mul, loc))) return false; }
- return !!add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_NEQUAL, mul, &zero->node, loc); -} - -/* Find the type corresponding to the given source type, with the same - * dimensions but a different base type. */ -static struct hlsl_type *convert_numeric_type(const struct hlsl_ctx *ctx, - const struct hlsl_type *type, enum hlsl_base_type base_type) -{ - return hlsl_get_numeric_type(ctx, type->type, base_type, type->dimx, type->dimy); + return !!add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_NEQUAL, mul, zero, loc); }
-static bool intrinsic_asuint(struct hlsl_ctx *ctx, +static bool intrinsic_any(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { - struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0}; - struct hlsl_type *data_type; + struct hlsl_ir_node *arg = params->args[0], *dot, *or, *zero, *bfalse, *load; + unsigned int i, count;
- if (params->args_count != 1 && params->args_count != 3) + if (arg->data_type->class != HLSL_CLASS_VECTOR && arg->data_type->class != HLSL_CLASS_SCALAR) { - hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, - "Wrong number of arguments to function 'asuint': expected 1 or 3, but got %u.", params->args_count); + hlsl_fixme(ctx, loc, "any() implementation for non-vector, non-scalar"); return false; }
- if (params->args_count == 3) + if (arg->data_type->base_type == HLSL_TYPE_FLOAT) { - hlsl_fixme(ctx, loc, "Double-to-integer conversion."); - return false; - } + if (!(zero = hlsl_new_float_constant(ctx, 0.0f, loc))) + return false; + list_add_tail(params->instrs, &zero->entry);
- data_type = params->args[0]->data_type; - if (data_type->base_type == HLSL_TYPE_BOOL || data_type->base_type == HLSL_TYPE_DOUBLE) - { - struct vkd3d_string_buffer *string; + if (!(dot = add_binary_dot_expr(ctx, params->instrs, arg, arg, loc))) + return false;
- if ((string = hlsl_type_to_string(ctx, data_type))) - hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "Wrong type for argument 0 of asuint(): expected 'int', 'uint', 'float', or 'half', but got '%s'.", - string->buffer); - hlsl_release_string_buffer(ctx, string); + return !!add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_NEQUAL, dot, zero, loc); } - data_type = convert_numeric_type(ctx, data_type, HLSL_TYPE_UINT); + else if (arg->data_type->base_type == HLSL_TYPE_BOOL) + { + if (!(bfalse = hlsl_new_bool_constant(ctx, false, loc))) + return false; + list_add_tail(params->instrs, &bfalse->entry); + + or = bfalse; + + count = hlsl_type_component_count(arg->data_type); + for (i = 0; i < count; ++i) + { + if (!(load = add_load_component(ctx, params->instrs, arg, i, loc))) + return false; + + if (!(or = add_binary_bitwise_expr(ctx, params->instrs, HLSL_OP2_BIT_OR, or, load, loc))) + return false; + } + + return true; + } + + hlsl_fixme(ctx, loc, "any() implementation for non-float, non-bool"); + return false; +} + +/* Find the type corresponding to the given source type, with the same + * dimensions but a different base type. */ +static struct hlsl_type *convert_numeric_type(const struct hlsl_ctx *ctx, + const struct hlsl_type *type, enum hlsl_base_type base_type) +{ + return hlsl_get_numeric_type(ctx, type->class, base_type, type->dimx, type->dimy); +} + +static bool intrinsic_asfloat(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0}; + struct hlsl_type *data_type; + + data_type = params->args[0]->data_type; + if (data_type->base_type == HLSL_TYPE_BOOL || data_type->base_type == HLSL_TYPE_DOUBLE) + { + struct vkd3d_string_buffer *string; + + if ((string = hlsl_type_to_string(ctx, data_type))) + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Wrong argument type of asfloat(): expected 'int', 'uint', 'float', or 'half', but got '%s'.", + string->buffer); + hlsl_release_string_buffer(ctx, string); + } + data_type = convert_numeric_type(ctx, data_type, HLSL_TYPE_FLOAT); + + operands[0] = params->args[0]; + return add_expr(ctx, params->instrs, HLSL_OP1_REINTERPRET, operands, data_type, loc); +} + +static bool intrinsic_asuint(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0}; + struct hlsl_type *data_type; + + if (params->args_count != 1 && params->args_count != 3) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, + "Wrong number of arguments to function 'asuint': expected 1 or 3, but got %u.", params->args_count); + return false; + } + + if (params->args_count == 3) + { + hlsl_fixme(ctx, loc, "Double-to-integer conversion."); + return false; + } + + data_type = params->args[0]->data_type; + if (data_type->base_type == HLSL_TYPE_BOOL || data_type->base_type == HLSL_TYPE_DOUBLE) + { + struct vkd3d_string_buffer *string; + + if ((string = hlsl_type_to_string(ctx, data_type))) + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Wrong type for argument 0 of asuint(): expected 'int', 'uint', 'float', or 'half', but got '%s'.", + string->buffer); + hlsl_release_string_buffer(ctx, string); + } + data_type = convert_numeric_type(ctx, data_type, HLSL_TYPE_UINT);
operands[0] = params->args[0]; return add_expr(ctx, params->instrs, HLSL_OP1_REINTERPRET, operands, data_type, loc); @@ -2483,7 +2558,7 @@ static bool intrinsic_cos(struct hlsl_ctx *ctx, static bool intrinsic_cross(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { - struct hlsl_ir_swizzle *arg1_swzl1, *arg1_swzl2, *arg2_swzl1, *arg2_swzl2; + struct hlsl_ir_node *arg1_swzl1, *arg1_swzl2, *arg2_swzl1, *arg2_swzl2; struct hlsl_ir_node *arg1 = params->args[0], *arg2 = params->args[1]; struct hlsl_ir_node *arg1_cast, *arg2_cast, *mul1_neg, *mul1, *mul2; struct hlsl_type *cast_type; @@ -2504,35 +2579,55 @@ static bool intrinsic_cross(struct hlsl_ctx *ctx,
if (!(arg1_swzl1 = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Z, X, Y, Z), 3, arg1_cast, loc))) return false; - list_add_tail(params->instrs, &arg1_swzl1->node.entry); + list_add_tail(params->instrs, &arg1_swzl1->entry);
if (!(arg2_swzl1 = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Y, Z, X, Y), 3, arg2_cast, loc))) return false; - list_add_tail(params->instrs, &arg2_swzl1->node.entry); + list_add_tail(params->instrs, &arg2_swzl1->entry);
- if (!(mul1 = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, - &arg1_swzl1->node, &arg2_swzl1->node, loc))) + if (!(mul1 = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, arg1_swzl1, arg2_swzl1, loc))) return false;
- if (!(mul1_neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, mul1, *loc))) + if (!(mul1_neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, mul1, loc))) return false; list_add_tail(params->instrs, &mul1_neg->entry);
if (!(arg1_swzl2 = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Y, Z, X, Y), 3, arg1_cast, loc))) return false; - list_add_tail(params->instrs, &arg1_swzl2->node.entry); + list_add_tail(params->instrs, &arg1_swzl2->entry);
if (!(arg2_swzl2 = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Z, X, Y, Z), 3, arg2_cast, loc))) return false; - list_add_tail(params->instrs, &arg2_swzl2->node.entry); + list_add_tail(params->instrs, &arg2_swzl2->entry);
- if (!(mul2 = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, - &arg1_swzl2->node, &arg2_swzl2->node, loc))) + if (!(mul2 = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, arg1_swzl2, arg2_swzl2, loc))) return false;
return !!add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_ADD, mul2, mul1_neg, loc); }
+static bool intrinsic_ddx(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *arg; + + if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) + return false; + + return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSX, arg, loc); +} + +static bool intrinsic_ddy(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *arg; + + if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) + return false; + + return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSY, arg, loc); +} + static bool intrinsic_distance(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -2565,8 +2660,7 @@ static bool intrinsic_dot(struct hlsl_ctx *ctx, static bool intrinsic_exp(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { - struct hlsl_ir_constant *coeff; - struct hlsl_ir_node *arg, *mul; + struct hlsl_ir_node *arg, *mul, *coeff;
if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) return false; @@ -2574,9 +2668,9 @@ static bool intrinsic_exp(struct hlsl_ctx *ctx, /* 1/ln(2) */ if (!(coeff = hlsl_new_float_constant(ctx, 1.442695f, loc))) return false; - list_add_tail(params->instrs, &coeff->node.entry); + list_add_tail(params->instrs, &coeff->entry);
- if (!(mul = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, &coeff->node, params->args[0], loc))) + if (!(mul = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, coeff, params->args[0], loc))) return false;
return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_EXP2, mul, loc); @@ -2604,6 +2698,43 @@ static bool intrinsic_floor(struct hlsl_ctx *ctx, return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_FLOOR, arg, loc); }
+static bool intrinsic_fmod(struct hlsl_ctx *ctx, const struct parse_initializer *params, + const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *x, *y, *div, *abs, *frac, *neg_frac, *ge, *select, *zero; + static const struct hlsl_constant_value zero_value; + + if (!(x = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) + return false; + + if (!(y = intrinsic_float_convert_arg(ctx, params, params->args[1], loc))) + return false; + + if (!(div = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_DIV, x, y, loc))) + return false; + + if (!(zero = hlsl_new_constant(ctx, div->data_type, &zero_value, loc))) + return false; + list_add_tail(params->instrs, &zero->entry); + + if (!(abs = add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_ABS, div, loc))) + return false; + + if (!(frac = add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_FRACT, abs, loc))) + return false; + + if (!(neg_frac = add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_NEG, frac, loc))) + return false; + + if (!(ge = add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_GEQUAL, div, zero, loc))) + return false; + + if (!(select = hlsl_add_conditional(ctx, params->instrs, ge, frac, neg_frac))) + return false; + + return !!add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, select, y, loc); +} + static bool intrinsic_frac(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -2635,7 +2766,7 @@ static bool intrinsic_length(struct hlsl_ctx *ctx, struct hlsl_type *type = params->args[0]->data_type; struct hlsl_ir_node *arg, *dot;
- if (type->type == HLSL_CLASS_MATRIX) + if (type->class == HLSL_CLASS_MATRIX) { struct vkd3d_string_buffer *string;
@@ -2692,20 +2823,18 @@ static struct hlsl_ir_node * add_pow_expr(struct hlsl_ctx *ctx, static bool intrinsic_lit(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { - struct hlsl_ir_node *n_l_neg, *n_h_neg, *specular_or, *specular_pow; - struct hlsl_ir_constant *init, *zero; - struct hlsl_ir_node *n_l, *n_h, *m; - struct hlsl_ir_node *diffuse; - struct hlsl_ir_store *store; + struct hlsl_ir_node *n_l_neg, *n_h_neg, *specular_or, *specular_pow, *load; + struct hlsl_ir_node *n_l, *n_h, *m, *diffuse, *zero, *store, *init; + struct hlsl_constant_value init_value; + struct hlsl_ir_load *var_load; struct hlsl_deref var_deref; struct hlsl_type *ret_type; - struct hlsl_ir_load *load; struct hlsl_ir_var *var; struct hlsl_block block;
- if (params->args[0]->data_type->type != HLSL_CLASS_SCALAR - || params->args[1]->data_type->type != HLSL_CLASS_SCALAR - || params->args[2]->data_type->type != HLSL_CLASS_SCALAR) + if (params->args[0]->data_type->class != HLSL_CLASS_SCALAR + || params->args[1]->data_type->class != HLSL_CLASS_SCALAR + || params->args[2]->data_type->class != HLSL_CLASS_SCALAR) { hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Invalid argument type."); return false; @@ -2726,37 +2855,35 @@ static bool intrinsic_lit(struct hlsl_ctx *ctx, return false; hlsl_init_simple_deref_from_var(&var_deref, var);
- if (!(init = hlsl_new_constant(ctx, ret_type, loc))) + init_value.u[0].f = 1.0f; + init_value.u[1].f = 0.0f; + init_value.u[2].f = 0.0f; + init_value.u[3].f = 1.0f; + if (!(init = hlsl_new_constant(ctx, ret_type, &init_value, loc))) return false; - init->value[0].f = 1.0f; - init->value[1].f = 0.0f; - init->value[2].f = 0.0f; - init->value[3].f = 1.0f; - list_add_tail(params->instrs, &init->node.entry); + list_add_tail(params->instrs, &init->entry);
- if (!(store = hlsl_new_simple_store(ctx, var, &init->node))) + if (!(store = hlsl_new_simple_store(ctx, var, init))) return false; - list_add_tail(params->instrs, &store->node.entry); + list_add_tail(params->instrs, &store->entry);
if (!(zero = hlsl_new_float_constant(ctx, 0.0f, loc))) return false; - list_add_tail(params->instrs, &zero->node.entry); + list_add_tail(params->instrs, &zero->entry);
/* Diffuse component. */ - if (!(diffuse = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MAX, n_l, &zero->node, loc))) + if (!(diffuse = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MAX, n_l, zero, loc))) return false;
- if (!(store = hlsl_new_store_component(ctx, &block, &var_deref, 1, diffuse))) + if (!hlsl_new_store_component(ctx, &block, &var_deref, 1, diffuse)) return false; list_move_tail(params->instrs, &block.instrs);
/* Specular component. */ - if (!(n_h_neg = add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_LESS, - n_h, &zero->node, loc))) + if (!(n_h_neg = add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_LESS, n_h, zero, loc))) return false;
- if (!(n_l_neg = add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_LESS, - n_l, &zero->node, loc))) + if (!(n_l_neg = add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_LESS, n_l, zero, loc))) return false;
if (!(specular_or = add_binary_logical_expr(ctx, params->instrs, HLSL_OP2_LOGIC_OR, n_l_neg, n_h_neg, loc))) @@ -2765,20 +2892,67 @@ static bool intrinsic_lit(struct hlsl_ctx *ctx, if (!(specular_pow = add_pow_expr(ctx, params->instrs, n_h, m, loc))) return false;
- if (!(load = hlsl_add_conditional(ctx, params->instrs, specular_or, &zero->node, specular_pow))) + if (!(load = hlsl_add_conditional(ctx, params->instrs, specular_or, zero, specular_pow))) return false;
- if (!(store = hlsl_new_store_component(ctx, &block, &var_deref, 2, &load->node))) + if (!hlsl_new_store_component(ctx, &block, &var_deref, 2, load)) return false; list_move_tail(params->instrs, &block.instrs);
- if (!(load = hlsl_new_var_load(ctx, var, *loc))) + if (!(var_load = hlsl_new_var_load(ctx, var, loc))) return false; - list_add_tail(params->instrs, &load->node.entry); + list_add_tail(params->instrs, &var_load->node.entry);
return true; }
+static bool intrinsic_log(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *log, *arg, *coeff; + + if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) + return false; + + if (!(log = add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_LOG2, arg, loc))) + return false; + + /* ln(2) */ + if (!(coeff = hlsl_new_float_constant(ctx, 0.69314718055f, loc))) + return false; + + return !!add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, log, coeff, loc); +} + +static bool intrinsic_log10(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *log, *arg, *coeff; + + if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) + return false; + + if (!(log = add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_LOG2, arg, loc))) + return false; + + /* 1 / log2(10) */ + if (!(coeff = hlsl_new_float_constant(ctx, 0.301029996f, loc))) + return false; + + return !!add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, log, coeff, loc); +} + +static bool intrinsic_log2(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *arg; + + if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) + return false; + + return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_LOG2, arg, loc); +} + static bool intrinsic_max(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -2808,15 +2982,15 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, struct hlsl_ir_load *load; struct hlsl_ir_var *var;
- if (arg1->data_type->type == HLSL_CLASS_SCALAR || arg2->data_type->type == HLSL_CLASS_SCALAR) + if (arg1->data_type->class == HLSL_CLASS_SCALAR || arg2->data_type->class == HLSL_CLASS_SCALAR) return !!add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, arg1, arg2, loc);
- if (arg1->data_type->type == HLSL_CLASS_VECTOR) + if (arg1->data_type->class == HLSL_CLASS_VECTOR) { vect_count++; cast_type1 = hlsl_get_matrix_type(ctx, base, arg1->data_type->dimx, 1); } - if (arg2->data_type->type == HLSL_CLASS_VECTOR) + if (arg2->data_type->class == HLSL_CLASS_VECTOR) { vect_count++; cast_type2 = hlsl_get_matrix_type(ctx, base, 1, arg2->data_type->dimx); @@ -2854,13 +3028,11 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, for (j = 0; j < matrix_type->dimy; ++j) { struct hlsl_ir_node *instr = NULL; - struct hlsl_ir_store *store; struct hlsl_block block;
for (k = 0; k < cast_type1->dimx && k < cast_type2->dimy; ++k) { - struct hlsl_ir_load *value1, *value2; - struct hlsl_ir_node *mul; + struct hlsl_ir_node *value1, *value2, *mul;
if (!(value1 = add_load_component(ctx, params->instrs, cast1, j * cast1->data_type->dimx + k, loc))) return false; @@ -2868,7 +3040,7 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, if (!(value2 = add_load_component(ctx, params->instrs, cast2, k * cast2->data_type->dimx + i, loc))) return false;
- if (!(mul = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, &value1->node, &value2->node, loc))) + if (!(mul = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, value1, value2, loc))) return false;
if (instr) @@ -2882,13 +3054,13 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, } }
- if (!(store = hlsl_new_store_component(ctx, &block, &var_deref, j * matrix_type->dimx + i, instr))) + if (!hlsl_new_store_component(ctx, &block, &var_deref, j * matrix_type->dimx + i, instr)) return false; list_move_tail(params->instrs, &block.instrs); } }
- if (!(load = hlsl_new_var_load(ctx, var, *loc))) + if (!(load = hlsl_new_var_load(ctx, var, loc))) return false; list_add_tail(params->instrs, &load->node.entry);
@@ -2901,7 +3073,7 @@ static bool intrinsic_normalize(struct hlsl_ctx *ctx, struct hlsl_type *type = params->args[0]->data_type; struct hlsl_ir_node *dot, *rsq, *arg;
- if (type->type == HLSL_CLASS_MATRIX) + if (type->class == HLSL_CLASS_MATRIX) { struct vkd3d_string_buffer *string;
@@ -2986,6 +3158,42 @@ static bool intrinsic_saturate(struct hlsl_ctx *ctx, return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_SAT, arg, loc); }
+static bool intrinsic_sign(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *lt, *neg, *op1, *op2, *zero, *arg = params->args[0]; + static const struct hlsl_constant_value zero_value; + + struct hlsl_type *int_type = hlsl_get_numeric_type(ctx, arg->data_type->class, HLSL_TYPE_INT, + arg->data_type->dimx, arg->data_type->dimy); + + if (!(zero = hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, arg->data_type->base_type), &zero_value, loc))) + return false; + list_add_tail(params->instrs, &zero->entry); + + /* Check if 0 < arg, cast bool to int */ + + if (!(lt = add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_LESS, zero, arg, loc))) + return false; + + if (!(op1 = add_implicit_conversion(ctx, params->instrs, lt, int_type, loc))) + return false; + + /* Check if arg < 0, cast bool to int and invert (meaning true is -1) */ + + if (!(lt = add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_LESS, arg, zero, loc))) + return false; + + if (!(op2 = add_implicit_conversion(ctx, params->instrs, lt, int_type, loc))) + return false; + + if (!(neg = add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_NEG, op2, loc))) + return false; + + /* Adding these two together will make 1 when > 0, -1 when < 0, and 0 when neither */ + return !!add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_ADD, neg, op1, loc); +} + static bool intrinsic_sin(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -3001,8 +3209,7 @@ static bool intrinsic_sin(struct hlsl_ctx *ctx, static bool intrinsic_smoothstep(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { - struct hlsl_ir_node *min_arg, *max_arg, *x_arg, *p, *p_num, *p_denom, *res; - struct hlsl_ir_constant *one, *minus_two, *three; + struct hlsl_ir_node *min_arg, *max_arg, *x_arg, *p, *p_num, *p_denom, *res, *one, *minus_two, *three;
if (!elementwise_intrinsic_float_convert_args(ctx, params, loc)) return false; @@ -3022,9 +3229,9 @@ static bool intrinsic_smoothstep(struct hlsl_ctx *ctx,
if (!(one = hlsl_new_float_constant(ctx, 1.0, loc))) return false; - list_add_tail(params->instrs, &one->node.entry); + list_add_tail(params->instrs, &one->entry);
- if (!(p_denom = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_DIV, &one->node, p_denom, loc))) + if (!(p_denom = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_DIV, one, p_denom, loc))) return false;
if (!(p = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, p_num, p_denom, loc))) @@ -3035,16 +3242,16 @@ static bool intrinsic_smoothstep(struct hlsl_ctx *ctx,
if (!(minus_two = hlsl_new_float_constant(ctx, -2.0, loc))) return false; - list_add_tail(params->instrs, &minus_two->node.entry); + list_add_tail(params->instrs, &minus_two->entry);
if (!(three = hlsl_new_float_constant(ctx, 3.0, loc))) return false; - list_add_tail(params->instrs, &three->node.entry); + list_add_tail(params->instrs, &three->entry);
- if (!(res = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, &minus_two->node, p, loc))) + if (!(res = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, minus_two, p, loc))) return false;
- if (!(res = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_ADD, &three->node, res, loc))) + if (!(res = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_ADD, three, res, loc))) return false;
if (!(p = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, p, p, loc))) @@ -3081,7 +3288,7 @@ static bool intrinsic_step(struct hlsl_ctx *ctx, return false;
type = ge->data_type; - type = hlsl_get_numeric_type(ctx, type->type, HLSL_TYPE_FLOAT, type->dimx, type->dimy); + type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy); return !!add_implicit_conversion(ctx, params->instrs, ge, type, loc); }
@@ -3090,9 +3297,7 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * { struct hlsl_resource_load_params load_params = {.type = HLSL_RESOURCE_SAMPLE}; const struct hlsl_type *sampler_type; - struct hlsl_ir_resource_load *load; - struct hlsl_ir_load *sampler_load; - struct hlsl_ir_node *coords; + struct hlsl_ir_node *coords, *load;
if (params->args_count != 2 && params->args_count != 4) { @@ -3107,7 +3312,7 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * }
sampler_type = params->args[0]->data_type; - if (sampler_type->type != HLSL_CLASS_OBJECT || sampler_type->base_type != HLSL_TYPE_SAMPLER + if (sampler_type->class != HLSL_CLASS_OBJECT || sampler_type->base_type != HLSL_TYPE_SAMPLER || (sampler_type->sampler_dim != dim && sampler_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC)) { struct vkd3d_string_buffer *string; @@ -3118,24 +3323,19 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * name, ctx->builtin_types.sampler[dim]->name, string->buffer); hlsl_release_string_buffer(ctx, string); } - else - { - /* Only HLSL_IR_LOAD can return an object. */ - sampler_load = hlsl_ir_load(params->args[0]); - - load_params.resource = sampler_load->src; - }
if (!(coords = add_implicit_conversion(ctx, params->instrs, params->args[1], hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, hlsl_sampler_dim_count(dim)), loc))) coords = params->args[1];
load_params.coords = coords; + load_params.resource = params->args[0]; load_params.format = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, 4); + load_params.sampling_dim = dim;
if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) return false; - list_add_tail(params->instrs, &load->node.entry); + list_add_tail(params->instrs, &load->entry); return true; }
@@ -3156,13 +3356,14 @@ static bool intrinsic_transpose(struct hlsl_ctx *ctx, { struct hlsl_ir_node *arg = params->args[0]; struct hlsl_type *arg_type = arg->data_type; + struct hlsl_ir_load *var_load; struct hlsl_deref var_deref; struct hlsl_type *mat_type; - struct hlsl_ir_load *load; + struct hlsl_ir_node *load; struct hlsl_ir_var *var; unsigned int i, j;
- if (arg_type->type != HLSL_CLASS_SCALAR && arg_type->type != HLSL_CLASS_MATRIX) + if (arg_type->class != HLSL_CLASS_SCALAR && arg_type->class != HLSL_CLASS_MATRIX) { struct vkd3d_string_buffer *string;
@@ -3174,7 +3375,7 @@ static bool intrinsic_transpose(struct hlsl_ctx *ctx, return false; }
- if (arg_type->type == HLSL_CLASS_SCALAR) + if (arg_type->class == HLSL_CLASS_SCALAR) { list_add_tail(params->instrs, &arg->entry); return true; @@ -3190,21 +3391,75 @@ static bool intrinsic_transpose(struct hlsl_ctx *ctx, { for (j = 0; j < arg_type->dimy; ++j) { - struct hlsl_ir_store *store; struct hlsl_block block;
if (!(load = add_load_component(ctx, params->instrs, arg, j * arg->data_type->dimx + i, loc))) return false;
- if (!(store = hlsl_new_store_component(ctx, &block, &var_deref, i * var->data_type->dimx + j, &load->node))) + if (!hlsl_new_store_component(ctx, &block, &var_deref, i * var->data_type->dimx + j, load)) return false; list_move_tail(params->instrs, &block.instrs); } }
- if (!(load = hlsl_new_var_load(ctx, var, *loc))) + if (!(var_load = hlsl_new_var_load(ctx, var, loc))) return false; - list_add_tail(params->instrs, &load->node.entry); + list_add_tail(params->instrs, &var_load->node.entry); + + return true; +} + +static bool intrinsic_trunc(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *arg; + + if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) + return false; + + return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_TRUNC, arg, loc); +} + +static bool intrinsic_d3dcolor_to_ubyte4(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *arg = params->args[0], *ret, *c, *swizzle; + struct hlsl_type *arg_type = arg->data_type; + + if (arg_type->class != HLSL_CLASS_SCALAR && !(arg_type->class == HLSL_CLASS_VECTOR && arg_type->dimx == 4)) + { + struct vkd3d_string_buffer *string; + + if ((string = hlsl_type_to_string(ctx, arg_type))) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Wrong argument type '%s'.", string->buffer); + hlsl_release_string_buffer(ctx, string); + } + + return false; + } + + if (!(arg = intrinsic_float_convert_arg(ctx, params, arg, loc))) + return false; + + if (!(c = hlsl_new_float_constant(ctx, 255.0f + (0.5f / 256.0f), loc))) + return false; + list_add_tail(params->instrs, &c->entry); + + if (arg_type->class == HLSL_CLASS_VECTOR) + { + if (!(swizzle = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Z, Y, X, W), 4, arg, loc))) + return false; + list_add_tail(params->instrs, &swizzle->entry); + + arg = swizzle; + } + + if (!(ret = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, arg, c, loc))) + return false; + + if (ctx->profile->major_version >= 4) + return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_TRUNC, ret, loc);
return true; } @@ -3220,22 +3475,31 @@ static const struct intrinsic_function intrinsic_functions[] = { /* Note: these entries should be kept in alphabetical order. */ + {"D3DCOLORtoUBYTE4", 1, true, intrinsic_d3dcolor_to_ubyte4}, {"abs", 1, true, intrinsic_abs}, {"all", 1, true, intrinsic_all}, + {"any", 1, true, intrinsic_any}, + {"asfloat", 1, true, intrinsic_asfloat}, {"asuint", -1, true, intrinsic_asuint}, {"clamp", 3, true, intrinsic_clamp}, {"cos", 1, true, intrinsic_cos}, {"cross", 2, true, intrinsic_cross}, + {"ddx", 1, true, intrinsic_ddx}, + {"ddy", 1, true, intrinsic_ddy}, {"distance", 2, true, intrinsic_distance}, {"dot", 2, true, intrinsic_dot}, {"exp", 1, true, intrinsic_exp}, {"exp2", 1, true, intrinsic_exp2}, {"floor", 1, true, intrinsic_floor}, + {"fmod", 2, true, intrinsic_fmod}, {"frac", 1, true, intrinsic_frac}, {"ldexp", 2, true, intrinsic_ldexp}, {"length", 1, true, intrinsic_length}, {"lerp", 3, true, intrinsic_lerp}, {"lit", 3, true, intrinsic_lit}, + {"log", 1, true, intrinsic_log}, + {"log10", 1, true, intrinsic_log10}, + {"log2", 1, true, intrinsic_log2}, {"max", 2, true, intrinsic_max}, {"min", 2, true, intrinsic_min}, {"mul", 2, true, intrinsic_mul}, @@ -3245,6 +3509,7 @@ intrinsic_functions[] = {"round", 1, true, intrinsic_round}, {"rsqrt", 1, true, intrinsic_rsqrt}, {"saturate", 1, true, intrinsic_saturate}, + {"sign", 1, true, intrinsic_sign}, {"sin", 1, true, intrinsic_sin}, {"smoothstep", 3, true, intrinsic_smoothstep}, {"sqrt", 1, true, intrinsic_sqrt}, @@ -3252,6 +3517,7 @@ intrinsic_functions[] = {"tex2D", -1, false, intrinsic_tex2D}, {"tex3D", -1, false, intrinsic_tex3D}, {"transpose", 1, true, intrinsic_transpose}, + {"trunc", 1, true, intrinsic_trunc}, };
static int intrinsic_function_name_compare(const void *a, const void *b) @@ -3291,11 +3557,11 @@ static struct list *add_call(struct hlsl_ctx *ctx, const char *name,
if (param->storage_modifiers & HLSL_STORAGE_IN) { - struct hlsl_ir_store *store; + struct hlsl_ir_node *store;
if (!(store = hlsl_new_simple_store(ctx, param, arg))) goto fail; - list_add_tail(args->instrs, &store->node.entry); + list_add_tail(args->instrs, &store->entry); } }
@@ -3316,7 +3582,7 @@ static struct list *add_call(struct hlsl_ctx *ctx, const char *name, hlsl_error(ctx, &arg->loc, VKD3D_SHADER_ERROR_HLSL_MODIFIES_CONST, "Output argument to "%s" is const.", decl->func->name);
- if (!(load = hlsl_new_var_load(ctx, param, arg->loc))) + if (!(load = hlsl_new_var_load(ctx, param, &arg->loc))) goto fail; list_add_tail(args->instrs, &load->node.entry);
@@ -3329,7 +3595,7 @@ static struct list *add_call(struct hlsl_ctx *ctx, const char *name, { struct hlsl_ir_load *load;
- if (!(load = hlsl_new_var_load(ctx, decl->return_var, *loc))) + if (!(load = hlsl_new_var_load(ctx, decl->return_var, loc))) goto fail; list_add_tail(args->instrs, &load->node.entry); } @@ -3360,7 +3626,7 @@ static struct list *add_call(struct hlsl_ctx *ctx, const char *name,
for (i = 0; i < args->args_count; ++i) { - if (args->args[i]->data_type->type > HLSL_CLASS_LAST_NUMERIC) + if (args->args[i]->data_type->class > HLSL_CLASS_LAST_NUMERIC) { struct vkd3d_string_buffer *string;
@@ -3397,20 +3663,20 @@ fail: }
static struct list *add_constructor(struct hlsl_ctx *ctx, struct hlsl_type *type, - struct parse_initializer *params, struct vkd3d_shader_location loc) + struct parse_initializer *params, const struct vkd3d_shader_location *loc) { struct hlsl_ir_load *load; struct hlsl_ir_var *var; unsigned int i, idx = 0;
- if (!(var = hlsl_new_synthetic_var(ctx, "constructor", type, &loc))) + if (!(var = hlsl_new_synthetic_var(ctx, "constructor", type, loc))) return NULL;
for (i = 0; i < params->args_count; ++i) { struct hlsl_ir_node *arg = params->args[i];
- if (arg->data_type->type == HLSL_CLASS_OBJECT) + if (arg->data_type->class == HLSL_CLASS_OBJECT) { struct vkd3d_string_buffer *string;
@@ -3455,320 +3721,526 @@ static unsigned int hlsl_offset_dim_count(enum hlsl_sampler_dim dim) } }
-static bool add_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *object, - const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -{ - const struct hlsl_type *object_type = object->data_type; - struct hlsl_ir_load *object_load; +static bool raise_invalid_method_object_type(struct hlsl_ctx *ctx, const struct hlsl_type *object_type, + const char *method, const struct vkd3d_shader_location *loc) +{ + struct vkd3d_string_buffer *string; + + if ((string = hlsl_type_to_string(ctx, object_type))) + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NOT_DEFINED, + "Method '%s' is not defined on type '%s'.", method, string->buffer); + hlsl_release_string_buffer(ctx, string); + return false; +} + +static bool add_load_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *object, + const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + const struct hlsl_type *object_type = object->data_type; + const unsigned int sampler_dim = hlsl_sampler_dim_count(object_type->sampler_dim); + const unsigned int offset_dim = hlsl_offset_dim_count(object_type->sampler_dim); + struct hlsl_resource_load_params load_params = {.type = HLSL_RESOURCE_LOAD}; + struct hlsl_ir_node *load; + bool multisampled; + + if (object_type->sampler_dim == HLSL_SAMPLER_DIM_CUBE + || object_type->sampler_dim == HLSL_SAMPLER_DIM_CUBEARRAY) + { + return raise_invalid_method_object_type(ctx, object_type, name, loc); + } + + multisampled = object_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS + || object_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY; + + if (params->args_count < 1 + multisampled || params->args_count > 3 + multisampled) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, + "Wrong number of arguments to method 'Load': expected between %u and %u, but got %u.", + 1 + multisampled, 3 + multisampled, params->args_count); + return false; + } + if (multisampled) + { + if (!(load_params.sample_index = add_implicit_conversion(ctx, instrs, params->args[1], + hlsl_get_scalar_type(ctx, HLSL_TYPE_INT), loc))) + return false; + } + + assert(offset_dim); + if (params->args_count > 1 + multisampled) + { + if (!(load_params.texel_offset = add_implicit_conversion(ctx, instrs, params->args[1 + multisampled], + hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) + return false; + } + if (params->args_count > 2 + multisampled) + { + hlsl_fixme(ctx, loc, "Tiled resource status argument."); + } + + /* +1 for the mipmap level for non-multisampled textures */ + if (!(load_params.coords = add_implicit_conversion(ctx, instrs, params->args[0], + hlsl_get_vector_type(ctx, HLSL_TYPE_INT, sampler_dim + !multisampled), loc))) + return false; + + load_params.format = object_type->e.resource_format; + load_params.resource = object; + + if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) + return false; + list_add_tail(instrs, &load->entry); + return true; +} + +static bool add_sample_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *object, + const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + const struct hlsl_type *object_type = object->data_type; + const unsigned int sampler_dim = hlsl_sampler_dim_count(object_type->sampler_dim); + const unsigned int offset_dim = hlsl_offset_dim_count(object_type->sampler_dim); + struct hlsl_resource_load_params load_params = {.type = HLSL_RESOURCE_SAMPLE}; + const struct hlsl_type *sampler_type; + struct hlsl_ir_node *load; + + if (object_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS + || object_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY) + { + return raise_invalid_method_object_type(ctx, object_type, name, loc); + } + + if (params->args_count < 2 || params->args_count > 4 + !!offset_dim) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, + "Wrong number of arguments to method 'Sample': expected from 2 to %u, but got %u.", + 4 + !!offset_dim, params->args_count); + return false; + } + + sampler_type = params->args[0]->data_type; + if (sampler_type->class != HLSL_CLASS_OBJECT || sampler_type->base_type != HLSL_TYPE_SAMPLER + || sampler_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC) + { + struct vkd3d_string_buffer *string; + + if ((string = hlsl_type_to_string(ctx, sampler_type))) + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Wrong type for argument 0 of Sample(): expected 'sampler', but got '%s'.", string->buffer); + hlsl_release_string_buffer(ctx, string); + return false; + } + + if (!(load_params.coords = add_implicit_conversion(ctx, instrs, params->args[1], + hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) + return false; + + if (offset_dim && params->args_count > 2) + { + if (!(load_params.texel_offset = add_implicit_conversion(ctx, instrs, params->args[2], + hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) + return false; + } + + if (params->args_count > 2 + !!offset_dim) + hlsl_fixme(ctx, loc, "Sample() clamp parameter."); + if (params->args_count > 3 + !!offset_dim) + hlsl_fixme(ctx, loc, "Tiled resource status argument."); + + load_params.format = object_type->e.resource_format; + load_params.resource = object; + load_params.sampler = params->args[0]; + + if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) + return false; + list_add_tail(instrs, &load->entry); + + return true; +} + +static bool add_sample_cmp_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *object, + const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + const struct hlsl_type *object_type = object->data_type; + const unsigned int sampler_dim = hlsl_sampler_dim_count(object_type->sampler_dim); + const unsigned int offset_dim = hlsl_offset_dim_count(object_type->sampler_dim); + struct hlsl_resource_load_params load_params = { 0 }; + const struct hlsl_type *sampler_type; + struct hlsl_ir_node *load; + + if (object_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS + || object_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY) + { + return raise_invalid_method_object_type(ctx, object_type, name, loc); + } + + if (!strcmp(name, "SampleCmpLevelZero")) + load_params.type = HLSL_RESOURCE_SAMPLE_CMP_LZ; + else + load_params.type = HLSL_RESOURCE_SAMPLE_CMP; + + if (params->args_count < 3 || params->args_count > 5 + !!offset_dim) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, + "Wrong number of arguments to method '%s': expected from 3 to %u, but got %u.", + name, 5 + !!offset_dim, params->args_count); + return false; + } + + sampler_type = params->args[0]->data_type; + if (sampler_type->class != HLSL_CLASS_OBJECT || sampler_type->base_type != HLSL_TYPE_SAMPLER + || sampler_type->sampler_dim != HLSL_SAMPLER_DIM_COMPARISON) + { + struct vkd3d_string_buffer *string; + + if ((string = hlsl_type_to_string(ctx, sampler_type))) + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Wrong type for argument 0 of %s(): expected 'SamplerComparisonState', but got '%s'.", + name, string->buffer); + hlsl_release_string_buffer(ctx, string); + return false; + } + + if (!(load_params.coords = add_implicit_conversion(ctx, instrs, params->args[1], + hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) + return false; + + if (!(load_params.cmp = add_implicit_conversion(ctx, instrs, params->args[2], + hlsl_get_scalar_type(ctx, HLSL_TYPE_FLOAT), loc))) + load_params.cmp = params->args[2]; + + if (offset_dim && params->args_count > 3) + { + if (!(load_params.texel_offset = add_implicit_conversion(ctx, instrs, params->args[2], + hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) + return false; + } + + if (params->args_count > 3 + !!offset_dim) + hlsl_fixme(ctx, loc, "%s() clamp parameter.", name); + if (params->args_count > 4 + !!offset_dim) + hlsl_fixme(ctx, loc, "Tiled resource status argument."); + + load_params.format = object_type->e.resource_format; + load_params.resource = object; + load_params.sampler = params->args[0]; + + if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) + return false; + list_add_tail(instrs, &load->entry); + + return true; +} + +static bool add_gather_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *object, + const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + const struct hlsl_type *object_type = object->data_type; + const unsigned int sampler_dim = hlsl_sampler_dim_count(object_type->sampler_dim); + const unsigned int offset_dim = hlsl_offset_dim_count(object_type->sampler_dim); + struct hlsl_resource_load_params load_params = {0}; + const struct hlsl_type *sampler_type; + struct hlsl_ir_node *load; + unsigned int read_channel; + + if (object_type->sampler_dim != HLSL_SAMPLER_DIM_2D + && object_type->sampler_dim != HLSL_SAMPLER_DIM_2DARRAY + && object_type->sampler_dim != HLSL_SAMPLER_DIM_CUBE + && object_type->sampler_dim != HLSL_SAMPLER_DIM_CUBEARRAY) + { + return raise_invalid_method_object_type(ctx, object_type, name, loc); + } + + if (!strcmp(name, "GatherGreen")) + { + load_params.type = HLSL_RESOURCE_GATHER_GREEN; + read_channel = 1; + } + else if (!strcmp(name, "GatherBlue")) + { + load_params.type = HLSL_RESOURCE_GATHER_BLUE; + read_channel = 2; + } + else if (!strcmp(name, "GatherAlpha")) + { + load_params.type = HLSL_RESOURCE_GATHER_ALPHA; + read_channel = 3; + } + else + { + load_params.type = HLSL_RESOURCE_GATHER_RED; + read_channel = 0; + } + + if (!strcmp(name, "Gather") || !offset_dim) + { + if (params->args_count < 2 || params->args_count > 3 + !!offset_dim) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, + "Wrong number of arguments to method '%s': expected from 2 to %u, but got %u.", + name, 3 + !!offset_dim, params->args_count); + return false; + } + } + else if (params->args_count < 2 || params->args_count == 5 || params->args_count > 7) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, + "Wrong number of arguments to method '%s': expected 2, 3, 4, 6 or 7, but got %u.", + name, params->args_count); + return false; + } + + if (params->args_count == 3 + !!offset_dim || params->args_count == 7) + hlsl_fixme(ctx, loc, "Tiled resource status argument.");
- if (object_type->type != HLSL_CLASS_OBJECT || object_type->base_type != HLSL_TYPE_TEXTURE - || object_type->sampler_dim == HLSL_SAMPLER_DIM_GENERIC) + if (params->args_count == 6 || params->args_count == 7) + { + hlsl_fixme(ctx, loc, "Multiple %s() offset parameters.", name); + } + else if (offset_dim && params->args_count > 2) + { + if (!(load_params.texel_offset = add_implicit_conversion(ctx, instrs, params->args[2], + hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) + return false; + } + + sampler_type = params->args[0]->data_type; + if (sampler_type->class != HLSL_CLASS_OBJECT || sampler_type->base_type != HLSL_TYPE_SAMPLER + || sampler_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC) { struct vkd3d_string_buffer *string;
- if ((string = hlsl_type_to_string(ctx, object_type))) + if ((string = hlsl_type_to_string(ctx, sampler_type))) hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "Type '%s' does not have methods.", string->buffer); + "Wrong type for argument 1 of %s(): expected 'sampler', but got '%s'.", name, string->buffer); hlsl_release_string_buffer(ctx, string); return false; }
- /* Only HLSL_IR_LOAD can return an object. */ - object_load = hlsl_ir_load(object); - - if (!strcmp(name, "Load") - && object_type->sampler_dim != HLSL_SAMPLER_DIM_CUBE - && object_type->sampler_dim != HLSL_SAMPLER_DIM_CUBEARRAY) + if (read_channel >= object_type->e.resource_format->dimx) { - const unsigned int sampler_dim = hlsl_sampler_dim_count(object_type->sampler_dim); - const unsigned int offset_dim = hlsl_offset_dim_count(object_type->sampler_dim); - struct hlsl_resource_load_params load_params = {.type = HLSL_RESOURCE_LOAD}; - struct hlsl_ir_resource_load *load; - bool multisampled; - - multisampled = object_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS - || object_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY; + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Method %s() requires at least %u channels.", name, read_channel + 1); + return false; + }
- if (params->args_count < 1 + multisampled || params->args_count > 3 + multisampled) - { - hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, - "Wrong number of arguments to method 'Load': expected between %u and %u, but got %u.", - 1 + multisampled, 3 + multisampled, params->args_count); - return false; - } - if (multisampled) - { - hlsl_fixme(ctx, loc, "Load() sampling index parameter."); - } + if (!(load_params.coords = add_implicit_conversion(ctx, instrs, params->args[1], + hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) + return false;
- assert(offset_dim); - if (params->args_count > 1 + multisampled) - { - if (!(load_params.texel_offset = add_implicit_conversion(ctx, instrs, params->args[1 + multisampled], - hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) - return false; - } - if (params->args_count > 2 + multisampled) - { - hlsl_fixme(ctx, loc, "Tiled resource status argument."); - } + load_params.format = hlsl_get_vector_type(ctx, object_type->e.resource_format->base_type, 4); + load_params.resource = object; + load_params.sampler = params->args[0];
- /* +1 for the mipmap level */ - if (!(load_params.coords = add_implicit_conversion(ctx, instrs, params->args[0], - hlsl_get_vector_type(ctx, HLSL_TYPE_INT, sampler_dim + 1), loc))) - return false; + if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) + return false; + list_add_tail(instrs, &load->entry); + return true; +}
- load_params.format = object_type->e.resource_format; - load_params.resource = object_load->src; +static bool add_sample_lod_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *object, + const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + const struct hlsl_type *object_type = object->data_type; + struct hlsl_resource_load_params load_params = { 0 }; + const unsigned int sampler_dim = hlsl_sampler_dim_count(object_type->sampler_dim); + const unsigned int offset_dim = hlsl_offset_dim_count(object_type->sampler_dim); + const struct hlsl_type *sampler_type; + struct hlsl_ir_node *load;
- if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) - return false; - list_add_tail(instrs, &load->node.entry); - return true; + if (object_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS + || object_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY) + { + return raise_invalid_method_object_type(ctx, object_type, name, loc); } - else if (!strcmp(name, "Sample") - && object_type->sampler_dim != HLSL_SAMPLER_DIM_2DMS - && object_type->sampler_dim != HLSL_SAMPLER_DIM_2DMSARRAY) + + if (!strcmp(name, "SampleLevel")) + load_params.type = HLSL_RESOURCE_SAMPLE_LOD; + else + load_params.type = HLSL_RESOURCE_SAMPLE_LOD_BIAS; + + if (params->args_count < 3 || params->args_count > 4 + !!offset_dim) { - const unsigned int sampler_dim = hlsl_sampler_dim_count(object_type->sampler_dim); - const unsigned int offset_dim = hlsl_offset_dim_count(object_type->sampler_dim); - struct hlsl_resource_load_params load_params = {.type = HLSL_RESOURCE_SAMPLE}; - const struct hlsl_type *sampler_type; - struct hlsl_ir_resource_load *load; - struct hlsl_ir_load *sampler_load; + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, + "Wrong number of arguments to method '%s': expected from 3 to %u, but got %u.", + name, 4 + !!offset_dim, params->args_count); + return false; + }
- if (params->args_count < 2 || params->args_count > 4 + !!offset_dim) - { - hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, - "Wrong number of arguments to method 'Sample': expected from 2 to %u, but got %u.", - 4 + !!offset_dim, params->args_count); - return false; - } + sampler_type = params->args[0]->data_type; + if (sampler_type->class != HLSL_CLASS_OBJECT || sampler_type->base_type != HLSL_TYPE_SAMPLER + || sampler_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC) + { + struct vkd3d_string_buffer *string;
- sampler_type = params->args[0]->data_type; - if (sampler_type->type != HLSL_CLASS_OBJECT || sampler_type->base_type != HLSL_TYPE_SAMPLER - || sampler_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC) - { - struct vkd3d_string_buffer *string; + if ((string = hlsl_type_to_string(ctx, sampler_type))) + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Wrong type for argument 0 of %s(): expected 'sampler', but got '%s'.", name, string->buffer); + hlsl_release_string_buffer(ctx, string); + return false; + }
- if ((string = hlsl_type_to_string(ctx, sampler_type))) - hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "Wrong type for argument 0 of Sample(): expected 'sampler', but got '%s'.", string->buffer); - hlsl_release_string_buffer(ctx, string); - return false; - } + if (!(load_params.coords = add_implicit_conversion(ctx, instrs, params->args[1], + hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) + load_params.coords = params->args[1];
- /* Only HLSL_IR_LOAD can return an object. */ - sampler_load = hlsl_ir_load(params->args[0]); + if (!(load_params.lod = add_implicit_conversion(ctx, instrs, params->args[2], + hlsl_get_scalar_type(ctx, HLSL_TYPE_FLOAT), loc))) + load_params.lod = params->args[2];
- if (!(load_params.coords = add_implicit_conversion(ctx, instrs, params->args[1], - hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) + if (offset_dim && params->args_count > 3) + { + if (!(load_params.texel_offset = add_implicit_conversion(ctx, instrs, params->args[3], + hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) return false; + }
- if (offset_dim && params->args_count > 2) - { - if (!(load_params.texel_offset = add_implicit_conversion(ctx, instrs, params->args[2], - hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) - return false; - } + if (params->args_count > 3 + !!offset_dim) + hlsl_fixme(ctx, loc, "Tiled resource status argument.");
- if (params->args_count > 2 + !!offset_dim) - hlsl_fixme(ctx, loc, "Sample() clamp parameter."); - if (params->args_count > 3 + !!offset_dim) - hlsl_fixme(ctx, loc, "Tiled resource status argument."); + load_params.format = object_type->e.resource_format; + load_params.resource = object; + load_params.sampler = params->args[0];
- load_params.format = object_type->e.resource_format; - load_params.resource = object_load->src; - load_params.sampler = sampler_load->src; + if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) + return false; + list_add_tail(instrs, &load->entry); + return true; +}
- if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) - return false; - list_add_tail(instrs, &load->node.entry); +static bool add_sample_grad_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *object, + const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + const struct hlsl_type *object_type = object->data_type; + struct hlsl_resource_load_params load_params = { 0 }; + const unsigned int sampler_dim = hlsl_sampler_dim_count(object_type->sampler_dim); + const unsigned int offset_dim = hlsl_offset_dim_count(object_type->sampler_dim); + const struct hlsl_type *sampler_type; + struct hlsl_ir_node *load;
- return true; - } - else if ((!strcmp(name, "Gather") || !strcmp(name, "GatherRed") || !strcmp(name, "GatherBlue") - || !strcmp(name, "GatherGreen") || !strcmp(name, "GatherAlpha")) - && (object_type->sampler_dim == HLSL_SAMPLER_DIM_2D - || object_type->sampler_dim == HLSL_SAMPLER_DIM_2DARRAY - || object_type->sampler_dim == HLSL_SAMPLER_DIM_CUBE - || object_type->sampler_dim == HLSL_SAMPLER_DIM_CUBEARRAY)) + if (object_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS + || object_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY) { - const unsigned int sampler_dim = hlsl_sampler_dim_count(object_type->sampler_dim); - const unsigned int offset_dim = hlsl_offset_dim_count(object_type->sampler_dim); - struct hlsl_resource_load_params load_params = {0}; - const struct hlsl_type *sampler_type; - struct hlsl_ir_resource_load *load; - struct hlsl_ir_load *sampler_load; - unsigned int read_channel; - - if (!strcmp(name, "GatherGreen")) - { - load_params.type = HLSL_RESOURCE_GATHER_GREEN; - read_channel = 1; - } - else if (!strcmp(name, "GatherBlue")) - { - load_params.type = HLSL_RESOURCE_GATHER_BLUE; - read_channel = 2; - } - else if (!strcmp(name, "GatherAlpha")) - { - load_params.type = HLSL_RESOURCE_GATHER_ALPHA; - read_channel = 3; - } - else - { - load_params.type = HLSL_RESOURCE_GATHER_RED; - read_channel = 0; - } - - if (!strcmp(name, "Gather") || !offset_dim) - { - if (params->args_count < 2 || params->args_count > 3 + !!offset_dim) - { - hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, - "Wrong number of arguments to method '%s': expected from 2 to %u, but got %u.", - name, 3 + !!offset_dim, params->args_count); - return false; - } - } - else if (params->args_count < 2 || params->args_count == 5 || params->args_count > 7) - { - hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, - "Wrong number of arguments to method '%s': expected 2, 3, 4, 6 or 7, but got %u.", - name, params->args_count); - return false; - } - - if (params->args_count == 3 + !!offset_dim || params->args_count == 7) - hlsl_fixme(ctx, loc, "Tiled resource status argument."); + return raise_invalid_method_object_type(ctx, object_type, name, loc); + }
- if (params->args_count == 6 || params->args_count == 7) - { - hlsl_fixme(ctx, loc, "Multiple %s() offset parameters.", name); - } - else if (offset_dim && params->args_count > 2) - { - if (!(load_params.texel_offset = add_implicit_conversion(ctx, instrs, params->args[2], - hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) - return false; - } + load_params.type = HLSL_RESOURCE_SAMPLE_GRAD;
- sampler_type = params->args[0]->data_type; - if (sampler_type->type != HLSL_CLASS_OBJECT || sampler_type->base_type != HLSL_TYPE_SAMPLER - || sampler_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC) - { - struct vkd3d_string_buffer *string; + if (params->args_count < 4 || params->args_count > 5 + !!offset_dim) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, + "Wrong number of arguments to method '%s': expected from 4 to %u, but got %u.", + name, 5 + !!offset_dim, params->args_count); + return false; + }
- if ((string = hlsl_type_to_string(ctx, sampler_type))) - hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "Wrong type for argument 1 of %s(): expected 'sampler', but got '%s'.", name, string->buffer); - hlsl_release_string_buffer(ctx, string); - return false; - } + sampler_type = params->args[0]->data_type; + if (sampler_type->class != HLSL_CLASS_OBJECT || sampler_type->base_type != HLSL_TYPE_SAMPLER + || sampler_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC) + { + struct vkd3d_string_buffer *string;
- if (read_channel >= object_type->e.resource_format->dimx) - { + if ((string = hlsl_type_to_string(ctx, sampler_type))) hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "Method %s() requires at least %u channels.", name, read_channel + 1); - return false; - } + "Wrong type for argument 0 of %s(): expected 'sampler', but got '%s'.", name, string->buffer); + hlsl_release_string_buffer(ctx, string); + return false; + }
- /* Only HLSL_IR_LOAD can return an object. */ - sampler_load = hlsl_ir_load(params->args[0]); + if (!(load_params.coords = add_implicit_conversion(ctx, instrs, params->args[1], + hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) + load_params.coords = params->args[1];
- if (!(load_params.coords = add_implicit_conversion(ctx, instrs, params->args[1], - hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) - return false; + if (!(load_params.ddx = add_implicit_conversion(ctx, instrs, params->args[2], + hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) + load_params.ddx = params->args[2];
- load_params.format = hlsl_get_vector_type(ctx, object_type->e.resource_format->base_type, 4); - load_params.resource = object_load->src; - load_params.sampler = sampler_load->src; + if (!(load_params.ddy = add_implicit_conversion(ctx, instrs, params->args[3], + hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) + load_params.ddy = params->args[3];
- if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) - return false; - list_add_tail(instrs, &load->node.entry); - return true; - } - else if (!strcmp(name, "SampleLevel") - && object_type->sampler_dim != HLSL_SAMPLER_DIM_2DMS - && object_type->sampler_dim != HLSL_SAMPLER_DIM_2DMSARRAY) + if (offset_dim && params->args_count > 4) { - struct hlsl_resource_load_params load_params = {.type = HLSL_RESOURCE_SAMPLE_LOD}; - const unsigned int sampler_dim = hlsl_sampler_dim_count(object_type->sampler_dim); - const unsigned int offset_dim = hlsl_offset_dim_count(object_type->sampler_dim); - const struct hlsl_type *sampler_type; - struct hlsl_ir_resource_load *load; - struct hlsl_ir_load *sampler_load; - - if (params->args_count < 3 || params->args_count > 4 + !!offset_dim) - { - hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, - "Wrong number of arguments to method 'SampleLevel': expected from 3 to %u, but got %u.", - 4 + !!offset_dim, params->args_count); + if (!(load_params.texel_offset = add_implicit_conversion(ctx, instrs, params->args[4], + hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) return false; - } - - sampler_type = params->args[0]->data_type; - if (sampler_type->type != HLSL_CLASS_OBJECT || sampler_type->base_type != HLSL_TYPE_SAMPLER - || sampler_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC) - { - struct vkd3d_string_buffer *string; + }
- if ((string = hlsl_type_to_string(ctx, sampler_type))) - hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "Wrong type for argument 0 of SampleLevel(): expected 'sampler', but got '%s'.", string->buffer); - hlsl_release_string_buffer(ctx, string); - return false; - } + if (params->args_count > 4 + !!offset_dim) + hlsl_fixme(ctx, loc, "Tiled resource status argument.");
- /* Only HLSL_IR_LOAD can return an object. */ - sampler_load = hlsl_ir_load(params->args[0]); + load_params.format = object_type->e.resource_format; + load_params.resource = object; + load_params.sampler = params->args[0];
- if (!(load_params.coords = add_implicit_conversion(ctx, instrs, params->args[1], - hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) - load_params.coords = params->args[1]; + if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) + return false; + list_add_tail(instrs, &load->entry); + return true; +}
- if (!(load_params.lod = add_implicit_conversion(ctx, instrs, params->args[2], - hlsl_get_scalar_type(ctx, HLSL_TYPE_FLOAT), loc))) - load_params.lod = params->args[2]; +static const struct method_function +{ + const char *name; + bool (*handler)(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *object, + const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc); +} +object_methods[] = +{ + { "Gather", add_gather_method_call }, + { "GatherAlpha", add_gather_method_call }, + { "GatherBlue", add_gather_method_call }, + { "GatherGreen", add_gather_method_call }, + { "GatherRed", add_gather_method_call }, + + { "Load", add_load_method_call }, + + { "Sample", add_sample_method_call }, + { "SampleBias", add_sample_lod_method_call }, + { "SampleCmp", add_sample_cmp_method_call }, + { "SampleCmpLevelZero", add_sample_cmp_method_call }, + { "SampleGrad", add_sample_grad_method_call }, + { "SampleLevel", add_sample_lod_method_call }, +};
- if (offset_dim && params->args_count > 3) - { - if (!(load_params.texel_offset = add_implicit_conversion(ctx, instrs, params->args[3], - hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) - return false; - } +static int object_method_function_name_compare(const void *a, const void *b) +{ + const struct method_function *func = b;
- if (params->args_count > 3 + !!offset_dim) - hlsl_fixme(ctx, loc, "Tiled resource status argument."); + return strcmp(a, func->name); +}
- load_params.format = object_type->e.resource_format; - load_params.resource = object_load->src; - load_params.sampler = sampler_load->src; +static bool add_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *object, + const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + const struct hlsl_type *object_type = object->data_type; + const struct method_function *method;
- if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) - return false; - list_add_tail(instrs, &load->node.entry); - return true; - } - else + if (object_type->class != HLSL_CLASS_OBJECT || object_type->base_type != HLSL_TYPE_TEXTURE + || object_type->sampler_dim == HLSL_SAMPLER_DIM_GENERIC) { struct vkd3d_string_buffer *string;
if ((string = hlsl_type_to_string(ctx, object_type))) - hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NOT_DEFINED, - "Method '%s' is not defined on type '%s'.", name, string->buffer); + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Type '%s' does not have methods.", string->buffer); hlsl_release_string_buffer(ctx, string); return false; } + + if ((method = bsearch(name, object_methods, ARRAY_SIZE(object_methods), + sizeof(*method), object_method_function_name_compare))) + { + return method->handler(ctx, instrs, object, name, params, loc); + } + else + { + return raise_invalid_method_object_type(ctx, object_type, name, loc); + } }
static void validate_texture_format_type(struct hlsl_ctx *ctx, struct hlsl_type *format, const struct vkd3d_shader_location *loc) { - if (format->type > HLSL_CLASS_VECTOR) + if (format->class > HLSL_CLASS_VECTOR) { struct vkd3d_string_buffer *string;
@@ -3846,6 +4318,7 @@ static void validate_texture_format_type(struct hlsl_ctx *ctx, struct hlsl_type %token KW_NAMESPACE %token KW_NOINTERPOLATION %token KW_OUT +%token KW_PACKOFFSET %token KW_PASS %token KW_PIXELSHADER %token KW_PRECISE @@ -3854,6 +4327,8 @@ static void validate_texture_format_type(struct hlsl_ctx *ctx, struct hlsl_type %token KW_RETURN %token KW_REGISTER %token KW_ROW_MAJOR +%token KW_RWBUFFER +%token KW_RWSTRUCTUREDBUFFER %token KW_RWTEXTURE1D %token KW_RWTEXTURE2D %token KW_RWTEXTURE3D @@ -3933,6 +4408,7 @@ static void validate_texture_format_type(struct hlsl_ctx *ctx, struct hlsl_type %type <list> conditional_expr %type <list> declaration %type <list> declaration_statement +%type <list> discard_statement %type <list> equality_expr %type <list> expr %type <list> expr_optional @@ -3968,6 +4444,7 @@ static void validate_texture_format_type(struct hlsl_ctx *ctx, struct hlsl_type %type <attr> attribute
%type <attr_list> attribute_list +%type <attr_list> attribute_list_optional
%type <boolval> boolean
@@ -3999,6 +4476,7 @@ static void validate_texture_format_type(struct hlsl_ctx *ctx, struct hlsl_type %type <parameters> parameters
%type <reg_reservation> register_opt +%type <reg_reservation> packoffset_opt
%type <sampler_dim> texture_type texture_ms_type uav_type
@@ -4037,7 +4515,7 @@ buffer_declaration: if ($3.semantic.name) hlsl_error(ctx, &@3, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, "Semantics are not allowed on buffers.");
- if (!(ctx->cur_buffer = hlsl_new_buffer(ctx, $1, $2, &$3.reg_reservation, @2))) + if (!(ctx->cur_buffer = hlsl_new_buffer(ctx, $1, $2, &$3.reg_reservation, &@2))) YYABORT; }
@@ -4261,6 +4739,14 @@ attribute_list: $$.attrs[$$.count++] = $2; }
+attribute_list_optional: + %empty + { + $$.count = 0; + $$.attrs = NULL; + } + | attribute_list + func_declaration: func_prototype compound_statement { @@ -4349,8 +4835,11 @@ func_prototype_no_attrs: "Semantics are not allowed on void functions."); }
- if ($7.reg_reservation.type) + if ($7.reg_reservation.reg_type) FIXME("Unexpected register reservation for a function.\n"); + if ($7.reg_reservation.offset_type) + hlsl_error(ctx, &@5, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, + "packoffset() is not allowed on functions.");
if (($$.decl = get_func_decl(&ctx->functions, $3, &$5))) { @@ -4476,17 +4965,24 @@ var_identifier: colon_attribute: %empty { - $$.semantic.name = NULL; - $$.reg_reservation.type = 0; + $$.semantic = (struct hlsl_semantic){0}; + $$.reg_reservation.reg_type = 0; + $$.reg_reservation.offset_type = 0; } | semantic { $$.semantic = $1; - $$.reg_reservation.type = 0; + $$.reg_reservation.reg_type = 0; + $$.reg_reservation.offset_type = 0; } | register_opt { - $$.semantic.name = NULL; + $$.semantic = (struct hlsl_semantic){0}; + $$.reg_reservation = $1; + } + | packoffset_opt + { + $$.semantic = (struct hlsl_semantic){0}; $$.reg_reservation = $1; }
@@ -4499,6 +4995,9 @@ semantic: ; $$.name = $2; $$.index = atoi(p); + $$.reported_missing = false; + $$.reported_duplicated_output_next_index = 0; + $$.reported_duplicated_input_incompatible_next_index = 0; *p = 0; }
@@ -4518,6 +5017,21 @@ register_opt: vkd3d_free($6); }
+packoffset_opt: + ':' KW_PACKOFFSET '(' any_identifier ')' + { + $$ = parse_packoffset(ctx, $4, NULL, &@$); + + vkd3d_free($4); + } + | ':' KW_PACKOFFSET '(' any_identifier '.' any_identifier ')' + { + $$ = parse_packoffset(ctx, $4, $6, &@$); + + vkd3d_free($4); + vkd3d_free($6); + } + parameters: scope_start { @@ -4536,7 +5050,7 @@ param_list: parameter { memset(&$$, 0, sizeof($$)); - if (!add_func_parameter(ctx, &$$, &$1, @1)) + if (!add_func_parameter(ctx, &$$, &$1, &@1)) { ERR("Error adding function parameter %s.\n", $1.name); YYABORT; @@ -4545,7 +5059,7 @@ param_list: | param_list ',' parameter { $$ = $1; - if (!add_func_parameter(ctx, &$$, &$3, @3)) + if (!add_func_parameter(ctx, &$$, &$3, &@3)) { hlsl_error(ctx, &@3, VKD3D_SHADER_ERROR_HLSL_REDEFINED, "Parameter "%s" is already declared.", $3.name); @@ -4624,7 +5138,15 @@ texture_ms_type: }
uav_type: - KW_RWTEXTURE1D + KW_RWBUFFER + { + $$ = HLSL_SAMPLER_DIM_BUFFER; + } + | KW_RWSTRUCTUREDBUFFER + { + $$ = HLSL_SAMPLER_DIM_STRUCTURED_BUFFER; + } + | KW_RWTEXTURE1D { $$ = HLSL_SAMPLER_DIM_1D; } @@ -4640,7 +5162,7 @@ uav_type: type_no_void: KW_VECTOR '<' type ',' C_INTEGER '>' { - if ($3->type != HLSL_CLASS_SCALAR) + if ($3->class != HLSL_CLASS_SCALAR) { struct vkd3d_string_buffer *string;
@@ -4667,7 +5189,7 @@ type_no_void: } | KW_MATRIX '<' type ',' C_INTEGER ',' C_INTEGER '>' { - if ($3->type != HLSL_CLASS_SCALAR) + if ($3->class != HLSL_CLASS_SCALAR) { struct vkd3d_string_buffer *string;
@@ -4702,6 +5224,10 @@ type_no_void: { $$ = ctx->builtin_types.sampler[HLSL_SAMPLER_DIM_GENERIC]; } + | KW_SAMPLERCOMPARISONSTATE + { + $$ = ctx->builtin_types.sampler[HLSL_SAMPLER_DIM_COMPARISON]; + } | KW_SAMPLER1D { $$ = ctx->builtin_types.sampler[HLSL_SAMPLER_DIM_1D]; @@ -4716,7 +5242,7 @@ type_no_void: } | KW_SAMPLERCUBE { - $$ = ctx->builtin_types.sampler[HLSL_SAMPLER_DIM_3D]; + $$ = ctx->builtin_types.sampler[HLSL_SAMPLER_DIM_CUBE]; } | KW_TEXTURE { @@ -4740,23 +5266,58 @@ type_no_void: } | texture_ms_type '<' type ',' shift_expr '>' { - unsigned int sample_count = evaluate_static_expression(node_from_list($5)); - destroy_instr_list($5); + unsigned int sample_count; + struct hlsl_block block; + + hlsl_block_init(&block); + list_move_tail(&block.instrs, $5); + + sample_count = evaluate_static_expression_as_uint(ctx, &block, &@5); + + hlsl_block_cleanup(&block); + + vkd3d_free($5);
$$ = hlsl_new_texture_type(ctx, $1, $3, sample_count); } | uav_type '<' type '>' { - if ($3->type > HLSL_CLASS_VECTOR) - { - struct vkd3d_string_buffer *string; + struct vkd3d_string_buffer *string = hlsl_type_to_string(ctx, $3);
- string = hlsl_type_to_string(ctx, $3); + if (!type_contains_only_numerics($3)) + { if (string) hlsl_error(ctx, &@3, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "UAV data type %s is not scalar or vector.", string->buffer); - hlsl_release_string_buffer(ctx, string); + "UAV type %s is not numeric.", string->buffer); + } + + switch ($1) + { + case HLSL_SAMPLER_DIM_BUFFER: + case HLSL_SAMPLER_DIM_1D: + case HLSL_SAMPLER_DIM_2D: + case HLSL_SAMPLER_DIM_3D: + if ($3->class == HLSL_CLASS_ARRAY) + { + if (string) + hlsl_error(ctx, &@3, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "This type of UAV does not support array type."); + } + else if (hlsl_type_component_count($3) > 4) + { + if (string) + hlsl_error(ctx, &@3, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "UAV data type %s size exceeds maximum size.", string->buffer); + } + break; + case HLSL_SAMPLER_DIM_STRUCTURED_BUFFER: + break; + default: + vkd3d_unreachable(); } + + hlsl_release_string_buffer(ctx, string); + $$ = hlsl_new_uav_type(ctx, $1, $3); } | TYPE_IDENTIFIER @@ -4779,7 +5340,7 @@ type_no_void: | KW_STRUCT TYPE_IDENTIFIER { $$ = hlsl_get_type(ctx->cur_scope, $2, true, true); - if ($$->type != HLSL_CLASS_STRUCT) + if ($$->class != HLSL_CLASS_STRUCT) hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_REDEFINED, ""%s" redefined as a structure.", $2); vkd3d_free($2); } @@ -4934,10 +5495,17 @@ arrays: } | '[' expr ']' arrays { - unsigned int size = evaluate_static_expression(node_from_list($2)); + struct hlsl_block block; uint32_t *new_array; + unsigned int size;
- destroy_instr_list($2); + hlsl_clone_block(ctx, &block, &ctx->static_initializers); + list_move_tail(&block.instrs, $2); + + size = evaluate_static_expression_as_uint(ctx, &block, &@2); + + hlsl_block_cleanup(&block); + vkd3d_free($2);
$$ = $4;
@@ -4988,59 +5556,59 @@ var_modifiers: } | KW_EXTERN var_modifiers { - $$ = add_modifiers(ctx, $2, HLSL_STORAGE_EXTERN, @1); + $$ = add_modifiers(ctx, $2, HLSL_STORAGE_EXTERN, &@1); } | KW_NOINTERPOLATION var_modifiers { - $$ = add_modifiers(ctx, $2, HLSL_STORAGE_NOINTERPOLATION, @1); + $$ = add_modifiers(ctx, $2, HLSL_STORAGE_NOINTERPOLATION, &@1); } | KW_PRECISE var_modifiers { - $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_PRECISE, @1); + $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_PRECISE, &@1); } | KW_SHARED var_modifiers { - $$ = add_modifiers(ctx, $2, HLSL_STORAGE_SHARED, @1); + $$ = add_modifiers(ctx, $2, HLSL_STORAGE_SHARED, &@1); } | KW_GROUPSHARED var_modifiers { - $$ = add_modifiers(ctx, $2, HLSL_STORAGE_GROUPSHARED, @1); + $$ = add_modifiers(ctx, $2, HLSL_STORAGE_GROUPSHARED, &@1); } | KW_STATIC var_modifiers { - $$ = add_modifiers(ctx, $2, HLSL_STORAGE_STATIC, @1); + $$ = add_modifiers(ctx, $2, HLSL_STORAGE_STATIC, &@1); } | KW_UNIFORM var_modifiers { - $$ = add_modifiers(ctx, $2, HLSL_STORAGE_UNIFORM, @1); + $$ = add_modifiers(ctx, $2, HLSL_STORAGE_UNIFORM, &@1); } | KW_VOLATILE var_modifiers { - $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_VOLATILE, @1); + $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_VOLATILE, &@1); } | KW_CONST var_modifiers { - $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_CONST, @1); + $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_CONST, &@1); } | KW_ROW_MAJOR var_modifiers { - $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_ROW_MAJOR, @1); + $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_ROW_MAJOR, &@1); } | KW_COLUMN_MAJOR var_modifiers { - $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_COLUMN_MAJOR, @1); + $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_COLUMN_MAJOR, &@1); } | KW_IN var_modifiers { - $$ = add_modifiers(ctx, $2, HLSL_STORAGE_IN, @1); + $$ = add_modifiers(ctx, $2, HLSL_STORAGE_IN, &@1); } | KW_OUT var_modifiers { - $$ = add_modifiers(ctx, $2, HLSL_STORAGE_OUT, @1); + $$ = add_modifiers(ctx, $2, HLSL_STORAGE_OUT, &@1); } | KW_INOUT var_modifiers { - $$ = add_modifiers(ctx, $2, HLSL_STORAGE_IN | HLSL_STORAGE_OUT, @1); + $$ = add_modifiers(ctx, $2, HLSL_STORAGE_IN | HLSL_STORAGE_OUT, &@1); }
@@ -5145,6 +5713,7 @@ statement: declaration_statement | expr_statement | compound_statement + | discard_statement | jump_statement | selection_statement | loop_statement @@ -5152,7 +5721,7 @@ statement: jump_statement: KW_RETURN expr ';' { - if (!add_return(ctx, $2, node_from_list($2), @1)) + if (!add_return(ctx, $2, node_from_list($2), &@1)) YYABORT; $$ = $2; } @@ -5160,65 +5729,81 @@ jump_statement: { if (!($$ = make_empty_list(ctx))) YYABORT; - if (!add_return(ctx, $$, NULL, @1)) + if (!add_return(ctx, $$, NULL, &@1)) + YYABORT; + } + +discard_statement: + KW_DISCARD ';' + { + struct hlsl_ir_node *discard; + + if (!($$ = make_empty_list(ctx))) YYABORT; + if (!(discard = hlsl_new_jump(ctx, HLSL_IR_JUMP_DISCARD, &@1))) + return false; + list_add_tail($$, &discard->entry); }
selection_statement: KW_IF '(' expr ')' if_body { struct hlsl_ir_node *condition = node_from_list($3); - struct hlsl_ir_if *instr; - - if (!(instr = hlsl_new_if(ctx, condition, @1))) + struct hlsl_block then_block, else_block; + struct hlsl_ir_node *instr; + + hlsl_block_init(&then_block); + list_move_tail(&then_block.instrs, $5.then_block); + hlsl_block_init(&else_block); + if ($5.else_block) + list_move_tail(&else_block.instrs, $5.else_block); + vkd3d_free($5.then_block); + vkd3d_free($5.else_block); + + if (!(instr = hlsl_new_if(ctx, condition, &then_block, &else_block, &@1))) YYABORT; - list_move_tail(&instr->then_instrs.instrs, $5.then_instrs); - if ($5.else_instrs) - list_move_tail(&instr->else_instrs.instrs, $5.else_instrs); - vkd3d_free($5.then_instrs); - vkd3d_free($5.else_instrs); if (condition->data_type->dimx > 1 || condition->data_type->dimy > 1) { struct vkd3d_string_buffer *string;
if ((string = hlsl_type_to_string(ctx, condition->data_type))) - hlsl_error(ctx, &instr->node.loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "if condition type %s is not scalar.", string->buffer); hlsl_release_string_buffer(ctx, string); } $$ = $3; - list_add_tail($$, &instr->node.entry); + list_add_tail($$, &instr->entry); }
if_body: statement { - $$.then_instrs = $1; - $$.else_instrs = NULL; + $$.then_block = $1; + $$.else_block = NULL; } | statement KW_ELSE statement { - $$.then_instrs = $1; - $$.else_instrs = $3; + $$.then_block = $1; + $$.else_block = $3; }
loop_statement: - KW_WHILE '(' expr ')' statement + attribute_list_optional KW_WHILE '(' expr ')' statement { - $$ = create_loop(ctx, LOOP_WHILE, NULL, $3, NULL, $5, @1); + $$ = create_loop(ctx, LOOP_WHILE, &$1, NULL, $4, NULL, $6, &@2); } - | KW_DO statement KW_WHILE '(' expr ')' ';' + | attribute_list_optional KW_DO statement KW_WHILE '(' expr ')' ';' { - $$ = create_loop(ctx, LOOP_DO_WHILE, NULL, $5, NULL, $2, @1); + $$ = create_loop(ctx, LOOP_DO_WHILE, &$1, NULL, $6, NULL, $3, &@2); } - | KW_FOR '(' scope_start expr_statement expr_statement expr_optional ')' statement + | attribute_list_optional KW_FOR '(' scope_start expr_statement expr_statement expr_optional ')' statement { - $$ = create_loop(ctx, LOOP_FOR, $4, $5, $6, $8, @1); + $$ = create_loop(ctx, LOOP_FOR, &$1, $5, $6, $7, $9, &@2); hlsl_pop_scope(ctx); } - | KW_FOR '(' scope_start declaration expr_statement expr_optional ')' statement + | attribute_list_optional KW_FOR '(' scope_start declaration expr_statement expr_optional ')' statement { - $$ = create_loop(ctx, LOOP_FOR, $4, $5, $6, $8, @1); + $$ = create_loop(ctx, LOOP_FOR, &$1, $5, $6, $7, $9, &@2); hlsl_pop_scope(ctx); }
@@ -5250,31 +5835,31 @@ func_arguments: primary_expr: C_FLOAT { - struct hlsl_ir_constant *c; + struct hlsl_ir_node *c;
if (!(c = hlsl_new_float_constant(ctx, $1, &@1))) YYABORT; - if (!($$ = make_list(ctx, &c->node))) + if (!($$ = make_list(ctx, c))) YYABORT; } | C_INTEGER { - struct hlsl_ir_constant *c; + struct hlsl_ir_node *c;
if (!(c = hlsl_new_int_constant(ctx, $1, &@1))) YYABORT; - if (!($$ = make_list(ctx, &c->node))) + if (!($$ = make_list(ctx, c))) YYABORT; } | boolean { - struct hlsl_ir_constant *c; + struct hlsl_ir_node *c;
if (!(c = hlsl_new_bool_constant(ctx, $1, &@1))) YYABORT; - if (!($$ = make_list(ctx, &c->node))) + if (!($$ = make_list(ctx, c))) { - hlsl_free_instr(&c->node); + hlsl_free_instr(c); YYABORT; } } @@ -5288,7 +5873,7 @@ primary_expr: hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_NOT_DEFINED, "Variable "%s" is not defined.", $1); YYABORT; } - if (!(load = hlsl_new_var_load(ctx, var, @1))) + if (!(load = hlsl_new_var_load(ctx, var, &@1))) YYABORT; if (!($$ = make_list(ctx, &load->node))) YYABORT; @@ -5316,7 +5901,7 @@ primary_expr: if (!(var = hlsl_new_synthetic_var(ctx, "state_block_expr", hlsl_get_scalar_type(ctx, HLSL_TYPE_INT), &@1))) YYABORT; - if (!(load = hlsl_new_var_load(ctx, var, @1))) + if (!(load = hlsl_new_var_load(ctx, var, &@1))) YYABORT; if (!($$ = make_list(ctx, &load->node))) YYABORT; @@ -5332,7 +5917,7 @@ postfix_expr: primary_expr | postfix_expr OP_INC { - if (!add_increment(ctx, $1, false, true, @2)) + if (!add_increment(ctx, $1, false, true, &@2)) { destroy_instr_list($1); YYABORT; @@ -5341,7 +5926,7 @@ postfix_expr: } | postfix_expr OP_DEC { - if (!add_increment(ctx, $1, true, true, @2)) + if (!add_increment(ctx, $1, true, true, &@2)) { destroy_instr_list($1); YYABORT; @@ -5352,7 +5937,7 @@ postfix_expr: { struct hlsl_ir_node *node = node_from_list($1);
- if (node->data_type->type == HLSL_CLASS_STRUCT) + if (node->data_type->class == HLSL_CLASS_STRUCT) { struct hlsl_type *type = node->data_type; const struct hlsl_struct_field *field; @@ -5365,20 +5950,20 @@ postfix_expr: }
field_idx = field - type->e.record.fields; - if (!add_record_load(ctx, $1, node, field_idx, @2)) + if (!add_record_access(ctx, $1, node, field_idx, &@2)) YYABORT; $$ = $1; } - else if (node->data_type->type <= HLSL_CLASS_LAST_NUMERIC) + else if (node->data_type->class <= HLSL_CLASS_LAST_NUMERIC) { - struct hlsl_ir_swizzle *swizzle; + struct hlsl_ir_node *swizzle;
if (!(swizzle = get_swizzle(ctx, node, $3, &@3))) { hlsl_error(ctx, &@3, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Invalid swizzle "%s".", $3); YYABORT; } - list_add_tail($1, &swizzle->node.entry); + list_add_tail($1, &swizzle->entry); $$ = $1; } else @@ -5391,10 +5976,10 @@ postfix_expr: { struct hlsl_ir_node *array = node_from_list($1), *index = node_from_list($3);
- list_move_tail($1, $3); + list_move_head($1, $3); vkd3d_free($3);
- if (!add_array_load(ctx, $1, array, index, &@2)) + if (!add_array_access(ctx, $1, array, index, &@2)) { destroy_instr_list($1); YYABORT; @@ -5412,7 +5997,7 @@ postfix_expr: free_parse_initializer(&$4); YYABORT; } - if ($2->type > HLSL_CLASS_LAST_NUMERIC) + if ($2->class > HLSL_CLASS_LAST_NUMERIC) { struct vkd3d_string_buffer *string;
@@ -5432,7 +6017,7 @@ postfix_expr: YYABORT; }
- if (!($$ = add_constructor(ctx, $2, &$4, @2))) + if (!($$ = add_constructor(ctx, $2, &$4, &@2))) { free_parse_initializer(&$4); YYABORT; @@ -5459,7 +6044,7 @@ unary_expr: postfix_expr | OP_INC unary_expr { - if (!add_increment(ctx, $2, false, false, @1)) + if (!add_increment(ctx, $2, false, false, &@1)) { destroy_instr_list($2); YYABORT; @@ -5468,7 +6053,7 @@ unary_expr: } | OP_DEC unary_expr { - if (!add_increment(ctx, $2, true, false, @1)) + if (!add_increment(ctx, $2, true, false, &@1)) { destroy_instr_list($2); YYABORT; @@ -5545,31 +6130,31 @@ mul_expr: unary_expr | mul_expr '*' unary_expr { - $$ = add_binary_arithmetic_expr_merge(ctx, $1, $3, HLSL_OP2_MUL, @2); + $$ = add_binary_arithmetic_expr_merge(ctx, $1, $3, HLSL_OP2_MUL, &@2); } | mul_expr '/' unary_expr { - $$ = add_binary_arithmetic_expr_merge(ctx, $1, $3, HLSL_OP2_DIV, @2); + $$ = add_binary_arithmetic_expr_merge(ctx, $1, $3, HLSL_OP2_DIV, &@2); } | mul_expr '%' unary_expr { - $$ = add_binary_arithmetic_expr_merge(ctx, $1, $3, HLSL_OP2_MOD, @2); + $$ = add_binary_arithmetic_expr_merge(ctx, $1, $3, HLSL_OP2_MOD, &@2); }
add_expr: mul_expr | add_expr '+' mul_expr { - $$ = add_binary_arithmetic_expr_merge(ctx, $1, $3, HLSL_OP2_ADD, @2); + $$ = add_binary_arithmetic_expr_merge(ctx, $1, $3, HLSL_OP2_ADD, &@2); } | add_expr '-' mul_expr { struct hlsl_ir_node *neg;
- if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, node_from_list($3), @2))) + if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, node_from_list($3), &@2))) YYABORT; list_add_tail($3, &neg->entry); - $$ = add_binary_arithmetic_expr_merge(ctx, $1, $3, HLSL_OP2_ADD, @2); + $$ = add_binary_arithmetic_expr_merge(ctx, $1, $3, HLSL_OP2_ADD, &@2); }
shift_expr: @@ -5587,30 +6172,30 @@ relational_expr: shift_expr | relational_expr '<' shift_expr { - $$ = add_binary_comparison_expr_merge(ctx, $1, $3, HLSL_OP2_LESS, @2); + $$ = add_binary_comparison_expr_merge(ctx, $1, $3, HLSL_OP2_LESS, &@2); } | relational_expr '>' shift_expr { - $$ = add_binary_comparison_expr_merge(ctx, $3, $1, HLSL_OP2_LESS, @2); + $$ = add_binary_comparison_expr_merge(ctx, $3, $1, HLSL_OP2_LESS, &@2); } | relational_expr OP_LE shift_expr { - $$ = add_binary_comparison_expr_merge(ctx, $3, $1, HLSL_OP2_GEQUAL, @2); + $$ = add_binary_comparison_expr_merge(ctx, $3, $1, HLSL_OP2_GEQUAL, &@2); } | relational_expr OP_GE shift_expr { - $$ = add_binary_comparison_expr_merge(ctx, $1, $3, HLSL_OP2_GEQUAL, @2); + $$ = add_binary_comparison_expr_merge(ctx, $1, $3, HLSL_OP2_GEQUAL, &@2); }
equality_expr: relational_expr | equality_expr OP_EQ relational_expr { - $$ = add_binary_comparison_expr_merge(ctx, $1, $3, HLSL_OP2_EQUAL, @2); + $$ = add_binary_comparison_expr_merge(ctx, $1, $3, HLSL_OP2_EQUAL, &@2); } | equality_expr OP_NE relational_expr { - $$ = add_binary_comparison_expr_merge(ctx, $1, $3, HLSL_OP2_NEQUAL, @2); + $$ = add_binary_comparison_expr_merge(ctx, $1, $3, HLSL_OP2_NEQUAL, &@2); }
bitand_expr: @@ -5652,7 +6237,26 @@ conditional_expr: logicor_expr | logicor_expr '?' expr ':' assignment_expr { - hlsl_fixme(ctx, &@$, "Ternary operator."); + struct hlsl_ir_node *cond = node_from_list($1), *first = node_from_list($3), *second = node_from_list($5); + struct hlsl_type *common_type; + + list_move_tail($1, $3); + list_move_tail($1, $5); + vkd3d_free($3); + vkd3d_free($5); + + if (!(common_type = get_common_numeric_type(ctx, first, second, &@3))) + YYABORT; + + if (!(first = add_implicit_conversion(ctx, $1, first, common_type, &@3))) + YYABORT; + + if (!(second = add_implicit_conversion(ctx, $1, second, common_type, &@5))) + YYABORT; + + if (!hlsl_add_conditional(ctx, $1, cond, first, second)) + YYABORT; + $$ = $1; }
assignment_expr: diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c index ab59875738c..765b1907426 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c @@ -27,11 +27,11 @@ static struct hlsl_ir_node *new_offset_from_path_index(struct hlsl_ctx *ctx, str enum hlsl_regset regset, const struct vkd3d_shader_location *loc) { struct hlsl_ir_node *idx_offset = NULL; - struct hlsl_ir_constant *c; + struct hlsl_ir_node *c;
- list_init(&block->instrs); + hlsl_block_init(block);
- switch (type->type) + switch (type->class) { case HLSL_CLASS_VECTOR: idx_offset = idx; @@ -41,11 +41,11 @@ static struct hlsl_ir_node *new_offset_from_path_index(struct hlsl_ctx *ctx, str { if (!(c = hlsl_new_uint_constant(ctx, 4, loc))) return NULL; - list_add_tail(&block->instrs, &c->node.entry); + hlsl_block_add_instr(block, c);
- if (!(idx_offset = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, &c->node, idx))) + if (!(idx_offset = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, c, idx))) return NULL; - list_add_tail(&block->instrs, &idx_offset->entry); + hlsl_block_add_instr(block, idx_offset);
break; } @@ -56,25 +56,25 @@ static struct hlsl_ir_node *new_offset_from_path_index(struct hlsl_ctx *ctx, str
if (!(c = hlsl_new_uint_constant(ctx, size, loc))) return NULL; - list_add_tail(&block->instrs, &c->node.entry); + hlsl_block_add_instr(block, c);
- if (!(idx_offset = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, &c->node, idx))) + if (!(idx_offset = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, c, idx))) return NULL; - list_add_tail(&block->instrs, &idx_offset->entry); + hlsl_block_add_instr(block, idx_offset);
break; }
case HLSL_CLASS_STRUCT: { - unsigned int field_idx = hlsl_ir_constant(idx)->value[0].u; + unsigned int field_idx = hlsl_ir_constant(idx)->value.u[0].u; struct hlsl_struct_field *field = &type->e.record.fields[field_idx];
if (!(c = hlsl_new_uint_constant(ctx, field->reg_offset[regset], loc))) return NULL; - list_add_tail(&block->instrs, &c->node.entry); + hlsl_block_add_instr(block, c);
- idx_offset = &c->node; + idx_offset = c;
break; } @@ -87,7 +87,7 @@ static struct hlsl_ir_node *new_offset_from_path_index(struct hlsl_ctx *ctx, str { if (!(idx_offset = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, offset, idx_offset))) return NULL; - list_add_tail(&block->instrs, &idx_offset->entry); + hlsl_block_add_instr(block, idx_offset); }
return idx_offset; @@ -101,7 +101,7 @@ static struct hlsl_ir_node *new_offset_instr_from_deref(struct hlsl_ctx *ctx, st struct hlsl_type *type; unsigned int i;
- list_init(&block->instrs); + hlsl_block_init(block);
assert(deref->var); type = deref->var->data_type; @@ -114,7 +114,7 @@ static struct hlsl_ir_node *new_offset_instr_from_deref(struct hlsl_ctx *ctx, st deref->offset_regset, loc))) return NULL;
- list_move_tail(&block->instrs, &idx_block.instrs); + hlsl_block_add_block(block, &idx_block);
type = hlsl_get_element_type_from_path_index(ctx, type, deref->path[i].node); } @@ -123,15 +123,14 @@ static struct hlsl_ir_node *new_offset_instr_from_deref(struct hlsl_ctx *ctx, st }
/* TODO: remove when no longer needed, only used for transform_deref_paths_into_offsets() */ -static void replace_deref_path_with_offset(struct hlsl_ctx *ctx, struct hlsl_deref *deref, +static bool replace_deref_path_with_offset(struct hlsl_ctx *ctx, struct hlsl_deref *deref, struct hlsl_ir_node *instr) { const struct hlsl_type *type; struct hlsl_ir_node *offset; struct hlsl_block block;
- if (!deref->var) - return; + assert(deref->var);
/* register offsets shouldn't be used before this point is reached. */ assert(!deref->offset.node); @@ -140,48 +139,22 @@ static void replace_deref_path_with_offset(struct hlsl_ctx *ctx, struct hlsl_der
/* Instructions that directly refer to structs or arrays (instead of single-register components) * are removed later by dce. So it is not a problem to just cleanup their derefs. */ - if (type->type == HLSL_CLASS_STRUCT || type->type == HLSL_CLASS_ARRAY) + if (type->class == HLSL_CLASS_STRUCT || type->class == HLSL_CLASS_ARRAY) { hlsl_cleanup_deref(deref); - return; + return true; }
deref->offset_regset = hlsl_type_get_regset(type);
if (!(offset = new_offset_instr_from_deref(ctx, &block, deref, &instr->loc))) - return; + return false; list_move_before(&instr->entry, &block.instrs);
hlsl_cleanup_deref(deref); hlsl_src_from_node(&deref->offset, offset); -} - -/* TODO: remove when no longer needed. */ -static bool transform_deref_paths_into_offsets(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) -{ - switch(instr->type) - { - case HLSL_IR_LOAD: - replace_deref_path_with_offset(ctx, &hlsl_ir_load(instr)->src, instr); - return true; - - case HLSL_IR_STORE: - replace_deref_path_with_offset(ctx, &hlsl_ir_store(instr)->lhs, instr); - return true;
- case HLSL_IR_RESOURCE_LOAD: - replace_deref_path_with_offset(ctx, &hlsl_ir_resource_load(instr)->resource, instr); - replace_deref_path_with_offset(ctx, &hlsl_ir_resource_load(instr)->sampler, instr); - return true; - - case HLSL_IR_RESOURCE_STORE: - replace_deref_path_with_offset(ctx, &hlsl_ir_resource_store(instr)->resource, instr); - return true; - - default: - return false; - } - return false; + return true; }
/* Split uniforms into two variables representing the constant and temp @@ -191,14 +164,14 @@ static void prepend_uniform_copy(struct hlsl_ctx *ctx, struct list *instrs, stru { struct vkd3d_string_buffer *name; struct hlsl_ir_var *uniform; - struct hlsl_ir_store *store; + struct hlsl_ir_node *store; struct hlsl_ir_load *load;
/* Use the synthetic name for the temp, rather than the uniform, so that we * can write the uniform name into the shader reflection data. */
if (!(uniform = hlsl_new_var(ctx, temp->name, temp->data_type, - temp->loc, NULL, temp->storage_modifiers, &temp->reg_reservation))) + &temp->loc, NULL, temp->storage_modifiers, &temp->reg_reservation))) return; list_add_before(&temp->scope_entry, &uniform->scope_entry); list_add_tail(&ctx->extern_vars, &uniform->extern_entry); @@ -212,17 +185,53 @@ static void prepend_uniform_copy(struct hlsl_ctx *ctx, struct list *instrs, stru temp->name = hlsl_strdup(ctx, name->buffer); hlsl_release_string_buffer(ctx, name);
- if (!(load = hlsl_new_var_load(ctx, uniform, temp->loc))) + if (!(load = hlsl_new_var_load(ctx, uniform, &temp->loc))) return; list_add_head(instrs, &load->node.entry);
if (!(store = hlsl_new_simple_store(ctx, temp, &load->node))) return; - list_add_after(&load->node.entry, &store->node.entry); + list_add_after(&load->node.entry, &store->entry); +} + +static void validate_field_semantic(struct hlsl_ctx *ctx, struct hlsl_struct_field *field) +{ + if (!field->semantic.name && hlsl_get_multiarray_element_type(field->type)->class <= HLSL_CLASS_LAST_NUMERIC + && !field->semantic.reported_missing) + { + hlsl_error(ctx, &field->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_SEMANTIC, + "Field '%s' is missing a semantic.", field->name); + field->semantic.reported_missing = true; + } +} + +static enum hlsl_base_type base_type_get_semantic_equivalent(enum hlsl_base_type base) +{ + if (base == HLSL_TYPE_BOOL) + return HLSL_TYPE_UINT; + if (base == HLSL_TYPE_INT) + return HLSL_TYPE_UINT; + if (base == HLSL_TYPE_HALF) + return HLSL_TYPE_FLOAT; + return base; +} + +static bool types_are_semantic_equivalent(struct hlsl_ctx *ctx, const struct hlsl_type *type1, + const struct hlsl_type *type2) +{ + if (ctx->profile->major_version < 4) + return true; + + if (type1->dimx != type2->dimx) + return false; + + return base_type_get_semantic_equivalent(type1->base_type) + == base_type_get_semantic_equivalent(type2->base_type); }
static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, - struct hlsl_type *type, unsigned int modifiers, const struct hlsl_semantic *semantic, bool output) + struct hlsl_type *type, unsigned int modifiers, struct hlsl_semantic *semantic, + uint32_t index, bool output, const struct vkd3d_shader_location *loc) { struct hlsl_semantic new_semantic; struct vkd3d_string_buffer *name; @@ -230,15 +239,50 @@ static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir
if (!(name = hlsl_get_string_buffer(ctx))) return NULL; - vkd3d_string_buffer_printf(name, "<%s-%s%u>", output ? "output" : "input", semantic->name, semantic->index); + vkd3d_string_buffer_printf(name, "<%s-%s%u>", output ? "output" : "input", semantic->name, index); + + LIST_FOR_EACH_ENTRY(ext_var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + if (!ascii_strcasecmp(ext_var->name, name->buffer)) + { + if (output) + { + if (index >= semantic->reported_duplicated_output_next_index) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, + "Output semantic "%s%u" is used multiple times.", semantic->name, index); + hlsl_note(ctx, &ext_var->loc, HLSL_LEVEL_ERROR, + "First use of "%s%u" is here.", semantic->name, index); + semantic->reported_duplicated_output_next_index = index + 1; + } + } + else + { + if (index >= semantic->reported_duplicated_input_incompatible_next_index + && !types_are_semantic_equivalent(ctx, ext_var->data_type, type)) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, + "Input semantic "%s%u" is used multiple times with incompatible types.", + semantic->name, index); + hlsl_note(ctx, &ext_var->loc, HLSL_LEVEL_ERROR, + "First declaration of "%s%u" is here.", semantic->name, index); + semantic->reported_duplicated_input_incompatible_next_index = index + 1; + } + } + + hlsl_release_string_buffer(ctx, name); + return ext_var; + } + } + if (!(new_semantic.name = hlsl_strdup(ctx, semantic->name))) { hlsl_release_string_buffer(ctx, name); return NULL; } - new_semantic.index = semantic->index; - if (!(ext_var = hlsl_new_var(ctx, hlsl_strdup(ctx, name->buffer), - type, var->loc, &new_semantic, modifiers, NULL))) + new_semantic.index = index; + if (!(ext_var = hlsl_new_var(ctx, hlsl_strdup(ctx, name->buffer), type, loc, &new_semantic, + modifiers, NULL))) { hlsl_release_string_buffer(ctx, name); hlsl_cleanup_semantic(&new_semantic); @@ -257,80 +301,116 @@ static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir }
static void prepend_input_copy(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_load *lhs, - unsigned int modifiers, const struct hlsl_semantic *semantic) + unsigned int modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) { - struct hlsl_type *type = lhs->node.data_type, *vector_type; + struct hlsl_type *type = lhs->node.data_type, *vector_type_src, *vector_type_dst; + struct vkd3d_shader_location *loc = &lhs->node.loc; struct hlsl_ir_var *var = lhs->src.var; + struct hlsl_ir_node *c; unsigned int i;
- vector_type = hlsl_get_vector_type(ctx, type->base_type, hlsl_type_minor_size(type)); + if (type->class > HLSL_CLASS_LAST_NUMERIC) + { + struct vkd3d_string_buffer *string; + if (!(string = hlsl_type_to_string(ctx, type))) + return; + hlsl_fixme(ctx, &var->loc, "Input semantics for type %s.", string->buffer); + hlsl_release_string_buffer(ctx, string); + } + if (!semantic->name) + return; + + vector_type_src = hlsl_get_vector_type(ctx, type->base_type, + (ctx->profile->major_version < 4) ? 4 : hlsl_type_minor_size(type)); + vector_type_dst = hlsl_get_vector_type(ctx, type->base_type, hlsl_type_minor_size(type));
for (i = 0; i < hlsl_type_major_size(type); ++i) { - struct hlsl_semantic semantic_copy = *semantic; - struct hlsl_ir_store *store; - struct hlsl_ir_constant *c; + struct hlsl_ir_node *store, *cast; struct hlsl_ir_var *input; struct hlsl_ir_load *load;
- semantic_copy.index = semantic->index + i; - - if (!(input = add_semantic_var(ctx, var, vector_type, modifiers, &semantic_copy, false))) + if (!(input = add_semantic_var(ctx, var, vector_type_src, modifiers, semantic, + semantic_index + i, false, loc))) return;
- if (!(load = hlsl_new_var_load(ctx, input, var->loc))) + if (!(load = hlsl_new_var_load(ctx, input, &var->loc))) return; list_add_after(&lhs->node.entry, &load->node.entry);
- if (type->type == HLSL_CLASS_MATRIX) + if (!(cast = hlsl_new_cast(ctx, &load->node, vector_type_dst, &var->loc))) + return; + list_add_after(&load->node.entry, &cast->entry); + + if (type->class == HLSL_CLASS_MATRIX) { if (!(c = hlsl_new_uint_constant(ctx, i, &var->loc))) return; - list_add_after(&load->node.entry, &c->node.entry); + list_add_after(&cast->entry, &c->entry);
- if (!(store = hlsl_new_store_index(ctx, &lhs->src, &c->node, &load->node, 0, &var->loc))) + if (!(store = hlsl_new_store_index(ctx, &lhs->src, c, cast, 0, &var->loc))) return; - list_add_after(&c->node.entry, &store->node.entry); + list_add_after(&c->entry, &store->entry); } else { assert(i == 0);
- if (!(store = hlsl_new_store_index(ctx, &lhs->src, NULL, &load->node, 0, &var->loc))) + if (!(store = hlsl_new_store_index(ctx, &lhs->src, NULL, cast, 0, &var->loc))) return; - list_add_after(&load->node.entry, &store->node.entry); + list_add_after(&cast->entry, &store->entry); } } }
-static void prepend_input_struct_copy(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_load *lhs) +static void prepend_input_copy_recurse(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_load *lhs, + unsigned int modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) { + struct vkd3d_shader_location *loc = &lhs->node.loc; struct hlsl_type *type = lhs->node.data_type; struct hlsl_ir_var *var = lhs->src.var; - size_t i; + struct hlsl_ir_node *c; + unsigned int i;
- for (i = 0; i < type->e.record.field_count; ++i) + if (type->class == HLSL_CLASS_ARRAY || type->class == HLSL_CLASS_STRUCT) { - const struct hlsl_struct_field *field = &type->e.record.fields[i]; - struct hlsl_ir_load *field_load; - struct hlsl_ir_constant *c; + struct hlsl_ir_load *element_load; + struct hlsl_struct_field *field; + uint32_t elem_semantic_index;
- if (!(c = hlsl_new_uint_constant(ctx, i, &var->loc))) - return; - list_add_after(&lhs->node.entry, &c->node.entry); + for (i = 0; i < hlsl_type_element_count(type); ++i) + { + if (type->class == HLSL_CLASS_ARRAY) + { + elem_semantic_index = semantic_index + + i * hlsl_type_get_array_element_reg_size(type->e.array.type, HLSL_REGSET_NUMERIC) / 4; + } + else + { + field = &type->e.record.fields[i]; + if (hlsl_type_is_resource(field->type)) + continue; + validate_field_semantic(ctx, field); + semantic = &field->semantic; + elem_semantic_index = semantic->index; + loc = &field->loc; + }
- /* This redundant load is expected to be deleted later by DCE. */ - if (!(field_load = hlsl_new_load_index(ctx, &lhs->src, &c->node, &var->loc))) - return; - list_add_after(&c->node.entry, &field_load->node.entry); + if (!(c = hlsl_new_uint_constant(ctx, i, &var->loc))) + return; + list_add_after(&lhs->node.entry, &c->entry);
- if (field->type->type == HLSL_CLASS_STRUCT) - prepend_input_struct_copy(ctx, instrs, field_load); - else if (field->semantic.name) - prepend_input_copy(ctx, instrs, field_load, field->storage_modifiers, &field->semantic); - else - hlsl_error(ctx, &field->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_SEMANTIC, - "Field '%s' is missing a semantic.", field->name); + /* This redundant load is expected to be deleted later by DCE. */ + if (!(element_load = hlsl_new_load_index(ctx, &lhs->src, c, loc))) + return; + list_add_after(&c->entry, &element_load->node.entry); + + prepend_input_copy_recurse(ctx, instrs, element_load, modifiers, semantic, elem_semantic_index); + } + } + else + { + prepend_input_copy(ctx, instrs, lhs, modifiers, semantic, semantic_index); } }
@@ -341,45 +421,51 @@ static void prepend_input_var_copy(struct hlsl_ctx *ctx, struct list *instrs, st struct hlsl_ir_load *load;
/* This redundant load is expected to be deleted later by DCE. */ - if (!(load = hlsl_new_var_load(ctx, var, var->loc))) + if (!(load = hlsl_new_var_load(ctx, var, &var->loc))) return; list_add_head(instrs, &load->node.entry);
- if (var->data_type->type == HLSL_CLASS_STRUCT) - prepend_input_struct_copy(ctx, instrs, load); - else if (var->semantic.name) - prepend_input_copy(ctx, instrs, load, var->storage_modifiers, &var->semantic); + prepend_input_copy_recurse(ctx, instrs, load, var->storage_modifiers, &var->semantic, var->semantic.index); }
static void append_output_copy(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_load *rhs, - unsigned int modifiers, const struct hlsl_semantic *semantic) + unsigned int modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) { struct hlsl_type *type = rhs->node.data_type, *vector_type; + struct vkd3d_shader_location *loc = &rhs->node.loc; struct hlsl_ir_var *var = rhs->src.var; + struct hlsl_ir_node *c; unsigned int i;
+ if (type->class > HLSL_CLASS_LAST_NUMERIC) + { + struct vkd3d_string_buffer *string; + if (!(string = hlsl_type_to_string(ctx, type))) + return; + hlsl_fixme(ctx, &var->loc, "Output semantics for type %s.", string->buffer); + hlsl_release_string_buffer(ctx, string); + } + if (!semantic->name) + return; + vector_type = hlsl_get_vector_type(ctx, type->base_type, hlsl_type_minor_size(type));
for (i = 0; i < hlsl_type_major_size(type); ++i) { - struct hlsl_semantic semantic_copy = *semantic; - struct hlsl_ir_store *store; - struct hlsl_ir_constant *c; + struct hlsl_ir_node *store; struct hlsl_ir_var *output; struct hlsl_ir_load *load;
- semantic_copy.index = semantic->index + i; - - if (!(output = add_semantic_var(ctx, var, vector_type, modifiers, &semantic_copy, true))) + if (!(output = add_semantic_var(ctx, var, vector_type, modifiers, semantic, semantic_index + i, true, loc))) return;
- if (type->type == HLSL_CLASS_MATRIX) + if (type->class == HLSL_CLASS_MATRIX) { if (!(c = hlsl_new_uint_constant(ctx, i, &var->loc))) return; - list_add_tail(instrs, &c->node.entry); + list_add_tail(instrs, &c->entry);
- if (!(load = hlsl_new_load_index(ctx, &rhs->src, &c->node, &var->loc))) + if (!(load = hlsl_new_load_index(ctx, &rhs->src, c, &var->loc))) return; list_add_tail(instrs, &load->node.entry); } @@ -394,38 +480,57 @@ static void append_output_copy(struct hlsl_ctx *ctx, struct list *instrs, struct
if (!(store = hlsl_new_simple_store(ctx, output, &load->node))) return; - list_add_tail(instrs, &store->node.entry); + list_add_tail(instrs, &store->entry); } }
-static void append_output_struct_copy(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_load *rhs) +static void append_output_copy_recurse(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_load *rhs, + unsigned int modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) { + struct vkd3d_shader_location *loc = &rhs->node.loc; struct hlsl_type *type = rhs->node.data_type; struct hlsl_ir_var *var = rhs->src.var; - size_t i; + struct hlsl_ir_node *c; + unsigned int i;
- for (i = 0; i < type->e.record.field_count; ++i) + if (type->class == HLSL_CLASS_ARRAY || type->class == HLSL_CLASS_STRUCT) { - const struct hlsl_struct_field *field = &type->e.record.fields[i]; - struct hlsl_ir_load *field_load; - struct hlsl_ir_constant *c; + struct hlsl_ir_load *element_load; + struct hlsl_struct_field *field; + uint32_t elem_semantic_index;
- if (!(c = hlsl_new_uint_constant(ctx, i, &var->loc))) - return; - list_add_tail(instrs, &c->node.entry); + for (i = 0; i < hlsl_type_element_count(type); ++i) + { + if (type->class == HLSL_CLASS_ARRAY) + { + elem_semantic_index = semantic_index + + i * hlsl_type_get_array_element_reg_size(type->e.array.type, HLSL_REGSET_NUMERIC) / 4; + } + else + { + field = &type->e.record.fields[i]; + if (hlsl_type_is_resource(field->type)) + continue; + validate_field_semantic(ctx, field); + semantic = &field->semantic; + elem_semantic_index = semantic->index; + loc = &field->loc; + }
- /* This redundant load is expected to be deleted later by DCE. */ - if (!(field_load = hlsl_new_load_index(ctx, &rhs->src, &c->node, &var->loc))) - return; - list_add_tail(instrs, &field_load->node.entry); + if (!(c = hlsl_new_uint_constant(ctx, i, &var->loc))) + return; + list_add_tail(instrs, &c->entry);
- if (field->type->type == HLSL_CLASS_STRUCT) - append_output_struct_copy(ctx, instrs, field_load); - else if (field->semantic.name) - append_output_copy(ctx, instrs, field_load, field->storage_modifiers, &field->semantic); - else - hlsl_error(ctx, &field->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_SEMANTIC, - "Field '%s' is missing a semantic.", field->name); + if (!(element_load = hlsl_new_load_index(ctx, &rhs->src, c, loc))) + return; + list_add_tail(instrs, &element_load->node.entry); + + append_output_copy_recurse(ctx, instrs, element_load, modifiers, semantic, elem_semantic_index); + } + } + else + { + append_output_copy(ctx, instrs, rhs, modifiers, semantic, semantic_index); } }
@@ -437,17 +542,14 @@ static void append_output_var_copy(struct hlsl_ctx *ctx, struct list *instrs, st struct hlsl_ir_load *load;
/* This redundant load is expected to be deleted later by DCE. */ - if (!(load = hlsl_new_var_load(ctx, var, var->loc))) + if (!(load = hlsl_new_var_load(ctx, var, &var->loc))) return; list_add_tail(instrs, &load->node.entry);
- if (var->data_type->type == HLSL_CLASS_STRUCT) - append_output_struct_copy(ctx, instrs, load); - else if (var->semantic.name) - append_output_copy(ctx, instrs, load, var->storage_modifiers, &var->semantic); + append_output_copy_recurse(ctx, instrs, load, var->storage_modifiers, &var->semantic, var->semantic.index); }
-static bool transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx, struct hlsl_ir_node *, void *), +bool hlsl_transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx, struct hlsl_ir_node *, void *), struct hlsl_block *block, void *context) { struct hlsl_ir_node *instr, *next; @@ -459,11 +561,11 @@ static bool transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx { struct hlsl_ir_if *iff = hlsl_ir_if(instr);
- progress |= transform_ir(ctx, func, &iff->then_instrs, context); - progress |= transform_ir(ctx, func, &iff->else_instrs, context); + progress |= hlsl_transform_ir(ctx, func, &iff->then_block, context); + progress |= hlsl_transform_ir(ctx, func, &iff->else_block, context); } else if (instr->type == HLSL_IR_LOOP) - progress |= transform_ir(ctx, func, &hlsl_ir_loop(instr)->body, context); + progress |= hlsl_transform_ir(ctx, func, &hlsl_ir_loop(instr)->body, context);
progress |= func(ctx, instr, context); } @@ -471,6 +573,44 @@ static bool transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx return progress; }
+static bool transform_instr_derefs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +{ + bool res; + bool (*func)(struct hlsl_ctx *ctx, struct hlsl_deref *, struct hlsl_ir_node *) = context; + + switch(instr->type) + { + case HLSL_IR_LOAD: + res = func(ctx, &hlsl_ir_load(instr)->src, instr); + return res; + + case HLSL_IR_STORE: + res = func(ctx, &hlsl_ir_store(instr)->lhs, instr); + return res; + + case HLSL_IR_RESOURCE_LOAD: + res = func(ctx, &hlsl_ir_resource_load(instr)->resource, instr); + if (hlsl_ir_resource_load(instr)->sampler.var) + res |= func(ctx, &hlsl_ir_resource_load(instr)->sampler, instr); + return res; + + case HLSL_IR_RESOURCE_STORE: + res = func(ctx, &hlsl_ir_resource_store(instr)->resource, instr); + return res; + + default: + return false; + } + return false; +} + +static bool transform_derefs(struct hlsl_ctx *ctx, + bool (*func)(struct hlsl_ctx *ctx, struct hlsl_deref *, struct hlsl_ir_node *), + struct hlsl_block *block) +{ + return hlsl_transform_ir(ctx, transform_instr_derefs, block, func); +} + struct recursive_call_ctx { const struct hlsl_ir_function_decl **backtrace; @@ -506,7 +646,7 @@ static bool find_recursive_calls(struct hlsl_ctx *ctx, struct hlsl_ir_node *inst return false; call_ctx->backtrace[call_ctx->count++] = decl;
- transform_ir(ctx, find_recursive_calls, &decl->body, call_ctx); + hlsl_transform_ir(ctx, find_recursive_calls, &decl->body, call_ctx);
--call_ctx->count;
@@ -516,21 +656,23 @@ static bool find_recursive_calls(struct hlsl_ctx *ctx, struct hlsl_ir_node *inst static void insert_early_return_break(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, struct hlsl_ir_node *cf_instr) { - struct hlsl_ir_jump *jump; + struct hlsl_ir_node *iff, *jump; + struct hlsl_block then_block; struct hlsl_ir_load *load; - struct hlsl_ir_if *iff;
- if (!(load = hlsl_new_var_load(ctx, func->early_return_var, cf_instr->loc))) + hlsl_block_init(&then_block); + + if (!(load = hlsl_new_var_load(ctx, func->early_return_var, &cf_instr->loc))) return; list_add_after(&cf_instr->entry, &load->node.entry);
- if (!(iff = hlsl_new_if(ctx, &load->node, cf_instr->loc))) + if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_BREAK, &cf_instr->loc))) return; - list_add_after(&load->node.entry, &iff->node.entry); + hlsl_block_add_instr(&then_block, jump);
- if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_BREAK, cf_instr->loc))) + if (!(iff = hlsl_new_if(ctx, &load->node, &then_block, NULL, &cf_instr->loc))) return; - list_add_tail(&iff->then_instrs.instrs, &jump->node.entry); + list_add_after(&load->node.entry, &iff->entry); }
/* Remove HLSL_IR_JUMP_RETURN calls by altering subsequent control flow. */ @@ -566,7 +708,7 @@ static bool lower_return(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fun * the CF instruction, shove it into an if block, and then lower that if * block. * - * (We could return a "did we make progress" boolean like transform_ir() + * (We could return a "did we make progress" boolean like hlsl_transform_ir() * and run this pass multiple times, but we already know the only block * that still needs to be addressed, so there's not much point.) * @@ -591,8 +733,8 @@ static bool lower_return(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fun { struct hlsl_ir_if *iff = hlsl_ir_if(instr);
- has_early_return |= lower_return(ctx, func, &iff->then_instrs, in_loop); - has_early_return |= lower_return(ctx, func, &iff->else_instrs, in_loop); + has_early_return |= lower_return(ctx, func, &iff->then_block, in_loop); + has_early_return |= lower_return(ctx, func, &iff->else_block, in_loop);
if (has_early_return) { @@ -628,18 +770,17 @@ static bool lower_return(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fun else if (instr->type == HLSL_IR_JUMP) { struct hlsl_ir_jump *jump = hlsl_ir_jump(instr); - struct hlsl_ir_constant *constant; - struct hlsl_ir_store *store; + struct hlsl_ir_node *constant, *store;
if (jump->type == HLSL_IR_JUMP_RETURN) { if (!(constant = hlsl_new_bool_constant(ctx, true, &jump->node.loc))) return false; - list_add_before(&jump->node.entry, &constant->node.entry); + list_add_before(&jump->node.entry, &constant->entry);
- if (!(store = hlsl_new_simple_store(ctx, func->early_return_var, &constant->node))) + if (!(store = hlsl_new_simple_store(ctx, func->early_return_var, constant))) return false; - list_add_after(&constant->node.entry, &store->node.entry); + list_add_after(&constant->entry, &store->entry);
has_early_return = true; if (in_loop) @@ -675,9 +816,9 @@ static bool lower_return(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fun else if (cf_instr) { struct list *tail = list_tail(&block->instrs); + struct hlsl_ir_node *not, *iff; + struct hlsl_block then_block; struct hlsl_ir_load *load; - struct hlsl_ir_node *not; - struct hlsl_ir_if *iff;
/* If we're in a loop, we should have used "break" instead. */ assert(!in_loop); @@ -685,21 +826,21 @@ static bool lower_return(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fun if (tail == &cf_instr->entry) return has_early_return;
- if (!(load = hlsl_new_var_load(ctx, func->early_return_var, cf_instr->loc))) - return false; - list_add_tail(&block->instrs, &load->node.entry); + hlsl_block_init(&then_block); + list_move_slice_tail(&then_block.instrs, list_next(&block->instrs, &cf_instr->entry), tail); + lower_return(ctx, func, &then_block, in_loop);
- if (!(not = hlsl_new_unary_expr(ctx, HLSL_OP1_LOGIC_NOT, &load->node, cf_instr->loc))) + if (!(load = hlsl_new_var_load(ctx, func->early_return_var, &cf_instr->loc))) return false; - list_add_tail(&block->instrs, ¬->entry); + hlsl_block_add_instr(block, &load->node);
- if (!(iff = hlsl_new_if(ctx, not, cf_instr->loc))) + if (!(not = hlsl_new_unary_expr(ctx, HLSL_OP1_LOGIC_NOT, &load->node, &cf_instr->loc))) return false; - list_add_tail(&block->instrs, &iff->node.entry); - - list_move_slice_tail(&iff->then_instrs.instrs, list_next(&block->instrs, &cf_instr->entry), tail); + hlsl_block_add_instr(block, not);
- lower_return(ctx, func, &iff->then_instrs, in_loop); + if (!(iff = hlsl_new_if(ctx, not, &then_block, NULL, &cf_instr->loc))) + return false; + list_add_tail(&block->instrs, &iff->entry); }
return has_early_return; @@ -721,7 +862,6 @@ static bool lower_calls(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void * hlsl_error(ctx, &call->node.loc, VKD3D_SHADER_ERROR_HLSL_NOT_DEFINED, "Function "%s" is not defined.", decl->func->name);
- list_init(&block.instrs); if (!hlsl_clone_block(ctx, &block, &decl->body)) return false; list_move_before(&call->node.entry, &block.instrs); @@ -731,6 +871,142 @@ static bool lower_calls(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void * return true; }
+static struct hlsl_ir_node *add_zero_mipmap_level(struct hlsl_ctx *ctx, struct hlsl_ir_node *index, + const struct vkd3d_shader_location *loc) +{ + unsigned int dim_count = index->data_type->dimx; + struct hlsl_ir_node *store, *zero; + struct hlsl_ir_load *coords_load; + struct hlsl_deref coords_deref; + struct hlsl_ir_var *coords; + + assert(dim_count < 4); + + if (!(coords = hlsl_new_synthetic_var(ctx, "coords", + hlsl_get_vector_type(ctx, HLSL_TYPE_UINT, dim_count + 1), loc))) + return NULL; + + hlsl_init_simple_deref_from_var(&coords_deref, coords); + if (!(store = hlsl_new_store_index(ctx, &coords_deref, NULL, index, (1u << dim_count) - 1, loc))) + return NULL; + list_add_after(&index->entry, &store->entry); + + if (!(zero = hlsl_new_uint_constant(ctx, 0, loc))) + return NULL; + list_add_after(&store->entry, &zero->entry); + + if (!(store = hlsl_new_store_index(ctx, &coords_deref, NULL, zero, 1u << dim_count, loc))) + return NULL; + list_add_after(&zero->entry, &store->entry); + + if (!(coords_load = hlsl_new_var_load(ctx, coords, loc))) + return NULL; + list_add_after(&store->entry, &coords_load->node.entry); + + return &coords_load->node; +} + +/* hlsl_ir_index nodes are a parse-time construct used to represent array indexing and struct + * record access before knowing if they will be used in the lhs of an assignment --in which case + * they are lowered into a deref-- or as the load of an element within a larger value. + * For the latter case, this pass takes care of lowering hlsl_ir_indexes into individual + * hlsl_ir_loads, or individual hlsl_ir_resource_loads, in case the indexing is a + * resource access. */ +static bool lower_index_loads(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +{ + struct hlsl_ir_node *val, *store; + struct hlsl_deref var_deref; + struct hlsl_ir_index *index; + struct hlsl_ir_load *load; + struct hlsl_ir_var *var; + + if (instr->type != HLSL_IR_INDEX) + return false; + index = hlsl_ir_index(instr); + val = index->val.node; + + if (hlsl_index_is_resource_access(index)) + { + unsigned int dim_count = hlsl_sampler_dim_count(val->data_type->sampler_dim); + struct hlsl_ir_node *coords = index->idx.node; + struct hlsl_resource_load_params params = {0}; + struct hlsl_ir_node *load; + + assert(coords->data_type->class == HLSL_CLASS_VECTOR); + assert(coords->data_type->base_type == HLSL_TYPE_UINT); + assert(coords->data_type->dimx == dim_count); + + if (!(coords = add_zero_mipmap_level(ctx, coords, &instr->loc))) + return false; + + params.type = HLSL_RESOURCE_LOAD; + params.resource = val; + params.coords = coords; + params.format = val->data_type->e.resource_format; + + if (!(load = hlsl_new_resource_load(ctx, ¶ms, &instr->loc))) + return false; + list_add_before(&instr->entry, &load->entry); + hlsl_replace_node(instr, load); + return true; + } + + if (!(var = hlsl_new_synthetic_var(ctx, "index-val", val->data_type, &instr->loc))) + return false; + hlsl_init_simple_deref_from_var(&var_deref, var); + + if (!(store = hlsl_new_simple_store(ctx, var, val))) + return false; + list_add_before(&instr->entry, &store->entry); + + if (hlsl_index_is_noncontiguous(index)) + { + struct hlsl_ir_node *mat = index->val.node; + struct hlsl_deref row_deref; + unsigned int i; + + assert(!hlsl_type_is_row_major(mat->data_type)); + + if (!(var = hlsl_new_synthetic_var(ctx, "row", instr->data_type, &instr->loc))) + return false; + hlsl_init_simple_deref_from_var(&row_deref, var); + + for (i = 0; i < mat->data_type->dimx; ++i) + { + struct hlsl_ir_node *c; + + if (!(c = hlsl_new_uint_constant(ctx, i, &instr->loc))) + return false; + list_add_before(&instr->entry, &c->entry); + + if (!(load = hlsl_new_load_index(ctx, &var_deref, c, &instr->loc))) + return false; + list_add_before(&instr->entry, &load->node.entry); + + if (!(load = hlsl_new_load_index(ctx, &load->src, index->idx.node, &instr->loc))) + return false; + list_add_before(&instr->entry, &load->node.entry); + + if (!(store = hlsl_new_store_index(ctx, &row_deref, c, &load->node, 0, &instr->loc))) + return false; + list_add_before(&instr->entry, &store->entry); + } + + if (!(load = hlsl_new_var_load(ctx, var, &instr->loc))) + return false; + list_add_before(&instr->entry, &load->node.entry); + hlsl_replace_node(instr, &load->node); + } + else + { + if (!(load = hlsl_new_load_index(ctx, &var_deref, index->idx.node, &instr->loc))) + return false; + list_add_before(&instr->entry, &load->node.entry); + hlsl_replace_node(instr, &load->node); + } + return true; +} + /* Lower casts from vec1 to vecN to swizzles. */ static bool lower_broadcasts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) { @@ -746,26 +1022,24 @@ static bool lower_broadcasts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, v src_type = cast->operands[0].node->data_type; dst_type = cast->node.data_type;
- if (src_type->type <= HLSL_CLASS_VECTOR && dst_type->type <= HLSL_CLASS_VECTOR && src_type->dimx == 1) + if (src_type->class <= HLSL_CLASS_VECTOR && dst_type->class <= HLSL_CLASS_VECTOR && src_type->dimx == 1) { - struct hlsl_ir_node *replacement; - struct hlsl_ir_swizzle *swizzle; - struct hlsl_ir_expr *new_cast; + struct hlsl_ir_node *replacement, *new_cast, *swizzle;
dst_scalar_type = hlsl_get_scalar_type(ctx, dst_type->base_type); /* We need to preserve the cast since it might be doing more than just * turning the scalar into a vector. */ if (!(new_cast = hlsl_new_cast(ctx, cast->operands[0].node, dst_scalar_type, &cast->node.loc))) return false; - list_add_after(&cast->node.entry, &new_cast->node.entry); - replacement = &new_cast->node; + list_add_after(&cast->node.entry, &new_cast->entry); + replacement = new_cast;
if (dst_type->dimx != 1) { if (!(swizzle = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, X, X, X), dst_type->dimx, replacement, &cast->node.loc))) return false; - list_add_after(&new_cast->node.entry, &swizzle->node.entry); - replacement = &swizzle->node; + list_add_after(&new_cast->entry, &swizzle->entry); + replacement = swizzle; }
hlsl_replace_node(&cast->node, replacement); @@ -949,9 +1223,9 @@ static void copy_propagation_invalidate_variable_from_deref_recurse(struct hlsl_ path_node = deref->path[depth].node; subtype = hlsl_get_element_type_from_path_index(ctx, type, path_node);
- if (type->type == HLSL_CLASS_STRUCT) + if (type->class == HLSL_CLASS_STRUCT) { - unsigned int idx = hlsl_ir_constant(path_node)->value[0].u; + unsigned int idx = hlsl_ir_constant(path_node)->value.u[0].u;
for (i = 0; i < idx; ++i) comp_start += hlsl_type_component_count(type->e.record.fields[i].type); @@ -966,7 +1240,7 @@ static void copy_propagation_invalidate_variable_from_deref_recurse(struct hlsl_ if (path_node->type == HLSL_IR_CONSTANT) { copy_propagation_invalidate_variable_from_deref_recurse(ctx, var_def, deref, subtype, - depth + 1, hlsl_ir_constant(path_node)->value[0].u * subtype_comp_count, writemask); + depth + 1, hlsl_ir_constant(path_node)->value.u[0].u * subtype_comp_count, writemask); } else { @@ -1041,14 +1315,14 @@ static bool copy_propagation_replace_with_single_instr(struct hlsl_ctx *ctx, var->name, start, start + count, debug_hlsl_swizzle(swizzle, instr_component_count), new_instr, debug_hlsl_swizzle(ret_swizzle, instr_component_count));
- if (instr->data_type->type != HLSL_CLASS_OBJECT) + if (instr->data_type->class != HLSL_CLASS_OBJECT) { - struct hlsl_ir_swizzle *swizzle_node; + struct hlsl_ir_node *swizzle_node;
if (!(swizzle_node = hlsl_new_swizzle(ctx, ret_swizzle, instr_component_count, new_instr, &instr->loc))) return false; - list_add_before(&instr->entry, &swizzle_node->node.entry); - new_instr = &swizzle_node->node; + list_add_before(&instr->entry, &swizzle_node->entry); + new_instr = swizzle_node; }
hlsl_replace_node(instr, new_instr); @@ -1061,9 +1335,9 @@ static bool copy_propagation_replace_with_constant_vector(struct hlsl_ctx *ctx, { const unsigned int instr_component_count = hlsl_type_component_count(instr->data_type); const struct hlsl_ir_var *var = deref->var; - union hlsl_constant_value values[4] = {0}; - struct hlsl_ir_constant *cons; + struct hlsl_constant_value values = {0}; unsigned int start, count, i; + struct hlsl_ir_node *cons;
if (!hlsl_component_index_range_from_deref(ctx, deref, &start, &count)) return false; @@ -1076,21 +1350,17 @@ static bool copy_propagation_replace_with_constant_vector(struct hlsl_ctx *ctx, || value->node->type != HLSL_IR_CONSTANT) return false;
- values[i] = hlsl_ir_constant(value->node)->value[value->component]; + values.u[i] = hlsl_ir_constant(value->node)->value.u[value->component]; }
- if (!(cons = hlsl_new_constant(ctx, instr->data_type, &instr->loc))) + if (!(cons = hlsl_new_constant(ctx, instr->data_type, &values, &instr->loc))) return false; - cons->value[0] = values[0]; - cons->value[1] = values[1]; - cons->value[2] = values[2]; - cons->value[3] = values[3]; - list_add_before(&instr->entry, &cons->node.entry); + list_add_before(&instr->entry, &cons->entry);
TRACE("Load from %s[%u-%u]%s turned into a constant %p.\n", var->name, start, start + count, debug_hlsl_swizzle(swizzle, instr_component_count), cons);
- hlsl_replace_node(instr, &cons->node); + hlsl_replace_node(instr, cons); return true; }
@@ -1099,7 +1369,7 @@ static bool copy_propagation_transform_load(struct hlsl_ctx *ctx, { struct hlsl_type *type = load->node.data_type;
- switch (type->type) + switch (type->class) { case HLSL_CLASS_SCALAR: case HLSL_CLASS_VECTOR: @@ -1220,7 +1490,7 @@ static void copy_propagation_record_store(struct hlsl_ctx *ctx, struct hlsl_ir_s { unsigned int writemask = store->writemask;
- if (store->rhs.node->data_type->type == HLSL_CLASS_OBJECT) + if (store->rhs.node->data_type->class == HLSL_CLASS_OBJECT) writemask = VKD3DSP_WRITEMASK_0; copy_propagation_set_value(var_def, start, writemask, store->rhs.node); } @@ -1270,8 +1540,8 @@ static void copy_propagation_invalidate_from_block(struct hlsl_ctx *ctx, struct { struct hlsl_ir_if *iff = hlsl_ir_if(instr);
- copy_propagation_invalidate_from_block(ctx, state, &iff->then_instrs); - copy_propagation_invalidate_from_block(ctx, state, &iff->else_instrs); + copy_propagation_invalidate_from_block(ctx, state, &iff->then_block); + copy_propagation_invalidate_from_block(ctx, state, &iff->else_block);
break; } @@ -1301,19 +1571,19 @@ static bool copy_propagation_process_if(struct hlsl_ctx *ctx, struct hlsl_ir_if bool progress = false;
copy_propagation_state_init(ctx, &inner_state, state); - progress |= copy_propagation_transform_block(ctx, &iff->then_instrs, &inner_state); + progress |= copy_propagation_transform_block(ctx, &iff->then_block, &inner_state); copy_propagation_state_destroy(&inner_state);
copy_propagation_state_init(ctx, &inner_state, state); - progress |= copy_propagation_transform_block(ctx, &iff->else_instrs, &inner_state); + progress |= copy_propagation_transform_block(ctx, &iff->else_block, &inner_state); copy_propagation_state_destroy(&inner_state);
/* Ideally we'd invalidate the outer state looking at what was * touched in the two inner states, but this doesn't work for * loops (because we need to know what is invalidated in advance), * so we need copy_propagation_invalidate_from_block() anyway. */ - copy_propagation_invalidate_from_block(ctx, state, &iff->then_instrs); - copy_propagation_invalidate_from_block(ctx, state, &iff->else_instrs); + copy_propagation_invalidate_from_block(ctx, state, &iff->then_block); + copy_propagation_invalidate_from_block(ctx, state, &iff->else_block);
return progress; } @@ -1379,7 +1649,7 @@ static bool copy_propagation_transform_block(struct hlsl_ctx *ctx, struct hlsl_b return progress; }
-static bool copy_propagation_execute(struct hlsl_ctx *ctx, struct hlsl_block *block) +bool hlsl_copy_propagation_execute(struct hlsl_ctx *ctx, struct hlsl_block *block) { struct copy_propagation_state state; bool progress; @@ -1471,7 +1741,7 @@ static bool validate_static_object_references(struct hlsl_ctx *ctx, struct hlsl_
static bool is_vec1(const struct hlsl_type *type) { - return (type->type == HLSL_CLASS_SCALAR) || (type->type == HLSL_CLASS_VECTOR && type->dimx == 1); + return (type->class == HLSL_CLASS_SCALAR) || (type->class == HLSL_CLASS_VECTOR && type->dimx == 1); }
static bool fold_redundant_casts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) @@ -1505,21 +1775,20 @@ static bool fold_redundant_casts(struct hlsl_ctx *ctx, struct hlsl_ir_node *inst static bool split_copy(struct hlsl_ctx *ctx, struct hlsl_ir_store *store, const struct hlsl_ir_load *load, const unsigned int idx, struct hlsl_type *type) { - struct hlsl_ir_store *split_store; + struct hlsl_ir_node *split_store, *c; struct hlsl_ir_load *split_load; - struct hlsl_ir_constant *c;
if (!(c = hlsl_new_uint_constant(ctx, idx, &store->node.loc))) return false; - list_add_before(&store->node.entry, &c->node.entry); + list_add_before(&store->node.entry, &c->entry);
- if (!(split_load = hlsl_new_load_index(ctx, &load->src, &c->node, &store->node.loc))) + if (!(split_load = hlsl_new_load_index(ctx, &load->src, c, &store->node.loc))) return false; list_add_before(&store->node.entry, &split_load->node.entry);
- if (!(split_store = hlsl_new_store_index(ctx, &store->lhs, &c->node, &split_load->node, 0, &store->node.loc))) + if (!(split_store = hlsl_new_store_index(ctx, &store->lhs, c, &split_load->node, 0, &store->node.loc))) return false; - list_add_before(&store->node.entry, &split_store->node.entry); + list_add_before(&store->node.entry, &split_store->entry);
return true; } @@ -1538,7 +1807,7 @@ static bool split_array_copies(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, store = hlsl_ir_store(instr); rhs = store->rhs.node; type = rhs->data_type; - if (type->type != HLSL_CLASS_ARRAY) + if (type->class != HLSL_CLASS_ARRAY) return false; element_type = type->e.array.type;
@@ -1575,7 +1844,7 @@ static bool split_struct_copies(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr store = hlsl_ir_store(instr); rhs = store->rhs.node; type = rhs->data_type; - if (type->type != HLSL_CLASS_STRUCT) + if (type->class != HLSL_CLASS_STRUCT) return false;
if (rhs->type != HLSL_IR_LOAD) @@ -1614,7 +1883,7 @@ static bool split_matrix_copies(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr store = hlsl_ir_store(instr); rhs = store->rhs.node; type = rhs->data_type; - if (type->type != HLSL_CLASS_MATRIX) + if (type->class != HLSL_CLASS_MATRIX) return false; element_type = hlsl_get_vector_type(ctx, type->base_type, hlsl_type_minor_size(type));
@@ -1649,22 +1918,21 @@ static bool lower_narrowing_casts(struct hlsl_ctx *ctx, struct hlsl_ir_node *ins src_type = cast->operands[0].node->data_type; dst_type = cast->node.data_type;
- if (src_type->type <= HLSL_CLASS_VECTOR && dst_type->type <= HLSL_CLASS_VECTOR && dst_type->dimx < src_type->dimx) + if (src_type->class <= HLSL_CLASS_VECTOR && dst_type->class <= HLSL_CLASS_VECTOR && dst_type->dimx < src_type->dimx) { - struct hlsl_ir_swizzle *swizzle; - struct hlsl_ir_expr *new_cast; + struct hlsl_ir_node *new_cast, *swizzle;
dst_vector_type = hlsl_get_vector_type(ctx, dst_type->base_type, src_type->dimx); /* We need to preserve the cast since it might be doing more than just * narrowing the vector. */ if (!(new_cast = hlsl_new_cast(ctx, cast->operands[0].node, dst_vector_type, &cast->node.loc))) return false; - list_add_after(&cast->node.entry, &new_cast->node.entry); - if (!(swizzle = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, Y, Z, W), dst_type->dimx, &new_cast->node, &cast->node.loc))) + list_add_after(&cast->node.entry, &new_cast->entry); + if (!(swizzle = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, Y, Z, W), dst_type->dimx, new_cast, &cast->node.loc))) return false; - list_add_after(&new_cast->node.entry, &swizzle->node.entry); + list_add_after(&new_cast->entry, &swizzle->entry);
- hlsl_replace_node(&cast->node, &swizzle->node); + hlsl_replace_node(&cast->node, swizzle); return true; }
@@ -1684,8 +1952,7 @@ static bool fold_swizzle_chains(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr
if (next_instr->type == HLSL_IR_SWIZZLE) { - struct hlsl_ir_swizzle *new_swizzle; - struct hlsl_ir_node *new_instr; + struct hlsl_ir_node *new_swizzle; unsigned int combined_swizzle;
combined_swizzle = hlsl_combine_swizzles(hlsl_ir_swizzle(next_instr)->swizzle, @@ -1695,9 +1962,8 @@ static bool fold_swizzle_chains(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr if (!(new_swizzle = hlsl_new_swizzle(ctx, combined_swizzle, instr->data_type->dimx, next_instr, &instr->loc))) return false;
- new_instr = &new_swizzle->node; - list_add_before(&instr->entry, &new_instr->entry); - hlsl_replace_node(instr, new_instr); + list_add_before(&instr->entry, &new_swizzle->entry); + hlsl_replace_node(instr, new_swizzle); return true; }
@@ -1725,6 +1991,81 @@ static bool remove_trivial_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *i return true; }
+static bool lower_nonconstant_vector_derefs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +{ + struct hlsl_ir_node *idx; + struct hlsl_deref *deref; + struct hlsl_type *type; + unsigned int i; + + if (instr->type != HLSL_IR_LOAD) + return false; + + deref = &hlsl_ir_load(instr)->src; + assert(deref->var); + + if (deref->path_len == 0) + return false; + + type = deref->var->data_type; + for (i = 0; i < deref->path_len - 1; ++i) + type = hlsl_get_element_type_from_path_index(ctx, type, deref->path[i].node); + + idx = deref->path[deref->path_len - 1].node; + + if (type->class == HLSL_CLASS_VECTOR && idx->type != HLSL_IR_CONSTANT) + { + struct hlsl_ir_node *eq, *swizzle, *dot, *c, *operands[HLSL_MAX_OPERANDS] = {0}; + struct hlsl_constant_value value; + struct hlsl_ir_load *vector_load; + enum hlsl_ir_expr_op op; + + if (!(vector_load = hlsl_new_load_parent(ctx, deref, &instr->loc))) + return false; + list_add_before(&instr->entry, &vector_load->node.entry); + + if (!(swizzle = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, X, X, X), type->dimx, idx, &instr->loc))) + return false; + list_add_before(&instr->entry, &swizzle->entry); + + value.u[0].u = 0; + value.u[1].u = 1; + value.u[2].u = 2; + value.u[3].u = 3; + if (!(c = hlsl_new_constant(ctx, hlsl_get_vector_type(ctx, HLSL_TYPE_UINT, type->dimx), &value, &instr->loc))) + return false; + list_add_before(&instr->entry, &c->entry); + + operands[0] = swizzle; + operands[1] = c; + if (!(eq = hlsl_new_expr(ctx, HLSL_OP2_EQUAL, operands, + hlsl_get_vector_type(ctx, HLSL_TYPE_BOOL, type->dimx), &instr->loc))) + return false; + list_add_before(&instr->entry, &eq->entry); + + if (!(eq = hlsl_new_cast(ctx, eq, type, &instr->loc))) + return false; + list_add_before(&instr->entry, &eq->entry); + + op = HLSL_OP2_DOT; + if (type->dimx == 1) + op = type->base_type == HLSL_TYPE_BOOL ? HLSL_OP2_LOGIC_AND : HLSL_OP2_MUL; + + /* Note: We may be creating a DOT for bool vectors here, which we need to lower to + * LOGIC_OR + LOGIC_AND. */ + operands[0] = &vector_load->node; + operands[1] = eq; + if (!(dot = hlsl_new_expr(ctx, op, operands, instr->data_type, &instr->loc))) + return false; + list_add_before(&instr->entry, &dot->entry); + hlsl_replace_node(instr, dot); + + return true; + } + + return false; +} + /* Lower DIV to RCP + MUL. */ static bool lower_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) { @@ -1737,7 +2078,7 @@ static bool lower_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, voi if (expr->op != HLSL_OP2_DIV) return false;
- if (!(rcp = hlsl_new_unary_expr(ctx, HLSL_OP1_RCP, expr->operands[1].node, instr->loc))) + if (!(rcp = hlsl_new_unary_expr(ctx, HLSL_OP1_RCP, expr->operands[1].node, &instr->loc))) return false; list_add_before(&expr->node.entry, &rcp->entry); expr->op = HLSL_OP2_MUL; @@ -1758,7 +2099,7 @@ static bool lower_sqrt(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *c if (expr->op != HLSL_OP1_SQRT) return false;
- if (!(rsq = hlsl_new_unary_expr(ctx, HLSL_OP1_RSQ, expr->operands[0].node, instr->loc))) + if (!(rsq = hlsl_new_unary_expr(ctx, HLSL_OP1_RSQ, expr->operands[0].node, &instr->loc))) return false; list_add_before(&expr->node.entry, &rsq->entry); expr->op = HLSL_OP1_RCP; @@ -1770,9 +2111,7 @@ static bool lower_sqrt(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *c /* Lower DP2 to MUL + ADD */ static bool lower_dot(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) { - struct hlsl_ir_node *arg1, *arg2, *mul, *replacement; - struct hlsl_ir_swizzle *add_x, *add_y; - struct hlsl_ir_constant *zero; + struct hlsl_ir_node *arg1, *arg2, *mul, *replacement, *zero, *add_x, *add_y; struct hlsl_ir_expr *expr;
if (instr->type != HLSL_IR_EXPR) @@ -1791,11 +2130,11 @@ static bool lower_dot(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *co
if (!(zero = hlsl_new_float_constant(ctx, 0.0f, &expr->node.loc))) return false; - list_add_before(&instr->entry, &zero->node.entry); + list_add_before(&instr->entry, &zero->entry);
operands[0] = arg1; operands[1] = arg2; - operands[2] = &zero->node; + operands[2] = zero;
if (!(replacement = hlsl_new_expr(ctx, HLSL_OP3_DP2ADD, operands, instr->data_type, &expr->node.loc))) return false; @@ -1808,13 +2147,13 @@ static bool lower_dot(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *co
if (!(add_x = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, X, X, X), instr->data_type->dimx, mul, &expr->node.loc))) return false; - list_add_before(&instr->entry, &add_x->node.entry); + list_add_before(&instr->entry, &add_x->entry);
if (!(add_y = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Y, Y, Y, Y), instr->data_type->dimx, mul, &expr->node.loc))) return false; - list_add_before(&instr->entry, &add_y->node.entry); + list_add_before(&instr->entry, &add_y->entry);
- if (!(replacement = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, &add_x->node, &add_y->node))) + if (!(replacement = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, add_x, add_y))) return false; } list_add_before(&instr->entry, &replacement->entry); @@ -1836,7 +2175,7 @@ static bool lower_abs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *co if (expr->op != HLSL_OP1_ABS) return false;
- if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, arg, instr->loc))) + if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, arg, &instr->loc))) return false; list_add_before(&instr->entry, &neg->entry);
@@ -1848,77 +2187,124 @@ static bool lower_abs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *co return true; }
-static bool lower_casts_to_bool(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +/* Lower ROUND using FRC, ROUND(x) -> ((x + 0.5) - FRC(x + 0.5)). */ +static bool lower_round(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) { - struct hlsl_type *type = instr->data_type, *arg_type; - struct hlsl_ir_constant *zero; + struct hlsl_ir_node *arg, *neg, *sum, *frc, *half, *replacement; + struct hlsl_type *type = instr->data_type; + struct hlsl_constant_value half_value; + unsigned int i, component_count; struct hlsl_ir_expr *expr;
if (instr->type != HLSL_IR_EXPR) return false; + expr = hlsl_ir_expr(instr); - if (expr->op != HLSL_OP1_CAST) - return false; - arg_type = expr->operands[0].node->data_type; - if (type->type > HLSL_CLASS_VECTOR || arg_type->type > HLSL_CLASS_VECTOR) + arg = expr->operands[0].node; + if (expr->op != HLSL_OP1_ROUND) return false; - if (type->base_type != HLSL_TYPE_BOOL) + + component_count = hlsl_type_component_count(type); + for (i = 0; i < component_count; ++i) + half_value.u[i].f = 0.5f; + if (!(half = hlsl_new_constant(ctx, type, &half_value, &expr->node.loc))) return false;
- /* Narrowing casts should have already been lowered. */ - assert(type->dimx == arg_type->dimx); + list_add_before(&instr->entry, &half->entry);
- zero = hlsl_new_constant(ctx, arg_type, &instr->loc); - if (!zero) + if (!(sum = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, arg, half))) return false; - list_add_before(&instr->entry, &zero->node.entry); + list_add_before(&instr->entry, &sum->entry);
- expr->op = HLSL_OP2_NEQUAL; - hlsl_src_from_node(&expr->operands[1], &zero->node); + if (!(frc = hlsl_new_unary_expr(ctx, HLSL_OP1_FRACT, sum, &instr->loc))) + return false; + list_add_before(&instr->entry, &frc->entry);
+ if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, frc, &instr->loc))) + return false; + list_add_before(&instr->entry, &neg->entry); + + if (!(replacement = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, sum, neg))) + return false; + list_add_before(&instr->entry, &replacement->entry); + + hlsl_replace_node(instr, replacement); return true; }
-struct hlsl_ir_load *hlsl_add_conditional(struct hlsl_ctx *ctx, struct list *instrs, +static bool lower_casts_to_bool(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +{ + struct hlsl_type *type = instr->data_type, *arg_type; + static const struct hlsl_constant_value zero_value; + struct hlsl_ir_node *zero; + struct hlsl_ir_expr *expr; + + if (instr->type != HLSL_IR_EXPR) + return false; + expr = hlsl_ir_expr(instr); + if (expr->op != HLSL_OP1_CAST) + return false; + arg_type = expr->operands[0].node->data_type; + if (type->class > HLSL_CLASS_VECTOR || arg_type->class > HLSL_CLASS_VECTOR) + return false; + if (type->base_type != HLSL_TYPE_BOOL) + return false; + + /* Narrowing casts should have already been lowered. */ + assert(type->dimx == arg_type->dimx); + + zero = hlsl_new_constant(ctx, arg_type, &zero_value, &instr->loc); + if (!zero) + return false; + list_add_before(&instr->entry, &zero->entry); + + expr->op = HLSL_OP2_NEQUAL; + hlsl_src_from_node(&expr->operands[1], zero); + + return true; +} + +struct hlsl_ir_node *hlsl_add_conditional(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *condition, struct hlsl_ir_node *if_true, struct hlsl_ir_node *if_false) { - struct hlsl_ir_store *store; + struct hlsl_block then_block, else_block; + struct hlsl_ir_node *iff, *store; struct hlsl_ir_load *load; struct hlsl_ir_var *var; - struct hlsl_ir_if *iff;
assert(hlsl_types_are_equal(if_true->data_type, if_false->data_type));
if (!(var = hlsl_new_synthetic_var(ctx, "conditional", if_true->data_type, &condition->loc))) return NULL;
- if (!(iff = hlsl_new_if(ctx, condition, condition->loc))) - return NULL; - list_add_tail(instrs, &iff->node.entry); + hlsl_block_init(&then_block); + hlsl_block_init(&else_block);
if (!(store = hlsl_new_simple_store(ctx, var, if_true))) return NULL; - list_add_tail(&iff->then_instrs.instrs, &store->node.entry); + hlsl_block_add_instr(&then_block, store);
if (!(store = hlsl_new_simple_store(ctx, var, if_false))) return NULL; - list_add_tail(&iff->else_instrs.instrs, &store->node.entry); + hlsl_block_add_instr(&else_block, store);
- if (!(load = hlsl_new_var_load(ctx, var, condition->loc))) + if (!(iff = hlsl_new_if(ctx, condition, &then_block, &else_block, &condition->loc))) + return NULL; + list_add_tail(instrs, &iff->entry); + + if (!(load = hlsl_new_var_load(ctx, var, &condition->loc))) return NULL; list_add_tail(instrs, &load->node.entry);
- return load; + return &load->node; }
static bool lower_int_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) { - struct hlsl_ir_node *arg1, *arg2, *xor, *and, *abs1, *abs2, *div, *neg; + struct hlsl_ir_node *arg1, *arg2, *xor, *and, *abs1, *abs2, *div, *neg, *cast1, *cast2, *cast3, *cond, *high_bit; struct hlsl_type *type = instr->data_type, *utype; - struct hlsl_ir_expr *cast1, *cast2, *cast3; - struct hlsl_ir_constant *high_bit; + struct hlsl_constant_value high_bit_value; struct hlsl_ir_expr *expr; - struct hlsl_ir_load *cond; unsigned int i;
if (instr->type != HLSL_IR_EXPR) @@ -1928,69 +2314,67 @@ static bool lower_int_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, arg2 = expr->operands[1].node; if (expr->op != HLSL_OP2_DIV) return false; - if (type->type != HLSL_CLASS_SCALAR && type->type != HLSL_CLASS_VECTOR) + if (type->class != HLSL_CLASS_SCALAR && type->class != HLSL_CLASS_VECTOR) return false; if (type->base_type != HLSL_TYPE_INT) return false; - utype = hlsl_get_numeric_type(ctx, type->type, HLSL_TYPE_UINT, type->dimx, type->dimy); + utype = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_UINT, type->dimx, type->dimy);
if (!(xor = hlsl_new_binary_expr(ctx, HLSL_OP2_BIT_XOR, arg1, arg2))) return false; list_add_before(&instr->entry, &xor->entry);
- if (!(high_bit = hlsl_new_constant(ctx, type, &instr->loc))) - return false; for (i = 0; i < type->dimx; ++i) - high_bit->value[i].u = 0x80000000; - list_add_before(&instr->entry, &high_bit->node.entry); + high_bit_value.u[i].u = 0x80000000; + if (!(high_bit = hlsl_new_constant(ctx, type, &high_bit_value, &instr->loc))) + return false; + list_add_before(&instr->entry, &high_bit->entry);
- if (!(and = hlsl_new_binary_expr(ctx, HLSL_OP2_BIT_AND, xor, &high_bit->node))) + if (!(and = hlsl_new_binary_expr(ctx, HLSL_OP2_BIT_AND, xor, high_bit))) return false; list_add_before(&instr->entry, &and->entry);
- if (!(abs1 = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, arg1, instr->loc))) + if (!(abs1 = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, arg1, &instr->loc))) return false; list_add_before(&instr->entry, &abs1->entry);
if (!(cast1 = hlsl_new_cast(ctx, abs1, utype, &instr->loc))) return false; - list_add_before(&instr->entry, &cast1->node.entry); + list_add_before(&instr->entry, &cast1->entry);
- if (!(abs2 = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, arg2, instr->loc))) + if (!(abs2 = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, arg2, &instr->loc))) return false; list_add_before(&instr->entry, &abs2->entry);
if (!(cast2 = hlsl_new_cast(ctx, abs2, utype, &instr->loc))) return false; - list_add_before(&instr->entry, &cast2->node.entry); + list_add_before(&instr->entry, &cast2->entry);
- if (!(div = hlsl_new_binary_expr(ctx, HLSL_OP2_DIV, &cast1->node, &cast2->node))) + if (!(div = hlsl_new_binary_expr(ctx, HLSL_OP2_DIV, cast1, cast2))) return false; list_add_before(&instr->entry, &div->entry);
if (!(cast3 = hlsl_new_cast(ctx, div, type, &instr->loc))) return false; - list_add_before(&instr->entry, &cast3->node.entry); + list_add_before(&instr->entry, &cast3->entry);
- if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, &cast3->node, instr->loc))) + if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, cast3, &instr->loc))) return false; list_add_before(&instr->entry, &neg->entry);
- if (!(cond = hlsl_add_conditional(ctx, &instr->entry, and, neg, &cast3->node))) + if (!(cond = hlsl_add_conditional(ctx, &instr->entry, and, neg, cast3))) return false; - hlsl_replace_node(instr, &cond->node); + hlsl_replace_node(instr, cond);
return true; }
static bool lower_int_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) { - struct hlsl_ir_node *arg1, *arg2, *and, *abs1, *abs2, *div, *neg; + struct hlsl_ir_node *arg1, *arg2, *and, *abs1, *abs2, *div, *neg, *cast1, *cast2, *cast3, *cond, *high_bit; struct hlsl_type *type = instr->data_type, *utype; - struct hlsl_ir_expr *cast1, *cast2, *cast3; - struct hlsl_ir_constant *high_bit; + struct hlsl_constant_value high_bit_value; struct hlsl_ir_expr *expr; - struct hlsl_ir_load *cond; unsigned int i;
if (instr->type != HLSL_IR_EXPR) @@ -2000,53 +2384,53 @@ static bool lower_int_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, arg2 = expr->operands[1].node; if (expr->op != HLSL_OP2_MOD) return false; - if (type->type != HLSL_CLASS_SCALAR && type->type != HLSL_CLASS_VECTOR) + if (type->class != HLSL_CLASS_SCALAR && type->class != HLSL_CLASS_VECTOR) return false; if (type->base_type != HLSL_TYPE_INT) return false; - utype = hlsl_get_numeric_type(ctx, type->type, HLSL_TYPE_UINT, type->dimx, type->dimy); + utype = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_UINT, type->dimx, type->dimy);
- if (!(high_bit = hlsl_new_constant(ctx, type, &instr->loc))) - return false; for (i = 0; i < type->dimx; ++i) - high_bit->value[i].u = 0x80000000; - list_add_before(&instr->entry, &high_bit->node.entry); + high_bit_value.u[i].u = 0x80000000; + if (!(high_bit = hlsl_new_constant(ctx, type, &high_bit_value, &instr->loc))) + return false; + list_add_before(&instr->entry, &high_bit->entry);
- if (!(and = hlsl_new_binary_expr(ctx, HLSL_OP2_BIT_AND, arg1, &high_bit->node))) + if (!(and = hlsl_new_binary_expr(ctx, HLSL_OP2_BIT_AND, arg1, high_bit))) return false; list_add_before(&instr->entry, &and->entry);
- if (!(abs1 = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, arg1, instr->loc))) + if (!(abs1 = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, arg1, &instr->loc))) return false; list_add_before(&instr->entry, &abs1->entry);
if (!(cast1 = hlsl_new_cast(ctx, abs1, utype, &instr->loc))) return false; - list_add_before(&instr->entry, &cast1->node.entry); + list_add_before(&instr->entry, &cast1->entry);
- if (!(abs2 = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, arg2, instr->loc))) + if (!(abs2 = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, arg2, &instr->loc))) return false; list_add_before(&instr->entry, &abs2->entry);
if (!(cast2 = hlsl_new_cast(ctx, abs2, utype, &instr->loc))) return false; - list_add_before(&instr->entry, &cast2->node.entry); + list_add_before(&instr->entry, &cast2->entry);
- if (!(div = hlsl_new_binary_expr(ctx, HLSL_OP2_MOD, &cast1->node, &cast2->node))) + if (!(div = hlsl_new_binary_expr(ctx, HLSL_OP2_MOD, cast1, cast2))) return false; list_add_before(&instr->entry, &div->entry);
if (!(cast3 = hlsl_new_cast(ctx, div, type, &instr->loc))) return false; - list_add_before(&instr->entry, &cast3->node.entry); + list_add_before(&instr->entry, &cast3->entry);
- if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, &cast3->node, instr->loc))) + if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, cast3, &instr->loc))) return false; list_add_before(&instr->entry, &neg->entry);
- if (!(cond = hlsl_add_conditional(ctx, &instr->entry, and, neg, &cast3->node))) + if (!(cond = hlsl_add_conditional(ctx, &instr->entry, and, neg, cast3))) return false; - hlsl_replace_node(instr, &cond->node); + hlsl_replace_node(instr, cond);
return true; } @@ -2063,14 +2447,14 @@ static bool lower_int_abs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void
if (expr->op != HLSL_OP1_ABS) return false; - if (type->type != HLSL_CLASS_SCALAR && type->type != HLSL_CLASS_VECTOR) + if (type->class != HLSL_CLASS_SCALAR && type->class != HLSL_CLASS_VECTOR) return false; if (type->base_type != HLSL_TYPE_INT) return false;
arg = expr->operands[0].node;
- if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, arg, instr->loc))) + if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, arg, &instr->loc))) return false; list_add_before(&instr->entry, &neg->entry);
@@ -2080,12 +2464,63 @@ static bool lower_int_abs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void return true; }
+static bool lower_int_dot(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +{ + struct hlsl_ir_node *arg1, *arg2, *mult, *comps[4] = {0}, *res; + struct hlsl_type *type = instr->data_type; + struct hlsl_ir_expr *expr; + unsigned int i, dimx; + bool is_bool; + + if (instr->type != HLSL_IR_EXPR) + return false; + expr = hlsl_ir_expr(instr); + + if (expr->op != HLSL_OP2_DOT) + return false; + + if (type->base_type == HLSL_TYPE_INT || type->base_type == HLSL_TYPE_UINT + || type->base_type == HLSL_TYPE_BOOL) + { + arg1 = expr->operands[0].node; + arg2 = expr->operands[1].node; + assert(arg1->data_type->dimx == arg2->data_type->dimx); + dimx = arg1->data_type->dimx; + is_bool = type->base_type == HLSL_TYPE_BOOL; + + if (!(mult = hlsl_new_binary_expr(ctx, is_bool ? HLSL_OP2_LOGIC_AND : HLSL_OP2_MUL, arg1, arg2))) + return false; + list_add_before(&instr->entry, &mult->entry); + + for (i = 0; i < dimx; ++i) + { + unsigned int s = hlsl_swizzle_from_writemask(1 << i); + + if (!(comps[i] = hlsl_new_swizzle(ctx, s, 1, mult, &instr->loc))) + return false; + list_add_before(&instr->entry, &comps[i]->entry); + } + + res = comps[0]; + for (i = 1; i < dimx; ++i) + { + if (!(res = hlsl_new_binary_expr(ctx, is_bool ? HLSL_OP2_LOGIC_OR : HLSL_OP2_ADD, res, comps[i]))) + return false; + list_add_before(&instr->entry, &res->entry); + } + + hlsl_replace_node(instr, res); + return true; + } + + return false; +} + static bool lower_float_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) { - struct hlsl_ir_node *arg1, *arg2, *mul1, *neg1, *ge, *neg2, *div, *mul2, *frc; + struct hlsl_ir_node *arg1, *arg2, *mul1, *neg1, *ge, *neg2, *div, *mul2, *frc, *cond, *one; struct hlsl_type *type = instr->data_type, *btype; - struct hlsl_ir_constant *one; - struct hlsl_ir_load *cond; + struct hlsl_constant_value one_value; struct hlsl_ir_expr *expr; unsigned int i;
@@ -2096,17 +2531,17 @@ static bool lower_float_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr arg2 = expr->operands[1].node; if (expr->op != HLSL_OP2_MOD) return false; - if (type->type != HLSL_CLASS_SCALAR && type->type != HLSL_CLASS_VECTOR) + if (type->class != HLSL_CLASS_SCALAR && type->class != HLSL_CLASS_VECTOR) return false; if (type->base_type != HLSL_TYPE_FLOAT) return false; - btype = hlsl_get_numeric_type(ctx, type->type, HLSL_TYPE_BOOL, type->dimx, type->dimy); + btype = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_BOOL, type->dimx, type->dimy);
if (!(mul1 = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, arg2, arg1))) return false; list_add_before(&instr->entry, &mul1->entry);
- if (!(neg1 = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, mul1, instr->loc))) + if (!(neg1 = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, mul1, &instr->loc))) return false; list_add_before(&instr->entry, &neg1->entry);
@@ -2115,20 +2550,20 @@ static bool lower_float_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr ge->data_type = btype; list_add_before(&instr->entry, &ge->entry);
- if (!(neg2 = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, arg2, instr->loc))) + if (!(neg2 = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, arg2, &instr->loc))) return false; list_add_before(&instr->entry, &neg2->entry);
if (!(cond = hlsl_add_conditional(ctx, &instr->entry, ge, arg2, neg2))) return false;
- if (!(one = hlsl_new_constant(ctx, type, &instr->loc))) - return false; for (i = 0; i < type->dimx; ++i) - one->value[i].f = 1.0f; - list_add_before(&instr->entry, &one->node.entry); + one_value.u[i].f = 1.0f; + if (!(one = hlsl_new_constant(ctx, type, &one_value, &instr->loc))) + return false; + list_add_before(&instr->entry, &one->entry);
- if (!(div = hlsl_new_binary_expr(ctx, HLSL_OP2_DIV, &one->node, &cond->node))) + if (!(div = hlsl_new_binary_expr(ctx, HLSL_OP2_DIV, one, cond))) return false; list_add_before(&instr->entry, &div->entry);
@@ -2136,7 +2571,7 @@ static bool lower_float_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr return false; list_add_before(&instr->entry, &mul2->entry);
- if (!(frc = hlsl_new_unary_expr(ctx, HLSL_OP1_FRACT, mul2, instr->loc))) + if (!(frc = hlsl_new_unary_expr(ctx, HLSL_OP1_FRACT, mul2, &instr->loc))) return false; list_add_before(&instr->entry, &frc->entry);
@@ -2144,7 +2579,7 @@ static bool lower_float_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr hlsl_src_remove(&expr->operands[0]); hlsl_src_remove(&expr->operands[1]); hlsl_src_from_node(&expr->operands[0], frc); - hlsl_src_from_node(&expr->operands[1], &cond->node); + hlsl_src_from_node(&expr->operands[1], cond);
return true; } @@ -2155,6 +2590,7 @@ static bool dce(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) { case HLSL_IR_CONSTANT: case HLSL_IR_EXPR: + case HLSL_IR_INDEX: case HLSL_IR_LOAD: case HLSL_IR_RESOURCE_LOAD: case HLSL_IR_SWIZZLE: @@ -2204,8 +2640,8 @@ static unsigned int index_instructions(struct hlsl_block *block, unsigned int in if (instr->type == HLSL_IR_IF) { struct hlsl_ir_if *iff = hlsl_ir_if(instr); - index = index_instructions(&iff->then_instrs, index); - index = index_instructions(&iff->else_instrs, index); + index = index_instructions(&iff->then_block, index); + index = index_instructions(&iff->else_block, index); } else if (instr->type == HLSL_IR_LOOP) { @@ -2262,9 +2698,9 @@ static void allocate_register_reservations(struct hlsl_ctx *ctx) continue; regset = hlsl_type_get_regset(var->data_type);
- if (var->reg_reservation.type) + if (var->reg_reservation.reg_type && var->regs[regset].bind_count) { - if (var->reg_reservation.type != get_regset_name(regset)) + if (var->reg_reservation.reg_type != get_regset_name(regset)) { struct vkd3d_string_buffer *type_string;
@@ -2277,8 +2713,10 @@ static void allocate_register_reservations(struct hlsl_ctx *ctx) else { var->regs[regset].allocated = true; - var->regs[regset].id = var->reg_reservation.index; - TRACE("Allocated reserved %s to %c%u.\n", var->name, var->reg_reservation.type, var->reg_reservation.index); + var->regs[regset].id = var->reg_reservation.reg_index; + TRACE("Allocated reserved %s to %c%u-%c%u.\n", var->name, var->reg_reservation.reg_type, + var->reg_reservation.reg_index, var->reg_reservation.reg_type, + var->reg_reservation.reg_index + var->regs[regset].bind_count); } } } @@ -2286,9 +2724,9 @@ static void allocate_register_reservations(struct hlsl_ctx *ctx)
/* Compute the earliest and latest liveness for each variable. In the case that * a variable is accessed inside of a loop, we promote its liveness to extend - * to at least the range of the entire loop. Note that we don't need to do this - * for anonymous nodes, since there's currently no way to use a node which was - * calculated in an earlier iteration of the loop. */ + * to at least the range of the entire loop. We also do this for nodes, so that + * nodes produced before the loop have their temp register protected from being + * overridden after the last read within an iteration. */ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop_first, unsigned int loop_last) { struct hlsl_ir_node *instr; @@ -2296,7 +2734,7 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop
LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) { - const unsigned int var_last_read = loop_last ? max(instr->index, loop_last) : instr->index; + const unsigned int last_read = loop_last ? max(instr->index, loop_last) : instr->index;
switch (instr->type) { @@ -2311,9 +2749,9 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop var = store->lhs.var; if (!var->first_write) var->first_write = loop_first ? min(instr->index, loop_first) : instr->index; - store->rhs.node->last_read = instr->index; + store->rhs.node->last_read = last_read; if (store->lhs.offset.node) - store->lhs.offset.node->last_read = instr->index; + store->lhs.offset.node->last_read = last_read; break; } case HLSL_IR_EXPR: @@ -2322,16 +2760,16 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop unsigned int i;
for (i = 0; i < ARRAY_SIZE(expr->operands) && expr->operands[i].node; ++i) - expr->operands[i].node->last_read = instr->index; + expr->operands[i].node->last_read = last_read; break; } case HLSL_IR_IF: { struct hlsl_ir_if *iff = hlsl_ir_if(instr);
- compute_liveness_recurse(&iff->then_instrs, loop_first, loop_last); - compute_liveness_recurse(&iff->else_instrs, loop_first, loop_last); - iff->condition.node->last_read = instr->index; + compute_liveness_recurse(&iff->then_block, loop_first, loop_last); + compute_liveness_recurse(&iff->else_block, loop_first, loop_last); + iff->condition.node->last_read = last_read; break; } case HLSL_IR_LOAD: @@ -2339,9 +2777,9 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop struct hlsl_ir_load *load = hlsl_ir_load(instr);
var = load->src.var; - var->last_read = max(var->last_read, var_last_read); + var->last_read = max(var->last_read, last_read); if (load->src.offset.node) - load->src.offset.node->last_read = instr->index; + load->src.offset.node->last_read = last_read; break; } case HLSL_IR_LOOP: @@ -2357,22 +2795,30 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop struct hlsl_ir_resource_load *load = hlsl_ir_resource_load(instr);
var = load->resource.var; - var->last_read = max(var->last_read, var_last_read); + var->last_read = max(var->last_read, last_read); if (load->resource.offset.node) - load->resource.offset.node->last_read = instr->index; + load->resource.offset.node->last_read = last_read;
if ((var = load->sampler.var)) { - var->last_read = max(var->last_read, var_last_read); + var->last_read = max(var->last_read, last_read); if (load->sampler.offset.node) - load->sampler.offset.node->last_read = instr->index; + load->sampler.offset.node->last_read = last_read; }
- load->coords.node->last_read = instr->index; + load->coords.node->last_read = last_read; if (load->texel_offset.node) - load->texel_offset.node->last_read = instr->index; + load->texel_offset.node->last_read = last_read; if (load->lod.node) - load->lod.node->last_read = instr->index; + load->lod.node->last_read = last_read; + if (load->ddx.node) + load->ddx.node->last_read = last_read; + if (load->ddy.node) + load->ddy.node->last_read = last_read; + if (load->sample_index.node) + load->sample_index.node->last_read = last_read; + if (load->cmp.node) + load->cmp.node->last_read = last_read; break; } case HLSL_IR_RESOURCE_STORE: @@ -2380,18 +2826,26 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop struct hlsl_ir_resource_store *store = hlsl_ir_resource_store(instr);
var = store->resource.var; - var->last_read = max(var->last_read, var_last_read); + var->last_read = max(var->last_read, last_read); if (store->resource.offset.node) - store->resource.offset.node->last_read = instr->index; - store->coords.node->last_read = instr->index; - store->value.node->last_read = instr->index; + store->resource.offset.node->last_read = last_read; + store->coords.node->last_read = last_read; + store->value.node->last_read = last_read; break; } case HLSL_IR_SWIZZLE: { struct hlsl_ir_swizzle *swizzle = hlsl_ir_swizzle(instr);
- swizzle->val.node->last_read = instr->index; + swizzle->val.node->last_read = last_read; + break; + } + case HLSL_IR_INDEX: + { + struct hlsl_ir_index *index = hlsl_ir_index(instr); + + index->val.node->last_read = last_read; + index->idx.node->last_read = last_read; break; } case HLSL_IR_CONSTANT: @@ -2426,127 +2880,142 @@ static void compute_liveness(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl compute_liveness_recurse(&entry_func->body, 0, 0); }
-struct liveness +struct register_allocator { - size_t size; - uint32_t reg_count; - struct + size_t count, capacity; + + /* Highest register index that has been allocated. + * Used to declare sm4 temp count. */ + uint32_t max_reg; + + struct allocation { - /* 0 if not live yet. */ - unsigned int last_read; - } *regs; + uint32_t reg; + unsigned int writemask; + unsigned int first_write, last_read; + } *allocations; };
-static unsigned int get_available_writemask(struct liveness *liveness, - unsigned int first_write, unsigned int component_idx, unsigned int reg_size) +static unsigned int get_available_writemask(const struct register_allocator *allocator, + unsigned int first_write, unsigned int last_read, uint32_t reg_idx) { - unsigned int i, writemask = 0, count = 0; + unsigned int writemask = VKD3DSP_WRITEMASK_ALL; + size_t i;
- for (i = 0; i < 4; ++i) + for (i = 0; i < allocator->count; ++i) { - if (liveness->regs[component_idx + i].last_read <= first_write) - { - writemask |= 1u << i; - if (++count == reg_size) - return writemask; - } + const struct allocation *allocation = &allocator->allocations[i]; + + /* We do not overlap if first write == last read: + * this is the case where we are allocating the result of that + * expression, e.g. "add r0, r0, r1". */ + + if (allocation->reg == reg_idx + && first_write < allocation->last_read && last_read > allocation->first_write) + writemask &= ~allocation->writemask; + + if (!writemask) + break; }
- return 0; + return writemask; }
-static bool resize_liveness(struct hlsl_ctx *ctx, struct liveness *liveness, size_t new_count) +static void record_allocation(struct hlsl_ctx *ctx, struct register_allocator *allocator, + uint32_t reg_idx, unsigned int writemask, unsigned int first_write, unsigned int last_read) { - size_t old_capacity = liveness->size; + struct allocation *allocation;
- if (!hlsl_array_reserve(ctx, (void **)&liveness->regs, &liveness->size, new_count, sizeof(*liveness->regs))) - return false; + if (!hlsl_array_reserve(ctx, (void **)&allocator->allocations, &allocator->capacity, + allocator->count + 1, sizeof(*allocator->allocations))) + return;
- if (liveness->size > old_capacity) - memset(liveness->regs + old_capacity, 0, (liveness->size - old_capacity) * sizeof(*liveness->regs)); - return true; + allocation = &allocator->allocations[allocator->count++]; + allocation->reg = reg_idx; + allocation->writemask = writemask; + allocation->first_write = first_write; + allocation->last_read = last_read; + + allocator->max_reg = max(allocator->max_reg, reg_idx); }
/* reg_size is the number of register components to be reserved, while component_count is the number * of components for the register's writemask. In SM1, floats and vectors allocate the whole * register, even if they don't use it completely. */ -static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct liveness *liveness, +static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct register_allocator *allocator, unsigned int first_write, unsigned int last_read, unsigned int reg_size, unsigned int component_count) { - unsigned int component_idx, writemask, i; struct hlsl_reg ret = {0}; + unsigned int writemask; + uint32_t reg_idx;
assert(component_count <= reg_size);
- for (component_idx = 0; component_idx < liveness->size; component_idx += 4) + for (reg_idx = 0;; ++reg_idx) { - if ((writemask = get_available_writemask(liveness, first_write, component_idx, reg_size))) + writemask = get_available_writemask(allocator, first_write, last_read, reg_idx); + + if (vkd3d_popcount(writemask) >= reg_size) + { + writemask = hlsl_combine_writemasks(writemask, (1u << reg_size) - 1); break; + } } - if (component_idx == liveness->size) - { - if (!resize_liveness(ctx, liveness, component_idx + 4)) - return ret; - writemask = (1u << reg_size) - 1; - } - for (i = 0; i < 4; ++i) - { - if (writemask & (1u << i)) - liveness->regs[component_idx + i].last_read = last_read; - } - ret.id = component_idx / 4; + + record_allocation(ctx, allocator, reg_idx, writemask, first_write, last_read); + + ret.id = reg_idx; + ret.bind_count = 1; ret.writemask = hlsl_combine_writemasks(writemask, (1u << component_count) - 1); ret.allocated = true; - liveness->reg_count = max(liveness->reg_count, ret.id + 1); return ret; }
-static bool is_range_available(struct liveness *liveness, unsigned int first_write, - unsigned int component_idx, unsigned int reg_size) +static bool is_range_available(const struct register_allocator *allocator, + unsigned int first_write, unsigned int last_read, uint32_t reg_idx, unsigned int reg_size) { - unsigned int i; + uint32_t i;
- for (i = 0; i < reg_size; i += 4) + for (i = 0; i < (reg_size / 4); ++i) { - if (!get_available_writemask(liveness, first_write, component_idx + i, 4)) + if (get_available_writemask(allocator, first_write, last_read, reg_idx + i) != VKD3DSP_WRITEMASK_ALL) return false; } return true; }
-static struct hlsl_reg allocate_range(struct hlsl_ctx *ctx, struct liveness *liveness, +static struct hlsl_reg allocate_range(struct hlsl_ctx *ctx, struct register_allocator *allocator, unsigned int first_write, unsigned int last_read, unsigned int reg_size) { - unsigned int i, component_idx; struct hlsl_reg ret = {0}; + uint32_t reg_idx; + unsigned int i;
- for (component_idx = 0; component_idx < liveness->size; component_idx += 4) + for (reg_idx = 0;; ++reg_idx) { - if (is_range_available(liveness, first_write, component_idx, - min(reg_size, liveness->size - component_idx))) + if (is_range_available(allocator, first_write, last_read, reg_idx, reg_size)) break; } - if (!resize_liveness(ctx, liveness, component_idx + reg_size)) - return ret;
- for (i = 0; i < reg_size; ++i) - liveness->regs[component_idx + i].last_read = last_read; - ret.id = component_idx / 4; + for (i = 0; i < reg_size / 4; ++i) + record_allocation(ctx, allocator, reg_idx + i, VKD3DSP_WRITEMASK_ALL, first_write, last_read); + + ret.id = reg_idx; + ret.bind_count = align(reg_size, 4) / 4; ret.allocated = true; - liveness->reg_count = max(liveness->reg_count, ret.id + align(reg_size, 4)); return ret; }
-static struct hlsl_reg allocate_numeric_registers_for_type(struct hlsl_ctx *ctx, struct liveness *liveness, +static struct hlsl_reg allocate_numeric_registers_for_type(struct hlsl_ctx *ctx, struct register_allocator *allocator, unsigned int first_write, unsigned int last_read, const struct hlsl_type *type) { unsigned int reg_size = type->reg_size[HLSL_REGSET_NUMERIC];
- if (type->type <= HLSL_CLASS_VECTOR) - return allocate_register(ctx, liveness, first_write, last_read, reg_size, type->dimx); + if (type->class <= HLSL_CLASS_VECTOR) + return allocate_register(ctx, allocator, first_write, last_read, reg_size, type->dimx); else - return allocate_range(ctx, liveness, first_write, last_read, reg_size); + return allocate_range(ctx, allocator, first_write, last_read, reg_size); }
static const char *debug_register(char class, struct hlsl_reg reg, const struct hlsl_type *type) @@ -2565,14 +3034,99 @@ static const char *debug_register(char class, struct hlsl_reg reg, const struct return vkd3d_dbg_sprintf("%c%u%s", class, reg.id, debug_hlsl_writemask(reg.writemask)); }
-static void allocate_variable_temp_register(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, struct liveness *liveness) +static bool track_object_components_usage(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +{ + struct hlsl_ir_resource_load *load; + struct hlsl_ir_var *var; + enum hlsl_regset regset; + unsigned int index; + + if (instr->type != HLSL_IR_RESOURCE_LOAD) + return false; + + load = hlsl_ir_resource_load(instr); + var = load->resource.var; + regset = hlsl_type_get_regset(hlsl_deref_get_type(ctx, &load->resource)); + + if (regset == HLSL_REGSET_SAMPLERS) + { + enum hlsl_sampler_dim dim; + + assert(!load->sampler.var); + if (!hlsl_regset_index_from_deref(ctx, &load->resource, regset, &index)) + return false; + + dim = var->objects_usage[regset][index].sampler_dim; + if (dim != load->sampling_dim) + { + if (dim == HLSL_SAMPLER_DIM_GENERIC) + { + var->objects_usage[regset][index].first_sampler_dim_loc = instr->loc; + } + else + { + hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INCONSISTENT_SAMPLER, + "Inconsistent generic sampler usage dimension."); + hlsl_note(ctx, &var->objects_usage[regset][index].first_sampler_dim_loc, + VKD3D_SHADER_LOG_ERROR, "First use is here."); + return false; + } + } + var->objects_usage[regset][index].used = true; + var->objects_usage[regset][index].sampler_dim = load->sampling_dim; + } + else + { + if (!hlsl_regset_index_from_deref(ctx, &load->resource, regset, &index)) + return false; + + var->objects_usage[regset][index].used = true; + var->objects_usage[regset][index].sampler_dim = load->sampling_dim; + + if (load->sampler.var) + { + var = load->sampler.var; + if (!hlsl_regset_index_from_deref(ctx, &load->sampler, HLSL_REGSET_SAMPLERS, &index)) + return false; + + var->objects_usage[HLSL_REGSET_SAMPLERS][index].used = true; + } + } + + return false; +} + +static void calculate_resource_register_counts(struct hlsl_ctx *ctx) +{ + struct hlsl_ir_var *var; + struct hlsl_type *type; + unsigned int i, k; + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + type = var->data_type; + + for (k = 0; k <= HLSL_REGSET_LAST_OBJECT; ++k) + { + for (i = 0; i < type->reg_size[k]; ++i) + { + /* Samplers are only allocated until the last used one. */ + if (var->objects_usage[k][i].used) + var->regs[k].bind_count = (k == HLSL_REGSET_SAMPLERS) ? i + 1 : type->reg_size[k]; + } + } + } +} + +static void allocate_variable_temp_register(struct hlsl_ctx *ctx, + struct hlsl_ir_var *var, struct register_allocator *allocator) { if (var->is_input_semantic || var->is_output_semantic || var->is_uniform) return;
if (!var->regs[HLSL_REGSET_NUMERIC].allocated && var->last_read) { - var->regs[HLSL_REGSET_NUMERIC] = allocate_numeric_registers_for_type(ctx, liveness, + var->regs[HLSL_REGSET_NUMERIC] = allocate_numeric_registers_for_type(ctx, allocator, var->first_write, var->last_read, var->data_type);
TRACE("Allocated %s to %s (liveness %u-%u).\n", var->name, debug_register('r', @@ -2580,15 +3134,20 @@ static void allocate_variable_temp_register(struct hlsl_ctx *ctx, struct hlsl_ir } }
-static void allocate_temp_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_block *block, struct liveness *liveness) +static void allocate_temp_registers_recurse(struct hlsl_ctx *ctx, + struct hlsl_block *block, struct register_allocator *allocator) { struct hlsl_ir_node *instr;
LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) { + /* In SM4 all constants are inlined. */ + if (ctx->profile->major_version >= 4 && instr->type == HLSL_IR_CONSTANT) + continue; + if (!instr->reg.allocated && instr->last_read) { - instr->reg = allocate_numeric_registers_for_type(ctx, liveness, instr->index, instr->last_read, + instr->reg = allocate_numeric_registers_for_type(ctx, allocator, instr->index, instr->last_read, instr->data_type); TRACE("Allocated anonymous expression @%u to %s (liveness %u-%u).\n", instr->index, debug_register('r', instr->reg, instr->data_type), instr->index, instr->last_read); @@ -2599,8 +3158,8 @@ static void allocate_temp_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_bl case HLSL_IR_IF: { struct hlsl_ir_if *iff = hlsl_ir_if(instr); - allocate_temp_registers_recurse(ctx, &iff->then_instrs, liveness); - allocate_temp_registers_recurse(ctx, &iff->else_instrs, liveness); + allocate_temp_registers_recurse(ctx, &iff->then_block, allocator); + allocate_temp_registers_recurse(ctx, &iff->else_block, allocator); break; }
@@ -2609,21 +3168,21 @@ static void allocate_temp_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_bl struct hlsl_ir_load *load = hlsl_ir_load(instr); /* We need to at least allocate a variable for undefs. * FIXME: We should probably find a way to remove them instead. */ - allocate_variable_temp_register(ctx, load->src.var, liveness); + allocate_variable_temp_register(ctx, load->src.var, allocator); break; }
case HLSL_IR_LOOP: { struct hlsl_ir_loop *loop = hlsl_ir_loop(instr); - allocate_temp_registers_recurse(ctx, &loop->body, liveness); + allocate_temp_registers_recurse(ctx, &loop->body, allocator); break; }
case HLSL_IR_STORE: { struct hlsl_ir_store *store = hlsl_ir_store(instr); - allocate_variable_temp_register(ctx, store->lhs.var, liveness); + allocate_variable_temp_register(ctx, store->lhs.var, allocator); break; }
@@ -2633,7 +3192,8 @@ static void allocate_temp_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_bl } }
-static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_block *block, struct liveness *liveness) +static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, + struct hlsl_block *block, struct register_allocator *allocator) { struct hlsl_constant_defs *defs = &ctx->constant_defs; struct hlsl_ir_node *instr; @@ -2649,7 +3209,7 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_b unsigned int x, y, i, writemask, end_reg; unsigned int reg_size = type->reg_size[HLSL_REGSET_NUMERIC];
- constant->reg = allocate_numeric_registers_for_type(ctx, liveness, 1, UINT_MAX, type); + constant->reg = allocate_numeric_registers_for_type(ctx, allocator, 1, UINT_MAX, type); TRACE("Allocated constant @%u to %s.\n", instr->index, debug_register('c', constant->reg, type));
if (!hlsl_array_reserve(ctx, (void **)&defs->values, &defs->size, @@ -2662,7 +3222,7 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_b defs->count = end_reg; }
- assert(type->type <= HLSL_CLASS_LAST_NUMERIC); + assert(type->class <= HLSL_CLASS_LAST_NUMERIC);
if (!(writemask = constant->reg.writemask)) writemask = (1u << type->dimx) - 1; @@ -2671,12 +3231,12 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_b { for (x = 0, i = 0; x < 4; ++x) { - const union hlsl_constant_value *value; + const union hlsl_constant_value_component *value; float f;
if (!(writemask & (1u << x))) continue; - value = &constant->value[i++]; + value = &constant->value.u[i++];
switch (type->base_type) { @@ -2714,15 +3274,15 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_b case HLSL_IR_IF: { struct hlsl_ir_if *iff = hlsl_ir_if(instr); - allocate_const_registers_recurse(ctx, &iff->then_instrs, liveness); - allocate_const_registers_recurse(ctx, &iff->else_instrs, liveness); + allocate_const_registers_recurse(ctx, &iff->then_block, allocator); + allocate_const_registers_recurse(ctx, &iff->else_block, allocator); break; }
case HLSL_IR_LOOP: { struct hlsl_ir_loop *loop = hlsl_ir_loop(instr); - allocate_const_registers_recurse(ctx, &loop->body, liveness); + allocate_const_registers_recurse(ctx, &loop->body, allocator); break; }
@@ -2734,10 +3294,10 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_b
static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) { - struct liveness liveness = {0}; + struct register_allocator allocator = {0}; struct hlsl_ir_var *var;
- allocate_const_registers_recurse(ctx, &entry_func->body, &liveness); + allocate_const_registers_recurse(ctx, &entry_func->body, &allocator);
LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { @@ -2748,12 +3308,14 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi if (reg_size == 0) continue;
- var->regs[HLSL_REGSET_NUMERIC] = allocate_numeric_registers_for_type(ctx, &liveness, + var->regs[HLSL_REGSET_NUMERIC] = allocate_numeric_registers_for_type(ctx, &allocator, 1, UINT_MAX, var->data_type); TRACE("Allocated %s to %s.\n", var->name, debug_register('c', var->regs[HLSL_REGSET_NUMERIC], var->data_type)); } } + + vkd3d_free(allocator.allocations); }
/* Simple greedy temporary register allocation pass that just assigns a unique @@ -2762,15 +3324,33 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi * does not handle constants. */ static void allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) { - struct liveness liveness = {0}; - allocate_temp_registers_recurse(ctx, &entry_func->body, &liveness); - ctx->temp_count = liveness.reg_count; - vkd3d_free(liveness.regs); + struct register_allocator allocator = {0}; + + /* ps_1_* outputs are special and go in temp register 0. */ + if (ctx->profile->major_version == 1 && ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) + { + size_t i; + + for (i = 0; i < entry_func->parameters.count; ++i) + { + const struct hlsl_ir_var *var = entry_func->parameters.vars[i]; + + if (var->is_output_semantic) + { + record_allocation(ctx, &allocator, 0, VKD3DSP_WRITEMASK_ALL, var->first_write, var->last_read); + break; + } + } + } + + allocate_temp_registers_recurse(ctx, &entry_func->body, &allocator); + ctx->temp_count = allocator.max_reg + 1; + vkd3d_free(allocator.allocations); }
static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, unsigned int *counter, bool output) { - static const char *shader_names[] = + static const char *const shader_names[] = { [VKD3D_SHADER_TYPE_PIXEL] = "Pixel", [VKD3D_SHADER_TYPE_VERTEX] = "Vertex", @@ -2791,7 +3371,12 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var D3DDECLUSAGE usage; uint32_t usage_idx;
- if (!hlsl_sm1_usage_from_semantic(&var->semantic, &usage, &usage_idx)) + /* ps_1_* outputs are special and go in temp register 0. */ + if (ctx->profile->major_version == 1 && output && ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) + return; + + builtin = hlsl_sm1_register_from_semantic(ctx, &var->semantic, output, &type, ®); + if (!builtin && !hlsl_sm1_usage_from_semantic(&var->semantic, &usage, &usage_idx)) { hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, "Invalid semantic '%s'.", var->semantic.name); @@ -2800,8 +3385,6 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var
if ((!output && !var->last_read) || (output && !var->first_write)) return; - - builtin = hlsl_sm1_register_from_semantic(ctx, &var->semantic, output, &type, ®); } else { @@ -2827,6 +3410,7 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var { var->regs[HLSL_REGSET_NUMERIC].allocated = true; var->regs[HLSL_REGSET_NUMERIC].id = (*counter)++; + var->regs[HLSL_REGSET_NUMERIC].bind_count = 1; var->regs[HLSL_REGSET_NUMERIC].writemask = (1 << var->data_type->dimx) - 1; TRACE("Allocated %s to %s.\n", var->name, debug_register(output ? 'o' : 'v', var->regs[HLSL_REGSET_NUMERIC], var->data_type)); @@ -2853,23 +3437,117 @@ static const struct hlsl_buffer *get_reserved_buffer(struct hlsl_ctx *ctx, uint3
LIST_FOR_EACH_ENTRY(buffer, &ctx->buffers, const struct hlsl_buffer, entry) { - if (buffer->used_size && buffer->reservation.type == 'b' && buffer->reservation.index == index) + if (buffer->used_size && buffer->reservation.reg_type == 'b' && buffer->reservation.reg_index == index) return buffer; } return NULL; }
-static void calculate_buffer_offset(struct hlsl_ir_var *var) +static void calculate_buffer_offset(struct hlsl_ctx *ctx, struct hlsl_ir_var *var) { + unsigned int var_reg_size = var->data_type->reg_size[HLSL_REGSET_NUMERIC]; + enum hlsl_type_class var_class = var->data_type->class; struct hlsl_buffer *buffer = var->buffer;
- buffer->size = hlsl_type_get_sm4_offset(var->data_type, buffer->size); + if (var->reg_reservation.offset_type == 'c') + { + if (var->reg_reservation.offset_index % 4) + { + if (var_class == HLSL_CLASS_MATRIX) + { + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, + "packoffset() reservations with matrix types must be aligned with the beginning of a register."); + } + else if (var_class == HLSL_CLASS_ARRAY) + { + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, + "packoffset() reservations with array types must be aligned with the beginning of a register."); + } + else if (var_class == HLSL_CLASS_STRUCT) + { + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, + "packoffset() reservations with struct types must be aligned with the beginning of a register."); + } + else if (var_class == HLSL_CLASS_VECTOR) + { + unsigned int aligned_offset = hlsl_type_get_sm4_offset(var->data_type, var->reg_reservation.offset_index); + + if (var->reg_reservation.offset_index != aligned_offset) + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, + "packoffset() reservations with vector types cannot span multiple registers."); + } + } + var->buffer_offset = var->reg_reservation.offset_index; + } + else + { + var->buffer_offset = hlsl_type_get_sm4_offset(var->data_type, buffer->size); + }
- var->buffer_offset = buffer->size; TRACE("Allocated buffer offset %u to %s.\n", var->buffer_offset, var->name); - buffer->size += var->data_type->reg_size[HLSL_REGSET_NUMERIC]; + buffer->size = max(buffer->size, var->buffer_offset + var_reg_size); if (var->last_read) - buffer->used_size = buffer->size; + buffer->used_size = max(buffer->used_size, var->buffer_offset + var_reg_size); +} + +static void validate_buffer_offsets(struct hlsl_ctx *ctx) +{ + struct hlsl_ir_var *var1, *var2; + struct hlsl_buffer *buffer; + + LIST_FOR_EACH_ENTRY(var1, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + if (!var1->is_uniform || var1->data_type->class == HLSL_CLASS_OBJECT) + continue; + + buffer = var1->buffer; + if (!buffer->used_size) + continue; + + LIST_FOR_EACH_ENTRY(var2, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + unsigned int var1_reg_size, var2_reg_size; + + if (!var2->is_uniform || var2->data_type->class == HLSL_CLASS_OBJECT) + continue; + + if (var1 == var2 || var1->buffer != var2->buffer) + continue; + + /* This is to avoid reporting the error twice for the same pair of overlapping variables. */ + if (strcmp(var1->name, var2->name) >= 0) + continue; + + var1_reg_size = var1->data_type->reg_size[HLSL_REGSET_NUMERIC]; + var2_reg_size = var2->data_type->reg_size[HLSL_REGSET_NUMERIC]; + + if (var1->buffer_offset < var2->buffer_offset + var2_reg_size + && var2->buffer_offset < var1->buffer_offset + var1_reg_size) + hlsl_error(ctx, &buffer->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, + "Invalid packoffset() reservation: Variables %s and %s overlap.", + var1->name, var2->name); + } + } + + LIST_FOR_EACH_ENTRY(var1, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + buffer = var1->buffer; + if (!buffer || buffer == ctx->globals_buffer) + continue; + + if (var1->reg_reservation.offset_type + || (var1->data_type->class == HLSL_CLASS_OBJECT && var1->reg_reservation.reg_type)) + buffer->manually_packed_elements = true; + else + buffer->automatically_packed_elements = true; + + if (buffer->manually_packed_elements && buffer->automatically_packed_elements) + { + hlsl_error(ctx, &buffer->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, + "packoffset() must be specified for all the buffer elements, or none of them."); + break; + } + } }
static void allocate_buffers(struct hlsl_ctx *ctx) @@ -2880,15 +3558,17 @@ static void allocate_buffers(struct hlsl_ctx *ctx)
LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { - if (var->is_uniform && var->data_type->type != HLSL_CLASS_OBJECT) + if (var->is_uniform && var->data_type->class != HLSL_CLASS_OBJECT) { if (var->is_param) var->buffer = ctx->params_buffer;
- calculate_buffer_offset(var); + calculate_buffer_offset(ctx, var); } }
+ validate_buffer_offsets(ctx); + LIST_FOR_EACH_ENTRY(buffer, &ctx->buffers, struct hlsl_buffer, entry) { if (!buffer->used_size) @@ -2896,28 +3576,30 @@ static void allocate_buffers(struct hlsl_ctx *ctx)
if (buffer->type == HLSL_BUFFER_CONSTANT) { - if (buffer->reservation.type == 'b') + if (buffer->reservation.reg_type == 'b') { - const struct hlsl_buffer *reserved_buffer = get_reserved_buffer(ctx, buffer->reservation.index); + const struct hlsl_buffer *reserved_buffer = get_reserved_buffer(ctx, buffer->reservation.reg_index);
if (reserved_buffer && reserved_buffer != buffer) { hlsl_error(ctx, &buffer->loc, VKD3D_SHADER_ERROR_HLSL_OVERLAPPING_RESERVATIONS, - "Multiple buffers bound to cb%u.", buffer->reservation.index); + "Multiple buffers bound to cb%u.", buffer->reservation.reg_index); hlsl_note(ctx, &reserved_buffer->loc, VKD3D_SHADER_LOG_ERROR, - "Buffer %s is already bound to cb%u.", reserved_buffer->name, buffer->reservation.index); + "Buffer %s is already bound to cb%u.", reserved_buffer->name, buffer->reservation.reg_index); }
- buffer->reg.id = buffer->reservation.index; + buffer->reg.id = buffer->reservation.reg_index; + buffer->reg.bind_count = 1; buffer->reg.allocated = true; TRACE("Allocated reserved %s to cb%u.\n", buffer->name, index); } - else if (!buffer->reservation.type) + else if (!buffer->reservation.reg_type) { while (get_reserved_buffer(ctx, index)) ++index;
buffer->reg.id = index; + buffer->reg.bind_count = 1; buffer->reg.allocated = true; TRACE("Allocated %s to cb%u.\n", buffer->name, index); ++index; @@ -2939,13 +3621,29 @@ static const struct hlsl_ir_var *get_allocated_object(struct hlsl_ctx *ctx, enum uint32_t index) { const struct hlsl_ir_var *var; + unsigned int start, count;
LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, const struct hlsl_ir_var, extern_entry) { - if (!var->regs[regset].allocated) + if (var->reg_reservation.reg_type == get_regset_name(regset) + && var->data_type->reg_size[regset]) + { + /* Vars with a reservation prevent non-reserved vars from being + * bound there even if the reserved vars aren't used. */ + start = var->reg_reservation.reg_index; + count = var->data_type->reg_size[regset]; + } + else if (var->regs[regset].allocated) + { + start = var->regs[regset].id; + count = var->regs[regset].bind_count; + } + else + { continue; + }
- if (index == var->regs[regset].id) + if (start <= index && index < start + count) return var; } return NULL; @@ -2956,7 +3654,6 @@ static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_regset regset) char regset_name = get_regset_name(regset); struct hlsl_ir_var *var; uint32_t min_index = 0; - uint32_t index;
if (regset == HLSL_REGSET_UAVS) { @@ -2968,19 +3665,17 @@ static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_regset regset) } }
- index = min_index; - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { - if (!var->last_read || !var->data_type->reg_size[regset]) + unsigned int count = var->regs[regset].bind_count; + + if (count == 0) continue;
if (var->regs[regset].allocated) { - const struct hlsl_ir_var *reserved_object; - unsigned int index = var->regs[regset].id; - - reserved_object = get_allocated_object(ctx, regset, index); + const struct hlsl_ir_var *reserved_object, *last_reported = NULL; + unsigned int index, i;
if (var->regs[regset].id < min_index) { @@ -2988,28 +3683,44 @@ static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_regset regset) hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_OVERLAPPING_RESERVATIONS, "UAV index (%u) must be higher than the maximum render target index (%u).", var->regs[regset].id, min_index - 1); + continue; } - else if (reserved_object && reserved_object != var) + + for (i = 0; i < count; ++i) { - hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_OVERLAPPING_RESERVATIONS, - "Multiple objects bound to %c%u.", regset_name, index); - hlsl_note(ctx, &reserved_object->loc, VKD3D_SHADER_LOG_ERROR, - "Object '%s' is already bound to %c%u.", reserved_object->name, - regset_name, index); - } + index = var->regs[regset].id + i;
- var->regs[regset].id = var->reg_reservation.index; - var->regs[regset].allocated = true; - TRACE("Allocated reserved %s to %c%u.\n", var->name, regset_name, var->regs[regset].id); + reserved_object = get_allocated_object(ctx, regset, index); + if (reserved_object && reserved_object != var && reserved_object != last_reported) + { + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_OVERLAPPING_RESERVATIONS, + "Multiple variables bound to %c%u.", regset_name, index); + hlsl_note(ctx, &reserved_object->loc, VKD3D_SHADER_LOG_ERROR, + "Variable '%s' is already bound to %c%u.", reserved_object->name, + regset_name, index); + last_reported = reserved_object; + } + } } else { - while (get_allocated_object(ctx, regset, index)) + unsigned int index = min_index; + unsigned int available = 0; + + while (available < count) + { + if (get_allocated_object(ctx, regset, index)) + available = 0; + else + ++available; ++index; + } + index -= count;
var->regs[regset].id = index; var->regs[regset].allocated = true; - TRACE("Allocated object to %c%u.\n", regset_name, index); + TRACE("Allocated variable %s to %c%u-%c%u.\n", var->name, regset_name, index, regset_name, + index + count); ++index; } } @@ -3034,12 +3745,12 @@ bool hlsl_component_index_range_from_deref(struct hlsl_ctx *ctx, const struct hl return false;
/* We should always have generated a cast to UINT. */ - assert(path_node->data_type->type == HLSL_CLASS_SCALAR + assert(path_node->data_type->class == HLSL_CLASS_SCALAR && path_node->data_type->base_type == HLSL_TYPE_UINT);
- idx = hlsl_ir_constant(path_node)->value[0].u; + idx = hlsl_ir_constant(path_node)->value.u[0].u;
- switch (type->type) + switch (type->class) { case HLSL_CLASS_VECTOR: if (idx >= type->dimx) @@ -3090,6 +3801,55 @@ bool hlsl_component_index_range_from_deref(struct hlsl_ctx *ctx, const struct hl return true; }
+bool hlsl_regset_index_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, + enum hlsl_regset regset, unsigned int *index) +{ + struct hlsl_type *type = deref->var->data_type; + unsigned int i; + + assert(regset <= HLSL_REGSET_LAST_OBJECT); + + *index = 0; + + for (i = 0; i < deref->path_len; ++i) + { + struct hlsl_ir_node *path_node = deref->path[i].node; + unsigned int idx = 0; + + assert(path_node); + if (path_node->type != HLSL_IR_CONSTANT) + return false; + + /* We should always have generated a cast to UINT. */ + assert(path_node->data_type->class == HLSL_CLASS_SCALAR + && path_node->data_type->base_type == HLSL_TYPE_UINT); + + idx = hlsl_ir_constant(path_node)->value.u[0].u; + + switch (type->class) + { + case HLSL_CLASS_ARRAY: + if (idx >= type->e.array.elements_count) + return false; + + *index += idx * type->e.array.type->reg_size[regset]; + break; + + case HLSL_CLASS_STRUCT: + *index += type->e.record.fields[idx].reg_offset[regset]; + break; + + default: + vkd3d_unreachable(); + } + + type = hlsl_get_element_type_from_path_index(ctx, type, path_node); + } + + assert(type->reg_size[regset] == 1); + return true; +} + bool hlsl_offset_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, unsigned int *offset) { struct hlsl_ir_node *offset_node = deref->offset.node; @@ -3102,13 +3862,13 @@ bool hlsl_offset_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref }
/* We should always have generated a cast to UINT. */ - assert(offset_node->data_type->type == HLSL_CLASS_SCALAR + assert(offset_node->data_type->class == HLSL_CLASS_SCALAR && offset_node->data_type->base_type == HLSL_TYPE_UINT);
if (offset_node->type != HLSL_IR_CONSTANT) return false;
- *offset = hlsl_ir_constant(offset_node)->value[0].u; + *offset = hlsl_ir_constant(offset_node)->value.u[0].u;
size = deref->var->data_type->reg_size[deref->offset_regset]; if (*offset >= size) @@ -3170,7 +3930,7 @@ static void parse_numthreads_attribute(struct hlsl_ctx *ctx, const struct hlsl_a const struct hlsl_type *type = instr->data_type; const struct hlsl_ir_constant *constant;
- if (type->type != HLSL_CLASS_SCALAR + if (type->class != HLSL_CLASS_SCALAR || (type->base_type != HLSL_TYPE_INT && type->base_type != HLSL_TYPE_UINT)) { struct vkd3d_string_buffer *string; @@ -3190,15 +3950,34 @@ static void parse_numthreads_attribute(struct hlsl_ctx *ctx, const struct hlsl_a } constant = hlsl_ir_constant(instr);
- if ((type->base_type == HLSL_TYPE_INT && constant->value[0].i <= 0) - || (type->base_type == HLSL_TYPE_UINT && !constant->value[0].u)) + if ((type->base_type == HLSL_TYPE_INT && constant->value.u[0].i <= 0) + || (type->base_type == HLSL_TYPE_UINT && !constant->value.u[0].u)) hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_THREAD_COUNT, "Thread count must be a positive integer.");
- ctx->thread_count[i] = constant->value[0].u; + ctx->thread_count[i] = constant->value.u[0].u; } }
+static bool type_has_object_components(struct hlsl_type *type) +{ + if (type->class == HLSL_CLASS_OBJECT) + return true; + if (type->class == HLSL_CLASS_ARRAY) + return type_has_object_components(type->e.array.type); + if (type->class == HLSL_CLASS_STRUCT) + { + unsigned int i; + + for (i = 0; i < type->e.record.field_count; ++i) + { + if (type_has_object_components(type->e.record.fields[i].type)) + return true; + } + } + return false; +} + int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, enum vkd3d_shader_target_type target_type, struct vkd3d_shader_code *out) { @@ -3209,10 +3988,10 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry unsigned int i; bool progress;
- list_move_head(&body->instrs, &ctx->static_initializers); + list_move_head(&body->instrs, &ctx->static_initializers.instrs);
memset(&recursive_call_ctx, 0, sizeof(recursive_call_ctx)); - transform_ir(ctx, find_recursive_calls, body, &recursive_call_ctx); + hlsl_transform_ir(ctx, find_recursive_calls, body, &recursive_call_ctx); vkd3d_free(recursive_call_ctx.backtrace);
/* Avoid going into an infinite loop when processing call instructions. @@ -3222,7 +4001,9 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry
lower_return(ctx, entry_func, body, false);
- while (transform_ir(ctx, lower_calls, body, NULL)); + while (hlsl_transform_ir(ctx, lower_calls, body, NULL)); + + hlsl_transform_ir(ctx, lower_index_loads, body, NULL);
LIST_FOR_EACH_ENTRY(var, &ctx->globals->vars, struct hlsl_ir_var, scope_entry) { @@ -3234,15 +4015,22 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry { var = entry_func->parameters.vars[i];
- if (var->data_type->type == HLSL_CLASS_OBJECT || (var->storage_modifiers & HLSL_STORAGE_UNIFORM)) + if (hlsl_type_is_resource(var->data_type) || (var->storage_modifiers & HLSL_STORAGE_UNIFORM)) { prepend_uniform_copy(ctx, &body->instrs, var); } else { - if (var->data_type->type != HLSL_CLASS_STRUCT && !var->semantic.name) + if (type_has_object_components(var->data_type)) + hlsl_fixme(ctx, &var->loc, "Prepend uniform copies for object components within structs."); + + if (hlsl_get_multiarray_element_type(var->data_type)->class != HLSL_CLASS_STRUCT + && !var->semantic.name) + { hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_SEMANTIC, "Parameter "%s" is missing a semantic.", var->name); + var->semantic.reported_missing = true; + }
if (var->storage_modifiers & HLSL_STORAGE_IN) prepend_input_var_copy(ctx, &body->instrs, var); @@ -3252,7 +4040,7 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry } if (entry_func->return_var) { - if (entry_func->return_var->data_type->type != HLSL_CLASS_STRUCT && !entry_func->return_var->semantic.name) + if (entry_func->return_var->data_type->class != HLSL_CLASS_STRUCT && !entry_func->return_var->semantic.name) hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_SEMANTIC, "Entry point "%s" is missing a return value semantic.", entry_func->func->name);
@@ -3274,60 +4062,71 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_ATTRIBUTE, "Entry point "%s" is missing a [numthreads] attribute.", entry_func->func->name);
- transform_ir(ctx, lower_broadcasts, body, NULL); - while (transform_ir(ctx, fold_redundant_casts, body, NULL)); + hlsl_transform_ir(ctx, lower_broadcasts, body, NULL); + while (hlsl_transform_ir(ctx, fold_redundant_casts, body, NULL)); do { - progress = transform_ir(ctx, split_array_copies, body, NULL); - progress |= transform_ir(ctx, split_struct_copies, body, NULL); + progress = hlsl_transform_ir(ctx, split_array_copies, body, NULL); + progress |= hlsl_transform_ir(ctx, split_struct_copies, body, NULL); } while (progress); - transform_ir(ctx, split_matrix_copies, body, NULL); - - transform_ir(ctx, lower_narrowing_casts, body, NULL); - transform_ir(ctx, lower_casts_to_bool, body, NULL); - transform_ir(ctx, lower_int_division, body, NULL); - transform_ir(ctx, lower_int_modulus, body, NULL); - transform_ir(ctx, lower_int_abs, body, NULL); - transform_ir(ctx, lower_float_modulus, body, NULL); + hlsl_transform_ir(ctx, split_matrix_copies, body, NULL); + + hlsl_transform_ir(ctx, lower_narrowing_casts, body, NULL); + hlsl_transform_ir(ctx, lower_casts_to_bool, body, NULL); + hlsl_transform_ir(ctx, lower_int_dot, body, NULL); + hlsl_transform_ir(ctx, lower_int_division, body, NULL); + hlsl_transform_ir(ctx, lower_int_modulus, body, NULL); + hlsl_transform_ir(ctx, lower_int_abs, body, NULL); + hlsl_transform_ir(ctx, lower_float_modulus, body, NULL); + hlsl_transform_ir(ctx, fold_redundant_casts, body, NULL); do { - progress = transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL); - progress |= transform_ir(ctx, hlsl_fold_constant_swizzles, body, NULL); - progress |= copy_propagation_execute(ctx, body); - progress |= transform_ir(ctx, fold_swizzle_chains, body, NULL); - progress |= transform_ir(ctx, remove_trivial_swizzles, body, NULL); + progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL); + progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_swizzles, body, NULL); + progress |= hlsl_copy_propagation_execute(ctx, body); + progress |= hlsl_transform_ir(ctx, fold_swizzle_chains, body, NULL); + progress |= hlsl_transform_ir(ctx, remove_trivial_swizzles, body, NULL); } while (progress);
+ hlsl_transform_ir(ctx, lower_nonconstant_vector_derefs, body, NULL); + hlsl_transform_ir(ctx, lower_casts_to_bool, body, NULL); + hlsl_transform_ir(ctx, lower_int_dot, body, NULL); + if (profile->major_version < 4) { - transform_ir(ctx, lower_division, body, NULL); - transform_ir(ctx, lower_sqrt, body, NULL); - transform_ir(ctx, lower_dot, body, NULL); + hlsl_transform_ir(ctx, lower_division, body, NULL); + hlsl_transform_ir(ctx, lower_sqrt, body, NULL); + hlsl_transform_ir(ctx, lower_dot, body, NULL); + hlsl_transform_ir(ctx, lower_round, body, NULL); }
if (profile->major_version < 2) { - transform_ir(ctx, lower_abs, body, NULL); + hlsl_transform_ir(ctx, lower_abs, body, NULL); }
- transform_ir(ctx, validate_static_object_references, body, NULL); + hlsl_transform_ir(ctx, validate_static_object_references, body, NULL); + hlsl_transform_ir(ctx, track_object_components_usage, body, NULL);
/* TODO: move forward, remove when no longer needed */ - transform_ir(ctx, transform_deref_paths_into_offsets, body, NULL); - while (transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL)); + transform_derefs(ctx, replace_deref_path_with_offset, body); + while (hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL));
do compute_liveness(ctx, entry_func); - while (transform_ir(ctx, dce, body, NULL)); + while (hlsl_transform_ir(ctx, dce, body, NULL));
compute_liveness(ctx, entry_func);
if (TRACE_ON()) rb_for_each_entry(&ctx->functions, dump_function, ctx);
+ calculate_resource_register_counts(ctx); + allocate_register_reservations(ctx); + allocate_temp_registers(ctx, entry_func); if (profile->major_version < 4) { diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c index 3210bbd5712..301113c8477 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c @@ -22,7 +22,49 @@
#include "hlsl.h"
-static bool fold_cast(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, struct hlsl_ir_constant *src) +static bool fold_abs(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, + const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src) +{ + enum hlsl_base_type type = dst_type->base_type; + unsigned int k; + + assert(type == src->node.data_type->base_type); + + for (k = 0; k < dst_type->dimx; ++k) + { + switch (type) + { + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: + dst->u[k].f = fabsf(src->value.u[k].f); + break; + + case HLSL_TYPE_DOUBLE: + dst->u[k].d = fabs(src->value.u[k].d); + break; + + case HLSL_TYPE_INT: + /* C's abs(INT_MIN) is undefined, but HLSL evaluates this to INT_MIN */ + if (src->value.u[k].i == INT_MIN) + dst->u[k].i = INT_MIN; + else + dst->u[k].i = abs(src->value.u[k].i); + break; + + case HLSL_TYPE_UINT: + dst->u[k].u = src->value.u[k].u; + break; + + default: + FIXME("Fold abs() for type %s.\n", debug_hlsl_type(ctx, dst_type)); + return false; + } + } + return true; +} + +static bool fold_cast(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, + const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src) { unsigned int k; uint32_t u; @@ -30,11 +72,11 @@ static bool fold_cast(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, struct double d; float f;
- if (dst->node.data_type->dimx != src->node.data_type->dimx - || dst->node.data_type->dimy != src->node.data_type->dimy) + if (dst_type->dimx != src->node.data_type->dimx + || dst_type->dimy != src->node.data_type->dimy) { FIXME("Cast from %s to %s.\n", debug_hlsl_type(ctx, src->node.data_type), - debug_hlsl_type(ctx, dst->node.data_type)); + debug_hlsl_type(ctx, dst_type)); return false; }
@@ -44,61 +86,61 @@ static bool fold_cast(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, struct { case HLSL_TYPE_FLOAT: case HLSL_TYPE_HALF: - u = src->value[k].f; - i = src->value[k].f; - f = src->value[k].f; - d = src->value[k].f; + u = src->value.u[k].f; + i = src->value.u[k].f; + f = src->value.u[k].f; + d = src->value.u[k].f; break;
case HLSL_TYPE_DOUBLE: - u = src->value[k].d; - i = src->value[k].d; - f = src->value[k].d; - d = src->value[k].d; + u = src->value.u[k].d; + i = src->value.u[k].d; + f = src->value.u[k].d; + d = src->value.u[k].d; break;
case HLSL_TYPE_INT: - u = src->value[k].i; - i = src->value[k].i; - f = src->value[k].i; - d = src->value[k].i; + u = src->value.u[k].i; + i = src->value.u[k].i; + f = src->value.u[k].i; + d = src->value.u[k].i; break;
case HLSL_TYPE_UINT: - u = src->value[k].u; - i = src->value[k].u; - f = src->value[k].u; - d = src->value[k].u; + u = src->value.u[k].u; + i = src->value.u[k].u; + f = src->value.u[k].u; + d = src->value.u[k].u; break;
case HLSL_TYPE_BOOL: - u = !!src->value[k].u; - i = !!src->value[k].u; - f = !!src->value[k].u; - d = !!src->value[k].u; + u = !!src->value.u[k].u; + i = !!src->value.u[k].u; + f = !!src->value.u[k].u; + d = !!src->value.u[k].u; break;
default: vkd3d_unreachable(); }
- switch (dst->node.data_type->base_type) + switch (dst_type->base_type) { case HLSL_TYPE_FLOAT: case HLSL_TYPE_HALF: - dst->value[k].f = f; + dst->u[k].f = f; break;
case HLSL_TYPE_DOUBLE: - dst->value[k].d = d; + dst->u[k].d = d; break;
case HLSL_TYPE_INT: - dst->value[k].i = i; + dst->u[k].i = i; break;
case HLSL_TYPE_UINT: - dst->value[k].u = u; + dst->u[k].u = u; break;
case HLSL_TYPE_BOOL: @@ -110,9 +152,10 @@ static bool fold_cast(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, struct return true; }
-static bool fold_neg(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, struct hlsl_ir_constant *src) +static bool fold_neg(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, + const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src) { - enum hlsl_base_type type = dst->node.data_type->base_type; + enum hlsl_base_type type = dst_type->base_type; unsigned int k;
assert(type == src->node.data_type->base_type); @@ -123,30 +166,30 @@ static bool fold_neg(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, struct { case HLSL_TYPE_FLOAT: case HLSL_TYPE_HALF: - dst->value[k].f = -src->value[k].f; + dst->u[k].f = -src->value.u[k].f; break;
case HLSL_TYPE_DOUBLE: - dst->value[k].d = -src->value[k].d; + dst->u[k].d = -src->value.u[k].d; break;
case HLSL_TYPE_INT: case HLSL_TYPE_UINT: - dst->value[k].u = -src->value[k].u; + dst->u[k].u = -src->value.u[k].u; break;
default: - FIXME("Fold negation for type %s.\n", debug_hlsl_type(ctx, dst->node.data_type)); + FIXME("Fold negation for type %s.\n", debug_hlsl_type(ctx, dst_type)); return false; } } return true; }
-static bool fold_add(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, struct hlsl_ir_constant *src1, - struct hlsl_ir_constant *src2) +static bool fold_add(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) { - enum hlsl_base_type type = dst->node.data_type->base_type; + enum hlsl_base_type type = dst_type->base_type; unsigned int k;
assert(type == src1->node.data_type->base_type); @@ -158,32 +201,32 @@ static bool fold_add(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, struct { case HLSL_TYPE_FLOAT: case HLSL_TYPE_HALF: - dst->value[k].f = src1->value[k].f + src2->value[k].f; + dst->u[k].f = src1->value.u[k].f + src2->value.u[k].f; break;
case HLSL_TYPE_DOUBLE: - dst->value[k].d = src1->value[k].d + src2->value[k].d; + dst->u[k].d = src1->value.u[k].d + src2->value.u[k].d; break;
/* Handling HLSL_TYPE_INT through the unsigned field to avoid * undefined behavior with signed integers in C. */ case HLSL_TYPE_INT: case HLSL_TYPE_UINT: - dst->value[k].u = src1->value[k].u + src2->value[k].u; + dst->u[k].u = src1->value.u[k].u + src2->value.u[k].u; break;
default: - FIXME("Fold addition for type %s.\n", debug_hlsl_type(ctx, dst->node.data_type)); + FIXME("Fold addition for type %s.\n", debug_hlsl_type(ctx, dst_type)); return false; } } return true; }
-static bool fold_mul(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, - struct hlsl_ir_constant *src1, struct hlsl_ir_constant *src2) +static bool fold_mul(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) { - enum hlsl_base_type type = dst->node.data_type->base_type; + enum hlsl_base_type type = dst_type->base_type; unsigned int k;
assert(type == src1->node.data_type->base_type); @@ -195,32 +238,32 @@ static bool fold_mul(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, { case HLSL_TYPE_FLOAT: case HLSL_TYPE_HALF: - dst->value[k].f = src1->value[k].f * src2->value[k].f; + dst->u[k].f = src1->value.u[k].f * src2->value.u[k].f; break;
case HLSL_TYPE_DOUBLE: - dst->value[k].d = src1->value[k].d * src2->value[k].d; + dst->u[k].d = src1->value.u[k].d * src2->value.u[k].d; break;
case HLSL_TYPE_INT: case HLSL_TYPE_UINT: - dst->value[k].u = src1->value[k].u * src2->value[k].u; + dst->u[k].u = src1->value.u[k].u * src2->value.u[k].u; break;
default: - FIXME("Fold multiplication for type %s.\n", debug_hlsl_type(ctx, dst->node.data_type)); + FIXME("Fold multiplication for type %s.\n", debug_hlsl_type(ctx, dst_type)); return false; } } return true; }
-static bool fold_nequal(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, - struct hlsl_ir_constant *src1, struct hlsl_ir_constant *src2) +static bool fold_nequal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) { unsigned int k;
- assert(dst->node.data_type->base_type == HLSL_TYPE_BOOL); + assert(dst_type->base_type == HLSL_TYPE_BOOL); assert(src1->node.data_type->base_type == src2->node.data_type->base_type);
for (k = 0; k < 4; ++k) @@ -229,270 +272,270 @@ static bool fold_nequal(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, { case HLSL_TYPE_FLOAT: case HLSL_TYPE_HALF: - dst->value[k].u = src1->value[k].f != src2->value[k].f; + dst->u[k].u = src1->value.u[k].f != src2->value.u[k].f; break;
case HLSL_TYPE_DOUBLE: - dst->value[k].u = src1->value[k].d != src2->value[k].d; + dst->u[k].u = src1->value.u[k].d != src2->value.u[k].d; break;
case HLSL_TYPE_INT: case HLSL_TYPE_UINT: case HLSL_TYPE_BOOL: - dst->value[k].u = src1->value[k].u != src2->value[k].u; + dst->u[k].u = src1->value.u[k].u != src2->value.u[k].u; break;
default: vkd3d_unreachable(); }
- dst->value[k].u *= ~0u; + dst->u[k].u *= ~0u; } return true; }
-static bool fold_div(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, - struct hlsl_ir_constant *src1, struct hlsl_ir_constant *src2) +static bool fold_div(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2, + const struct vkd3d_shader_location *loc) { - enum hlsl_base_type type = dst->node.data_type->base_type; + enum hlsl_base_type type = dst_type->base_type; unsigned int k;
assert(type == src1->node.data_type->base_type); assert(type == src2->node.data_type->base_type);
- for (k = 0; k < dst->node.data_type->dimx; ++k) + for (k = 0; k < dst_type->dimx; ++k) { switch (type) { case HLSL_TYPE_FLOAT: case HLSL_TYPE_HALF: - if (ctx->profile->major_version >= 4 && src2->value[k].f == 0) + if (ctx->profile->major_version >= 4 && src2->value.u[k].f == 0) { - hlsl_warning(ctx, &dst->node.loc, VKD3D_SHADER_WARNING_HLSL_DIVISION_BY_ZERO, + hlsl_warning(ctx, loc, VKD3D_SHADER_WARNING_HLSL_DIVISION_BY_ZERO, "Floating point division by zero."); } - dst->value[k].f = src1->value[k].f / src2->value[k].f; - if (ctx->profile->major_version < 4 && !isfinite(dst->value[k].f)) + dst->u[k].f = src1->value.u[k].f / src2->value.u[k].f; + if (ctx->profile->major_version < 4 && !isfinite(dst->u[k].f)) { - hlsl_error(ctx, &dst->node.loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, "Infinities and NaNs are not allowed by the shader model."); } break;
case HLSL_TYPE_DOUBLE: - if (src2->value[k].d == 0) + if (src2->value.u[k].d == 0) { - hlsl_warning(ctx, &dst->node.loc, VKD3D_SHADER_WARNING_HLSL_DIVISION_BY_ZERO, + hlsl_warning(ctx, loc, VKD3D_SHADER_WARNING_HLSL_DIVISION_BY_ZERO, "Floating point division by zero."); } - dst->value[k].d = src1->value[k].d / src2->value[k].d; + dst->u[k].d = src1->value.u[k].d / src2->value.u[k].d; break;
case HLSL_TYPE_INT: - if (src2->value[k].i == 0) + if (src2->value.u[k].i == 0) { - hlsl_error(ctx, &dst->node.loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, "Division by zero."); return false; } - if (src1->value[k].i == INT_MIN && src2->value[k].i == -1) - dst->value[k].i = INT_MIN; + if (src1->value.u[k].i == INT_MIN && src2->value.u[k].i == -1) + dst->u[k].i = INT_MIN; else - dst->value[k].i = src1->value[k].i / src2->value[k].i; + dst->u[k].i = src1->value.u[k].i / src2->value.u[k].i; break;
case HLSL_TYPE_UINT: - if (src2->value[k].u == 0) + if (src2->value.u[k].u == 0) { - hlsl_error(ctx, &dst->node.loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, "Division by zero."); return false; } - dst->value[k].u = src1->value[k].u / src2->value[k].u; + dst->u[k].u = src1->value.u[k].u / src2->value.u[k].u; break;
default: - FIXME("Fold division for type %s.\n", debug_hlsl_type(ctx, dst->node.data_type)); + FIXME("Fold division for type %s.\n", debug_hlsl_type(ctx, dst_type)); return false; } } return true; }
-static bool fold_mod(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, - struct hlsl_ir_constant *src1, struct hlsl_ir_constant *src2) +static bool fold_mod(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2, + const struct vkd3d_shader_location *loc) { - enum hlsl_base_type type = dst->node.data_type->base_type; + enum hlsl_base_type type = dst_type->base_type; unsigned int k;
assert(type == src1->node.data_type->base_type); assert(type == src2->node.data_type->base_type);
- for (k = 0; k < dst->node.data_type->dimx; ++k) + for (k = 0; k < dst_type->dimx; ++k) { switch (type) { case HLSL_TYPE_INT: - if (src2->value[k].i == 0) + if (src2->value.u[k].i == 0) { - hlsl_error(ctx, &dst->node.loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, - "Division by zero."); + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, "Division by zero."); return false; } - if (src1->value[k].i == INT_MIN && src2->value[k].i == -1) - dst->value[k].i = 0; + if (src1->value.u[k].i == INT_MIN && src2->value.u[k].i == -1) + dst->u[k].i = 0; else - dst->value[k].i = src1->value[k].i % src2->value[k].i; + dst->u[k].i = src1->value.u[k].i % src2->value.u[k].i; break;
case HLSL_TYPE_UINT: - if (src2->value[k].u == 0) + if (src2->value.u[k].u == 0) { - hlsl_error(ctx, &dst->node.loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, - "Division by zero."); + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, "Division by zero."); return false; } - dst->value[k].u = src1->value[k].u % src2->value[k].u; + dst->u[k].u = src1->value.u[k].u % src2->value.u[k].u; break;
default: - FIXME("Fold modulus for type %s.\n", debug_hlsl_type(ctx, dst->node.data_type)); + FIXME("Fold modulus for type %s.\n", debug_hlsl_type(ctx, dst_type)); return false; } } return true; }
-static bool fold_max(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, - struct hlsl_ir_constant *src1, struct hlsl_ir_constant *src2) +static bool fold_max(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) { - enum hlsl_base_type type = dst->node.data_type->base_type; + enum hlsl_base_type type = dst_type->base_type; unsigned int k;
assert(type == src1->node.data_type->base_type); assert(type == src2->node.data_type->base_type);
- for (k = 0; k < dst->node.data_type->dimx; ++k) + for (k = 0; k < dst_type->dimx; ++k) { switch (type) { case HLSL_TYPE_INT: - dst->value[k].i = max(src1->value[k].i, src2->value[k].i); + dst->u[k].i = max(src1->value.u[k].i, src2->value.u[k].i); break;
case HLSL_TYPE_UINT: - dst->value[k].u = max(src1->value[k].u, src2->value[k].u); + dst->u[k].u = max(src1->value.u[k].u, src2->value.u[k].u); break;
default: - FIXME("Fold max for type %s.\n", debug_hlsl_type(ctx, dst->node.data_type)); + FIXME("Fold max for type %s.\n", debug_hlsl_type(ctx, dst_type)); return false; } } return true; }
-static bool fold_min(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, - struct hlsl_ir_constant *src1, struct hlsl_ir_constant *src2) +static bool fold_min(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) { - enum hlsl_base_type type = dst->node.data_type->base_type; + enum hlsl_base_type type = dst_type->base_type; unsigned int k;
assert(type == src1->node.data_type->base_type); assert(type == src2->node.data_type->base_type);
- for (k = 0; k < dst->node.data_type->dimx; ++k) + for (k = 0; k < dst_type->dimx; ++k) { switch (type) { case HLSL_TYPE_INT: - dst->value[k].i = min(src1->value[k].i, src2->value[k].i); + dst->u[k].i = min(src1->value.u[k].i, src2->value.u[k].i); break;
case HLSL_TYPE_UINT: - dst->value[k].u = min(src1->value[k].u, src2->value[k].u); + dst->u[k].u = min(src1->value.u[k].u, src2->value.u[k].u); break;
default: - FIXME("Fold min for type %s.\n", debug_hlsl_type(ctx, dst->node.data_type)); + FIXME("Fold min for type %s.\n", debug_hlsl_type(ctx, dst_type)); return false; } } return true; }
-static bool fold_bit_xor(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, - struct hlsl_ir_constant *src1, struct hlsl_ir_constant *src2) +static bool fold_bit_xor(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) { - enum hlsl_base_type type = dst->node.data_type->base_type; + enum hlsl_base_type type = dst_type->base_type; unsigned int k;
assert(type == src1->node.data_type->base_type); assert(type == src2->node.data_type->base_type);
- for (k = 0; k < dst->node.data_type->dimx; ++k) + for (k = 0; k < dst_type->dimx; ++k) { switch (type) { case HLSL_TYPE_INT: case HLSL_TYPE_UINT: - dst->value[k].u = src1->value[k].u ^ src2->value[k].u; + dst->u[k].u = src1->value.u[k].u ^ src2->value.u[k].u; break;
default: - FIXME("Fold bit xor for type %s.\n", debug_hlsl_type(ctx, dst->node.data_type)); + FIXME("Fold bit xor for type %s.\n", debug_hlsl_type(ctx, dst_type)); return false; } } return true; }
-static bool fold_bit_and(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, - struct hlsl_ir_constant *src1, struct hlsl_ir_constant *src2) +static bool fold_bit_and(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) { - enum hlsl_base_type type = dst->node.data_type->base_type; + enum hlsl_base_type type = dst_type->base_type; unsigned int k;
assert(type == src1->node.data_type->base_type); assert(type == src2->node.data_type->base_type);
- for (k = 0; k < dst->node.data_type->dimx; ++k) + for (k = 0; k < dst_type->dimx; ++k) { switch (type) { case HLSL_TYPE_INT: case HLSL_TYPE_UINT: - dst->value[k].u = src1->value[k].u & src2->value[k].u; + dst->u[k].u = src1->value.u[k].u & src2->value.u[k].u; break;
default: - FIXME("Fold bit and for type %s.\n", debug_hlsl_type(ctx, dst->node.data_type)); + FIXME("Fold bit and for type %s.\n", debug_hlsl_type(ctx, dst_type)); return false; } } return true; }
-static bool fold_bit_or(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, - struct hlsl_ir_constant *src1, struct hlsl_ir_constant *src2) +static bool fold_bit_or(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) { - enum hlsl_base_type type = dst->node.data_type->base_type; + enum hlsl_base_type type = dst_type->base_type; unsigned int k;
assert(type == src1->node.data_type->base_type); assert(type == src2->node.data_type->base_type);
- for (k = 0; k < dst->node.data_type->dimx; ++k) + for (k = 0; k < dst_type->dimx; ++k) { switch (type) { case HLSL_TYPE_INT: case HLSL_TYPE_UINT: - dst->value[k].u = src1->value[k].u | src2->value[k].u; + dst->u[k].u = src1->value.u[k].u | src2->value.u[k].u; break;
default: - FIXME("Fold bit or for type %s.\n", debug_hlsl_type(ctx, dst->node.data_type)); + FIXME("Fold bit or for type %s.\n", debug_hlsl_type(ctx, dst_type)); return false; } } @@ -501,7 +544,9 @@ static bool fold_bit_or(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst,
bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) { - struct hlsl_ir_constant *arg1, *arg2 = NULL, *res; + struct hlsl_ir_constant *arg1, *arg2 = NULL; + struct hlsl_constant_value res = {0}; + struct hlsl_ir_node *res_node; struct hlsl_ir_expr *expr; unsigned int i; bool success; @@ -512,7 +557,7 @@ bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, if (!expr->operands[0].node) return false;
- if (instr->data_type->type > HLSL_CLASS_VECTOR) + if (instr->data_type->class > HLSL_CLASS_VECTOR) return false;
for (i = 0; i < ARRAY_SIZE(expr->operands); ++i) @@ -521,64 +566,65 @@ bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, { if (expr->operands[i].node->type != HLSL_IR_CONSTANT) return false; - assert(expr->operands[i].node->data_type->type <= HLSL_CLASS_VECTOR); + assert(expr->operands[i].node->data_type->class <= HLSL_CLASS_VECTOR); } } arg1 = hlsl_ir_constant(expr->operands[0].node); if (expr->operands[1].node) arg2 = hlsl_ir_constant(expr->operands[1].node);
- if (!(res = hlsl_new_constant(ctx, instr->data_type, &instr->loc))) - return false; - switch (expr->op) { + case HLSL_OP1_ABS: + success = fold_abs(ctx, &res, instr->data_type, arg1); + break; + case HLSL_OP1_CAST: - success = fold_cast(ctx, res, arg1); + success = fold_cast(ctx, &res, instr->data_type, arg1); break;
case HLSL_OP1_NEG: - success = fold_neg(ctx, res, arg1); + success = fold_neg(ctx, &res, instr->data_type, arg1); break;
case HLSL_OP2_ADD: - success = fold_add(ctx, res, arg1, arg2); + success = fold_add(ctx, &res, instr->data_type, arg1, arg2); break;
case HLSL_OP2_MUL: - success = fold_mul(ctx, res, arg1, arg2); + success = fold_mul(ctx, &res, instr->data_type, arg1, arg2); break;
case HLSL_OP2_NEQUAL: - success = fold_nequal(ctx, res, arg1, arg2); + success = fold_nequal(ctx, &res, instr->data_type, arg1, arg2); break;
case HLSL_OP2_DIV: - success = fold_div(ctx, res, arg1, arg2); + success = fold_div(ctx, &res, instr->data_type, arg1, arg2, &instr->loc); break;
case HLSL_OP2_MOD: - success = fold_mod(ctx, res, arg1, arg2); + success = fold_mod(ctx, &res, instr->data_type, arg1, arg2, &instr->loc); break;
case HLSL_OP2_MAX: - success = fold_max(ctx, res, arg1, arg2); + success = fold_max(ctx, &res, instr->data_type, arg1, arg2); break;
case HLSL_OP2_MIN: - success = fold_min(ctx, res, arg1, arg2); + success = fold_min(ctx, &res, instr->data_type, arg1, arg2); break;
case HLSL_OP2_BIT_XOR: - success = fold_bit_xor(ctx, res, arg1, arg2); + success = fold_bit_xor(ctx, &res, instr->data_type, arg1, arg2); break;
case HLSL_OP2_BIT_AND: - success = fold_bit_and(ctx, res, arg1, arg2); + success = fold_bit_and(ctx, &res, instr->data_type, arg1, arg2); break;
case HLSL_OP2_BIT_OR: - success = fold_bit_or(ctx, res, arg1, arg2); + success = fold_bit_or(ctx, &res, instr->data_type, arg1, arg2); break;
default: @@ -589,20 +635,20 @@ bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr,
if (success) { - list_add_before(&expr->node.entry, &res->node.entry); - hlsl_replace_node(&expr->node, &res->node); - } - else - { - vkd3d_free(res); + if (!(res_node = hlsl_new_constant(ctx, instr->data_type, &res, &instr->loc))) + return false; + list_add_before(&expr->node.entry, &res_node->entry); + hlsl_replace_node(&expr->node, res_node); } return success; }
bool hlsl_fold_constant_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) { - struct hlsl_ir_constant *value, *res; + struct hlsl_constant_value value; struct hlsl_ir_swizzle *swizzle; + struct hlsl_ir_constant *src; + struct hlsl_ir_node *dst; unsigned int i;
if (instr->type != HLSL_IR_SWIZZLE) @@ -610,15 +656,15 @@ bool hlsl_fold_constant_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *inst swizzle = hlsl_ir_swizzle(instr); if (swizzle->val.node->type != HLSL_IR_CONSTANT) return false; - value = hlsl_ir_constant(swizzle->val.node); - - if (!(res = hlsl_new_constant(ctx, instr->data_type, &instr->loc))) - return false; + src = hlsl_ir_constant(swizzle->val.node);
for (i = 0; i < swizzle->node.data_type->dimx; ++i) - res->value[i] = value->value[hlsl_swizzle_get_component(swizzle->swizzle, i)]; + value.u[i] = src->value.u[hlsl_swizzle_get_component(swizzle->swizzle, i)]; + + if (!(dst = hlsl_new_constant(ctx, instr->data_type, &value, &instr->loc))) + return false;
- list_add_before(&swizzle->node.entry, &res->node.entry); - hlsl_replace_node(&swizzle->node, &res->node); + list_add_before(&swizzle->node.entry, &dst->entry); + hlsl_replace_node(&swizzle->node, dst); return true; } diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_sm1.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_sm1.c deleted file mode 100644 index 4a62d804ed6..00000000000 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl_sm1.c +++ /dev/null @@ -1,980 +0,0 @@ -/* - * HLSL code generation for DXBC shader models 1-3 - * - * Copyright 2019-2020 Zebediah Figura for CodeWeavers - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA - */ - -#include "hlsl.h" -#include <stdio.h> - -bool hlsl_sm1_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, - bool output, D3DSHADER_PARAM_REGISTER_TYPE *type, unsigned int *reg) -{ - unsigned int i; - - static const struct - { - const char *semantic; - bool output; - enum vkd3d_shader_type shader_type; - unsigned int major_version; - D3DSHADER_PARAM_REGISTER_TYPE type; - DWORD offset; - } - register_table[] = - { - {"color", true, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_COLOROUT}, - {"depth", true, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_DEPTHOUT}, - {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_DEPTHOUT}, - {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_COLOROUT}, - {"color", false, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_INPUT}, - {"texcoord", false, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_TEXTURE}, - - {"color", true, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_COLOROUT}, - {"depth", true, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_DEPTHOUT}, - {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_DEPTHOUT}, - {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_COLOROUT}, - {"sv_position", false, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_MISCTYPE, D3DSMO_POSITION}, - {"vface", false, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_MISCTYPE, D3DSMO_FACE}, - {"vpos", false, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_MISCTYPE, D3DSMO_POSITION}, - - {"color", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_ATTROUT}, - {"fog", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT, D3DSRO_FOG}, - {"position", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT, D3DSRO_POSITION}, - {"psize", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT, D3DSRO_POINT_SIZE}, - {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT, D3DSRO_POSITION}, - {"texcoord", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_TEXCRDOUT}, - - {"color", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_ATTROUT}, - {"fog", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT, D3DSRO_FOG}, - {"position", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT, D3DSRO_POSITION}, - {"psize", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT, D3DSRO_POINT_SIZE}, - {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT, D3DSRO_POSITION}, - {"texcoord", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_TEXCRDOUT}, - }; - - for (i = 0; i < ARRAY_SIZE(register_table); ++i) - { - if (!ascii_strcasecmp(semantic->name, register_table[i].semantic) - && output == register_table[i].output - && ctx->profile->type == register_table[i].shader_type - && ctx->profile->major_version == register_table[i].major_version) - { - *type = register_table[i].type; - if (register_table[i].type == D3DSPR_MISCTYPE || register_table[i].type == D3DSPR_RASTOUT) - *reg = register_table[i].offset; - else - *reg = semantic->index; - return true; - } - } - - return false; -} - -bool hlsl_sm1_usage_from_semantic(const struct hlsl_semantic *semantic, D3DDECLUSAGE *usage, uint32_t *usage_idx) -{ - static const struct - { - const char *name; - D3DDECLUSAGE usage; - } - semantics[] = - { - {"binormal", D3DDECLUSAGE_BINORMAL}, - {"blendindices", D3DDECLUSAGE_BLENDINDICES}, - {"blendweight", D3DDECLUSAGE_BLENDWEIGHT}, - {"color", D3DDECLUSAGE_COLOR}, - {"depth", D3DDECLUSAGE_DEPTH}, - {"fog", D3DDECLUSAGE_FOG}, - {"normal", D3DDECLUSAGE_NORMAL}, - {"position", D3DDECLUSAGE_POSITION}, - {"positiont", D3DDECLUSAGE_POSITIONT}, - {"psize", D3DDECLUSAGE_PSIZE}, - {"sample", D3DDECLUSAGE_SAMPLE}, - {"sv_depth", D3DDECLUSAGE_DEPTH}, - {"sv_position", D3DDECLUSAGE_POSITION}, - {"sv_target", D3DDECLUSAGE_COLOR}, - {"tangent", D3DDECLUSAGE_TANGENT}, - {"tessfactor", D3DDECLUSAGE_TESSFACTOR}, - {"texcoord", D3DDECLUSAGE_TEXCOORD}, - }; - - unsigned int i; - - for (i = 0; i < ARRAY_SIZE(semantics); ++i) - { - if (!ascii_strcasecmp(semantic->name, semantics[i].name)) - { - *usage = semantics[i].usage; - *usage_idx = semantic->index; - return true; - } - } - - return false; -} - -static uint32_t sm1_version(enum vkd3d_shader_type type, unsigned int major, unsigned int minor) -{ - if (type == VKD3D_SHADER_TYPE_VERTEX) - return D3DVS_VERSION(major, minor); - else - return D3DPS_VERSION(major, minor); -} - -static D3DXPARAMETER_CLASS sm1_class(const struct hlsl_type *type) -{ - switch (type->type) - { - case HLSL_CLASS_ARRAY: - return sm1_class(type->e.array.type); - case HLSL_CLASS_MATRIX: - assert(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); - if (type->modifiers & HLSL_MODIFIER_COLUMN_MAJOR) - return D3DXPC_MATRIX_COLUMNS; - else - return D3DXPC_MATRIX_ROWS; - case HLSL_CLASS_OBJECT: - return D3DXPC_OBJECT; - case HLSL_CLASS_SCALAR: - return D3DXPC_SCALAR; - case HLSL_CLASS_STRUCT: - return D3DXPC_STRUCT; - case HLSL_CLASS_VECTOR: - return D3DXPC_VECTOR; - default: - ERR("Invalid class %#x.\n", type->type); - vkd3d_unreachable(); - } -} - -static D3DXPARAMETER_TYPE sm1_base_type(const struct hlsl_type *type) -{ - switch (type->base_type) - { - case HLSL_TYPE_BOOL: - return D3DXPT_BOOL; - case HLSL_TYPE_FLOAT: - case HLSL_TYPE_HALF: - return D3DXPT_FLOAT; - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: - return D3DXPT_INT; - case HLSL_TYPE_PIXELSHADER: - return D3DXPT_PIXELSHADER; - case HLSL_TYPE_SAMPLER: - switch (type->sampler_dim) - { - case HLSL_SAMPLER_DIM_1D: - return D3DXPT_SAMPLER1D; - case HLSL_SAMPLER_DIM_2D: - return D3DXPT_SAMPLER2D; - case HLSL_SAMPLER_DIM_3D: - return D3DXPT_SAMPLER3D; - case HLSL_SAMPLER_DIM_CUBE: - return D3DXPT_SAMPLERCUBE; - case HLSL_SAMPLER_DIM_GENERIC: - return D3DXPT_SAMPLER; - default: - ERR("Invalid dimension %#x.\n", type->sampler_dim); - vkd3d_unreachable(); - } - break; - case HLSL_TYPE_STRING: - return D3DXPT_STRING; - case HLSL_TYPE_TEXTURE: - switch (type->sampler_dim) - { - case HLSL_SAMPLER_DIM_1D: - return D3DXPT_TEXTURE1D; - case HLSL_SAMPLER_DIM_2D: - return D3DXPT_TEXTURE2D; - case HLSL_SAMPLER_DIM_3D: - return D3DXPT_TEXTURE3D; - case HLSL_SAMPLER_DIM_CUBE: - return D3DXPT_TEXTURECUBE; - case HLSL_SAMPLER_DIM_GENERIC: - return D3DXPT_TEXTURE; - default: - ERR("Invalid dimension %#x.\n", type->sampler_dim); - vkd3d_unreachable(); - } - break; - case HLSL_TYPE_VERTEXSHADER: - return D3DXPT_VERTEXSHADER; - case HLSL_TYPE_VOID: - return D3DXPT_VOID; - default: - vkd3d_unreachable(); - } -} - -static const struct hlsl_type *get_array_type(const struct hlsl_type *type) -{ - if (type->type == HLSL_CLASS_ARRAY) - return get_array_type(type->e.array.type); - return type; -} - -static unsigned int get_array_size(const struct hlsl_type *type) -{ - if (type->type == HLSL_CLASS_ARRAY) - return get_array_size(type->e.array.type) * type->e.array.elements_count; - return 1; -} - -static void write_sm1_type(struct vkd3d_bytecode_buffer *buffer, struct hlsl_type *type, unsigned int ctab_start) -{ - const struct hlsl_type *array_type = get_array_type(type); - unsigned int array_size = get_array_size(type); - unsigned int field_count = 0; - size_t fields_offset = 0; - size_t i; - - if (type->bytecode_offset) - return; - - if (array_type->type == HLSL_CLASS_STRUCT) - { - field_count = array_type->e.record.field_count; - - for (i = 0; i < field_count; ++i) - { - struct hlsl_struct_field *field = &array_type->e.record.fields[i]; - - field->name_bytecode_offset = put_string(buffer, field->name); - write_sm1_type(buffer, field->type, ctab_start); - } - - fields_offset = bytecode_get_size(buffer) - ctab_start; - - for (i = 0; i < field_count; ++i) - { - struct hlsl_struct_field *field = &array_type->e.record.fields[i]; - - put_u32(buffer, field->name_bytecode_offset - ctab_start); - put_u32(buffer, field->type->bytecode_offset - ctab_start); - } - } - - type->bytecode_offset = put_u32(buffer, vkd3d_make_u32(sm1_class(type), sm1_base_type(array_type))); - put_u32(buffer, vkd3d_make_u32(type->dimy, type->dimx)); - put_u32(buffer, vkd3d_make_u32(array_size, field_count)); - put_u32(buffer, fields_offset); -} - -static void sm1_sort_extern(struct list *sorted, struct hlsl_ir_var *to_sort) -{ - struct hlsl_ir_var *var; - - list_remove(&to_sort->extern_entry); - - LIST_FOR_EACH_ENTRY(var, sorted, struct hlsl_ir_var, extern_entry) - { - if (strcmp(to_sort->name, var->name) < 0) - { - list_add_before(&var->extern_entry, &to_sort->extern_entry); - return; - } - } - - list_add_tail(sorted, &to_sort->extern_entry); -} - -static void sm1_sort_externs(struct hlsl_ctx *ctx) -{ - struct list sorted = LIST_INIT(sorted); - struct hlsl_ir_var *var, *next; - - LIST_FOR_EACH_ENTRY_SAFE(var, next, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - sm1_sort_extern(&sorted, var); - list_move_tail(&ctx->extern_vars, &sorted); -} - -static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - struct hlsl_ir_function_decl *entry_func) -{ - size_t ctab_offset, ctab_start, ctab_end, vars_start, size_offset, creator_offset, offset; - unsigned int uniform_count = 0; - struct hlsl_ir_var *var; - - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { - enum hlsl_regset regset = hlsl_type_get_regset(var->data_type); - - if (!var->semantic.name && var->regs[regset].allocated) - { - ++uniform_count; - - if (var->is_param && var->is_uniform) - { - struct vkd3d_string_buffer *name; - - if (!(name = hlsl_get_string_buffer(ctx))) - { - buffer->status = VKD3D_ERROR_OUT_OF_MEMORY; - return; - } - vkd3d_string_buffer_printf(name, "$%s", var->name); - vkd3d_free((char *)var->name); - var->name = hlsl_strdup(ctx, name->buffer); - hlsl_release_string_buffer(ctx, name); - } - } - } - - sm1_sort_externs(ctx); - - size_offset = put_u32(buffer, 0); - ctab_offset = put_u32(buffer, VKD3D_MAKE_TAG('C','T','A','B')); - - ctab_start = put_u32(buffer, sizeof(D3DXSHADER_CONSTANTTABLE)); - creator_offset = put_u32(buffer, 0); - put_u32(buffer, sm1_version(ctx->profile->type, ctx->profile->major_version, ctx->profile->minor_version)); - put_u32(buffer, uniform_count); - put_u32(buffer, sizeof(D3DXSHADER_CONSTANTTABLE)); /* offset of constants */ - put_u32(buffer, 0); /* FIXME: flags */ - put_u32(buffer, 0); /* FIXME: target string */ - - vars_start = bytecode_get_size(buffer); - - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { - enum hlsl_regset regset = hlsl_type_get_regset(var->data_type); - - if (!var->semantic.name && var->regs[regset].allocated) - { - put_u32(buffer, 0); /* name */ - if (var->data_type->type == HLSL_CLASS_OBJECT - && (var->data_type->base_type == HLSL_TYPE_SAMPLER - || var->data_type->base_type == HLSL_TYPE_TEXTURE)) - { - assert(regset == HLSL_REGSET_SAMPLERS); - put_u32(buffer, vkd3d_make_u32(D3DXRS_SAMPLER, var->regs[regset].id)); - put_u32(buffer, 1); - } - else - { - assert(regset == HLSL_REGSET_NUMERIC); - put_u32(buffer, vkd3d_make_u32(D3DXRS_FLOAT4, var->regs[regset].id)); - put_u32(buffer, var->data_type->reg_size[regset] / 4); - } - put_u32(buffer, 0); /* type */ - put_u32(buffer, 0); /* FIXME: default value */ - } - } - - uniform_count = 0; - - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { - enum hlsl_regset regset = hlsl_type_get_regset(var->data_type); - - if (!var->semantic.name && var->regs[regset].allocated) - { - size_t var_offset = vars_start + (uniform_count * 5 * sizeof(uint32_t)); - size_t name_offset; - - name_offset = put_string(buffer, var->name); - set_u32(buffer, var_offset, name_offset - ctab_start); - - write_sm1_type(buffer, var->data_type, ctab_start); - set_u32(buffer, var_offset + 3 * sizeof(uint32_t), var->data_type->bytecode_offset - ctab_start); - ++uniform_count; - } - } - - offset = put_string(buffer, vkd3d_shader_get_version(NULL, NULL)); - set_u32(buffer, creator_offset, offset - ctab_start); - - ctab_end = bytecode_get_size(buffer); - set_u32(buffer, size_offset, vkd3d_make_u32(D3DSIO_COMMENT, (ctab_end - ctab_offset) / sizeof(uint32_t))); -} - -static uint32_t sm1_encode_register_type(D3DSHADER_PARAM_REGISTER_TYPE type) -{ - return ((type << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK) - | ((type << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2); -} - -struct sm1_instruction -{ - D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode; - - struct sm1_dst_register - { - D3DSHADER_PARAM_REGISTER_TYPE type; - D3DSHADER_PARAM_DSTMOD_TYPE mod; - unsigned int writemask; - uint32_t reg; - } dst; - - struct sm1_src_register - { - D3DSHADER_PARAM_REGISTER_TYPE type; - D3DSHADER_PARAM_SRCMOD_TYPE mod; - unsigned int swizzle; - uint32_t reg; - } srcs[3]; - unsigned int src_count; - - unsigned int has_dst; -}; - -static void write_sm1_dst_register(struct vkd3d_bytecode_buffer *buffer, const struct sm1_dst_register *reg) -{ - assert(reg->writemask); - put_u32(buffer, (1u << 31) | sm1_encode_register_type(reg->type) | reg->mod | (reg->writemask << 16) | reg->reg); -} - -static void write_sm1_src_register(struct vkd3d_bytecode_buffer *buffer, - const struct sm1_src_register *reg) -{ - put_u32(buffer, (1u << 31) | sm1_encode_register_type(reg->type) | reg->mod | (reg->swizzle << 16) | reg->reg); -} - -static void write_sm1_instruction(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - const struct sm1_instruction *instr) -{ - uint32_t token = instr->opcode; - unsigned int i; - - if (ctx->profile->major_version > 1) - token |= (instr->has_dst + instr->src_count) << D3DSI_INSTLENGTH_SHIFT; - put_u32(buffer, token); - - if (instr->has_dst) - write_sm1_dst_register(buffer, &instr->dst); - - for (i = 0; i < instr->src_count; ++i) - write_sm1_src_register(buffer, &instr->srcs[i]); -}; - -static void sm1_map_src_swizzle(struct sm1_src_register *src, unsigned int map_writemask) -{ - src->swizzle = hlsl_map_swizzle(src->swizzle, map_writemask); -} - -static void write_sm1_dp2add(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - const struct hlsl_reg *dst, const struct hlsl_reg *src1, const struct hlsl_reg *src2, - const struct hlsl_reg *src3) -{ - struct sm1_instruction instr = - { - .opcode = D3DSIO_DP2ADD, - - .dst.type = D3DSPR_TEMP, - .dst.writemask = dst->writemask, - .dst.reg = dst->id, - .has_dst = 1, - - .srcs[0].type = D3DSPR_TEMP, - .srcs[0].swizzle = hlsl_swizzle_from_writemask(src1->writemask), - .srcs[0].reg = src1->id, - .srcs[1].type = D3DSPR_TEMP, - .srcs[1].swizzle = hlsl_swizzle_from_writemask(src2->writemask), - .srcs[1].reg = src2->id, - .srcs[2].type = D3DSPR_TEMP, - .srcs[2].swizzle = hlsl_swizzle_from_writemask(src3->writemask), - .srcs[2].reg = src3->id, - .src_count = 3, - }; - - write_sm1_instruction(ctx, buffer, &instr); -} - -static void write_sm1_binary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, const struct hlsl_reg *dst, - const struct hlsl_reg *src1, const struct hlsl_reg *src2) -{ - struct sm1_instruction instr = - { - .opcode = opcode, - - .dst.type = D3DSPR_TEMP, - .dst.writemask = dst->writemask, - .dst.reg = dst->id, - .has_dst = 1, - - .srcs[0].type = D3DSPR_TEMP, - .srcs[0].swizzle = hlsl_swizzle_from_writemask(src1->writemask), - .srcs[0].reg = src1->id, - .srcs[1].type = D3DSPR_TEMP, - .srcs[1].swizzle = hlsl_swizzle_from_writemask(src2->writemask), - .srcs[1].reg = src2->id, - .src_count = 2, - }; - - sm1_map_src_swizzle(&instr.srcs[0], instr.dst.writemask); - sm1_map_src_swizzle(&instr.srcs[1], instr.dst.writemask); - write_sm1_instruction(ctx, buffer, &instr); -} - -static void write_sm1_binary_op_dot(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, const struct hlsl_reg *dst, - const struct hlsl_reg *src1, const struct hlsl_reg *src2) -{ - struct sm1_instruction instr = - { - .opcode = opcode, - - .dst.type = D3DSPR_TEMP, - .dst.writemask = dst->writemask, - .dst.reg = dst->id, - .has_dst = 1, - - .srcs[0].type = D3DSPR_TEMP, - .srcs[0].swizzle = hlsl_swizzle_from_writemask(src1->writemask), - .srcs[0].reg = src1->id, - .srcs[1].type = D3DSPR_TEMP, - .srcs[1].swizzle = hlsl_swizzle_from_writemask(src2->writemask), - .srcs[1].reg = src2->id, - .src_count = 2, - }; - - write_sm1_instruction(ctx, buffer, &instr); -} - -static void write_sm1_unary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, const struct hlsl_reg *dst, - const struct hlsl_reg *src, D3DSHADER_PARAM_SRCMOD_TYPE src_mod, D3DSHADER_PARAM_DSTMOD_TYPE dst_mod) -{ - struct sm1_instruction instr = - { - .opcode = opcode, - - .dst.type = D3DSPR_TEMP, - .dst.mod = dst_mod, - .dst.writemask = dst->writemask, - .dst.reg = dst->id, - .has_dst = 1, - - .srcs[0].type = D3DSPR_TEMP, - .srcs[0].swizzle = hlsl_swizzle_from_writemask(src->writemask), - .srcs[0].reg = src->id, - .srcs[0].mod = src_mod, - .src_count = 1, - }; - - sm1_map_src_swizzle(&instr.srcs[0], instr.dst.writemask); - write_sm1_instruction(ctx, buffer, &instr); -} - -static void write_sm1_constant_defs(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer) -{ - unsigned int i, x; - - for (i = 0; i < ctx->constant_defs.count; ++i) - { - uint32_t token = D3DSIO_DEF; - const struct sm1_dst_register reg = - { - .type = D3DSPR_CONST, - .writemask = VKD3DSP_WRITEMASK_ALL, - .reg = i, - }; - - if (ctx->profile->major_version > 1) - token |= 5 << D3DSI_INSTLENGTH_SHIFT; - put_u32(buffer, token); - - write_sm1_dst_register(buffer, ®); - for (x = 0; x < 4; ++x) - put_f32(buffer, ctx->constant_defs.values[i].f[x]); - } -} - -static void write_sm1_semantic_dcl(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - const struct hlsl_ir_var *var, bool output) -{ - struct sm1_dst_register reg = {0}; - uint32_t token, usage_idx; - D3DDECLUSAGE usage; - bool ret; - - if (hlsl_sm1_register_from_semantic(ctx, &var->semantic, output, ®.type, ®.reg)) - { - usage = 0; - usage_idx = 0; - } - else - { - ret = hlsl_sm1_usage_from_semantic(&var->semantic, &usage, &usage_idx); - assert(ret); - reg.type = output ? D3DSPR_OUTPUT : D3DSPR_INPUT; - reg.reg = var->regs[HLSL_REGSET_NUMERIC].id; - } - - token = D3DSIO_DCL; - if (ctx->profile->major_version > 1) - token |= 2 << D3DSI_INSTLENGTH_SHIFT; - put_u32(buffer, token); - - token = (1u << 31); - token |= usage << D3DSP_DCL_USAGE_SHIFT; - token |= usage_idx << D3DSP_DCL_USAGEINDEX_SHIFT; - put_u32(buffer, token); - - reg.writemask = (1 << var->data_type->dimx) - 1; - write_sm1_dst_register(buffer, ®); -} - -static void write_sm1_semantic_dcls(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer) -{ - bool write_in = false, write_out = false; - struct hlsl_ir_var *var; - - if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) - write_in = true; - else if (ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX && ctx->profile->major_version == 3) - write_in = write_out = true; - else if (ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX && ctx->profile->major_version < 3) - write_in = true; - - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { - if (write_in && var->is_input_semantic) - write_sm1_semantic_dcl(ctx, buffer, var, false); - if (write_out && var->is_output_semantic) - write_sm1_semantic_dcl(ctx, buffer, var, true); - } -} - -static void write_sm1_constant(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - const struct hlsl_ir_node *instr) -{ - const struct hlsl_ir_constant *constant = hlsl_ir_constant(instr); - struct sm1_instruction sm1_instr = - { - .opcode = D3DSIO_MOV, - - .dst.type = D3DSPR_TEMP, - .dst.reg = instr->reg.id, - .dst.writemask = instr->reg.writemask, - .has_dst = 1, - - .srcs[0].type = D3DSPR_CONST, - .srcs[0].reg = constant->reg.id, - .srcs[0].swizzle = hlsl_swizzle_from_writemask(constant->reg.writemask), - .src_count = 1, - }; - - assert(instr->reg.allocated); - assert(constant->reg.allocated); - sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); - write_sm1_instruction(ctx, buffer, &sm1_instr); -} - -static void write_sm1_per_component_unary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - const struct hlsl_ir_node *instr, D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode) -{ - struct hlsl_ir_expr *expr = hlsl_ir_expr(instr); - struct hlsl_ir_node *arg1 = expr->operands[0].node; - unsigned int i; - - for (i = 0; i < instr->data_type->dimx; ++i) - { - struct hlsl_reg src = arg1->reg, dst = instr->reg; - - src.writemask = hlsl_combine_writemasks(src.writemask, 1u << i); - dst.writemask = hlsl_combine_writemasks(dst.writemask, 1u << i); - write_sm1_unary_op(ctx, buffer, opcode, &dst, &src, 0, 0); - } -} - -static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) -{ - struct hlsl_ir_expr *expr = hlsl_ir_expr(instr); - struct hlsl_ir_node *arg1 = expr->operands[0].node; - struct hlsl_ir_node *arg2 = expr->operands[1].node; - struct hlsl_ir_node *arg3 = expr->operands[2].node; - - assert(instr->reg.allocated); - - if (instr->data_type->base_type != HLSL_TYPE_FLOAT) - { - /* These need to be lowered. */ - hlsl_fixme(ctx, &instr->loc, "SM1 non-float expression."); - return; - } - - switch (expr->op) - { - case HLSL_OP1_ABS: - write_sm1_unary_op(ctx, buffer, D3DSIO_ABS, &instr->reg, &arg1->reg, 0, 0); - break; - - case HLSL_OP1_EXP2: - write_sm1_per_component_unary_op(ctx, buffer, instr, D3DSIO_EXP); - break; - - case HLSL_OP1_NEG: - write_sm1_unary_op(ctx, buffer, D3DSIO_MOV, &instr->reg, &arg1->reg, D3DSPSM_NEG, 0); - break; - - case HLSL_OP1_SAT: - write_sm1_unary_op(ctx, buffer, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, D3DSPDM_SATURATE); - break; - - case HLSL_OP1_RCP: - write_sm1_per_component_unary_op(ctx, buffer, instr, D3DSIO_RCP); - break; - - case HLSL_OP1_RSQ: - write_sm1_per_component_unary_op(ctx, buffer, instr, D3DSIO_RSQ); - break; - - case HLSL_OP2_ADD: - write_sm1_binary_op(ctx, buffer, D3DSIO_ADD, &instr->reg, &arg1->reg, &arg2->reg); - break; - - case HLSL_OP2_MAX: - write_sm1_binary_op(ctx, buffer, D3DSIO_MAX, &instr->reg, &arg1->reg, &arg2->reg); - break; - - case HLSL_OP2_MIN: - write_sm1_binary_op(ctx, buffer, D3DSIO_MIN, &instr->reg, &arg1->reg, &arg2->reg); - break; - - case HLSL_OP2_MUL: - write_sm1_binary_op(ctx, buffer, D3DSIO_MUL, &instr->reg, &arg1->reg, &arg2->reg); - break; - - case HLSL_OP1_FRACT: - write_sm1_unary_op(ctx, buffer, D3DSIO_FRC, &instr->reg, &arg1->reg, D3DSPSM_NONE, 0); - break; - - case HLSL_OP2_DOT: - switch (arg1->data_type->dimx) - { - case 4: - write_sm1_binary_op_dot(ctx, buffer, D3DSIO_DP4, &instr->reg, &arg1->reg, &arg2->reg); - break; - - case 3: - write_sm1_binary_op_dot(ctx, buffer, D3DSIO_DP3, &instr->reg, &arg1->reg, &arg2->reg); - break; - - default: - vkd3d_unreachable(); - } - break; - - case HLSL_OP3_DP2ADD: - write_sm1_dp2add(ctx, buffer, &instr->reg, &arg1->reg, &arg2->reg, &arg3->reg); - break; - - default: - hlsl_fixme(ctx, &instr->loc, "SM1 "%s" expression.", debug_hlsl_expr_op(expr->op)); - break; - } -} - -static void write_sm1_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) -{ - const struct hlsl_ir_load *load = hlsl_ir_load(instr); - const struct hlsl_reg reg = hlsl_reg_from_deref(ctx, &load->src); - struct sm1_instruction sm1_instr = - { - .opcode = D3DSIO_MOV, - - .dst.type = D3DSPR_TEMP, - .dst.reg = instr->reg.id, - .dst.writemask = instr->reg.writemask, - .has_dst = 1, - - .srcs[0].type = D3DSPR_TEMP, - .srcs[0].reg = reg.id, - .srcs[0].swizzle = hlsl_swizzle_from_writemask(reg.writemask), - .src_count = 1, - }; - - assert(instr->reg.allocated); - - if (load->src.var->is_uniform) - { - assert(reg.allocated); - sm1_instr.srcs[0].type = D3DSPR_CONST; - } - else if (load->src.var->is_input_semantic) - { - if (!hlsl_sm1_register_from_semantic(ctx, &load->src.var->semantic, - false, &sm1_instr.srcs[0].type, &sm1_instr.srcs[0].reg)) - { - assert(reg.allocated); - sm1_instr.srcs[0].type = D3DSPR_INPUT; - sm1_instr.srcs[0].reg = reg.id; - } - else - sm1_instr.srcs[0].swizzle = hlsl_swizzle_from_writemask((1 << load->src.var->data_type->dimx) - 1); - } - - sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); - write_sm1_instruction(ctx, buffer, &sm1_instr); -} - -static void write_sm1_store(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - const struct hlsl_ir_node *instr) -{ - const struct hlsl_ir_store *store = hlsl_ir_store(instr); - const struct hlsl_ir_node *rhs = store->rhs.node; - const struct hlsl_reg reg = hlsl_reg_from_deref(ctx, &store->lhs); - struct sm1_instruction sm1_instr = - { - .opcode = D3DSIO_MOV, - - .dst.type = D3DSPR_TEMP, - .dst.reg = reg.id, - .dst.writemask = hlsl_combine_writemasks(reg.writemask, store->writemask), - .has_dst = 1, - - .srcs[0].type = D3DSPR_TEMP, - .srcs[0].reg = rhs->reg.id, - .srcs[0].swizzle = hlsl_swizzle_from_writemask(rhs->reg.writemask), - .src_count = 1, - }; - - if (store->lhs.var->data_type->type == HLSL_CLASS_MATRIX) - { - FIXME("Matrix writemasks need to be lowered.\n"); - return; - } - - if (store->lhs.var->is_output_semantic) - { - if (!hlsl_sm1_register_from_semantic(ctx, &store->lhs.var->semantic, - true, &sm1_instr.dst.type, &sm1_instr.dst.reg)) - { - assert(reg.allocated); - sm1_instr.dst.type = D3DSPR_OUTPUT; - sm1_instr.dst.reg = reg.id; - } - else - sm1_instr.dst.writemask = (1u << store->lhs.var->data_type->dimx) - 1; - } - else - assert(reg.allocated); - - sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); - write_sm1_instruction(ctx, buffer, &sm1_instr); -} - -static void write_sm1_swizzle(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - const struct hlsl_ir_node *instr) -{ - const struct hlsl_ir_swizzle *swizzle = hlsl_ir_swizzle(instr); - const struct hlsl_ir_node *val = swizzle->val.node; - struct sm1_instruction sm1_instr = - { - .opcode = D3DSIO_MOV, - - .dst.type = D3DSPR_TEMP, - .dst.reg = instr->reg.id, - .dst.writemask = instr->reg.writemask, - .has_dst = 1, - - .srcs[0].type = D3DSPR_TEMP, - .srcs[0].reg = val->reg.id, - .srcs[0].swizzle = hlsl_combine_swizzles(hlsl_swizzle_from_writemask(val->reg.writemask), - swizzle->swizzle, instr->data_type->dimx), - .src_count = 1, - }; - - assert(instr->reg.allocated); - assert(val->reg.allocated); - sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); - write_sm1_instruction(ctx, buffer, &sm1_instr); -} - -static void write_sm1_instructions(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - const struct hlsl_ir_function_decl *entry_func) -{ - const struct hlsl_ir_node *instr; - - LIST_FOR_EACH_ENTRY(instr, &entry_func->body.instrs, struct hlsl_ir_node, entry) - { - if (instr->data_type) - { - if (instr->data_type->type == HLSL_CLASS_MATRIX) - { - /* These need to be lowered. */ - hlsl_fixme(ctx, &instr->loc, "SM1 matrix expression."); - continue; - } - else if (instr->data_type->type == HLSL_CLASS_OBJECT) - { - hlsl_fixme(ctx, &instr->loc, "Object copy."); - break; - } - - assert(instr->data_type->type == HLSL_CLASS_SCALAR || instr->data_type->type == HLSL_CLASS_VECTOR); - } - - switch (instr->type) - { - case HLSL_IR_CALL: - vkd3d_unreachable(); - - case HLSL_IR_CONSTANT: - write_sm1_constant(ctx, buffer, instr); - break; - - case HLSL_IR_EXPR: - write_sm1_expr(ctx, buffer, instr); - break; - - case HLSL_IR_LOAD: - write_sm1_load(ctx, buffer, instr); - break; - - case HLSL_IR_STORE: - write_sm1_store(ctx, buffer, instr); - break; - - case HLSL_IR_SWIZZLE: - write_sm1_swizzle(ctx, buffer, instr); - break; - - default: - hlsl_fixme(ctx, &instr->loc, "Instruction type %s.", hlsl_node_type_to_string(instr->type)); - } - } -} - -int hlsl_sm1_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out) -{ - struct vkd3d_bytecode_buffer buffer = {0}; - int ret; - - put_u32(&buffer, sm1_version(ctx->profile->type, ctx->profile->major_version, ctx->profile->minor_version)); - - write_sm1_uniforms(ctx, &buffer, entry_func); - - write_sm1_constant_defs(ctx, &buffer); - write_sm1_semantic_dcls(ctx, &buffer); - write_sm1_instructions(ctx, &buffer, entry_func); - - put_u32(&buffer, D3DSIO_END); - - if (!(ret = buffer.status)) - { - out->code = buffer.data; - out->size = buffer.size; - } - return ret; -} diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_sm4.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_sm4.c deleted file mode 100644 index 553a75818e7..00000000000 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl_sm4.c +++ /dev/null @@ -1,2531 +0,0 @@ -/* - * HLSL code generation for DXBC shader models 4-5 - * - * Copyright 2019-2020 Zebediah Figura for CodeWeavers - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA - */ - -#include "hlsl.h" -#include <stdio.h> -#include "d3dcommon.h" -#include "sm4.h" - -static void write_sm4_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_block *block); - -static bool type_is_integer(const struct hlsl_type *type) -{ - switch (type->base_type) - { - case HLSL_TYPE_BOOL: - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: - return true; - - default: - return false; - } -} - -bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, - bool output, enum vkd3d_sm4_register_type *type, enum vkd3d_sm4_swizzle_type *swizzle_type, bool *has_idx) -{ - unsigned int i; - - static const struct - { - const char *semantic; - bool output; - enum vkd3d_shader_type shader_type; - enum vkd3d_sm4_swizzle_type swizzle_type; - enum vkd3d_sm4_register_type type; - bool has_idx; - } - register_table[] = - { - {"sv_dispatchthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM5_RT_THREAD_ID, false}, - {"sv_groupid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM5_RT_THREAD_GROUP_ID, false}, - {"sv_groupthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM5_RT_LOCAL_THREAD_ID, false}, - - {"sv_primitiveid", false, VKD3D_SHADER_TYPE_GEOMETRY, VKD3D_SM4_SWIZZLE_NONE, VKD3D_SM4_RT_PRIMID, false}, - - /* Put sv_target in this table, instead of letting it fall through to - * default varying allocation, so that the register index matches the - * usage index. */ - {"color", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM4_RT_OUTPUT, true}, - {"depth", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM4_RT_DEPTHOUT, false}, - {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM4_RT_DEPTHOUT, false}, - {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM4_RT_OUTPUT, true}, - }; - - for (i = 0; i < ARRAY_SIZE(register_table); ++i) - { - if (!ascii_strcasecmp(semantic->name, register_table[i].semantic) - && output == register_table[i].output - && ctx->profile->type == register_table[i].shader_type) - { - *type = register_table[i].type; - if (swizzle_type) - *swizzle_type = register_table[i].swizzle_type; - *has_idx = register_table[i].has_idx; - return true; - } - } - - return false; -} - -bool hlsl_sm4_usage_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, - bool output, D3D_NAME *usage) -{ - unsigned int i; - - static const struct - { - const char *name; - bool output; - enum vkd3d_shader_type shader_type; - D3DDECLUSAGE usage; - } - semantics[] = - { - {"sv_dispatchthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, ~0u}, - {"sv_groupid", false, VKD3D_SHADER_TYPE_COMPUTE, ~0u}, - {"sv_groupthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, ~0u}, - - {"position", false, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_POSITION}, - {"sv_position", false, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_POSITION}, - {"sv_primitiveid", false, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_PRIMITIVE_ID}, - - {"position", true, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_POSITION}, - {"sv_position", true, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_POSITION}, - {"sv_primitiveid", true, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_PRIMITIVE_ID}, - - {"position", false, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_POSITION}, - {"sv_position", false, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_POSITION}, - - {"color", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_TARGET}, - {"depth", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_DEPTH}, - {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_TARGET}, - {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_DEPTH}, - - {"sv_position", false, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_UNDEFINED}, - {"sv_vertexid", false, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_VERTEX_ID}, - - {"position", true, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_POSITION}, - {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_POSITION}, - }; - - for (i = 0; i < ARRAY_SIZE(semantics); ++i) - { - if (!ascii_strcasecmp(semantic->name, semantics[i].name) - && output == semantics[i].output - && ctx->profile->type == semantics[i].shader_type - && !ascii_strncasecmp(semantic->name, "sv_", 3)) - { - *usage = semantics[i].usage; - return true; - } - } - - if (!ascii_strncasecmp(semantic->name, "sv_", 3)) - return false; - - *usage = D3D_NAME_UNDEFINED; - return true; -} - -static void write_sm4_signature(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc, bool output) -{ - struct vkd3d_bytecode_buffer buffer = {0}; - struct vkd3d_string_buffer *string; - const struct hlsl_ir_var *var; - size_t count_position; - unsigned int i; - bool ret; - - count_position = put_u32(&buffer, 0); - put_u32(&buffer, 8); /* unknown */ - - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { - unsigned int width = (1u << var->data_type->dimx) - 1, use_mask; - enum vkd3d_sm4_register_type type; - uint32_t usage_idx, reg_idx; - D3D_NAME usage; - bool has_idx; - - if ((output && !var->is_output_semantic) || (!output && !var->is_input_semantic)) - continue; - - ret = hlsl_sm4_usage_from_semantic(ctx, &var->semantic, output, &usage); - assert(ret); - if (usage == ~0u) - continue; - usage_idx = var->semantic.index; - - if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, output, &type, NULL, &has_idx)) - { - reg_idx = has_idx ? var->semantic.index : ~0u; - } - else - { - assert(var->regs[HLSL_REGSET_NUMERIC].allocated); - type = VKD3D_SM4_RT_INPUT; - reg_idx = var->regs[HLSL_REGSET_NUMERIC].id; - } - - use_mask = width; /* FIXME: accurately report use mask */ - if (output) - use_mask = 0xf ^ use_mask; - - /* Special pixel shader semantics (TARGET, DEPTH, COVERAGE). */ - if (usage >= 64) - usage = 0; - - put_u32(&buffer, 0); /* name */ - put_u32(&buffer, usage_idx); - put_u32(&buffer, usage); - switch (var->data_type->base_type) - { - case HLSL_TYPE_FLOAT: - case HLSL_TYPE_HALF: - put_u32(&buffer, D3D_REGISTER_COMPONENT_FLOAT32); - break; - - case HLSL_TYPE_INT: - put_u32(&buffer, D3D_REGISTER_COMPONENT_SINT32); - break; - - case HLSL_TYPE_BOOL: - case HLSL_TYPE_UINT: - put_u32(&buffer, D3D_REGISTER_COMPONENT_UINT32); - break; - - default: - if ((string = hlsl_type_to_string(ctx, var->data_type))) - hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "Invalid data type %s for semantic variable %s.", string->buffer, var->name); - hlsl_release_string_buffer(ctx, string); - put_u32(&buffer, D3D_REGISTER_COMPONENT_UNKNOWN); - } - put_u32(&buffer, reg_idx); - put_u32(&buffer, vkd3d_make_u16(width, use_mask)); - } - - i = 0; - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { - const char *semantic = var->semantic.name; - size_t string_offset; - D3D_NAME usage; - - if ((output && !var->is_output_semantic) || (!output && !var->is_input_semantic)) - continue; - - hlsl_sm4_usage_from_semantic(ctx, &var->semantic, output, &usage); - if (usage == ~0u) - continue; - - if (usage == D3D_NAME_TARGET && !ascii_strcasecmp(semantic, "color")) - string_offset = put_string(&buffer, "SV_Target"); - else if (usage == D3D_NAME_DEPTH && !ascii_strcasecmp(semantic, "depth")) - string_offset = put_string(&buffer, "SV_Depth"); - else if (usage == D3D_NAME_POSITION && !ascii_strcasecmp(semantic, "position")) - string_offset = put_string(&buffer, "SV_Position"); - else - string_offset = put_string(&buffer, semantic); - set_u32(&buffer, (2 + i++ * 6) * sizeof(uint32_t), string_offset); - } - - set_u32(&buffer, count_position, i); - - dxbc_writer_add_section(dxbc, output ? TAG_OSGN : TAG_ISGN, buffer.data, buffer.size); -} - -static const struct hlsl_type *get_array_type(const struct hlsl_type *type) -{ - if (type->type == HLSL_CLASS_ARRAY) - return get_array_type(type->e.array.type); - return type; -} - -static unsigned int get_array_size(const struct hlsl_type *type) -{ - if (type->type == HLSL_CLASS_ARRAY) - return get_array_size(type->e.array.type) * type->e.array.elements_count; - return 1; -} - -static D3D_SHADER_VARIABLE_CLASS sm4_class(const struct hlsl_type *type) -{ - switch (type->type) - { - case HLSL_CLASS_ARRAY: - return sm4_class(type->e.array.type); - case HLSL_CLASS_MATRIX: - assert(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); - if (type->modifiers & HLSL_MODIFIER_COLUMN_MAJOR) - return D3D_SVC_MATRIX_COLUMNS; - else - return D3D_SVC_MATRIX_ROWS; - case HLSL_CLASS_OBJECT: - return D3D_SVC_OBJECT; - case HLSL_CLASS_SCALAR: - return D3D_SVC_SCALAR; - case HLSL_CLASS_STRUCT: - return D3D_SVC_STRUCT; - case HLSL_CLASS_VECTOR: - return D3D_SVC_VECTOR; - default: - ERR("Invalid class %#x.\n", type->type); - vkd3d_unreachable(); - } -} - -static D3D_SHADER_VARIABLE_TYPE sm4_base_type(const struct hlsl_type *type) -{ - switch (type->base_type) - { - case HLSL_TYPE_BOOL: - return D3D_SVT_BOOL; - case HLSL_TYPE_DOUBLE: - return D3D_SVT_DOUBLE; - case HLSL_TYPE_FLOAT: - case HLSL_TYPE_HALF: - return D3D_SVT_FLOAT; - case HLSL_TYPE_INT: - return D3D_SVT_INT; - case HLSL_TYPE_PIXELSHADER: - return D3D_SVT_PIXELSHADER; - case HLSL_TYPE_SAMPLER: - switch (type->sampler_dim) - { - case HLSL_SAMPLER_DIM_1D: - return D3D_SVT_SAMPLER1D; - case HLSL_SAMPLER_DIM_2D: - return D3D_SVT_SAMPLER2D; - case HLSL_SAMPLER_DIM_3D: - return D3D_SVT_SAMPLER3D; - case HLSL_SAMPLER_DIM_CUBE: - return D3D_SVT_SAMPLERCUBE; - case HLSL_SAMPLER_DIM_GENERIC: - return D3D_SVT_SAMPLER; - default: - vkd3d_unreachable(); - } - break; - case HLSL_TYPE_STRING: - return D3D_SVT_STRING; - case HLSL_TYPE_TEXTURE: - switch (type->sampler_dim) - { - case HLSL_SAMPLER_DIM_1D: - return D3D_SVT_TEXTURE1D; - case HLSL_SAMPLER_DIM_2D: - return D3D_SVT_TEXTURE2D; - case HLSL_SAMPLER_DIM_3D: - return D3D_SVT_TEXTURE3D; - case HLSL_SAMPLER_DIM_CUBE: - return D3D_SVT_TEXTURECUBE; - case HLSL_SAMPLER_DIM_GENERIC: - return D3D_SVT_TEXTURE; - default: - vkd3d_unreachable(); - } - break; - case HLSL_TYPE_UINT: - return D3D_SVT_UINT; - case HLSL_TYPE_VERTEXSHADER: - return D3D_SVT_VERTEXSHADER; - case HLSL_TYPE_VOID: - return D3D_SVT_VOID; - default: - vkd3d_unreachable(); - } -} - -static void write_sm4_type(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, struct hlsl_type *type) -{ - const struct hlsl_type *array_type = get_array_type(type); - const char *name = array_type->name ? array_type->name : "<unnamed>"; - const struct hlsl_profile_info *profile = ctx->profile; - unsigned int field_count = 0, array_size = 0; - size_t fields_offset = 0, name_offset = 0; - size_t i; - - if (type->bytecode_offset) - return; - - if (profile->major_version >= 5) - name_offset = put_string(buffer, name); - - if (type->type == HLSL_CLASS_ARRAY) - array_size = get_array_size(type); - - if (array_type->type == HLSL_CLASS_STRUCT) - { - field_count = array_type->e.record.field_count; - - for (i = 0; i < field_count; ++i) - { - struct hlsl_struct_field *field = &array_type->e.record.fields[i]; - - field->name_bytecode_offset = put_string(buffer, field->name); - write_sm4_type(ctx, buffer, field->type); - } - - fields_offset = bytecode_get_size(buffer); - - for (i = 0; i < field_count; ++i) - { - struct hlsl_struct_field *field = &array_type->e.record.fields[i]; - - put_u32(buffer, field->name_bytecode_offset); - put_u32(buffer, field->type->bytecode_offset); - put_u32(buffer, field->reg_offset[HLSL_REGSET_NUMERIC]); - } - } - - type->bytecode_offset = put_u32(buffer, vkd3d_make_u32(sm4_class(type), sm4_base_type(type))); - put_u32(buffer, vkd3d_make_u32(type->dimy, type->dimx)); - put_u32(buffer, vkd3d_make_u32(array_size, field_count)); - put_u32(buffer, fields_offset); - - if (profile->major_version >= 5) - { - put_u32(buffer, 0); /* FIXME: unknown */ - put_u32(buffer, 0); /* FIXME: unknown */ - put_u32(buffer, 0); /* FIXME: unknown */ - put_u32(buffer, 0); /* FIXME: unknown */ - put_u32(buffer, name_offset); - } -} - -static D3D_SHADER_INPUT_TYPE sm4_resource_type(const struct hlsl_type *type) -{ - switch (type->base_type) - { - case HLSL_TYPE_SAMPLER: - return D3D_SIT_SAMPLER; - case HLSL_TYPE_TEXTURE: - return D3D_SIT_TEXTURE; - case HLSL_TYPE_UAV: - return D3D_SIT_UAV_RWTYPED; - default: - vkd3d_unreachable(); - } -} - -static D3D_RESOURCE_RETURN_TYPE sm4_resource_format(const struct hlsl_type *type) -{ - switch (type->e.resource_format->base_type) - { - case HLSL_TYPE_DOUBLE: - return D3D_RETURN_TYPE_DOUBLE; - - case HLSL_TYPE_FLOAT: - case HLSL_TYPE_HALF: - return D3D_RETURN_TYPE_FLOAT; - - case HLSL_TYPE_INT: - return D3D_RETURN_TYPE_SINT; - break; - - case HLSL_TYPE_BOOL: - case HLSL_TYPE_UINT: - return D3D_RETURN_TYPE_UINT; - - default: - vkd3d_unreachable(); - } -} - -static D3D_SRV_DIMENSION sm4_rdef_resource_dimension(const struct hlsl_type *type) -{ - switch (type->sampler_dim) - { - case HLSL_SAMPLER_DIM_1D: - return D3D_SRV_DIMENSION_TEXTURE1D; - case HLSL_SAMPLER_DIM_2D: - return D3D_SRV_DIMENSION_TEXTURE2D; - case HLSL_SAMPLER_DIM_3D: - return D3D_SRV_DIMENSION_TEXTURE3D; - case HLSL_SAMPLER_DIM_CUBE: - return D3D_SRV_DIMENSION_TEXTURECUBE; - case HLSL_SAMPLER_DIM_1DARRAY: - return D3D_SRV_DIMENSION_TEXTURE1DARRAY; - case HLSL_SAMPLER_DIM_2DARRAY: - return D3D_SRV_DIMENSION_TEXTURE2DARRAY; - case HLSL_SAMPLER_DIM_2DMS: - return D3D_SRV_DIMENSION_TEXTURE2DMS; - case HLSL_SAMPLER_DIM_2DMSARRAY: - return D3D_SRV_DIMENSION_TEXTURE2DMSARRAY; - case HLSL_SAMPLER_DIM_CUBEARRAY: - return D3D_SRV_DIMENSION_TEXTURECUBEARRAY; - default: - vkd3d_unreachable(); - } -} - -static int sm4_compare_extern_resources(const void *a, const void *b) -{ - const struct hlsl_ir_var *aa = *(const struct hlsl_ir_var **)a; - const struct hlsl_ir_var *bb = *(const struct hlsl_ir_var **)b; - enum hlsl_regset aa_regset, bb_regset; - - aa_regset = hlsl_type_get_regset(aa->data_type); - bb_regset = hlsl_type_get_regset(bb->data_type); - - if (aa_regset != bb_regset) - return aa_regset - bb_regset; - - return aa->regs[aa_regset].id - bb->regs[bb_regset].id; -} - -static const struct hlsl_ir_var **sm4_get_extern_resources(struct hlsl_ctx *ctx, unsigned int *count) -{ - const struct hlsl_ir_var **extern_resources = NULL; - const struct hlsl_ir_var *var; - enum hlsl_regset regset; - size_t capacity = 0; - - *count = 0; - - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { - if (!hlsl_type_is_resource(var->data_type)) - continue; - regset = hlsl_type_get_regset(var->data_type); - if (!var->regs[regset].allocated) - continue; - - if (!(hlsl_array_reserve(ctx, (void **)&extern_resources, &capacity, *count + 1, - sizeof(*extern_resources)))) - { - *count = 0; - return NULL; - } - - extern_resources[*count] = var; - ++*count; - } - - qsort(extern_resources, *count, sizeof(*extern_resources), sm4_compare_extern_resources); - return extern_resources; -} - -static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) -{ - unsigned int cbuffer_count = 0, resource_count = 0, extern_resources_count, i, j; - size_t cbuffers_offset, resources_offset, creator_offset, string_offset; - size_t cbuffer_position, resource_position, creator_position; - const struct hlsl_profile_info *profile = ctx->profile; - const struct hlsl_ir_var **extern_resources; - struct vkd3d_bytecode_buffer buffer = {0}; - const struct hlsl_buffer *cbuffer; - const struct hlsl_ir_var *var; - - static const uint16_t target_types[] = - { - 0xffff, /* PIXEL */ - 0xfffe, /* VERTEX */ - 0x4753, /* GEOMETRY */ - 0x4853, /* HULL */ - 0x4453, /* DOMAIN */ - 0x4353, /* COMPUTE */ - }; - - extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count); - - resource_count += extern_resources_count; - LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) - { - if (cbuffer->reg.allocated) - { - ++cbuffer_count; - ++resource_count; - } - } - - put_u32(&buffer, cbuffer_count); - cbuffer_position = put_u32(&buffer, 0); - put_u32(&buffer, resource_count); - resource_position = put_u32(&buffer, 0); - put_u32(&buffer, vkd3d_make_u32(vkd3d_make_u16(profile->minor_version, profile->major_version), - target_types[profile->type])); - put_u32(&buffer, 0); /* FIXME: compilation flags */ - creator_position = put_u32(&buffer, 0); - - if (profile->major_version >= 5) - { - put_u32(&buffer, TAG_RD11); - put_u32(&buffer, 15 * sizeof(uint32_t)); /* size of RDEF header including this header */ - put_u32(&buffer, 6 * sizeof(uint32_t)); /* size of buffer desc */ - put_u32(&buffer, 8 * sizeof(uint32_t)); /* size of binding desc */ - put_u32(&buffer, 10 * sizeof(uint32_t)); /* size of variable desc */ - put_u32(&buffer, 9 * sizeof(uint32_t)); /* size of type desc */ - put_u32(&buffer, 3 * sizeof(uint32_t)); /* size of member desc */ - put_u32(&buffer, 0); /* unknown; possibly a null terminator */ - } - - /* Bound resources. */ - - resources_offset = bytecode_get_size(&buffer); - set_u32(&buffer, resource_position, resources_offset); - - for (i = 0; i < extern_resources_count; ++i) - { - enum hlsl_regset regset; - uint32_t flags = 0; - - var = extern_resources[i]; - regset = hlsl_type_get_regset(var->data_type); - - if (var->reg_reservation.type) - flags |= D3D_SIF_USERPACKED; - - put_u32(&buffer, 0); /* name */ - put_u32(&buffer, sm4_resource_type(var->data_type)); - if (regset == HLSL_REGSET_SAMPLERS) - { - put_u32(&buffer, 0); - put_u32(&buffer, 0); - put_u32(&buffer, 0); - } - else - { - put_u32(&buffer, sm4_resource_format(var->data_type)); - put_u32(&buffer, sm4_rdef_resource_dimension(var->data_type)); - put_u32(&buffer, ~0u); /* FIXME: multisample count */ - flags |= (var->data_type->e.resource_format->dimx - 1) << VKD3D_SM4_SIF_TEXTURE_COMPONENTS_SHIFT; - } - put_u32(&buffer, var->regs[regset].id); - put_u32(&buffer, 1); /* bind count */ - put_u32(&buffer, flags); - } - - LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) - { - uint32_t flags = 0; - - if (!cbuffer->reg.allocated) - continue; - - if (cbuffer->reservation.type) - flags |= D3D_SIF_USERPACKED; - - put_u32(&buffer, 0); /* name */ - put_u32(&buffer, cbuffer->type == HLSL_BUFFER_CONSTANT ? D3D_SIT_CBUFFER : D3D_SIT_TBUFFER); - put_u32(&buffer, 0); /* return type */ - put_u32(&buffer, 0); /* dimension */ - put_u32(&buffer, 0); /* multisample count */ - put_u32(&buffer, cbuffer->reg.id); /* bind point */ - put_u32(&buffer, 1); /* bind count */ - put_u32(&buffer, flags); /* flags */ - } - - for (i = 0; i < extern_resources_count; ++i) - { - var = extern_resources[i]; - - string_offset = put_string(&buffer, var->name); - set_u32(&buffer, resources_offset + i++ * 8 * sizeof(uint32_t), string_offset); - } - - LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) - { - if (!cbuffer->reg.allocated) - continue; - - string_offset = put_string(&buffer, cbuffer->name); - set_u32(&buffer, resources_offset + i++ * 8 * sizeof(uint32_t), string_offset); - } - - /* Buffers. */ - - cbuffers_offset = bytecode_get_size(&buffer); - set_u32(&buffer, cbuffer_position, cbuffers_offset); - LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) - { - unsigned int var_count = 0; - - if (!cbuffer->reg.allocated) - continue; - - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { - if (var->is_uniform && var->buffer == cbuffer) - ++var_count; - } - - put_u32(&buffer, 0); /* name */ - put_u32(&buffer, var_count); - put_u32(&buffer, 0); /* variable offset */ - put_u32(&buffer, align(cbuffer->size, 4) * sizeof(float)); - put_u32(&buffer, 0); /* FIXME: flags */ - put_u32(&buffer, cbuffer->type == HLSL_BUFFER_CONSTANT ? D3D_CT_CBUFFER : D3D_CT_TBUFFER); - } - - i = 0; - LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) - { - if (!cbuffer->reg.allocated) - continue; - - string_offset = put_string(&buffer, cbuffer->name); - set_u32(&buffer, cbuffers_offset + i++ * 6 * sizeof(uint32_t), string_offset); - } - - i = 0; - LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) - { - size_t vars_start = bytecode_get_size(&buffer); - - if (!cbuffer->reg.allocated) - continue; - - set_u32(&buffer, cbuffers_offset + (i++ * 6 + 2) * sizeof(uint32_t), vars_start); - - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { - if (var->is_uniform && var->buffer == cbuffer) - { - uint32_t flags = 0; - - if (var->last_read) - flags |= D3D_SVF_USED; - - put_u32(&buffer, 0); /* name */ - put_u32(&buffer, var->buffer_offset * sizeof(float)); - put_u32(&buffer, var->data_type->reg_size[HLSL_REGSET_NUMERIC] * sizeof(float)); - put_u32(&buffer, flags); - put_u32(&buffer, 0); /* type */ - put_u32(&buffer, 0); /* FIXME: default value */ - - if (profile->major_version >= 5) - { - put_u32(&buffer, 0); /* texture start */ - put_u32(&buffer, 0); /* texture count */ - put_u32(&buffer, 0); /* sampler start */ - put_u32(&buffer, 0); /* sampler count */ - } - } - } - - j = 0; - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { - if (var->is_uniform && var->buffer == cbuffer) - { - const unsigned int var_size = (profile->major_version >= 5 ? 10 : 6); - size_t var_offset = vars_start + j * var_size * sizeof(uint32_t); - size_t string_offset = put_string(&buffer, var->name); - - set_u32(&buffer, var_offset, string_offset); - write_sm4_type(ctx, &buffer, var->data_type); - set_u32(&buffer, var_offset + 4 * sizeof(uint32_t), var->data_type->bytecode_offset); - ++j; - } - } - } - - creator_offset = put_string(&buffer, vkd3d_shader_get_version(NULL, NULL)); - set_u32(&buffer, creator_position, creator_offset); - - dxbc_writer_add_section(dxbc, TAG_RDEF, buffer.data, buffer.size); - - vkd3d_free(extern_resources); -} - -static enum vkd3d_sm4_resource_type sm4_resource_dimension(const struct hlsl_type *type) -{ - switch (type->sampler_dim) - { - case HLSL_SAMPLER_DIM_1D: - return VKD3D_SM4_RESOURCE_TEXTURE_1D; - case HLSL_SAMPLER_DIM_2D: - return VKD3D_SM4_RESOURCE_TEXTURE_2D; - case HLSL_SAMPLER_DIM_3D: - return VKD3D_SM4_RESOURCE_TEXTURE_3D; - case HLSL_SAMPLER_DIM_CUBE: - return VKD3D_SM4_RESOURCE_TEXTURE_CUBE; - case HLSL_SAMPLER_DIM_1DARRAY: - return VKD3D_SM4_RESOURCE_TEXTURE_1DARRAY; - case HLSL_SAMPLER_DIM_2DARRAY: - return VKD3D_SM4_RESOURCE_TEXTURE_2DARRAY; - case HLSL_SAMPLER_DIM_2DMS: - return VKD3D_SM4_RESOURCE_TEXTURE_2DMS; - case HLSL_SAMPLER_DIM_2DMSARRAY: - return VKD3D_SM4_RESOURCE_TEXTURE_2DMSARRAY; - case HLSL_SAMPLER_DIM_CUBEARRAY: - return VKD3D_SM4_RESOURCE_TEXTURE_CUBEARRAY; - default: - vkd3d_unreachable(); - } -} - -struct sm4_instruction_modifier -{ - enum vkd3d_sm4_instruction_modifier type; - - union - { - struct - { - int u, v, w; - } aoffimmi; - } u; -}; - -static uint32_t sm4_encode_instruction_modifier(const struct sm4_instruction_modifier *imod) -{ - uint32_t word = 0; - - word |= VKD3D_SM4_MODIFIER_MASK & imod->type; - - switch (imod->type) - { - case VKD3D_SM4_MODIFIER_AOFFIMMI: - assert(-8 <= imod->u.aoffimmi.u && imod->u.aoffimmi.u <= 7); - assert(-8 <= imod->u.aoffimmi.v && imod->u.aoffimmi.v <= 7); - assert(-8 <= imod->u.aoffimmi.w && imod->u.aoffimmi.w <= 7); - word |= ((uint32_t)imod->u.aoffimmi.u & 0xf) << VKD3D_SM4_AOFFIMMI_U_SHIFT; - word |= ((uint32_t)imod->u.aoffimmi.v & 0xf) << VKD3D_SM4_AOFFIMMI_V_SHIFT; - word |= ((uint32_t)imod->u.aoffimmi.w & 0xf) << VKD3D_SM4_AOFFIMMI_W_SHIFT; - break; - - default: - vkd3d_unreachable(); - } - - return word; -} - -struct sm4_register -{ - enum vkd3d_sm4_register_type type; - uint32_t idx[2]; - unsigned int idx_count; - enum vkd3d_sm4_dimension dim; - uint32_t immconst_uint[4]; - unsigned int mod; -}; - -struct sm4_instruction -{ - enum vkd3d_sm4_opcode opcode; - - struct sm4_instruction_modifier modifiers[1]; - unsigned int modifier_count; - - struct sm4_dst_register - { - struct sm4_register reg; - unsigned int writemask; - } dsts[2]; - unsigned int dst_count; - - struct sm4_src_register - { - struct sm4_register reg; - enum vkd3d_sm4_swizzle_type swizzle_type; - unsigned int swizzle; - } srcs[4]; - unsigned int src_count; - - uint32_t idx[3]; - unsigned int idx_count; -}; - -static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *reg, - unsigned int *writemask, enum vkd3d_sm4_swizzle_type *swizzle_type, - const struct hlsl_deref *deref, const struct hlsl_type *data_type) -{ - const struct hlsl_ir_var *var = deref->var; - - if (var->is_uniform) - { - if (data_type->type == HLSL_CLASS_OBJECT && data_type->base_type == HLSL_TYPE_TEXTURE) - { - reg->type = VKD3D_SM4_RT_RESOURCE; - reg->dim = VKD3D_SM4_DIMENSION_VEC4; - if (swizzle_type) - *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; - reg->idx[0] = var->regs[HLSL_REGSET_TEXTURES].id; - reg->idx_count = 1; - *writemask = VKD3DSP_WRITEMASK_ALL; - } - else if (data_type->type == HLSL_CLASS_OBJECT && data_type->base_type == HLSL_TYPE_UAV) - { - reg->type = VKD3D_SM5_RT_UAV; - reg->dim = VKD3D_SM4_DIMENSION_VEC4; - if (swizzle_type) - *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; - reg->idx[0] = var->regs[HLSL_REGSET_UAVS].id; - reg->idx_count = 1; - *writemask = VKD3DSP_WRITEMASK_ALL; - } - else if (data_type->type == HLSL_CLASS_OBJECT && data_type->base_type == HLSL_TYPE_SAMPLER) - { - reg->type = VKD3D_SM4_RT_SAMPLER; - reg->dim = VKD3D_SM4_DIMENSION_NONE; - if (swizzle_type) - *swizzle_type = VKD3D_SM4_SWIZZLE_NONE; - reg->idx[0] = var->regs[HLSL_REGSET_SAMPLERS].id; - reg->idx_count = 1; - *writemask = VKD3DSP_WRITEMASK_ALL; - } - else - { - unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref) + var->buffer_offset; - - assert(data_type->type <= HLSL_CLASS_VECTOR); - reg->type = VKD3D_SM4_RT_CONSTBUFFER; - reg->dim = VKD3D_SM4_DIMENSION_VEC4; - if (swizzle_type) - *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; - reg->idx[0] = var->buffer->reg.id; - reg->idx[1] = offset / 4; - reg->idx_count = 2; - *writemask = ((1u << data_type->dimx) - 1) << (offset & 3); - } - } - else if (var->is_input_semantic) - { - bool has_idx; - - if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, false, ®->type, swizzle_type, &has_idx)) - { - unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref); - - if (has_idx) - { - reg->idx[0] = var->semantic.index + offset / 4; - reg->idx_count = 1; - } - - reg->dim = VKD3D_SM4_DIMENSION_VEC4; - *writemask = ((1u << data_type->dimx) - 1) << (offset % 4); - } - else - { - struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref); - - assert(hlsl_reg.allocated); - reg->type = VKD3D_SM4_RT_INPUT; - reg->dim = VKD3D_SM4_DIMENSION_VEC4; - if (swizzle_type) - *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; - reg->idx[0] = hlsl_reg.id; - reg->idx_count = 1; - *writemask = hlsl_reg.writemask; - } - } - else if (var->is_output_semantic) - { - bool has_idx; - - if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, true, ®->type, swizzle_type, &has_idx)) - { - unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref); - - if (has_idx) - { - reg->idx[0] = var->semantic.index + offset / 4; - reg->idx_count = 1; - } - - if (reg->type == VKD3D_SM4_RT_DEPTHOUT) - reg->dim = VKD3D_SM4_DIMENSION_SCALAR; - else - reg->dim = VKD3D_SM4_DIMENSION_VEC4; - *writemask = ((1u << data_type->dimx) - 1) << (offset % 4); - } - else - { - struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref); - - assert(hlsl_reg.allocated); - reg->type = VKD3D_SM4_RT_OUTPUT; - reg->dim = VKD3D_SM4_DIMENSION_VEC4; - reg->idx[0] = hlsl_reg.id; - reg->idx_count = 1; - *writemask = hlsl_reg.writemask; - } - } - else - { - struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref); - - assert(hlsl_reg.allocated); - reg->type = VKD3D_SM4_RT_TEMP; - reg->dim = VKD3D_SM4_DIMENSION_VEC4; - if (swizzle_type) - *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; - reg->idx[0] = hlsl_reg.id; - reg->idx_count = 1; - *writemask = hlsl_reg.writemask; - } -} - -static void sm4_src_from_deref(struct hlsl_ctx *ctx, struct sm4_src_register *src, - const struct hlsl_deref *deref, const struct hlsl_type *data_type, unsigned int map_writemask) -{ - unsigned int writemask; - - sm4_register_from_deref(ctx, &src->reg, &writemask, &src->swizzle_type, deref, data_type); - if (src->swizzle_type == VKD3D_SM4_SWIZZLE_VEC4) - src->swizzle = hlsl_map_swizzle(hlsl_swizzle_from_writemask(writemask), map_writemask); -} - -static void sm4_register_from_node(struct sm4_register *reg, unsigned int *writemask, - enum vkd3d_sm4_swizzle_type *swizzle_type, const struct hlsl_ir_node *instr) -{ - assert(instr->reg.allocated); - reg->type = VKD3D_SM4_RT_TEMP; - reg->dim = VKD3D_SM4_DIMENSION_VEC4; - *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; - reg->idx[0] = instr->reg.id; - reg->idx_count = 1; - *writemask = instr->reg.writemask; -} - -static void sm4_dst_from_node(struct sm4_dst_register *dst, const struct hlsl_ir_node *instr) -{ - unsigned int swizzle_type; - - sm4_register_from_node(&dst->reg, &dst->writemask, &swizzle_type, instr); -} - -static void sm4_src_from_node(struct sm4_src_register *src, - const struct hlsl_ir_node *instr, unsigned int map_writemask) -{ - unsigned int writemask; - - sm4_register_from_node(&src->reg, &writemask, &src->swizzle_type, instr); - if (src->swizzle_type == VKD3D_SM4_SWIZZLE_VEC4) - src->swizzle = hlsl_map_swizzle(hlsl_swizzle_from_writemask(writemask), map_writemask); -} - -static uint32_t sm4_encode_register(const struct sm4_register *reg) -{ - return (reg->type << VKD3D_SM4_REGISTER_TYPE_SHIFT) - | (reg->idx_count << VKD3D_SM4_REGISTER_ORDER_SHIFT) - | (reg->dim << VKD3D_SM4_DIMENSION_SHIFT); -} - -static uint32_t sm4_register_order(const struct sm4_register *reg) -{ - uint32_t order = 1; - if (reg->type == VKD3D_SM4_RT_IMMCONST) - order += reg->dim == VKD3D_SM4_DIMENSION_VEC4 ? 4 : 1; - order += reg->idx_count; - if (reg->mod) - ++order; - return order; -} - -static void write_sm4_instruction(struct vkd3d_bytecode_buffer *buffer, const struct sm4_instruction *instr) -{ - uint32_t token = instr->opcode; - unsigned int size = 1, i, j; - - size += instr->modifier_count; - for (i = 0; i < instr->dst_count; ++i) - size += sm4_register_order(&instr->dsts[i].reg); - for (i = 0; i < instr->src_count; ++i) - size += sm4_register_order(&instr->srcs[i].reg); - size += instr->idx_count; - - token |= (size << VKD3D_SM4_INSTRUCTION_LENGTH_SHIFT); - - if (instr->modifier_count > 0) - token |= VKD3D_SM4_INSTRUCTION_MODIFIER; - put_u32(buffer, token); - - for (i = 0; i < instr->modifier_count; ++i) - { - token = sm4_encode_instruction_modifier(&instr->modifiers[i]); - if (instr->modifier_count > i + 1) - token |= VKD3D_SM4_INSTRUCTION_MODIFIER; - put_u32(buffer, token); - } - - for (i = 0; i < instr->dst_count; ++i) - { - token = sm4_encode_register(&instr->dsts[i].reg); - if (instr->dsts[i].reg.dim == VKD3D_SM4_DIMENSION_VEC4) - token |= instr->dsts[i].writemask << VKD3D_SM4_WRITEMASK_SHIFT; - put_u32(buffer, token); - - for (j = 0; j < instr->dsts[i].reg.idx_count; ++j) - put_u32(buffer, instr->dsts[i].reg.idx[j]); - } - - for (i = 0; i < instr->src_count; ++i) - { - token = sm4_encode_register(&instr->srcs[i].reg); - token |= (uint32_t)instr->srcs[i].swizzle_type << VKD3D_SM4_SWIZZLE_TYPE_SHIFT; - token |= instr->srcs[i].swizzle << VKD3D_SM4_SWIZZLE_SHIFT; - if (instr->srcs[i].reg.mod) - token |= VKD3D_SM4_EXTENDED_OPERAND; - put_u32(buffer, token); - - if (instr->srcs[i].reg.mod) - put_u32(buffer, (instr->srcs[i].reg.mod << VKD3D_SM4_REGISTER_MODIFIER_SHIFT) - | VKD3D_SM4_EXTENDED_OPERAND_MODIFIER); - - for (j = 0; j < instr->srcs[i].reg.idx_count; ++j) - put_u32(buffer, instr->srcs[i].reg.idx[j]); - - if (instr->srcs[i].reg.type == VKD3D_SM4_RT_IMMCONST) - { - put_u32(buffer, instr->srcs[i].reg.immconst_uint[0]); - if (instr->srcs[i].reg.dim == VKD3D_SM4_DIMENSION_VEC4) - { - put_u32(buffer, instr->srcs[i].reg.immconst_uint[1]); - put_u32(buffer, instr->srcs[i].reg.immconst_uint[2]); - put_u32(buffer, instr->srcs[i].reg.immconst_uint[3]); - } - } - } - - for (j = 0; j < instr->idx_count; ++j) - put_u32(buffer, instr->idx[j]); -} - -static bool encode_texel_offset_as_aoffimmi(struct sm4_instruction *instr, - const struct hlsl_ir_node *texel_offset) -{ - struct sm4_instruction_modifier modif; - struct hlsl_ir_constant *offset; - - if (!texel_offset || texel_offset->type != HLSL_IR_CONSTANT) - return false; - offset = hlsl_ir_constant(texel_offset); - - modif.type = VKD3D_SM4_MODIFIER_AOFFIMMI; - modif.u.aoffimmi.u = offset->value[0].i; - modif.u.aoffimmi.v = offset->value[1].i; - modif.u.aoffimmi.w = offset->value[2].i; - if (modif.u.aoffimmi.u < -8 || modif.u.aoffimmi.u > 7 - || modif.u.aoffimmi.v < -8 || modif.u.aoffimmi.v > 7 - || modif.u.aoffimmi.w < -8 || modif.u.aoffimmi.w > 7) - return false; - - instr->modifiers[instr->modifier_count++] = modif; - return true; -} - -static void write_sm4_dcl_constant_buffer(struct vkd3d_bytecode_buffer *buffer, const struct hlsl_buffer *cbuffer) -{ - const struct sm4_instruction instr = - { - .opcode = VKD3D_SM4_OP_DCL_CONSTANT_BUFFER, - - .srcs[0].reg.dim = VKD3D_SM4_DIMENSION_VEC4, - .srcs[0].reg.type = VKD3D_SM4_RT_CONSTBUFFER, - .srcs[0].reg.idx = {cbuffer->reg.id, (cbuffer->used_size + 3) / 4}, - .srcs[0].reg.idx_count = 2, - .srcs[0].swizzle_type = VKD3D_SM4_SWIZZLE_VEC4, - .srcs[0].swizzle = HLSL_SWIZZLE(X, Y, Z, W), - .src_count = 1, - }; - write_sm4_instruction(buffer, &instr); -} - -static void write_sm4_dcl_sampler(struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_var *var) -{ - const struct sm4_instruction instr = - { - .opcode = VKD3D_SM4_OP_DCL_SAMPLER, - - .dsts[0].reg.type = VKD3D_SM4_RT_SAMPLER, - .dsts[0].reg.idx = {var->regs[HLSL_REGSET_SAMPLERS].id}, - .dsts[0].reg.idx_count = 1, - .dst_count = 1, - }; - write_sm4_instruction(buffer, &instr); -} - -static void write_sm4_dcl_texture(struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_var *var) -{ - bool uav = (var->data_type->base_type == HLSL_TYPE_UAV); - struct sm4_instruction instr = - { - .opcode = (uav ? VKD3D_SM5_OP_DCL_UAV_TYPED : VKD3D_SM4_OP_DCL_RESOURCE) - | (sm4_resource_dimension(var->data_type) << VKD3D_SM4_RESOURCE_TYPE_SHIFT), - - .dsts[0].reg.type = uav ? VKD3D_SM5_RT_UAV : VKD3D_SM4_RT_RESOURCE, - .dsts[0].reg.idx = {uav ? var->regs[HLSL_REGSET_UAVS].id : var->regs[HLSL_REGSET_TEXTURES].id}, - .dsts[0].reg.idx_count = 1, - .dst_count = 1, - - .idx[0] = sm4_resource_format(var->data_type) * 0x1111, - .idx_count = 1, - }; - - if (var->data_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS - || var->data_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY) - { - instr.opcode |= var->data_type->sample_count << VKD3D_SM4_RESOURCE_SAMPLE_COUNT_SHIFT; - } - - write_sm4_instruction(buffer, &instr); -} - -static void write_sm4_dcl_semantic(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_var *var) -{ - const struct hlsl_profile_info *profile = ctx->profile; - const bool output = var->is_output_semantic; - D3D_NAME usage; - bool has_idx; - - struct sm4_instruction instr = - { - .dsts[0].reg.dim = VKD3D_SM4_DIMENSION_VEC4, - .dst_count = 1, - }; - - if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, output, &instr.dsts[0].reg.type, NULL, &has_idx)) - { - if (has_idx) - { - instr.dsts[0].reg.idx[0] = var->semantic.index; - instr.dsts[0].reg.idx_count = 1; - } - else - { - instr.dsts[0].reg.idx_count = 0; - } - instr.dsts[0].writemask = (1 << var->data_type->dimx) - 1; - } - else - { - instr.dsts[0].reg.type = output ? VKD3D_SM4_RT_OUTPUT : VKD3D_SM4_RT_INPUT; - instr.dsts[0].reg.idx[0] = var->regs[HLSL_REGSET_NUMERIC].id; - instr.dsts[0].reg.idx_count = 1; - instr.dsts[0].writemask = var->regs[HLSL_REGSET_NUMERIC].writemask; - } - - if (instr.dsts[0].reg.type == VKD3D_SM4_RT_DEPTHOUT) - instr.dsts[0].reg.dim = VKD3D_SM4_DIMENSION_SCALAR; - - hlsl_sm4_usage_from_semantic(ctx, &var->semantic, output, &usage); - if (usage == ~0u) - usage = D3D_NAME_UNDEFINED; - - if (var->is_input_semantic) - { - switch (usage) - { - case D3D_NAME_UNDEFINED: - instr.opcode = (profile->type == VKD3D_SHADER_TYPE_PIXEL) - ? VKD3D_SM4_OP_DCL_INPUT_PS : VKD3D_SM4_OP_DCL_INPUT; - break; - - case D3D_NAME_INSTANCE_ID: - case D3D_NAME_PRIMITIVE_ID: - case D3D_NAME_VERTEX_ID: - instr.opcode = (profile->type == VKD3D_SHADER_TYPE_PIXEL) - ? VKD3D_SM4_OP_DCL_INPUT_PS_SGV : VKD3D_SM4_OP_DCL_INPUT_SGV; - break; - - default: - instr.opcode = (profile->type == VKD3D_SHADER_TYPE_PIXEL) - ? VKD3D_SM4_OP_DCL_INPUT_PS_SIV : VKD3D_SM4_OP_DCL_INPUT_SIV; - break; - } - - if (profile->type == VKD3D_SHADER_TYPE_PIXEL) - { - enum vkd3d_shader_interpolation_mode mode = VKD3DSIM_LINEAR; - - if ((var->storage_modifiers & HLSL_STORAGE_NOINTERPOLATION) || type_is_integer(var->data_type)) - mode = VKD3DSIM_CONSTANT; - - instr.opcode |= mode << VKD3D_SM4_INTERPOLATION_MODE_SHIFT; - } - } - else - { - if (usage == D3D_NAME_UNDEFINED || profile->type == VKD3D_SHADER_TYPE_PIXEL) - instr.opcode = VKD3D_SM4_OP_DCL_OUTPUT; - else - instr.opcode = VKD3D_SM4_OP_DCL_OUTPUT_SIV; - } - - switch (usage) - { - case D3D_NAME_COVERAGE: - case D3D_NAME_DEPTH: - case D3D_NAME_DEPTH_GREATER_EQUAL: - case D3D_NAME_DEPTH_LESS_EQUAL: - case D3D_NAME_TARGET: - case D3D_NAME_UNDEFINED: - break; - - default: - instr.idx_count = 1; - instr.idx[0] = usage; - break; - } - - write_sm4_instruction(buffer, &instr); -} - -static void write_sm4_dcl_temps(struct vkd3d_bytecode_buffer *buffer, uint32_t temp_count) -{ - struct sm4_instruction instr = - { - .opcode = VKD3D_SM4_OP_DCL_TEMPS, - - .idx = {temp_count}, - .idx_count = 1, - }; - - write_sm4_instruction(buffer, &instr); -} - -static void write_sm4_dcl_thread_group(struct vkd3d_bytecode_buffer *buffer, const uint32_t thread_count[3]) -{ - struct sm4_instruction instr = - { - .opcode = VKD3D_SM5_OP_DCL_THREAD_GROUP, - - .idx = {thread_count[0], thread_count[1], thread_count[2]}, - .idx_count = 3, - }; - - write_sm4_instruction(buffer, &instr); -} - -static void write_sm4_ret(struct vkd3d_bytecode_buffer *buffer) -{ - struct sm4_instruction instr = - { - .opcode = VKD3D_SM4_OP_RET, - }; - - write_sm4_instruction(buffer, &instr); -} - -static void write_sm4_unary_op(struct vkd3d_bytecode_buffer *buffer, enum vkd3d_sm4_opcode opcode, - const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src, unsigned int src_mod) -{ - struct sm4_instruction instr; - - memset(&instr, 0, sizeof(instr)); - instr.opcode = opcode; - - sm4_dst_from_node(&instr.dsts[0], dst); - instr.dst_count = 1; - - sm4_src_from_node(&instr.srcs[0], src, instr.dsts[0].writemask); - instr.srcs[0].reg.mod = src_mod; - instr.src_count = 1; - - write_sm4_instruction(buffer, &instr); -} - -static void write_sm4_unary_op_with_two_destinations(struct vkd3d_bytecode_buffer *buffer, - enum vkd3d_sm4_opcode opcode, const struct hlsl_ir_node *dst, unsigned dst_idx, - const struct hlsl_ir_node *src) -{ - struct sm4_instruction instr; - - memset(&instr, 0, sizeof(instr)); - instr.opcode = opcode; - - assert(dst_idx < ARRAY_SIZE(instr.dsts)); - sm4_dst_from_node(&instr.dsts[dst_idx], dst); - assert(1 - dst_idx >= 0); - instr.dsts[1 - dst_idx].reg.type = VKD3D_SM4_RT_NULL; - instr.dsts[1 - dst_idx].reg.dim = VKD3D_SM4_DIMENSION_NONE; - instr.dsts[1 - dst_idx].reg.idx_count = 0; - instr.dst_count = 2; - - sm4_src_from_node(&instr.srcs[0], src, instr.dsts[dst_idx].writemask); - instr.src_count = 1; - - write_sm4_instruction(buffer, &instr); -} - -static void write_sm4_binary_op(struct vkd3d_bytecode_buffer *buffer, enum vkd3d_sm4_opcode opcode, - const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2) -{ - struct sm4_instruction instr; - - memset(&instr, 0, sizeof(instr)); - instr.opcode = opcode; - - sm4_dst_from_node(&instr.dsts[0], dst); - instr.dst_count = 1; - - sm4_src_from_node(&instr.srcs[0], src1, instr.dsts[0].writemask); - sm4_src_from_node(&instr.srcs[1], src2, instr.dsts[0].writemask); - instr.src_count = 2; - - write_sm4_instruction(buffer, &instr); -} - -/* dp# instructions don't map the swizzle. */ -static void write_sm4_binary_op_dot(struct vkd3d_bytecode_buffer *buffer, enum vkd3d_sm4_opcode opcode, - const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2) -{ - struct sm4_instruction instr; - - memset(&instr, 0, sizeof(instr)); - instr.opcode = opcode; - - sm4_dst_from_node(&instr.dsts[0], dst); - instr.dst_count = 1; - - sm4_src_from_node(&instr.srcs[0], src1, VKD3DSP_WRITEMASK_ALL); - sm4_src_from_node(&instr.srcs[1], src2, VKD3DSP_WRITEMASK_ALL); - instr.src_count = 2; - - write_sm4_instruction(buffer, &instr); -} - -static void write_sm4_binary_op_with_two_destinations(struct vkd3d_bytecode_buffer *buffer, - enum vkd3d_sm4_opcode opcode, const struct hlsl_ir_node *dst, unsigned dst_idx, - const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2) -{ - struct sm4_instruction instr; - - memset(&instr, 0, sizeof(instr)); - instr.opcode = opcode; - - assert(dst_idx < ARRAY_SIZE(instr.dsts)); - sm4_dst_from_node(&instr.dsts[dst_idx], dst); - assert(1 - dst_idx >= 0); - instr.dsts[1 - dst_idx].reg.type = VKD3D_SM4_RT_NULL; - instr.dsts[1 - dst_idx].reg.dim = VKD3D_SM4_DIMENSION_NONE; - instr.dsts[1 - dst_idx].reg.idx_count = 0; - instr.dst_count = 2; - - sm4_src_from_node(&instr.srcs[0], src1, instr.dsts[dst_idx].writemask); - sm4_src_from_node(&instr.srcs[1], src2, instr.dsts[dst_idx].writemask); - instr.src_count = 2; - - write_sm4_instruction(buffer, &instr); -} - -static void write_sm4_constant(struct hlsl_ctx *ctx, - struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_constant *constant) -{ - const unsigned int dimx = constant->node.data_type->dimx; - struct sm4_instruction instr; - struct sm4_register *reg = &instr.srcs[0].reg; - - memset(&instr, 0, sizeof(instr)); - instr.opcode = VKD3D_SM4_OP_MOV; - - sm4_dst_from_node(&instr.dsts[0], &constant->node); - instr.dst_count = 1; - - instr.srcs[0].swizzle_type = VKD3D_SM4_SWIZZLE_NONE; - reg->type = VKD3D_SM4_RT_IMMCONST; - if (dimx == 1) - { - reg->dim = VKD3D_SM4_DIMENSION_SCALAR; - reg->immconst_uint[0] = constant->value[0].u; - } - else - { - unsigned int i, j = 0; - - reg->dim = VKD3D_SM4_DIMENSION_VEC4; - for (i = 0; i < 4; ++i) - { - if (instr.dsts[0].writemask & (1u << i)) - reg->immconst_uint[i] = constant->value[j++].u; - } - } - instr.src_count = 1, - - write_sm4_instruction(buffer, &instr); -} - -static void write_sm4_ld(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - const struct hlsl_type *resource_type, const struct hlsl_ir_node *dst, - const struct hlsl_deref *resource, const struct hlsl_ir_node *coords, - const struct hlsl_ir_node *texel_offset) -{ - bool uav = (resource_type->base_type == HLSL_TYPE_UAV); - struct sm4_instruction instr; - unsigned int dim_count; - - memset(&instr, 0, sizeof(instr)); - instr.opcode = uav ? VKD3D_SM5_OP_LD_UAV_TYPED : VKD3D_SM4_OP_LD; - - if (texel_offset) - { - if (!encode_texel_offset_as_aoffimmi(&instr, texel_offset)) - { - hlsl_error(ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, - "Offset must resolve to integer literal in the range -8 to 7."); - return; - } - } - - sm4_dst_from_node(&instr.dsts[0], dst); - instr.dst_count = 1; - - sm4_src_from_node(&instr.srcs[0], coords, VKD3DSP_WRITEMASK_ALL); - - if (!uav) - { - /* Mipmap level is in the last component in the IR, but needs to be in the W - * component in the instruction. */ - dim_count = hlsl_sampler_dim_count(resource_type->sampler_dim); - if (dim_count == 1) - instr.srcs[0].swizzle = hlsl_combine_swizzles(instr.srcs[0].swizzle, HLSL_SWIZZLE(X, X, X, Y), 4); - if (dim_count == 2) - instr.srcs[0].swizzle = hlsl_combine_swizzles(instr.srcs[0].swizzle, HLSL_SWIZZLE(X, Y, X, Z), 4); - } - - sm4_src_from_deref(ctx, &instr.srcs[1], resource, resource_type, instr.dsts[0].writemask); - - instr.src_count = 2; - - write_sm4_instruction(buffer, &instr); -} - -static void write_sm4_sample(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - const struct hlsl_type *resource_type, const struct hlsl_ir_node *dst, - const struct hlsl_deref *resource, const struct hlsl_deref *sampler, - const struct hlsl_ir_node *coords, const struct hlsl_ir_node *texel_offset) -{ - struct sm4_instruction instr; - - memset(&instr, 0, sizeof(instr)); - instr.opcode = VKD3D_SM4_OP_SAMPLE; - - if (texel_offset) - { - if (!encode_texel_offset_as_aoffimmi(&instr, texel_offset)) - { - hlsl_error(ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, - "Offset must resolve to integer literal in the range -8 to 7."); - return; - } - } - - sm4_dst_from_node(&instr.dsts[0], dst); - instr.dst_count = 1; - - sm4_src_from_node(&instr.srcs[0], coords, VKD3DSP_WRITEMASK_ALL); - sm4_src_from_deref(ctx, &instr.srcs[1], resource, resource_type, instr.dsts[0].writemask); - sm4_src_from_deref(ctx, &instr.srcs[2], sampler, sampler->var->data_type, VKD3DSP_WRITEMASK_ALL); - instr.src_count = 3; - - write_sm4_instruction(buffer, &instr); -} - -static bool type_is_float(const struct hlsl_type *type) -{ - return type->base_type == HLSL_TYPE_FLOAT || type->base_type == HLSL_TYPE_HALF; -} - -static void write_sm4_cast_from_bool(struct hlsl_ctx *ctx, - struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_expr *expr, - const struct hlsl_ir_node *arg, uint32_t mask) -{ - struct sm4_instruction instr; - - memset(&instr, 0, sizeof(instr)); - instr.opcode = VKD3D_SM4_OP_AND; - - sm4_dst_from_node(&instr.dsts[0], &expr->node); - instr.dst_count = 1; - - sm4_src_from_node(&instr.srcs[0], arg, instr.dsts[0].writemask); - instr.srcs[1].swizzle_type = VKD3D_SM4_SWIZZLE_NONE; - instr.srcs[1].reg.type = VKD3D_SM4_RT_IMMCONST; - instr.srcs[1].reg.dim = VKD3D_SM4_DIMENSION_SCALAR; - instr.srcs[1].reg.immconst_uint[0] = mask; - instr.src_count = 2; - - write_sm4_instruction(buffer, &instr); -} - -static void write_sm4_cast(struct hlsl_ctx *ctx, - struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_expr *expr) -{ - static const union - { - uint32_t u; - float f; - } one = { .f = 1.0 }; - const struct hlsl_ir_node *arg1 = expr->operands[0].node; - const struct hlsl_type *dst_type = expr->node.data_type; - const struct hlsl_type *src_type = arg1->data_type; - - /* Narrowing casts were already lowered. */ - assert(src_type->dimx == dst_type->dimx); - - switch (dst_type->base_type) - { - case HLSL_TYPE_FLOAT: - switch (src_type->base_type) - { - case HLSL_TYPE_HALF: - case HLSL_TYPE_FLOAT: - write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); - break; - - case HLSL_TYPE_INT: - write_sm4_unary_op(buffer, VKD3D_SM4_OP_ITOF, &expr->node, arg1, 0); - break; - - case HLSL_TYPE_UINT: - write_sm4_unary_op(buffer, VKD3D_SM4_OP_UTOF, &expr->node, arg1, 0); - break; - - case HLSL_TYPE_BOOL: - write_sm4_cast_from_bool(ctx, buffer, expr, arg1, one.u); - break; - - case HLSL_TYPE_DOUBLE: - hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to float."); - break; - - default: - vkd3d_unreachable(); - } - break; - - case HLSL_TYPE_INT: - switch (src_type->base_type) - { - case HLSL_TYPE_HALF: - case HLSL_TYPE_FLOAT: - write_sm4_unary_op(buffer, VKD3D_SM4_OP_FTOI, &expr->node, arg1, 0); - break; - - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: - write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); - break; - - case HLSL_TYPE_BOOL: - write_sm4_cast_from_bool(ctx, buffer, expr, arg1, 1); - break; - - case HLSL_TYPE_DOUBLE: - hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to int."); - break; - - default: - vkd3d_unreachable(); - } - break; - - case HLSL_TYPE_UINT: - switch (src_type->base_type) - { - case HLSL_TYPE_HALF: - case HLSL_TYPE_FLOAT: - write_sm4_unary_op(buffer, VKD3D_SM4_OP_FTOU, &expr->node, arg1, 0); - break; - - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: - write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); - break; - - case HLSL_TYPE_BOOL: - write_sm4_cast_from_bool(ctx, buffer, expr, arg1, 1); - break; - - case HLSL_TYPE_DOUBLE: - hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to uint."); - break; - - default: - vkd3d_unreachable(); - } - break; - - case HLSL_TYPE_HALF: - hlsl_fixme(ctx, &expr->node.loc, "SM4 cast to half."); - break; - - case HLSL_TYPE_DOUBLE: - hlsl_fixme(ctx, &expr->node.loc, "SM4 cast to double."); - break; - - case HLSL_TYPE_BOOL: - /* Casts to bool should have already been lowered. */ - default: - vkd3d_unreachable(); - } -} - -static void write_sm4_store_uav_typed(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - const struct hlsl_deref *dst, const struct hlsl_ir_node *coords, const struct hlsl_ir_node *value) -{ - struct sm4_instruction instr; - - memset(&instr, 0, sizeof(instr)); - instr.opcode = VKD3D_SM5_OP_STORE_UAV_TYPED; - - sm4_register_from_deref(ctx, &instr.dsts[0].reg, &instr.dsts[0].writemask, NULL, dst, dst->var->data_type); - instr.dst_count = 1; - - sm4_src_from_node(&instr.srcs[0], coords, VKD3DSP_WRITEMASK_ALL); - sm4_src_from_node(&instr.srcs[1], value, VKD3DSP_WRITEMASK_ALL); - instr.src_count = 2; - - write_sm4_instruction(buffer, &instr); -} - -static void write_sm4_expr(struct hlsl_ctx *ctx, - struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_expr *expr) -{ - const struct hlsl_ir_node *arg1 = expr->operands[0].node; - const struct hlsl_ir_node *arg2 = expr->operands[1].node; - const struct hlsl_type *dst_type = expr->node.data_type; - struct vkd3d_string_buffer *dst_type_string; - - assert(expr->node.reg.allocated); - - if (!(dst_type_string = hlsl_type_to_string(ctx, dst_type))) - return; - - switch (expr->op) - { - case HLSL_OP1_ABS: - switch (dst_type->base_type) - { - case HLSL_TYPE_FLOAT: - write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, VKD3D_SM4_REGISTER_MODIFIER_ABS); - break; - - default: - hlsl_fixme(ctx, &expr->node.loc, "SM4 %s absolute value expression.", dst_type_string->buffer); - } - break; - - case HLSL_OP1_BIT_NOT: - assert(type_is_integer(dst_type)); - write_sm4_unary_op(buffer, VKD3D_SM4_OP_NOT, &expr->node, arg1, 0); - break; - - case HLSL_OP1_CAST: - write_sm4_cast(ctx, buffer, expr); - break; - - case HLSL_OP1_COS: - assert(type_is_float(dst_type)); - write_sm4_unary_op_with_two_destinations(buffer, VKD3D_SM4_OP_SINCOS, &expr->node, 1, arg1); - break; - - case HLSL_OP1_EXP2: - assert(type_is_float(dst_type)); - write_sm4_unary_op(buffer, VKD3D_SM4_OP_EXP, &expr->node, arg1, 0); - break; - - case HLSL_OP1_FLOOR: - assert(type_is_float(dst_type)); - write_sm4_unary_op(buffer, VKD3D_SM4_OP_ROUND_NI, &expr->node, arg1, 0); - break; - - case HLSL_OP1_FRACT: - assert(type_is_float(dst_type)); - write_sm4_unary_op(buffer, VKD3D_SM4_OP_FRC, &expr->node, arg1, 0); - break; - - case HLSL_OP1_LOG2: - assert(type_is_float(dst_type)); - write_sm4_unary_op(buffer, VKD3D_SM4_OP_LOG, &expr->node, arg1, 0); - break; - - case HLSL_OP1_LOGIC_NOT: - assert(dst_type->base_type == HLSL_TYPE_BOOL); - write_sm4_unary_op(buffer, VKD3D_SM4_OP_NOT, &expr->node, arg1, 0); - break; - - case HLSL_OP1_NEG: - switch (dst_type->base_type) - { - case HLSL_TYPE_FLOAT: - write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, VKD3D_SM4_REGISTER_MODIFIER_NEGATE); - break; - - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: - write_sm4_unary_op(buffer, VKD3D_SM4_OP_INEG, &expr->node, arg1, 0); - break; - - default: - hlsl_fixme(ctx, &expr->node.loc, "SM4 %s negation expression.", dst_type_string->buffer); - } - break; - - case HLSL_OP1_REINTERPRET: - write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); - break; - - case HLSL_OP1_ROUND: - assert(type_is_float(dst_type)); - write_sm4_unary_op(buffer, VKD3D_SM4_OP_ROUND_NE, &expr->node, arg1, 0); - break; - - case HLSL_OP1_RSQ: - assert(type_is_float(dst_type)); - write_sm4_unary_op(buffer, VKD3D_SM4_OP_RSQ, &expr->node, arg1, 0); - break; - - case HLSL_OP1_SAT: - assert(type_is_float(dst_type)); - write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV - | (VKD3D_SM4_INSTRUCTION_FLAG_SATURATE << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT), - &expr->node, arg1, 0); - break; - - case HLSL_OP1_SIN: - assert(type_is_float(dst_type)); - write_sm4_unary_op_with_two_destinations(buffer, VKD3D_SM4_OP_SINCOS, &expr->node, 0, arg1); - break; - - case HLSL_OP1_SQRT: - assert(type_is_float(dst_type)); - write_sm4_unary_op(buffer, VKD3D_SM4_OP_SQRT, &expr->node, arg1, 0); - break; - - case HLSL_OP2_ADD: - switch (dst_type->base_type) - { - case HLSL_TYPE_FLOAT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_ADD, &expr->node, arg1, arg2); - break; - - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_IADD, &expr->node, arg1, arg2); - break; - - default: - hlsl_fixme(ctx, &expr->node.loc, "SM4 %s addition expression.", dst_type_string->buffer); - } - break; - - case HLSL_OP2_BIT_AND: - assert(type_is_integer(dst_type)); - write_sm4_binary_op(buffer, VKD3D_SM4_OP_AND, &expr->node, arg1, arg2); - break; - - case HLSL_OP2_BIT_OR: - assert(type_is_integer(dst_type)); - write_sm4_binary_op(buffer, VKD3D_SM4_OP_OR, &expr->node, arg1, arg2); - break; - - case HLSL_OP2_BIT_XOR: - assert(type_is_integer(dst_type)); - write_sm4_binary_op(buffer, VKD3D_SM4_OP_XOR, &expr->node, arg1, arg2); - break; - - case HLSL_OP2_DIV: - switch (dst_type->base_type) - { - case HLSL_TYPE_FLOAT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_DIV, &expr->node, arg1, arg2); - break; - - case HLSL_TYPE_UINT: - write_sm4_binary_op_with_two_destinations(buffer, VKD3D_SM4_OP_UDIV, &expr->node, 0, arg1, arg2); - break; - - default: - hlsl_fixme(ctx, &expr->node.loc, "SM4 %s division expression.", dst_type_string->buffer); - } - break; - - case HLSL_OP2_DOT: - switch (dst_type->base_type) - { - case HLSL_TYPE_FLOAT: - switch (arg1->data_type->dimx) - { - case 4: - write_sm4_binary_op_dot(buffer, VKD3D_SM4_OP_DP4, &expr->node, arg1, arg2); - break; - - case 3: - write_sm4_binary_op_dot(buffer, VKD3D_SM4_OP_DP3, &expr->node, arg1, arg2); - break; - - case 2: - write_sm4_binary_op_dot(buffer, VKD3D_SM4_OP_DP2, &expr->node, arg1, arg2); - break; - - case 1: - default: - vkd3d_unreachable(); - } - break; - - default: - hlsl_fixme(ctx, &expr->node.loc, "SM4 %s dot expression.", dst_type_string->buffer); - } - break; - - case HLSL_OP2_EQUAL: - { - const struct hlsl_type *src_type = arg1->data_type; - - assert(dst_type->base_type == HLSL_TYPE_BOOL); - - switch (src_type->base_type) - { - case HLSL_TYPE_FLOAT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_EQ, &expr->node, arg1, arg2); - break; - - case HLSL_TYPE_BOOL: - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_IEQ, &expr->node, arg1, arg2); - break; - - default: - hlsl_fixme(ctx, &expr->node.loc, "SM4 equality between "%s" operands.", - debug_hlsl_type(ctx, src_type)); - break; - } - break; - } - - case HLSL_OP2_GEQUAL: - { - const struct hlsl_type *src_type = arg1->data_type; - - assert(dst_type->base_type == HLSL_TYPE_BOOL); - - switch (src_type->base_type) - { - case HLSL_TYPE_FLOAT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_GE, &expr->node, arg1, arg2); - break; - - case HLSL_TYPE_INT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_IGE, &expr->node, arg1, arg2); - break; - - case HLSL_TYPE_BOOL: - case HLSL_TYPE_UINT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_UGE, &expr->node, arg1, arg2); - break; - - default: - hlsl_fixme(ctx, &expr->node.loc, "SM4 greater-than-or-equal between "%s" operands.", - debug_hlsl_type(ctx, src_type)); - break; - } - break; - } - - case HLSL_OP2_LESS: - { - const struct hlsl_type *src_type = arg1->data_type; - - assert(dst_type->base_type == HLSL_TYPE_BOOL); - - switch (src_type->base_type) - { - case HLSL_TYPE_FLOAT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_LT, &expr->node, arg1, arg2); - break; - - case HLSL_TYPE_INT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_ILT, &expr->node, arg1, arg2); - break; - - case HLSL_TYPE_BOOL: - case HLSL_TYPE_UINT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_ULT, &expr->node, arg1, arg2); - break; - - default: - hlsl_fixme(ctx, &expr->node.loc, "SM4 less-than between "%s" operands.", - debug_hlsl_type(ctx, src_type)); - break; - } - break; - } - - case HLSL_OP2_LOGIC_AND: - assert(dst_type->base_type == HLSL_TYPE_BOOL); - write_sm4_binary_op(buffer, VKD3D_SM4_OP_AND, &expr->node, arg1, arg2); - break; - - case HLSL_OP2_LOGIC_OR: - assert(dst_type->base_type == HLSL_TYPE_BOOL); - write_sm4_binary_op(buffer, VKD3D_SM4_OP_OR, &expr->node, arg1, arg2); - break; - - case HLSL_OP2_LSHIFT: - assert(type_is_integer(dst_type)); - assert(dst_type->base_type != HLSL_TYPE_BOOL); - write_sm4_binary_op(buffer, VKD3D_SM4_OP_ISHL, &expr->node, arg1, arg2); - break; - - case HLSL_OP2_MAX: - switch (dst_type->base_type) - { - case HLSL_TYPE_FLOAT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_MAX, &expr->node, arg1, arg2); - break; - - case HLSL_TYPE_INT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_IMAX, &expr->node, arg1, arg2); - break; - - case HLSL_TYPE_UINT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_UMAX, &expr->node, arg1, arg2); - break; - - default: - hlsl_fixme(ctx, &expr->node.loc, "SM4 %s maximum expression.", dst_type_string->buffer); - } - break; - - case HLSL_OP2_MIN: - switch (dst_type->base_type) - { - case HLSL_TYPE_FLOAT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_MIN, &expr->node, arg1, arg2); - break; - - case HLSL_TYPE_INT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_IMIN, &expr->node, arg1, arg2); - break; - - case HLSL_TYPE_UINT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_UMIN, &expr->node, arg1, arg2); - break; - - default: - hlsl_fixme(ctx, &expr->node.loc, "SM4 %s minimum expression.", dst_type_string->buffer); - } - break; - - case HLSL_OP2_MOD: - switch (dst_type->base_type) - { - case HLSL_TYPE_UINT: - write_sm4_binary_op_with_two_destinations(buffer, VKD3D_SM4_OP_UDIV, &expr->node, 1, arg1, arg2); - break; - - default: - hlsl_fixme(ctx, &expr->node.loc, "SM4 %s modulus expression.", dst_type_string->buffer); - } - break; - - case HLSL_OP2_MUL: - switch (dst_type->base_type) - { - case HLSL_TYPE_FLOAT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_MUL, &expr->node, arg1, arg2); - break; - - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: - /* Using IMUL instead of UMUL because we're taking the low - * bits, and the native compiler generates IMUL. */ - write_sm4_binary_op_with_two_destinations(buffer, VKD3D_SM4_OP_IMUL, &expr->node, 1, arg1, arg2); - break; - - default: - hlsl_fixme(ctx, &expr->node.loc, "SM4 %s multiplication expression.", dst_type_string->buffer); - } - break; - - case HLSL_OP2_NEQUAL: - { - const struct hlsl_type *src_type = arg1->data_type; - - assert(dst_type->base_type == HLSL_TYPE_BOOL); - - switch (src_type->base_type) - { - case HLSL_TYPE_FLOAT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_NE, &expr->node, arg1, arg2); - break; - - case HLSL_TYPE_BOOL: - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_INE, &expr->node, arg1, arg2); - break; - - default: - hlsl_fixme(ctx, &expr->node.loc, "SM4 inequality between "%s" operands.", - debug_hlsl_type(ctx, src_type)); - break; - } - break; - } - - case HLSL_OP2_RSHIFT: - assert(type_is_integer(dst_type)); - assert(dst_type->base_type != HLSL_TYPE_BOOL); - write_sm4_binary_op(buffer, dst_type->base_type == HLSL_TYPE_INT ? VKD3D_SM4_OP_ISHR : VKD3D_SM4_OP_USHR, - &expr->node, arg1, arg2); - break; - - default: - hlsl_fixme(ctx, &expr->node.loc, "SM4 %s expression.", debug_hlsl_expr_op(expr->op)); - } - - hlsl_release_string_buffer(ctx, dst_type_string); -} - -static void write_sm4_if(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_if *iff) -{ - struct sm4_instruction instr = - { - .opcode = VKD3D_SM4_OP_IF | VKD3D_SM4_CONDITIONAL_NZ, - .src_count = 1, - }; - - assert(iff->condition.node->data_type->dimx == 1); - - sm4_src_from_node(&instr.srcs[0], iff->condition.node, VKD3DSP_WRITEMASK_ALL); - write_sm4_instruction(buffer, &instr); - - write_sm4_block(ctx, buffer, &iff->then_instrs); - - if (!list_empty(&iff->else_instrs.instrs)) - { - instr.opcode = VKD3D_SM4_OP_ELSE; - instr.src_count = 0; - write_sm4_instruction(buffer, &instr); - - write_sm4_block(ctx, buffer, &iff->else_instrs); - } - - instr.opcode = VKD3D_SM4_OP_ENDIF; - instr.src_count = 0; - write_sm4_instruction(buffer, &instr); -} - -static void write_sm4_jump(struct hlsl_ctx *ctx, - struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_jump *jump) -{ - struct sm4_instruction instr = {0}; - - switch (jump->type) - { - case HLSL_IR_JUMP_BREAK: - instr.opcode = VKD3D_SM4_OP_BREAK; - break; - - case HLSL_IR_JUMP_RETURN: - vkd3d_unreachable(); - - default: - hlsl_fixme(ctx, &jump->node.loc, "Jump type %s.\n", hlsl_jump_type_to_string(jump->type)); - return; - } - - write_sm4_instruction(buffer, &instr); -} - -static void write_sm4_load(struct hlsl_ctx *ctx, - struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_load *load) -{ - struct sm4_instruction instr; - - memset(&instr, 0, sizeof(instr)); - instr.opcode = VKD3D_SM4_OP_MOV; - - sm4_dst_from_node(&instr.dsts[0], &load->node); - instr.dst_count = 1; - - sm4_src_from_deref(ctx, &instr.srcs[0], &load->src, load->node.data_type, instr.dsts[0].writemask); - instr.src_count = 1; - - write_sm4_instruction(buffer, &instr); -} - -static void write_sm4_loop(struct hlsl_ctx *ctx, - struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_loop *loop) -{ - struct sm4_instruction instr = - { - .opcode = VKD3D_SM4_OP_LOOP, - }; - - write_sm4_instruction(buffer, &instr); - - write_sm4_block(ctx, buffer, &loop->body); - - instr.opcode = VKD3D_SM4_OP_ENDLOOP; - write_sm4_instruction(buffer, &instr); -} - -static void write_sm4_gather(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - const struct hlsl_type *resource_type, const struct hlsl_ir_node *dst, - const struct hlsl_deref *resource, const struct hlsl_deref *sampler, - const struct hlsl_ir_node *coords, unsigned int swizzle, const struct hlsl_ir_node *texel_offset) -{ - struct sm4_src_register *src; - struct sm4_instruction instr; - - memset(&instr, 0, sizeof(instr)); - - instr.opcode = VKD3D_SM4_OP_GATHER4; - - sm4_dst_from_node(&instr.dsts[0], dst); - instr.dst_count = 1; - - sm4_src_from_node(&instr.srcs[instr.src_count++], coords, VKD3DSP_WRITEMASK_ALL); - - if (texel_offset) - { - if (!encode_texel_offset_as_aoffimmi(&instr, texel_offset)) - { - if (ctx->profile->major_version < 5) - { - hlsl_error(ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, - "Offset must resolve to integer literal in the range -8 to 7 for profiles < 5."); - return; - } - instr.opcode = VKD3D_SM5_OP_GATHER4_PO; - sm4_src_from_node(&instr.srcs[instr.src_count++], texel_offset, VKD3DSP_WRITEMASK_ALL); - } - } - - sm4_src_from_deref(ctx, &instr.srcs[instr.src_count++], resource, resource_type, instr.dsts[0].writemask); - - src = &instr.srcs[instr.src_count++]; - sm4_src_from_deref(ctx, src, sampler, sampler->var->data_type, VKD3DSP_WRITEMASK_ALL); - src->reg.dim = VKD3D_SM4_DIMENSION_VEC4; - src->swizzle_type = VKD3D_SM4_SWIZZLE_SCALAR; - src->swizzle = swizzle; - - write_sm4_instruction(buffer, &instr); -} - -static void write_sm4_resource_load(struct hlsl_ctx *ctx, - struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_resource_load *load) -{ - const struct hlsl_type *resource_type = load->resource.var->data_type; - const struct hlsl_ir_node *texel_offset = load->texel_offset.node; - const struct hlsl_ir_node *coords = load->coords.node; - - if (resource_type->type != HLSL_CLASS_OBJECT) - { - assert(resource_type->type == HLSL_CLASS_ARRAY || resource_type->type == HLSL_CLASS_STRUCT); - hlsl_fixme(ctx, &load->node.loc, "Resource being a component of another variable."); - return; - } - - if (load->sampler.var) - { - const struct hlsl_type *sampler_type = load->sampler.var->data_type; - - if (sampler_type->type != HLSL_CLASS_OBJECT) - { - assert(sampler_type->type == HLSL_CLASS_ARRAY || sampler_type->type == HLSL_CLASS_STRUCT); - hlsl_fixme(ctx, &load->node.loc, "Sampler being a component of another variable."); - return; - } - assert(sampler_type->base_type == HLSL_TYPE_SAMPLER); - assert(sampler_type->sampler_dim == HLSL_SAMPLER_DIM_GENERIC); - - if (!load->sampler.var->is_uniform) - { - hlsl_fixme(ctx, &load->node.loc, "Sample using non-uniform sampler variable."); - return; - } - } - - if (!load->resource.var->is_uniform) - { - hlsl_fixme(ctx, &load->node.loc, "Load from non-uniform resource variable."); - return; - } - - switch (load->load_type) - { - case HLSL_RESOURCE_LOAD: - write_sm4_ld(ctx, buffer, resource_type, &load->node, &load->resource, - coords, texel_offset); - break; - - case HLSL_RESOURCE_SAMPLE: - if (!load->sampler.var) - { - hlsl_fixme(ctx, &load->node.loc, "SM4 combined sample expression."); - return; - } - write_sm4_sample(ctx, buffer, resource_type, &load->node, - &load->resource, &load->sampler, coords, texel_offset); - break; - - case HLSL_RESOURCE_GATHER_RED: - write_sm4_gather(ctx, buffer, resource_type, &load->node, &load->resource, - &load->sampler, coords, HLSL_SWIZZLE(X, X, X, X), texel_offset); - break; - - case HLSL_RESOURCE_GATHER_GREEN: - write_sm4_gather(ctx, buffer, resource_type, &load->node, &load->resource, - &load->sampler, coords, HLSL_SWIZZLE(Y, Y, Y, Y), texel_offset); - break; - - case HLSL_RESOURCE_GATHER_BLUE: - write_sm4_gather(ctx, buffer, resource_type, &load->node, &load->resource, - &load->sampler, coords, HLSL_SWIZZLE(Z, Z, Z, Z), texel_offset); - break; - - case HLSL_RESOURCE_GATHER_ALPHA: - write_sm4_gather(ctx, buffer, resource_type, &load->node, &load->resource, - &load->sampler, coords, HLSL_SWIZZLE(W, W, W, W), texel_offset); - break; - - case HLSL_RESOURCE_SAMPLE_LOD: - hlsl_fixme(ctx, &load->node.loc, "SM4 sample-LOD expression."); - break; - } -} - -static void write_sm4_resource_store(struct hlsl_ctx *ctx, - struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_resource_store *store) -{ - const struct hlsl_type *resource_type = store->resource.var->data_type; - - if (resource_type->type != HLSL_CLASS_OBJECT) - { - assert(resource_type->type == HLSL_CLASS_ARRAY || resource_type->type == HLSL_CLASS_STRUCT); - hlsl_fixme(ctx, &store->node.loc, "Resource being a component of another variable."); - return; - } - - if (!store->resource.var->is_uniform) - { - hlsl_fixme(ctx, &store->node.loc, "Store to non-uniform resource variable."); - return; - } - - write_sm4_store_uav_typed(ctx, buffer, &store->resource, store->coords.node, store->value.node); -} - -static void write_sm4_store(struct hlsl_ctx *ctx, - struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_store *store) -{ - const struct hlsl_ir_node *rhs = store->rhs.node; - struct sm4_instruction instr; - unsigned int writemask; - - memset(&instr, 0, sizeof(instr)); - instr.opcode = VKD3D_SM4_OP_MOV; - - sm4_register_from_deref(ctx, &instr.dsts[0].reg, &writemask, NULL, &store->lhs, rhs->data_type); - instr.dsts[0].writemask = hlsl_combine_writemasks(writemask, store->writemask); - instr.dst_count = 1; - - sm4_src_from_node(&instr.srcs[0], rhs, instr.dsts[0].writemask); - instr.src_count = 1; - - write_sm4_instruction(buffer, &instr); -} - -static void write_sm4_swizzle(struct hlsl_ctx *ctx, - struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_swizzle *swizzle) -{ - struct sm4_instruction instr; - unsigned int writemask; - - memset(&instr, 0, sizeof(instr)); - instr.opcode = VKD3D_SM4_OP_MOV; - - sm4_dst_from_node(&instr.dsts[0], &swizzle->node); - instr.dst_count = 1; - - sm4_register_from_node(&instr.srcs[0].reg, &writemask, &instr.srcs[0].swizzle_type, swizzle->val.node); - instr.srcs[0].swizzle = hlsl_map_swizzle(hlsl_combine_swizzles(hlsl_swizzle_from_writemask(writemask), - swizzle->swizzle, swizzle->node.data_type->dimx), instr.dsts[0].writemask); - instr.src_count = 1; - - write_sm4_instruction(buffer, &instr); -} - -static void write_sm4_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - const struct hlsl_block *block) -{ - const struct hlsl_ir_node *instr; - - LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) - { - if (instr->data_type) - { - if (instr->data_type->type == HLSL_CLASS_MATRIX) - { - hlsl_fixme(ctx, &instr->loc, "Matrix operations need to be lowered."); - break; - } - else if (instr->data_type->type == HLSL_CLASS_OBJECT) - { - hlsl_fixme(ctx, &instr->loc, "Object copy."); - break; - } - - assert(instr->data_type->type == HLSL_CLASS_SCALAR || instr->data_type->type == HLSL_CLASS_VECTOR); - } - - switch (instr->type) - { - case HLSL_IR_CALL: - vkd3d_unreachable(); - - case HLSL_IR_CONSTANT: - write_sm4_constant(ctx, buffer, hlsl_ir_constant(instr)); - break; - - case HLSL_IR_EXPR: - write_sm4_expr(ctx, buffer, hlsl_ir_expr(instr)); - break; - - case HLSL_IR_IF: - write_sm4_if(ctx, buffer, hlsl_ir_if(instr)); - break; - - case HLSL_IR_JUMP: - write_sm4_jump(ctx, buffer, hlsl_ir_jump(instr)); - break; - - case HLSL_IR_LOAD: - write_sm4_load(ctx, buffer, hlsl_ir_load(instr)); - break; - - case HLSL_IR_RESOURCE_LOAD: - write_sm4_resource_load(ctx, buffer, hlsl_ir_resource_load(instr)); - break; - - case HLSL_IR_RESOURCE_STORE: - write_sm4_resource_store(ctx, buffer, hlsl_ir_resource_store(instr)); - break; - - case HLSL_IR_LOOP: - write_sm4_loop(ctx, buffer, hlsl_ir_loop(instr)); - break; - - case HLSL_IR_STORE: - write_sm4_store(ctx, buffer, hlsl_ir_store(instr)); - break; - - case HLSL_IR_SWIZZLE: - write_sm4_swizzle(ctx, buffer, hlsl_ir_swizzle(instr)); - break; - - default: - hlsl_fixme(ctx, &instr->loc, "Instruction type %s.", hlsl_node_type_to_string(instr->type)); - } - } -} - -static void write_sm4_shdr(struct hlsl_ctx *ctx, - const struct hlsl_ir_function_decl *entry_func, struct dxbc_writer *dxbc) -{ - const struct hlsl_profile_info *profile = ctx->profile; - const struct hlsl_ir_var **extern_resources; - struct vkd3d_bytecode_buffer buffer = {0}; - unsigned int extern_resources_count, i; - const struct hlsl_buffer *cbuffer; - const struct hlsl_ir_var *var; - size_t token_count_position; - - static const uint16_t shader_types[VKD3D_SHADER_TYPE_COUNT] = - { - VKD3D_SM4_PS, - VKD3D_SM4_VS, - VKD3D_SM4_GS, - VKD3D_SM5_HS, - VKD3D_SM5_DS, - VKD3D_SM5_CS, - 0, /* EFFECT */ - 0, /* TEXTURE */ - VKD3D_SM4_LIB, - }; - - extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count); - - put_u32(&buffer, vkd3d_make_u32((profile->major_version << 4) | profile->minor_version, shader_types[profile->type])); - token_count_position = put_u32(&buffer, 0); - - LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) - { - if (cbuffer->reg.allocated) - write_sm4_dcl_constant_buffer(&buffer, cbuffer); - } - - for (i = 0; i < extern_resources_count; ++i) - { - var = extern_resources[i]; - - if (var->data_type->base_type == HLSL_TYPE_SAMPLER) - write_sm4_dcl_sampler(&buffer, var); - else if (var->data_type->base_type == HLSL_TYPE_TEXTURE || var->data_type->base_type == HLSL_TYPE_UAV) - write_sm4_dcl_texture(&buffer, var); - } - - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { - if ((var->is_input_semantic && var->last_read) || (var->is_output_semantic && var->first_write)) - write_sm4_dcl_semantic(ctx, &buffer, var); - } - - if (profile->type == VKD3D_SHADER_TYPE_COMPUTE) - write_sm4_dcl_thread_group(&buffer, ctx->thread_count); - - if (ctx->temp_count) - write_sm4_dcl_temps(&buffer, ctx->temp_count); - - write_sm4_block(ctx, &buffer, &entry_func->body); - - write_sm4_ret(&buffer); - - set_u32(&buffer, token_count_position, bytecode_get_size(&buffer) / sizeof(uint32_t)); - - dxbc_writer_add_section(dxbc, TAG_SHDR, buffer.data, buffer.size); - - vkd3d_free(extern_resources); -} - -int hlsl_sm4_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out) -{ - struct dxbc_writer dxbc; - size_t i; - int ret; - - dxbc_writer_init(&dxbc); - - write_sm4_signature(ctx, &dxbc, false); - write_sm4_signature(ctx, &dxbc, true); - write_sm4_rdef(ctx, &dxbc); - write_sm4_shdr(ctx, entry_func, &dxbc); - - if (!(ret = ctx->result)) - ret = dxbc_writer_write(&dxbc, out); - for (i = 0; i < dxbc.section_count; ++i) - vkd3d_shader_free_shader_code(&dxbc.sections[i].data); - return ret; -} diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c new file mode 100644 index 00000000000..9eefb82c226 --- /dev/null +++ b/libs/vkd3d/libs/vkd3d-shader/ir.c @@ -0,0 +1,1072 @@ +/* + * Copyright 2023 Conor McCarthy for CodeWeavers + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#include "vkd3d_shader_private.h" + +static inline bool shader_register_is_phase_instance_id(const struct vkd3d_shader_register *reg) +{ + return reg->type == VKD3DSPR_FORKINSTID || reg->type == VKD3DSPR_JOININSTID; +} + +static bool shader_instruction_is_dcl(const struct vkd3d_shader_instruction *ins) +{ + return (VKD3DSIH_DCL <= ins->handler_idx && ins->handler_idx <= VKD3DSIH_DCL_VERTICES_OUT) + || ins->handler_idx == VKD3DSIH_HS_DECLS; +} + +static void vkd3d_shader_instruction_make_nop(struct vkd3d_shader_instruction *ins) +{ + ins->handler_idx = VKD3DSIH_NOP; + ins->dst_count = 0; + ins->src_count = 0; + ins->dst = NULL; + ins->src = NULL; +} + +static void shader_register_eliminate_phase_addressing(struct vkd3d_shader_register *reg, + unsigned int instance_id) +{ + unsigned int i; + + for (i = 0; i < reg->idx_count; ++i) + { + if (reg->idx[i].rel_addr && shader_register_is_phase_instance_id(®->idx[i].rel_addr->reg)) + { + reg->idx[i].rel_addr = NULL; + reg->idx[i].offset += instance_id; + } + } +} + +static void shader_instruction_eliminate_phase_instance_id(struct vkd3d_shader_instruction *ins, + unsigned int instance_id) +{ + struct vkd3d_shader_register *reg; + unsigned int i; + + for (i = 0; i < ins->src_count; ++i) + { + reg = (struct vkd3d_shader_register *)&ins->src[i].reg; + if (shader_register_is_phase_instance_id(reg)) + { + reg->type = VKD3DSPR_IMMCONST; + reg->precision = VKD3D_SHADER_REGISTER_PRECISION_DEFAULT; + reg->non_uniform = false; + reg->idx[0].offset = ~0u; + reg->idx[0].rel_addr = NULL; + reg->idx[1].offset = ~0u; + reg->idx[1].rel_addr = NULL; + reg->idx[2].offset = ~0u; + reg->idx[2].rel_addr = NULL; + reg->idx_count = 0; + reg->immconst_type = VKD3D_IMMCONST_SCALAR; + reg->u.immconst_uint[0] = instance_id; + continue; + } + shader_register_eliminate_phase_addressing(reg, instance_id); + } + + for (i = 0; i < ins->dst_count; ++i) + shader_register_eliminate_phase_addressing((struct vkd3d_shader_register *)&ins->dst[i].reg, instance_id); +} + +struct hull_flattener +{ + struct vkd3d_shader_instruction_array instructions; + + unsigned int max_temp_count; + unsigned int temp_dcl_idx; + + unsigned int instance_count; + unsigned int phase_body_idx; + enum vkd3d_shader_opcode phase; +}; + +static bool flattener_is_in_fork_or_join_phase(const struct hull_flattener *flattener) +{ + return flattener->phase == VKD3DSIH_HS_FORK_PHASE || flattener->phase == VKD3DSIH_HS_JOIN_PHASE; +} + +struct shader_phase_location +{ + unsigned int index; + unsigned int instance_count; + unsigned int instruction_count; +}; + +struct shader_phase_location_array +{ + /* Unlikely worst case: one phase for each component of each output register. */ + struct shader_phase_location locations[MAX_REG_OUTPUT * VKD3D_VEC4_SIZE]; + unsigned int count; +}; + +static void flattener_eliminate_phase_related_dcls(struct hull_flattener *normaliser, + unsigned int index, struct shader_phase_location_array *locations) +{ + struct vkd3d_shader_instruction *ins = &normaliser->instructions.elements[index]; + struct shader_phase_location *loc; + bool b; + + if (ins->handler_idx == VKD3DSIH_HS_FORK_PHASE || ins->handler_idx == VKD3DSIH_HS_JOIN_PHASE) + { + b = flattener_is_in_fork_or_join_phase(normaliser); + /* Reset the phase info. */ + normaliser->phase_body_idx = ~0u; + normaliser->phase = ins->handler_idx; + normaliser->instance_count = 1; + /* Leave the first occurrence and delete the rest. */ + if (b) + vkd3d_shader_instruction_make_nop(ins); + return; + } + else if (ins->handler_idx == VKD3DSIH_DCL_HS_FORK_PHASE_INSTANCE_COUNT + || ins->handler_idx == VKD3DSIH_DCL_HS_JOIN_PHASE_INSTANCE_COUNT) + { + normaliser->instance_count = ins->declaration.count + !ins->declaration.count; + vkd3d_shader_instruction_make_nop(ins); + return; + } + else if (ins->handler_idx == VKD3DSIH_DCL_INPUT && shader_register_is_phase_instance_id( + &ins->declaration.dst.reg)) + { + vkd3d_shader_instruction_make_nop(ins); + return; + } + else if (ins->handler_idx == VKD3DSIH_DCL_TEMPS && normaliser->phase != VKD3DSIH_INVALID) + { + /* Leave only the first temp declaration and set it to the max count later. */ + if (!normaliser->max_temp_count) + normaliser->temp_dcl_idx = index; + else + vkd3d_shader_instruction_make_nop(ins); + normaliser->max_temp_count = max(normaliser->max_temp_count, ins->declaration.count); + return; + } + + if (normaliser->phase == VKD3DSIH_INVALID || shader_instruction_is_dcl(ins)) + return; + + if (normaliser->phase_body_idx == ~0u) + normaliser->phase_body_idx = index; + + if (ins->handler_idx == VKD3DSIH_RET) + { + vkd3d_shader_instruction_make_nop(ins); + if (locations->count >= ARRAY_SIZE(locations->locations)) + { + FIXME("Insufficient space for phase location.\n"); + return; + } + loc = &locations->locations[locations->count++]; + loc->index = normaliser->phase_body_idx; + loc->instance_count = normaliser->instance_count; + loc->instruction_count = index - normaliser->phase_body_idx; + } +} + +static enum vkd3d_result flattener_flatten_phases(struct hull_flattener *normaliser, + struct shader_phase_location_array *locations) +{ + struct shader_phase_location *loc; + unsigned int i, j, k, end, count; + + for (i = 0, count = 0; i < locations->count; ++i) + count += (locations->locations[i].instance_count - 1) * locations->locations[i].instruction_count; + + if (!shader_instruction_array_reserve(&normaliser->instructions, normaliser->instructions.count + count)) + return VKD3D_ERROR_OUT_OF_MEMORY; + end = normaliser->instructions.count; + normaliser->instructions.count += count; + + for (i = locations->count; i > 0; --i) + { + loc = &locations->locations[i - 1]; + j = loc->index + loc->instruction_count; + memmove(&normaliser->instructions.elements[j + count], &normaliser->instructions.elements[j], + (end - j) * sizeof(*normaliser->instructions.elements)); + end = j; + count -= (loc->instance_count - 1) * loc->instruction_count; + loc->index += count; + } + + for (i = 0, count = 0; i < locations->count; ++i) + { + loc = &locations->locations[i]; + /* Make a copy of the non-dcl instructions for each instance. */ + for (j = 1; j < loc->instance_count; ++j) + { + for (k = 0; k < loc->instruction_count; ++k) + { + if (!shader_instruction_array_clone_instruction(&normaliser->instructions, + loc->index + loc->instruction_count * j + k, loc->index + k)) + return VKD3D_ERROR_OUT_OF_MEMORY; + } + } + /* Replace each reference to the instance id with a constant instance id. */ + for (j = 0; j < loc->instance_count; ++j) + { + for (k = 0; k < loc->instruction_count; ++k) + shader_instruction_eliminate_phase_instance_id( + &normaliser->instructions.elements[loc->index + loc->instruction_count * j + k], j); + } + } + + return VKD3D_OK; +} + +static void shader_register_init(struct vkd3d_shader_register *reg, enum vkd3d_shader_register_type reg_type, + enum vkd3d_data_type data_type, unsigned int idx_count) +{ + reg->type = reg_type; + reg->precision = VKD3D_SHADER_REGISTER_PRECISION_DEFAULT; + reg->non_uniform = false; + reg->data_type = data_type; + reg->idx[0].offset = ~0u; + reg->idx[0].rel_addr = NULL; + reg->idx[1].offset = ~0u; + reg->idx[1].rel_addr = NULL; + reg->idx[2].offset = ~0u; + reg->idx[2].rel_addr = NULL; + reg->idx_count = idx_count; + reg->immconst_type = VKD3D_IMMCONST_SCALAR; +} + +static void shader_instruction_init(struct vkd3d_shader_instruction *ins, enum vkd3d_shader_opcode handler_idx) +{ + memset(ins, 0, sizeof(*ins)); + ins->handler_idx = handler_idx; +} + +enum vkd3d_result instruction_array_flatten_hull_shader_phases(struct vkd3d_shader_instruction_array *src_instructions) +{ + struct hull_flattener flattener = {*src_instructions}; + struct vkd3d_shader_instruction_array *instructions; + struct shader_phase_location_array locations; + enum vkd3d_result result = VKD3D_OK; + unsigned int i; + + instructions = &flattener.instructions; + + flattener.phase = VKD3DSIH_INVALID; + for (i = 0, locations.count = 0; i < instructions->count; ++i) + flattener_eliminate_phase_related_dcls(&flattener, i, &locations); + + if ((result = flattener_flatten_phases(&flattener, &locations)) < 0) + return result; + + if (flattener.phase != VKD3DSIH_INVALID) + { + if (flattener.temp_dcl_idx) + instructions->elements[flattener.temp_dcl_idx].declaration.count = flattener.max_temp_count; + + if (!shader_instruction_array_reserve(&flattener.instructions, flattener.instructions.count + 1)) + return VKD3D_ERROR_OUT_OF_MEMORY; + shader_instruction_init(&instructions->elements[instructions->count++], VKD3DSIH_RET); + } + + *src_instructions = flattener.instructions; + return result; +} + +struct control_point_normaliser +{ + struct vkd3d_shader_instruction_array instructions; + enum vkd3d_shader_opcode phase; + struct vkd3d_shader_src_param *outpointid_param; +}; + +static bool control_point_normaliser_is_in_control_point_phase(const struct control_point_normaliser *normaliser) +{ + return normaliser->phase == VKD3DSIH_HS_CONTROL_POINT_PHASE; +} + +static struct vkd3d_shader_src_param *instruction_array_create_outpointid_param( + struct vkd3d_shader_instruction_array *instructions) +{ + struct vkd3d_shader_src_param *rel_addr; + + if (!(rel_addr = shader_src_param_allocator_get(&instructions->src_params, 1))) + return NULL; + + shader_register_init(&rel_addr->reg, VKD3DSPR_OUTPOINTID, VKD3D_DATA_UINT, 0); + rel_addr->swizzle = 0; + rel_addr->modifiers = 0; + + return rel_addr; +} + +static void shader_dst_param_normalise_outpointid(struct vkd3d_shader_dst_param *dst_param, + struct control_point_normaliser *normaliser) +{ + struct vkd3d_shader_register *reg = &dst_param->reg; + + if (control_point_normaliser_is_in_control_point_phase(normaliser) && reg->type == VKD3DSPR_OUTPUT) + { + /* The TPF reader validates idx_count. */ + assert(reg->idx_count == 1); + reg->idx[1] = reg->idx[0]; + /* The control point id param is implicit here. Avoid later complications by inserting it. */ + reg->idx[0].offset = 0; + reg->idx[0].rel_addr = normaliser->outpointid_param; + ++reg->idx_count; + } +} + +static void shader_dst_param_io_init(struct vkd3d_shader_dst_param *param, const struct signature_element *e, + enum vkd3d_shader_register_type reg_type, unsigned int idx_count) +{ + param->write_mask = e->mask; + param->modifiers = 0; + param->shift = 0; + shader_register_init(¶m->reg, reg_type, vkd3d_data_type_from_component_type(e->component_type), idx_count); +} + +static enum vkd3d_result control_point_normaliser_emit_hs_input(struct control_point_normaliser *normaliser, + const struct shader_signature *s, unsigned int input_control_point_count, unsigned int dst) +{ + struct vkd3d_shader_instruction *ins; + struct vkd3d_shader_dst_param *param; + const struct signature_element *e; + unsigned int i, count; + + for (i = 0, count = 1; i < s->element_count; ++i) + count += !!s->elements[i].used_mask; + + if (!shader_instruction_array_reserve(&normaliser->instructions, normaliser->instructions.count + count)) + return VKD3D_ERROR_OUT_OF_MEMORY; + + memmove(&normaliser->instructions.elements[dst + count], &normaliser->instructions.elements[dst], + (normaliser->instructions.count - dst) * sizeof(*normaliser->instructions.elements)); + normaliser->instructions.count += count; + + ins = &normaliser->instructions.elements[dst]; + shader_instruction_init(ins, VKD3DSIH_HS_CONTROL_POINT_PHASE); + ins->flags = 1; + ++ins; + + for (i = 0; i < s->element_count; ++i) + { + e = &s->elements[i]; + if (!e->used_mask) + continue; + + if (e->sysval_semantic != VKD3D_SHADER_SV_NONE) + { + shader_instruction_init(ins, VKD3DSIH_DCL_INPUT_SIV); + param = &ins->declaration.register_semantic.reg; + ins->declaration.register_semantic.sysval_semantic = vkd3d_siv_from_sysval(e->sysval_semantic); + } + else + { + shader_instruction_init(ins, VKD3DSIH_DCL_INPUT); + param = &ins->declaration.dst; + } + + shader_dst_param_io_init(param, e, VKD3DSPR_INPUT, 2); + param->reg.idx[0].offset = input_control_point_count; + param->reg.idx[1].offset = i; + + ++ins; + } + + return VKD3D_OK; +} + +enum vkd3d_result instruction_array_normalise_hull_shader_control_point_io( + struct vkd3d_shader_instruction_array *src_instructions, const struct shader_signature *input_signature) +{ + struct vkd3d_shader_instruction_array *instructions; + struct control_point_normaliser normaliser; + unsigned int input_control_point_count; + struct vkd3d_shader_instruction *ins; + enum vkd3d_result ret; + unsigned int i, j; + + if (!(normaliser.outpointid_param = instruction_array_create_outpointid_param(src_instructions))) + { + ERR("Failed to allocate src param.\n"); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + normaliser.instructions = *src_instructions; + instructions = &normaliser.instructions; + normaliser.phase = VKD3DSIH_INVALID; + + for (i = 0; i < normaliser.instructions.count; ++i) + { + ins = &instructions->elements[i]; + + switch (ins->handler_idx) + { + case VKD3DSIH_HS_CONTROL_POINT_PHASE: + case VKD3DSIH_HS_FORK_PHASE: + case VKD3DSIH_HS_JOIN_PHASE: + normaliser.phase = ins->handler_idx; + break; + default: + if (shader_instruction_is_dcl(ins)) + break; + for (j = 0; j < ins->dst_count; ++j) + shader_dst_param_normalise_outpointid((struct vkd3d_shader_dst_param *)&ins->dst[j], &normaliser); + break; + } + } + + normaliser.phase = VKD3DSIH_INVALID; + input_control_point_count = 1; + + for (i = 0; i < instructions->count; ++i) + { + ins = &instructions->elements[i]; + + switch (ins->handler_idx) + { + case VKD3DSIH_DCL_INPUT_CONTROL_POINT_COUNT: + input_control_point_count = ins->declaration.count; + break; + case VKD3DSIH_HS_CONTROL_POINT_PHASE: + *src_instructions = normaliser.instructions; + return VKD3D_OK; + case VKD3DSIH_HS_FORK_PHASE: + case VKD3DSIH_HS_JOIN_PHASE: + ret = control_point_normaliser_emit_hs_input(&normaliser, input_signature, + input_control_point_count, i); + *src_instructions = normaliser.instructions; + return ret; + default: + break; + } + } + + *src_instructions = normaliser.instructions; + return VKD3D_OK; +} + +struct io_normaliser +{ + struct vkd3d_shader_instruction_array instructions; + enum vkd3d_shader_type shader_type; + struct shader_signature *input_signature; + struct shader_signature *output_signature; + struct shader_signature *patch_constant_signature; + + unsigned int max_temp_count; + unsigned int temp_dcl_idx; + + unsigned int instance_count; + unsigned int phase_body_idx; + enum vkd3d_shader_opcode phase; + unsigned int output_control_point_count; + + struct vkd3d_shader_src_param *outpointid_param; + + struct vkd3d_shader_dst_param *input_dcl_params[MAX_REG_OUTPUT]; + struct vkd3d_shader_dst_param *output_dcl_params[MAX_REG_OUTPUT]; + struct vkd3d_shader_dst_param *pc_dcl_params[MAX_REG_OUTPUT]; + uint8_t input_range_map[MAX_REG_OUTPUT][VKD3D_VEC4_SIZE]; + uint8_t output_range_map[MAX_REG_OUTPUT][VKD3D_VEC4_SIZE]; + uint8_t pc_range_map[MAX_REG_OUTPUT][VKD3D_VEC4_SIZE]; +}; + +static bool io_normaliser_is_in_fork_or_join_phase(const struct io_normaliser *normaliser) +{ + return normaliser->phase == VKD3DSIH_HS_FORK_PHASE || normaliser->phase == VKD3DSIH_HS_JOIN_PHASE; +} + +static bool io_normaliser_is_in_control_point_phase(const struct io_normaliser *normaliser) +{ + return normaliser->phase == VKD3DSIH_HS_CONTROL_POINT_PHASE; +} + +static unsigned int shader_signature_find_element_for_reg(const struct shader_signature *signature, + unsigned int reg_idx, unsigned int write_mask) +{ + unsigned int i; + + for (i = 0; i < signature->element_count; ++i) + { + struct signature_element *e = &signature->elements[i]; + if (e->register_index <= reg_idx && e->register_index + e->register_count > reg_idx + && (e->mask & write_mask) == write_mask) + { + return i; + } + } + + /* Validated in the TPF reader. */ + vkd3d_unreachable(); +} + +static unsigned int range_map_get_register_count(uint8_t range_map[][VKD3D_VEC4_SIZE], + unsigned int register_idx, unsigned int write_mask) +{ + return range_map[register_idx][vkd3d_write_mask_get_component_idx(write_mask)]; +} + +static void range_map_set_register_range(uint8_t range_map[][VKD3D_VEC4_SIZE], unsigned int register_idx, + unsigned int register_count, unsigned int write_mask, bool is_dcl_indexrange) +{ + unsigned int i, j, r, c, component_idx, component_count; + + assert(write_mask <= VKD3DSP_WRITEMASK_ALL); + component_idx = vkd3d_write_mask_get_component_idx(write_mask); + component_count = vkd3d_write_mask_component_count(write_mask); + + assert(register_idx < MAX_REG_OUTPUT && MAX_REG_OUTPUT - register_idx >= register_count); + + if (range_map[register_idx][component_idx] > register_count && is_dcl_indexrange) + { + /* Validated in the TPF reader. */ + assert(range_map[register_idx][component_idx] != UINT8_MAX); + return; + } + if (range_map[register_idx][component_idx] == register_count) + { + /* Already done. This happens when fxc splits a register declaration by + * component(s). The dcl_indexrange instructions are split too. */ + return; + } + range_map[register_idx][component_idx] = register_count; + + for (i = 0; i < register_count; ++i) + { + r = register_idx + i; + for (j = !i; j < component_count; ++j) + { + c = component_idx + j; + /* A synthetic patch constant range which overlaps an existing range can start upstream of it + * for fork/join phase instancing, but ranges declared by dcl_indexrange should not overlap. + * The latter is validated in the TPF reader. */ + assert(!range_map[r][c] || !is_dcl_indexrange); + range_map[r][c] = UINT8_MAX; + } + } +} + +static void io_normaliser_add_index_range(struct io_normaliser *normaliser, + const struct vkd3d_shader_instruction *ins) +{ + const struct vkd3d_shader_index_range *range = &ins->declaration.index_range; + const struct vkd3d_shader_register *reg = &range->dst.reg; + unsigned int reg_idx, write_mask, element_idx; + const struct shader_signature *signature; + uint8_t (*range_map)[VKD3D_VEC4_SIZE]; + + switch (reg->type) + { + case VKD3DSPR_INPUT: + case VKD3DSPR_INCONTROLPOINT: + range_map = normaliser->input_range_map; + signature = normaliser->input_signature; + break; + case VKD3DSPR_OUTCONTROLPOINT: + range_map = normaliser->output_range_map; + signature = normaliser->output_signature; + break; + case VKD3DSPR_OUTPUT: + if (!io_normaliser_is_in_fork_or_join_phase(normaliser)) + { + range_map = normaliser->output_range_map; + signature = normaliser->output_signature; + break; + } + /* fall through */ + case VKD3DSPR_PATCHCONST: + range_map = normaliser->pc_range_map; + signature = normaliser->patch_constant_signature; + break; + default: + /* Validated in the TPF reader. */ + vkd3d_unreachable(); + } + + reg_idx = reg->idx[reg->idx_count - 1].offset; + write_mask = range->dst.write_mask; + element_idx = shader_signature_find_element_for_reg(signature, reg_idx, write_mask); + range_map_set_register_range(range_map, reg_idx, range->register_count, + signature->elements[element_idx].mask, true); +} + +static int signature_element_mask_compare(const void *a, const void *b) +{ + const struct signature_element *e = a, *f = b; + int ret; + + return (ret = vkd3d_u32_compare(e->mask, f->mask)) ? ret : vkd3d_u32_compare(e->register_index, f->register_index); +} + +static bool sysval_semantics_should_merge(const struct signature_element *e, const struct signature_element *f) +{ + if (e->sysval_semantic < VKD3D_SHADER_SV_TESS_FACTOR_QUADEDGE + || e->sysval_semantic > VKD3D_SHADER_SV_TESS_FACTOR_LINEDEN) + return false; + + return e->sysval_semantic == f->sysval_semantic + /* Line detail and density must be merged together to match the SPIR-V array. + * This deletes one of the two sysvals, but these are not used. */ + || (e->sysval_semantic == VKD3D_SHADER_SV_TESS_FACTOR_LINEDET + && f->sysval_semantic == VKD3D_SHADER_SV_TESS_FACTOR_LINEDEN) + || (e->sysval_semantic == VKD3D_SHADER_SV_TESS_FACTOR_LINEDEN + && f->sysval_semantic == VKD3D_SHADER_SV_TESS_FACTOR_LINEDET); +} + +/* Merge tess factor sysvals because they are an array in SPIR-V. */ +static void shader_signature_map_patch_constant_index_ranges(struct shader_signature *s, + uint8_t range_map[][VKD3D_VEC4_SIZE]) +{ + struct signature_element *e, *f; + unsigned int i, j, register_count; + + qsort(s->elements, s->element_count, sizeof(s->elements[0]), signature_element_mask_compare); + + for (i = 0; i < s->element_count; i += register_count) + { + e = &s->elements[i]; + register_count = 1; + + if (!e->sysval_semantic) + continue; + + for (j = i + 1; j < s->element_count; ++j, ++register_count) + { + f = &s->elements[j]; + if (f->register_index != e->register_index + register_count || !sysval_semantics_should_merge(e, f)) + break; + } + if (register_count < 2) + continue; + + range_map_set_register_range(range_map, e->register_index, register_count, e->mask, false); + } +} + +static int signature_element_register_compare(const void *a, const void *b) +{ + const struct signature_element *e = a, *f = b; + + return vkd3d_u32_compare(e->register_index, f->register_index); +} + +static int signature_element_index_compare(const void *a, const void *b) +{ + const struct signature_element *e = a, *f = b; + + return vkd3d_u32_compare(e->sort_index, f->sort_index); +} + +static bool shader_signature_merge(struct shader_signature *s, uint8_t range_map[][VKD3D_VEC4_SIZE], + bool is_patch_constant) +{ + unsigned int i, j, element_count, new_count, register_count; + struct signature_element *elements; + struct signature_element *e, *f; + + element_count = s->element_count; + if (!(elements = vkd3d_malloc(element_count * sizeof(*elements)))) + return false; + memcpy(elements, s->elements, element_count * sizeof(*elements)); + + qsort(elements, element_count, sizeof(elements[0]), signature_element_register_compare); + + for (i = 0, new_count = 0; i < element_count; i = j, elements[new_count++] = *e) + { + e = &elements[i]; + j = i + 1; + + if (e->register_index == ~0u) + continue; + + /* Do not merge if the register index will be relative-addressed. */ + if (range_map_get_register_count(range_map, e->register_index, e->mask) > 1) + continue; + + for (; j < element_count; ++j) + { + f = &elements[j]; + + /* Merge different components of the same register unless sysvals are different, + * or it will be relative-addressed. */ + if (f->register_index != e->register_index || f->sysval_semantic != e->sysval_semantic + || range_map_get_register_count(range_map, f->register_index, f->mask) > 1) + break; + + TRACE("Merging %s, reg %u, mask %#x, sysval %#x with %s, mask %#x, sysval %#x.\n", e->semantic_name, + e->register_index, e->mask, e->sysval_semantic, f->semantic_name, f->mask, f->sysval_semantic); + assert(!(e->mask & f->mask)); + + e->mask |= f->mask; + e->used_mask |= f->used_mask; + e->semantic_index = min(e->semantic_index, f->semantic_index); + } + } + element_count = new_count; + vkd3d_free(s->elements); + s->elements = elements; + s->element_count = element_count; + + if (is_patch_constant) + shader_signature_map_patch_constant_index_ranges(s, range_map); + + for (i = 0, new_count = 0; i < element_count; i += register_count, elements[new_count++] = *e) + { + e = &elements[i]; + register_count = 1; + + if (e->register_index >= MAX_REG_OUTPUT) + continue; + + register_count = range_map_get_register_count(range_map, e->register_index, e->mask); + assert(register_count != UINT8_MAX); + register_count += !register_count; + + if (register_count > 1) + { + TRACE("Merging %s, base reg %u, count %u.\n", e->semantic_name, e->register_index, register_count); + e->register_count = register_count; + } + } + element_count = new_count; + + /* Restoring the original order is required for sensible trace output. */ + qsort(elements, element_count, sizeof(elements[0]), signature_element_index_compare); + + s->element_count = element_count; + + return true; +} + +static bool sysval_semantic_is_tess_factor(enum vkd3d_shader_sysval_semantic sysval_semantic) +{ + return sysval_semantic >= VKD3D_SHADER_SV_TESS_FACTOR_QUADEDGE + && sysval_semantic <= VKD3D_SHADER_SV_TESS_FACTOR_LINEDEN; +} + +static unsigned int shader_register_normalise_arrayed_addressing(struct vkd3d_shader_register *reg, + unsigned int id_idx, unsigned int register_index) +{ + assert(id_idx < ARRAY_SIZE(reg->idx) - 1); + + /* For a relative-addressed register index, move the id up a slot to separate it from the address, + * because rel_addr can be replaced with a constant offset in some cases. */ + if (reg->idx[id_idx].rel_addr) + { + reg->idx[id_idx + 1].rel_addr = NULL; + reg->idx[id_idx + 1].offset = reg->idx[id_idx].offset; + reg->idx[id_idx].offset -= register_index; + ++id_idx; + } + /* Otherwise we have no address for the arrayed register, so insert one. This happens e.g. where + * tessellation level registers are merged into an array because they're an array in SPIR-V. */ + else + { + ++id_idx; + memmove(®->idx[1], ®->idx[0], id_idx * sizeof(reg->idx[0])); + reg->idx[0].rel_addr = NULL; + reg->idx[0].offset = reg->idx[id_idx].offset - register_index; + } + + return id_idx; +} + +static bool shader_dst_param_io_normalise(struct vkd3d_shader_dst_param *dst_param, bool is_io_dcl, + struct io_normaliser *normaliser) + { + unsigned int id_idx, reg_idx, write_mask, element_idx; + struct vkd3d_shader_register *reg = &dst_param->reg; + struct vkd3d_shader_dst_param **dcl_params; + const struct shader_signature *signature; + const struct signature_element *e; + + if ((reg->type == VKD3DSPR_OUTPUT && io_normaliser_is_in_fork_or_join_phase(normaliser)) + || reg->type == VKD3DSPR_PATCHCONST) + { + signature = normaliser->patch_constant_signature; + /* Convert patch constant outputs to the patch constant register type to avoid the need + * to convert compiler symbols when accessed as inputs in a later stage. */ + reg->type = VKD3DSPR_PATCHCONST; + dcl_params = normaliser->pc_dcl_params; + } + else if (reg->type == VKD3DSPR_OUTPUT || dst_param->reg.type == VKD3DSPR_COLOROUT) + { + signature = normaliser->output_signature; + dcl_params = normaliser->output_dcl_params; + } + else if (dst_param->reg.type == VKD3DSPR_INCONTROLPOINT || dst_param->reg.type == VKD3DSPR_INPUT) + { + signature = normaliser->input_signature; + dcl_params = normaliser->input_dcl_params; + } + else + { + return true; + } + + id_idx = reg->idx_count - 1; + reg_idx = reg->idx[id_idx].offset; + write_mask = dst_param->write_mask; + element_idx = shader_signature_find_element_for_reg(signature, reg_idx, write_mask); + e = &signature->elements[element_idx]; + + dst_param->write_mask >>= vkd3d_write_mask_get_component_idx(e->mask); + if (is_io_dcl) + { + /* Validated in the TPF reader. */ + assert(element_idx < ARRAY_SIZE(normaliser->input_dcl_params)); + + if (dcl_params[element_idx]) + { + /* Merge split declarations into a single one. */ + dcl_params[element_idx]->write_mask |= dst_param->write_mask; + /* Turn this into a nop. */ + return false; + } + else + { + dcl_params[element_idx] = dst_param; + } + } + + if (io_normaliser_is_in_control_point_phase(normaliser) && reg->type == VKD3DSPR_OUTPUT) + { + if (is_io_dcl) + { + /* Emit an array size for the control points for consistency with inputs. */ + reg->idx[0].offset = normaliser->output_control_point_count; + } + else + { + /* The control point id param. */ + assert(reg->idx[0].rel_addr); + } + id_idx = 1; + } + + if ((e->register_count > 1 || sysval_semantic_is_tess_factor(e->sysval_semantic))) + { + if (is_io_dcl) + { + /* For control point I/O, idx 0 contains the control point count. + * Ensure it is moved up to the next slot. */ + reg->idx[id_idx].offset = reg->idx[0].offset; + reg->idx[0].offset = e->register_count; + ++id_idx; + } + else + { + id_idx = shader_register_normalise_arrayed_addressing(reg, id_idx, e->register_index); + } + } + + /* Replace the register index with the signature element index */ + reg->idx[id_idx].offset = element_idx; + reg->idx_count = id_idx + 1; + + return true; +} + +static void shader_src_param_io_normalise(struct vkd3d_shader_src_param *src_param, + struct io_normaliser *normaliser) +{ + unsigned int i, id_idx, reg_idx, write_mask, element_idx, component_idx; + struct vkd3d_shader_register *reg = &src_param->reg; + const struct shader_signature *signature; + const struct signature_element *e; + + /* Input/output registers from one phase can be used as inputs in + * subsequent phases. Specifically: + * + * - Control phase inputs are available as "vicp" in fork and join + * phases. + * - Control phase outputs are available as "vocp" in fork and join + * phases. + * - Fork phase patch constants are available as "vpc" in join + * phases. + * + * We handle "vicp" here by converting INCONTROLPOINT src registers to + * type INPUT so they match the control phase declarations. We handle + * "vocp" by converting OUTCONTROLPOINT registers to type OUTPUT. + * Merging fork and join phases handles "vpc". */ + + switch (reg->type) + { + case VKD3DSPR_PATCHCONST: + signature = normaliser->patch_constant_signature; + break; + case VKD3DSPR_INCONTROLPOINT: + if (normaliser->shader_type == VKD3D_SHADER_TYPE_HULL) + reg->type = VKD3DSPR_INPUT; + /* fall through */ + case VKD3DSPR_INPUT: + signature = normaliser->input_signature; + break; + case VKD3DSPR_OUTCONTROLPOINT: + if (normaliser->shader_type == VKD3D_SHADER_TYPE_HULL) + reg->type = VKD3DSPR_OUTPUT; + /* fall through */ + case VKD3DSPR_OUTPUT: + signature = normaliser->output_signature; + break; + default: + return; + } + + id_idx = reg->idx_count - 1; + reg_idx = reg->idx[id_idx].offset; + write_mask = VKD3DSP_WRITEMASK_0 << vkd3d_swizzle_get_component(src_param->swizzle, 0); + element_idx = shader_signature_find_element_for_reg(signature, reg_idx, write_mask); + + e = &signature->elements[element_idx]; + if ((e->register_count > 1 || sysval_semantic_is_tess_factor(e->sysval_semantic))) + id_idx = shader_register_normalise_arrayed_addressing(reg, id_idx, e->register_index); + reg->idx[id_idx].offset = element_idx; + reg->idx_count = id_idx + 1; + + if ((component_idx = vkd3d_write_mask_get_component_idx(e->mask))) + { + for (i = 0; i < VKD3D_VEC4_SIZE; ++i) + if (vkd3d_swizzle_get_component(src_param->swizzle, i)) + src_param->swizzle -= component_idx << VKD3D_SHADER_SWIZZLE_SHIFT(i); + } +} + +static void shader_instruction_normalise_io_params(struct vkd3d_shader_instruction *ins, + struct io_normaliser *normaliser) +{ + struct vkd3d_shader_register *reg; + bool keep = true; + unsigned int i; + + switch (ins->handler_idx) + { + case VKD3DSIH_DCL_INPUT: + if (normaliser->shader_type == VKD3D_SHADER_TYPE_HULL) + { + reg = &ins->declaration.dst.reg; + /* We don't need to keep OUTCONTROLPOINT or PATCHCONST input declarations since their + * equivalents were declared earlier, but INCONTROLPOINT may be the first occurrence. */ + if (reg->type == VKD3DSPR_OUTCONTROLPOINT || reg->type == VKD3DSPR_PATCHCONST) + vkd3d_shader_instruction_make_nop(ins); + else if (reg->type == VKD3DSPR_INCONTROLPOINT) + reg->type = VKD3DSPR_INPUT; + } + /* fall through */ + case VKD3DSIH_DCL_INPUT_PS: + case VKD3DSIH_DCL_OUTPUT: + keep = shader_dst_param_io_normalise(&ins->declaration.dst, true, normaliser); + break; + case VKD3DSIH_DCL_INPUT_SGV: + case VKD3DSIH_DCL_INPUT_SIV: + case VKD3DSIH_DCL_INPUT_PS_SGV: + case VKD3DSIH_DCL_INPUT_PS_SIV: + case VKD3DSIH_DCL_OUTPUT_SIV: + keep = shader_dst_param_io_normalise(&ins->declaration.register_semantic.reg, true, + normaliser); + break; + case VKD3DSIH_HS_CONTROL_POINT_PHASE: + case VKD3DSIH_HS_FORK_PHASE: + case VKD3DSIH_HS_JOIN_PHASE: + normaliser->phase = ins->handler_idx; + memset(normaliser->input_dcl_params, 0, sizeof(normaliser->input_dcl_params)); + memset(normaliser->output_dcl_params, 0, sizeof(normaliser->output_dcl_params)); + memset(normaliser->pc_dcl_params, 0, sizeof(normaliser->pc_dcl_params)); + break; + default: + if (shader_instruction_is_dcl(ins)) + break; + for (i = 0; i < ins->dst_count; ++i) + shader_dst_param_io_normalise((struct vkd3d_shader_dst_param *)&ins->dst[i], false, normaliser); + for (i = 0; i < ins->src_count; ++i) + shader_src_param_io_normalise((struct vkd3d_shader_src_param *)&ins->src[i], normaliser); + break; + } + + if (!keep) + shader_instruction_init(ins, VKD3DSIH_NOP); +} + +enum vkd3d_result instruction_array_normalise_io_registers(struct vkd3d_shader_instruction_array *instructions, + enum vkd3d_shader_type shader_type, struct shader_signature *input_signature, + struct shader_signature *output_signature, struct shader_signature *patch_constant_signature) +{ + struct io_normaliser normaliser = {*instructions}; + struct vkd3d_shader_instruction *ins; + bool has_control_point_phase; + unsigned int i, j; + + normaliser.phase = VKD3DSIH_INVALID; + normaliser.shader_type = shader_type; + normaliser.input_signature = input_signature; + normaliser.output_signature = output_signature; + normaliser.patch_constant_signature = patch_constant_signature; + + for (i = 0, has_control_point_phase = false; i < instructions->count; ++i) + { + ins = &instructions->elements[i]; + + switch (ins->handler_idx) + { + case VKD3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT: + normaliser.output_control_point_count = ins->declaration.count; + break; + case VKD3DSIH_DCL_INDEX_RANGE: + io_normaliser_add_index_range(&normaliser, ins); + vkd3d_shader_instruction_make_nop(ins); + break; + case VKD3DSIH_HS_CONTROL_POINT_PHASE: + has_control_point_phase = true; + /* fall through */ + case VKD3DSIH_HS_FORK_PHASE: + case VKD3DSIH_HS_JOIN_PHASE: + normaliser.phase = ins->handler_idx; + break; + default: + break; + } + } + + if (normaliser.shader_type == VKD3D_SHADER_TYPE_HULL && !has_control_point_phase) + { + /* Inputs and outputs must match for the default phase, so merge ranges must match too. */ + for (i = 0; i < MAX_REG_OUTPUT; ++i) + { + for (j = 0; j < VKD3D_VEC4_SIZE; ++j) + { + if (!normaliser.input_range_map[i][j] && normaliser.output_range_map[i][j]) + normaliser.input_range_map[i][j] = normaliser.output_range_map[i][j]; + else if (normaliser.input_range_map[i][j] && !normaliser.output_range_map[i][j]) + normaliser.output_range_map[i][j] = normaliser.input_range_map[i][j]; + else assert(normaliser.input_range_map[i][j] == normaliser.output_range_map[i][j]); + } + } + } + + if (!shader_signature_merge(input_signature, normaliser.input_range_map, false) + || !shader_signature_merge(output_signature, normaliser.output_range_map, false) + || !shader_signature_merge(patch_constant_signature, normaliser.pc_range_map, true)) + { + *instructions = normaliser.instructions; + return VKD3D_ERROR_OUT_OF_MEMORY; + } + + normaliser.phase = VKD3DSIH_INVALID; + for (i = 0; i < normaliser.instructions.count; ++i) + shader_instruction_normalise_io_params(&normaliser.instructions.elements[i], &normaliser); + + *instructions = normaliser.instructions; + return VKD3D_OK; +} diff --git a/libs/vkd3d/libs/vkd3d-shader/preproc.l b/libs/vkd3d/libs/vkd3d-shader/preproc.l index bb5a6b61de1..94079696280 100644 --- a/libs/vkd3d/libs/vkd3d-shader/preproc.l +++ b/libs/vkd3d/libs/vkd3d-shader/preproc.l @@ -41,6 +41,7 @@ static void update_location(struct preproc_ctx *ctx); %option bison-locations %option extra-type="struct preproc_ctx *" %option never-interactive +%option nodefault %option noinput %option nounput %option noyy_top_state @@ -75,6 +76,7 @@ INT_SUFFIX [uUlL]{0,2} <C_COMMENT>"*/" {yy_pop_state(yyscanner);} <C_COMMENT,CXX_COMMENT><<EOF>> {yy_pop_state(yyscanner);} <C_COMMENT,CXX_COMMENT>. {} +<C_COMMENT>\n {}
<ERROR>(\{NEWLINE}|[^\n])* {return T_STRING;}
@@ -176,9 +178,9 @@ INT_SUFFIX [uUlL]{0,2} return T_NEWLINE; }
-<INITIAL>{WS}+ {} +<INITIAL,INCLUDE,LINE>{WS}+ {} <INITIAL>[-()[]{},+!*/<>&|^?:] {return yytext[0];} -<INITIAL>. {return T_TEXT;} +<INITIAL,INCLUDE,LINE>. {return T_TEXT;}
%%
diff --git a/libs/vkd3d/libs/vkd3d-shader/sm4.h b/libs/vkd3d/libs/vkd3d-shader/sm4.h deleted file mode 100644 index 5ec4ee17e27..00000000000 --- a/libs/vkd3d/libs/vkd3d-shader/sm4.h +++ /dev/null @@ -1,552 +0,0 @@ -/* - * Copyright 2009 Henri Verbeet for CodeWeavers - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA - */ - -#ifndef __VKD3D_SM4_H -#define __VKD3D_SM4_H - -#define VKD3D_SM4_PS 0x0000u -#define VKD3D_SM4_VS 0x0001u -#define VKD3D_SM4_GS 0x0002u -#define VKD3D_SM5_HS 0x0003u -#define VKD3D_SM5_DS 0x0004u -#define VKD3D_SM5_CS 0x0005u -#define VKD3D_SM4_LIB 0xfff0u - -#define VKD3D_SM4_INSTRUCTION_MODIFIER (0x1u << 31) - -#define VKD3D_SM4_MODIFIER_MASK 0x3fu - -#define VKD3D_SM5_MODIFIER_DATA_TYPE_SHIFT 6 -#define VKD3D_SM5_MODIFIER_DATA_TYPE_MASK (0xffffu << VKD3D_SM5_MODIFIER_DATA_TYPE_SHIFT) - -#define VKD3D_SM5_MODIFIER_RESOURCE_TYPE_SHIFT 6 -#define VKD3D_SM5_MODIFIER_RESOURCE_TYPE_MASK (0xfu << VKD3D_SM5_MODIFIER_RESOURCE_TYPE_SHIFT) - -#define VKD3D_SM5_MODIFIER_RESOURCE_STRIDE_SHIFT 11 -#define VKD3D_SM5_MODIFIER_RESOURCE_STRIDE_MASK (0xfffu << VKD3D_SM5_MODIFIER_RESOURCE_STRIDE_SHIFT) - -#define VKD3D_SM4_AOFFIMMI_U_SHIFT 9 -#define VKD3D_SM4_AOFFIMMI_U_MASK (0xfu << VKD3D_SM4_AOFFIMMI_U_SHIFT) -#define VKD3D_SM4_AOFFIMMI_V_SHIFT 13 -#define VKD3D_SM4_AOFFIMMI_V_MASK (0xfu << VKD3D_SM4_AOFFIMMI_V_SHIFT) -#define VKD3D_SM4_AOFFIMMI_W_SHIFT 17 -#define VKD3D_SM4_AOFFIMMI_W_MASK (0xfu << VKD3D_SM4_AOFFIMMI_W_SHIFT) - -#define VKD3D_SM4_INSTRUCTION_LENGTH_SHIFT 24 -#define VKD3D_SM4_INSTRUCTION_LENGTH_MASK (0x1fu << VKD3D_SM4_INSTRUCTION_LENGTH_SHIFT) - -#define VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT 11 -#define VKD3D_SM4_INSTRUCTION_FLAGS_MASK (0x7u << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT) - -#define VKD3D_SM4_RESOURCE_TYPE_SHIFT 11 -#define VKD3D_SM4_RESOURCE_TYPE_MASK (0xfu << VKD3D_SM4_RESOURCE_TYPE_SHIFT) - -#define VKD3D_SM4_RESOURCE_SAMPLE_COUNT_SHIFT 16 -#define VKD3D_SM4_RESOURCE_SAMPLE_COUNT_MASK (0xfu << VKD3D_SM4_RESOURCE_SAMPLE_COUNT_SHIFT) - -#define VKD3D_SM4_PRIMITIVE_TYPE_SHIFT 11 -#define VKD3D_SM4_PRIMITIVE_TYPE_MASK (0x3fu << VKD3D_SM4_PRIMITIVE_TYPE_SHIFT) - -#define VKD3D_SM4_INDEX_TYPE_SHIFT 11 -#define VKD3D_SM4_INDEX_TYPE_MASK (0x1u << VKD3D_SM4_INDEX_TYPE_SHIFT) - -#define VKD3D_SM4_SAMPLER_MODE_SHIFT 11 -#define VKD3D_SM4_SAMPLER_MODE_MASK (0xfu << VKD3D_SM4_SAMPLER_MODE_SHIFT) - -#define VKD3D_SM4_SHADER_DATA_TYPE_SHIFT 11 -#define VKD3D_SM4_SHADER_DATA_TYPE_MASK (0xfu << VKD3D_SM4_SHADER_DATA_TYPE_SHIFT) - -#define VKD3D_SM4_INTERPOLATION_MODE_SHIFT 11 -#define VKD3D_SM4_INTERPOLATION_MODE_MASK (0xfu << VKD3D_SM4_INTERPOLATION_MODE_SHIFT) - -#define VKD3D_SM4_GLOBAL_FLAGS_SHIFT 11 -#define VKD3D_SM4_GLOBAL_FLAGS_MASK (0xffu << VKD3D_SM4_GLOBAL_FLAGS_SHIFT) - -#define VKD3D_SM5_PRECISE_SHIFT 19 -#define VKD3D_SM5_PRECISE_MASK (0xfu << VKD3D_SM5_PRECISE_SHIFT) - -#define VKD3D_SM5_CONTROL_POINT_COUNT_SHIFT 11 -#define VKD3D_SM5_CONTROL_POINT_COUNT_MASK (0xffu << VKD3D_SM5_CONTROL_POINT_COUNT_SHIFT) - -#define VKD3D_SM5_FP_ARRAY_SIZE_SHIFT 16 -#define VKD3D_SM5_FP_TABLE_COUNT_MASK 0xffffu - -#define VKD3D_SM5_UAV_FLAGS_SHIFT 15 -#define VKD3D_SM5_UAV_FLAGS_MASK (0x1ffu << VKD3D_SM5_UAV_FLAGS_SHIFT) - -#define VKD3D_SM5_SYNC_FLAGS_SHIFT 11 -#define VKD3D_SM5_SYNC_FLAGS_MASK (0xffu << VKD3D_SM5_SYNC_FLAGS_SHIFT) - -#define VKD3D_SM5_TESSELLATOR_SHIFT 11 -#define VKD3D_SM5_TESSELLATOR_MASK (0xfu << VKD3D_SM5_TESSELLATOR_SHIFT) - -#define VKD3D_SM4_OPCODE_MASK 0xff - -#define VKD3D_SM4_EXTENDED_OPERAND (0x1u << 31) - -#define VKD3D_SM4_EXTENDED_OPERAND_TYPE_MASK 0x3fu - -#define VKD3D_SM4_REGISTER_MODIFIER_SHIFT 6 -#define VKD3D_SM4_REGISTER_MODIFIER_MASK (0xffu << VKD3D_SM4_REGISTER_MODIFIER_SHIFT) - -#define VKD3D_SM4_REGISTER_PRECISION_SHIFT 14 -#define VKD3D_SM4_REGISTER_PRECISION_MASK (0x7u << VKD3D_SM4_REGISTER_PRECISION_SHIFT) - -#define VKD3D_SM4_REGISTER_NON_UNIFORM_SHIFT 17 -#define VKD3D_SM4_REGISTER_NON_UNIFORM_MASK (0x1u << VKD3D_SM4_REGISTER_NON_UNIFORM_SHIFT) - -#define VKD3D_SM4_ADDRESSING_SHIFT2 28 -#define VKD3D_SM4_ADDRESSING_MASK2 (0x3u << VKD3D_SM4_ADDRESSING_SHIFT2) - -#define VKD3D_SM4_ADDRESSING_SHIFT1 25 -#define VKD3D_SM4_ADDRESSING_MASK1 (0x3u << VKD3D_SM4_ADDRESSING_SHIFT1) - -#define VKD3D_SM4_ADDRESSING_SHIFT0 22 -#define VKD3D_SM4_ADDRESSING_MASK0 (0x3u << VKD3D_SM4_ADDRESSING_SHIFT0) - -#define VKD3D_SM4_REGISTER_ORDER_SHIFT 20 -#define VKD3D_SM4_REGISTER_ORDER_MASK (0x3u << VKD3D_SM4_REGISTER_ORDER_SHIFT) - -#define VKD3D_SM4_REGISTER_TYPE_SHIFT 12 -#define VKD3D_SM4_REGISTER_TYPE_MASK (0xffu << VKD3D_SM4_REGISTER_TYPE_SHIFT) - -#define VKD3D_SM4_SWIZZLE_TYPE_SHIFT 2 -#define VKD3D_SM4_SWIZZLE_TYPE_MASK (0x3u << VKD3D_SM4_SWIZZLE_TYPE_SHIFT) - -#define VKD3D_SM4_DIMENSION_SHIFT 0 -#define VKD3D_SM4_DIMENSION_MASK (0x3u << VKD3D_SM4_DIMENSION_SHIFT) - -#define VKD3D_SM4_WRITEMASK_SHIFT 4 -#define VKD3D_SM4_WRITEMASK_MASK (0xfu << VKD3D_SM4_WRITEMASK_SHIFT) - -#define VKD3D_SM4_SWIZZLE_SHIFT 4 -#define VKD3D_SM4_SWIZZLE_MASK (0xffu << VKD3D_SM4_SWIZZLE_SHIFT) - -#define VKD3D_SM4_VERSION_MAJOR(version) (((version) >> 4) & 0xf) -#define VKD3D_SM4_VERSION_MINOR(version) (((version) >> 0) & 0xf) - -#define VKD3D_SM4_ADDRESSING_RELATIVE 0x2 -#define VKD3D_SM4_ADDRESSING_OFFSET 0x1 - -#define VKD3D_SM4_INSTRUCTION_FLAG_SATURATE 0x4 - -#define VKD3D_SM4_CONDITIONAL_NZ (0x1u << 18) - -#define VKD3D_SM4_TYPE_COMPONENT(com, i) (((com) >> (4 * (i))) & 0xfu) - -/* The shift that corresponds to the D3D_SIF_TEXTURE_COMPONENTS mask. */ -#define VKD3D_SM4_SIF_TEXTURE_COMPONENTS_SHIFT 2 - -enum vkd3d_sm4_opcode -{ - VKD3D_SM4_OP_ADD = 0x00, - VKD3D_SM4_OP_AND = 0x01, - VKD3D_SM4_OP_BREAK = 0x02, - VKD3D_SM4_OP_BREAKC = 0x03, - VKD3D_SM4_OP_CASE = 0x06, - VKD3D_SM4_OP_CONTINUE = 0x07, - VKD3D_SM4_OP_CONTINUEC = 0x08, - VKD3D_SM4_OP_CUT = 0x09, - VKD3D_SM4_OP_DEFAULT = 0x0a, - VKD3D_SM4_OP_DERIV_RTX = 0x0b, - VKD3D_SM4_OP_DERIV_RTY = 0x0c, - VKD3D_SM4_OP_DISCARD = 0x0d, - VKD3D_SM4_OP_DIV = 0x0e, - VKD3D_SM4_OP_DP2 = 0x0f, - VKD3D_SM4_OP_DP3 = 0x10, - VKD3D_SM4_OP_DP4 = 0x11, - VKD3D_SM4_OP_ELSE = 0x12, - VKD3D_SM4_OP_EMIT = 0x13, - VKD3D_SM4_OP_ENDIF = 0x15, - VKD3D_SM4_OP_ENDLOOP = 0x16, - VKD3D_SM4_OP_ENDSWITCH = 0x17, - VKD3D_SM4_OP_EQ = 0x18, - VKD3D_SM4_OP_EXP = 0x19, - VKD3D_SM4_OP_FRC = 0x1a, - VKD3D_SM4_OP_FTOI = 0x1b, - VKD3D_SM4_OP_FTOU = 0x1c, - VKD3D_SM4_OP_GE = 0x1d, - VKD3D_SM4_OP_IADD = 0x1e, - VKD3D_SM4_OP_IF = 0x1f, - VKD3D_SM4_OP_IEQ = 0x20, - VKD3D_SM4_OP_IGE = 0x21, - VKD3D_SM4_OP_ILT = 0x22, - VKD3D_SM4_OP_IMAD = 0x23, - VKD3D_SM4_OP_IMAX = 0x24, - VKD3D_SM4_OP_IMIN = 0x25, - VKD3D_SM4_OP_IMUL = 0x26, - VKD3D_SM4_OP_INE = 0x27, - VKD3D_SM4_OP_INEG = 0x28, - VKD3D_SM4_OP_ISHL = 0x29, - VKD3D_SM4_OP_ISHR = 0x2a, - VKD3D_SM4_OP_ITOF = 0x2b, - VKD3D_SM4_OP_LABEL = 0x2c, - VKD3D_SM4_OP_LD = 0x2d, - VKD3D_SM4_OP_LD2DMS = 0x2e, - VKD3D_SM4_OP_LOG = 0x2f, - VKD3D_SM4_OP_LOOP = 0x30, - VKD3D_SM4_OP_LT = 0x31, - VKD3D_SM4_OP_MAD = 0x32, - VKD3D_SM4_OP_MIN = 0x33, - VKD3D_SM4_OP_MAX = 0x34, - VKD3D_SM4_OP_SHADER_DATA = 0x35, - VKD3D_SM4_OP_MOV = 0x36, - VKD3D_SM4_OP_MOVC = 0x37, - VKD3D_SM4_OP_MUL = 0x38, - VKD3D_SM4_OP_NE = 0x39, - VKD3D_SM4_OP_NOP = 0x3a, - VKD3D_SM4_OP_NOT = 0x3b, - VKD3D_SM4_OP_OR = 0x3c, - VKD3D_SM4_OP_RESINFO = 0x3d, - VKD3D_SM4_OP_RET = 0x3e, - VKD3D_SM4_OP_RETC = 0x3f, - VKD3D_SM4_OP_ROUND_NE = 0x40, - VKD3D_SM4_OP_ROUND_NI = 0x41, - VKD3D_SM4_OP_ROUND_PI = 0x42, - VKD3D_SM4_OP_ROUND_Z = 0x43, - VKD3D_SM4_OP_RSQ = 0x44, - VKD3D_SM4_OP_SAMPLE = 0x45, - VKD3D_SM4_OP_SAMPLE_C = 0x46, - VKD3D_SM4_OP_SAMPLE_C_LZ = 0x47, - VKD3D_SM4_OP_SAMPLE_LOD = 0x48, - VKD3D_SM4_OP_SAMPLE_GRAD = 0x49, - VKD3D_SM4_OP_SAMPLE_B = 0x4a, - VKD3D_SM4_OP_SQRT = 0x4b, - VKD3D_SM4_OP_SWITCH = 0x4c, - VKD3D_SM4_OP_SINCOS = 0x4d, - VKD3D_SM4_OP_UDIV = 0x4e, - VKD3D_SM4_OP_ULT = 0x4f, - VKD3D_SM4_OP_UGE = 0x50, - VKD3D_SM4_OP_UMUL = 0x51, - VKD3D_SM4_OP_UMAX = 0x53, - VKD3D_SM4_OP_UMIN = 0x54, - VKD3D_SM4_OP_USHR = 0x55, - VKD3D_SM4_OP_UTOF = 0x56, - VKD3D_SM4_OP_XOR = 0x57, - VKD3D_SM4_OP_DCL_RESOURCE = 0x58, - VKD3D_SM4_OP_DCL_CONSTANT_BUFFER = 0x59, - VKD3D_SM4_OP_DCL_SAMPLER = 0x5a, - VKD3D_SM4_OP_DCL_INDEX_RANGE = 0x5b, - VKD3D_SM4_OP_DCL_OUTPUT_TOPOLOGY = 0x5c, - VKD3D_SM4_OP_DCL_INPUT_PRIMITIVE = 0x5d, - VKD3D_SM4_OP_DCL_VERTICES_OUT = 0x5e, - VKD3D_SM4_OP_DCL_INPUT = 0x5f, - VKD3D_SM4_OP_DCL_INPUT_SGV = 0x60, - VKD3D_SM4_OP_DCL_INPUT_SIV = 0x61, - VKD3D_SM4_OP_DCL_INPUT_PS = 0x62, - VKD3D_SM4_OP_DCL_INPUT_PS_SGV = 0x63, - VKD3D_SM4_OP_DCL_INPUT_PS_SIV = 0x64, - VKD3D_SM4_OP_DCL_OUTPUT = 0x65, - VKD3D_SM4_OP_DCL_OUTPUT_SIV = 0x67, - VKD3D_SM4_OP_DCL_TEMPS = 0x68, - VKD3D_SM4_OP_DCL_INDEXABLE_TEMP = 0x69, - VKD3D_SM4_OP_DCL_GLOBAL_FLAGS = 0x6a, - VKD3D_SM4_OP_LOD = 0x6c, - VKD3D_SM4_OP_GATHER4 = 0x6d, - VKD3D_SM4_OP_SAMPLE_POS = 0x6e, - VKD3D_SM4_OP_SAMPLE_INFO = 0x6f, - VKD3D_SM5_OP_HS_DECLS = 0x71, - VKD3D_SM5_OP_HS_CONTROL_POINT_PHASE = 0x72, - VKD3D_SM5_OP_HS_FORK_PHASE = 0x73, - VKD3D_SM5_OP_HS_JOIN_PHASE = 0x74, - VKD3D_SM5_OP_EMIT_STREAM = 0x75, - VKD3D_SM5_OP_CUT_STREAM = 0x76, - VKD3D_SM5_OP_FCALL = 0x78, - VKD3D_SM5_OP_BUFINFO = 0x79, - VKD3D_SM5_OP_DERIV_RTX_COARSE = 0x7a, - VKD3D_SM5_OP_DERIV_RTX_FINE = 0x7b, - VKD3D_SM5_OP_DERIV_RTY_COARSE = 0x7c, - VKD3D_SM5_OP_DERIV_RTY_FINE = 0x7d, - VKD3D_SM5_OP_GATHER4_C = 0x7e, - VKD3D_SM5_OP_GATHER4_PO = 0x7f, - VKD3D_SM5_OP_GATHER4_PO_C = 0x80, - VKD3D_SM5_OP_RCP = 0x81, - VKD3D_SM5_OP_F32TOF16 = 0x82, - VKD3D_SM5_OP_F16TOF32 = 0x83, - VKD3D_SM5_OP_COUNTBITS = 0x86, - VKD3D_SM5_OP_FIRSTBIT_HI = 0x87, - VKD3D_SM5_OP_FIRSTBIT_LO = 0x88, - VKD3D_SM5_OP_FIRSTBIT_SHI = 0x89, - VKD3D_SM5_OP_UBFE = 0x8a, - VKD3D_SM5_OP_IBFE = 0x8b, - VKD3D_SM5_OP_BFI = 0x8c, - VKD3D_SM5_OP_BFREV = 0x8d, - VKD3D_SM5_OP_SWAPC = 0x8e, - VKD3D_SM5_OP_DCL_STREAM = 0x8f, - VKD3D_SM5_OP_DCL_FUNCTION_BODY = 0x90, - VKD3D_SM5_OP_DCL_FUNCTION_TABLE = 0x91, - VKD3D_SM5_OP_DCL_INTERFACE = 0x92, - VKD3D_SM5_OP_DCL_INPUT_CONTROL_POINT_COUNT = 0x93, - VKD3D_SM5_OP_DCL_OUTPUT_CONTROL_POINT_COUNT = 0x94, - VKD3D_SM5_OP_DCL_TESSELLATOR_DOMAIN = 0x95, - VKD3D_SM5_OP_DCL_TESSELLATOR_PARTITIONING = 0x96, - VKD3D_SM5_OP_DCL_TESSELLATOR_OUTPUT_PRIMITIVE = 0x97, - VKD3D_SM5_OP_DCL_HS_MAX_TESSFACTOR = 0x98, - VKD3D_SM5_OP_DCL_HS_FORK_PHASE_INSTANCE_COUNT = 0x99, - VKD3D_SM5_OP_DCL_HS_JOIN_PHASE_INSTANCE_COUNT = 0x9a, - VKD3D_SM5_OP_DCL_THREAD_GROUP = 0x9b, - VKD3D_SM5_OP_DCL_UAV_TYPED = 0x9c, - VKD3D_SM5_OP_DCL_UAV_RAW = 0x9d, - VKD3D_SM5_OP_DCL_UAV_STRUCTURED = 0x9e, - VKD3D_SM5_OP_DCL_TGSM_RAW = 0x9f, - VKD3D_SM5_OP_DCL_TGSM_STRUCTURED = 0xa0, - VKD3D_SM5_OP_DCL_RESOURCE_RAW = 0xa1, - VKD3D_SM5_OP_DCL_RESOURCE_STRUCTURED = 0xa2, - VKD3D_SM5_OP_LD_UAV_TYPED = 0xa3, - VKD3D_SM5_OP_STORE_UAV_TYPED = 0xa4, - VKD3D_SM5_OP_LD_RAW = 0xa5, - VKD3D_SM5_OP_STORE_RAW = 0xa6, - VKD3D_SM5_OP_LD_STRUCTURED = 0xa7, - VKD3D_SM5_OP_STORE_STRUCTURED = 0xa8, - VKD3D_SM5_OP_ATOMIC_AND = 0xa9, - VKD3D_SM5_OP_ATOMIC_OR = 0xaa, - VKD3D_SM5_OP_ATOMIC_XOR = 0xab, - VKD3D_SM5_OP_ATOMIC_CMP_STORE = 0xac, - VKD3D_SM5_OP_ATOMIC_IADD = 0xad, - VKD3D_SM5_OP_ATOMIC_IMAX = 0xae, - VKD3D_SM5_OP_ATOMIC_IMIN = 0xaf, - VKD3D_SM5_OP_ATOMIC_UMAX = 0xb0, - VKD3D_SM5_OP_ATOMIC_UMIN = 0xb1, - VKD3D_SM5_OP_IMM_ATOMIC_ALLOC = 0xb2, - VKD3D_SM5_OP_IMM_ATOMIC_CONSUME = 0xb3, - VKD3D_SM5_OP_IMM_ATOMIC_IADD = 0xb4, - VKD3D_SM5_OP_IMM_ATOMIC_AND = 0xb5, - VKD3D_SM5_OP_IMM_ATOMIC_OR = 0xb6, - VKD3D_SM5_OP_IMM_ATOMIC_XOR = 0xb7, - VKD3D_SM5_OP_IMM_ATOMIC_EXCH = 0xb8, - VKD3D_SM5_OP_IMM_ATOMIC_CMP_EXCH = 0xb9, - VKD3D_SM5_OP_IMM_ATOMIC_IMAX = 0xba, - VKD3D_SM5_OP_IMM_ATOMIC_IMIN = 0xbb, - VKD3D_SM5_OP_IMM_ATOMIC_UMAX = 0xbc, - VKD3D_SM5_OP_IMM_ATOMIC_UMIN = 0xbd, - VKD3D_SM5_OP_SYNC = 0xbe, - VKD3D_SM5_OP_DADD = 0xbf, - VKD3D_SM5_OP_DMAX = 0xc0, - VKD3D_SM5_OP_DMIN = 0xc1, - VKD3D_SM5_OP_DMUL = 0xc2, - VKD3D_SM5_OP_DEQ = 0xc3, - VKD3D_SM5_OP_DGE = 0xc4, - VKD3D_SM5_OP_DLT = 0xc5, - VKD3D_SM5_OP_DNE = 0xc6, - VKD3D_SM5_OP_DMOV = 0xc7, - VKD3D_SM5_OP_DMOVC = 0xc8, - VKD3D_SM5_OP_DTOF = 0xc9, - VKD3D_SM5_OP_FTOD = 0xca, - VKD3D_SM5_OP_EVAL_SAMPLE_INDEX = 0xcc, - VKD3D_SM5_OP_EVAL_CENTROID = 0xcd, - VKD3D_SM5_OP_DCL_GS_INSTANCES = 0xce, - VKD3D_SM5_OP_DDIV = 0xd2, - VKD3D_SM5_OP_DFMA = 0xd3, - VKD3D_SM5_OP_DRCP = 0xd4, - VKD3D_SM5_OP_MSAD = 0xd5, - VKD3D_SM5_OP_DTOI = 0xd6, - VKD3D_SM5_OP_DTOU = 0xd7, - VKD3D_SM5_OP_ITOD = 0xd8, - VKD3D_SM5_OP_UTOD = 0xd9, - VKD3D_SM5_OP_GATHER4_S = 0xdb, - VKD3D_SM5_OP_GATHER4_C_S = 0xdc, - VKD3D_SM5_OP_GATHER4_PO_S = 0xdd, - VKD3D_SM5_OP_GATHER4_PO_C_S = 0xde, - VKD3D_SM5_OP_LD_S = 0xdf, - VKD3D_SM5_OP_LD2DMS_S = 0xe0, - VKD3D_SM5_OP_LD_UAV_TYPED_S = 0xe1, - VKD3D_SM5_OP_LD_RAW_S = 0xe2, - VKD3D_SM5_OP_LD_STRUCTURED_S = 0xe3, - VKD3D_SM5_OP_SAMPLE_LOD_S = 0xe4, - VKD3D_SM5_OP_SAMPLE_C_LZ_S = 0xe5, - VKD3D_SM5_OP_SAMPLE_CL_S = 0xe6, - VKD3D_SM5_OP_SAMPLE_B_CL_S = 0xe7, - VKD3D_SM5_OP_SAMPLE_GRAD_CL_S = 0xe8, - VKD3D_SM5_OP_SAMPLE_C_CL_S = 0xe9, - VKD3D_SM5_OP_CHECK_ACCESS_FULLY_MAPPED = 0xea, -}; - -enum vkd3d_sm4_instruction_modifier -{ - VKD3D_SM4_MODIFIER_AOFFIMMI = 0x1, - VKD3D_SM5_MODIFIER_RESOURCE_TYPE = 0x2, - VKD3D_SM5_MODIFIER_DATA_TYPE = 0x3, -}; - -enum vkd3d_sm4_register_type -{ - VKD3D_SM4_RT_TEMP = 0x00, - VKD3D_SM4_RT_INPUT = 0x01, - VKD3D_SM4_RT_OUTPUT = 0x02, - VKD3D_SM4_RT_INDEXABLE_TEMP = 0x03, - VKD3D_SM4_RT_IMMCONST = 0x04, - VKD3D_SM4_RT_IMMCONST64 = 0x05, - VKD3D_SM4_RT_SAMPLER = 0x06, - VKD3D_SM4_RT_RESOURCE = 0x07, - VKD3D_SM4_RT_CONSTBUFFER = 0x08, - VKD3D_SM4_RT_IMMCONSTBUFFER = 0x09, - VKD3D_SM4_RT_PRIMID = 0x0b, - VKD3D_SM4_RT_DEPTHOUT = 0x0c, - VKD3D_SM4_RT_NULL = 0x0d, - VKD3D_SM4_RT_RASTERIZER = 0x0e, - VKD3D_SM4_RT_OMASK = 0x0f, - VKD3D_SM5_RT_STREAM = 0x10, - VKD3D_SM5_RT_FUNCTION_BODY = 0x11, - VKD3D_SM5_RT_FUNCTION_POINTER = 0x13, - VKD3D_SM5_RT_OUTPUT_CONTROL_POINT_ID = 0x16, - VKD3D_SM5_RT_FORK_INSTANCE_ID = 0x17, - VKD3D_SM5_RT_JOIN_INSTANCE_ID = 0x18, - VKD3D_SM5_RT_INPUT_CONTROL_POINT = 0x19, - VKD3D_SM5_RT_OUTPUT_CONTROL_POINT = 0x1a, - VKD3D_SM5_RT_PATCH_CONSTANT_DATA = 0x1b, - VKD3D_SM5_RT_DOMAIN_LOCATION = 0x1c, - VKD3D_SM5_RT_UAV = 0x1e, - VKD3D_SM5_RT_SHARED_MEMORY = 0x1f, - VKD3D_SM5_RT_THREAD_ID = 0x20, - VKD3D_SM5_RT_THREAD_GROUP_ID = 0x21, - VKD3D_SM5_RT_LOCAL_THREAD_ID = 0x22, - VKD3D_SM5_RT_COVERAGE = 0x23, - VKD3D_SM5_RT_LOCAL_THREAD_INDEX = 0x24, - VKD3D_SM5_RT_GS_INSTANCE_ID = 0x25, - VKD3D_SM5_RT_DEPTHOUT_GREATER_EQUAL = 0x26, - VKD3D_SM5_RT_DEPTHOUT_LESS_EQUAL = 0x27, - VKD3D_SM5_RT_OUTPUT_STENCIL_REF = 0x29, -}; - -enum vkd3d_sm4_extended_operand_type -{ - VKD3D_SM4_EXTENDED_OPERAND_NONE = 0x0, - VKD3D_SM4_EXTENDED_OPERAND_MODIFIER = 0x1, -}; - -enum vkd3d_sm4_register_modifier -{ - VKD3D_SM4_REGISTER_MODIFIER_NONE = 0x00, - VKD3D_SM4_REGISTER_MODIFIER_NEGATE = 0x01, - VKD3D_SM4_REGISTER_MODIFIER_ABS = 0x02, - VKD3D_SM4_REGISTER_MODIFIER_ABS_NEGATE = 0x03, -}; - -enum vkd3d_sm4_register_precision -{ - VKD3D_SM4_REGISTER_PRECISION_DEFAULT = 0x0, - VKD3D_SM4_REGISTER_PRECISION_MIN_FLOAT_16 = 0x1, - VKD3D_SM4_REGISTER_PRECISION_MIN_FLOAT_10 = 0x2, - VKD3D_SM4_REGISTER_PRECISION_MIN_INT_16 = 0x4, - VKD3D_SM4_REGISTER_PRECISION_MIN_UINT_16 = 0x5, -}; - -enum vkd3d_sm4_output_primitive_type -{ - VKD3D_SM4_OUTPUT_PT_POINTLIST = 0x1, - VKD3D_SM4_OUTPUT_PT_LINESTRIP = 0x3, - VKD3D_SM4_OUTPUT_PT_TRIANGLESTRIP = 0x5, -}; - -enum vkd3d_sm4_input_primitive_type -{ - VKD3D_SM4_INPUT_PT_POINT = 0x01, - VKD3D_SM4_INPUT_PT_LINE = 0x02, - VKD3D_SM4_INPUT_PT_TRIANGLE = 0x03, - VKD3D_SM4_INPUT_PT_LINEADJ = 0x06, - VKD3D_SM4_INPUT_PT_TRIANGLEADJ = 0x07, - VKD3D_SM5_INPUT_PT_PATCH1 = 0x08, - VKD3D_SM5_INPUT_PT_PATCH2 = 0x09, - VKD3D_SM5_INPUT_PT_PATCH3 = 0x0a, - VKD3D_SM5_INPUT_PT_PATCH4 = 0x0b, - VKD3D_SM5_INPUT_PT_PATCH5 = 0x0c, - VKD3D_SM5_INPUT_PT_PATCH6 = 0x0d, - VKD3D_SM5_INPUT_PT_PATCH7 = 0x0e, - VKD3D_SM5_INPUT_PT_PATCH8 = 0x0f, - VKD3D_SM5_INPUT_PT_PATCH9 = 0x10, - VKD3D_SM5_INPUT_PT_PATCH10 = 0x11, - VKD3D_SM5_INPUT_PT_PATCH11 = 0x12, - VKD3D_SM5_INPUT_PT_PATCH12 = 0x13, - VKD3D_SM5_INPUT_PT_PATCH13 = 0x14, - VKD3D_SM5_INPUT_PT_PATCH14 = 0x15, - VKD3D_SM5_INPUT_PT_PATCH15 = 0x16, - VKD3D_SM5_INPUT_PT_PATCH16 = 0x17, - VKD3D_SM5_INPUT_PT_PATCH17 = 0x18, - VKD3D_SM5_INPUT_PT_PATCH18 = 0x19, - VKD3D_SM5_INPUT_PT_PATCH19 = 0x1a, - VKD3D_SM5_INPUT_PT_PATCH20 = 0x1b, - VKD3D_SM5_INPUT_PT_PATCH21 = 0x1c, - VKD3D_SM5_INPUT_PT_PATCH22 = 0x1d, - VKD3D_SM5_INPUT_PT_PATCH23 = 0x1e, - VKD3D_SM5_INPUT_PT_PATCH24 = 0x1f, - VKD3D_SM5_INPUT_PT_PATCH25 = 0x20, - VKD3D_SM5_INPUT_PT_PATCH26 = 0x21, - VKD3D_SM5_INPUT_PT_PATCH27 = 0x22, - VKD3D_SM5_INPUT_PT_PATCH28 = 0x23, - VKD3D_SM5_INPUT_PT_PATCH29 = 0x24, - VKD3D_SM5_INPUT_PT_PATCH30 = 0x25, - VKD3D_SM5_INPUT_PT_PATCH31 = 0x26, - VKD3D_SM5_INPUT_PT_PATCH32 = 0x27, -}; - -enum vkd3d_sm4_swizzle_type -{ - VKD3D_SM4_SWIZZLE_NONE = 0x0, - VKD3D_SM4_SWIZZLE_VEC4 = 0x1, - VKD3D_SM4_SWIZZLE_SCALAR = 0x2, -}; - -enum vkd3d_sm4_dimension -{ - VKD3D_SM4_DIMENSION_NONE = 0x0, - VKD3D_SM4_DIMENSION_SCALAR = 0x1, - VKD3D_SM4_DIMENSION_VEC4 = 0x2, -}; - -enum vkd3d_sm4_resource_type -{ - VKD3D_SM4_RESOURCE_BUFFER = 0x1, - VKD3D_SM4_RESOURCE_TEXTURE_1D = 0x2, - VKD3D_SM4_RESOURCE_TEXTURE_2D = 0x3, - VKD3D_SM4_RESOURCE_TEXTURE_2DMS = 0x4, - VKD3D_SM4_RESOURCE_TEXTURE_3D = 0x5, - VKD3D_SM4_RESOURCE_TEXTURE_CUBE = 0x6, - VKD3D_SM4_RESOURCE_TEXTURE_1DARRAY = 0x7, - VKD3D_SM4_RESOURCE_TEXTURE_2DARRAY = 0x8, - VKD3D_SM4_RESOURCE_TEXTURE_2DMSARRAY = 0x9, - VKD3D_SM4_RESOURCE_TEXTURE_CUBEARRAY = 0xa, - VKD3D_SM4_RESOURCE_RAW_BUFFER = 0xb, - VKD3D_SM4_RESOURCE_STRUCTURED_BUFFER = 0xc, -}; - -enum vkd3d_sm4_data_type -{ - VKD3D_SM4_DATA_UNORM = 0x1, - VKD3D_SM4_DATA_SNORM = 0x2, - VKD3D_SM4_DATA_INT = 0x3, - VKD3D_SM4_DATA_UINT = 0x4, - VKD3D_SM4_DATA_FLOAT = 0x5, - VKD3D_SM4_DATA_MIXED = 0x6, - VKD3D_SM4_DATA_DOUBLE = 0x7, - VKD3D_SM4_DATA_CONTINUED = 0x8, - VKD3D_SM4_DATA_UNUSED = 0x9, -}; - -enum vkd3d_sm4_sampler_mode -{ - VKD3D_SM4_SAMPLER_DEFAULT = 0x0, - VKD3D_SM4_SAMPLER_COMPARISON = 0x1, -}; - -enum vkd3d_sm4_shader_data_type -{ - VKD3D_SM4_SHADER_DATA_IMMEDIATE_CONSTANT_BUFFER = 0x3, - VKD3D_SM4_SHADER_DATA_MESSAGE = 0x4, -}; - -#endif /* __VKD3D_SM4_H */ diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c index 53e13735937..bfe5272fd29 100644 --- a/libs/vkd3d/libs/vkd3d-shader/spirv.c +++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c @@ -168,7 +168,7 @@ static void vkd3d_spirv_validate(const struct vkd3d_shader_code *spirv,
#endif /* HAVE_SPIRV_TOOLS */
-static enum vkd3d_shader_input_sysval_semantic vkd3d_siv_from_sysval_indexed(enum vkd3d_shader_sysval_semantic sysval, +enum vkd3d_shader_input_sysval_semantic vkd3d_siv_from_sysval_indexed(enum vkd3d_shader_sysval_semantic sysval, unsigned int index) { switch (sysval) @@ -199,14 +199,9 @@ static enum vkd3d_shader_input_sysval_semantic vkd3d_siv_from_sysval_indexed(enu } }
-static enum vkd3d_shader_input_sysval_semantic vkd3d_siv_from_sysval(enum vkd3d_shader_sysval_semantic sysval) -{ - return vkd3d_siv_from_sysval_indexed(sysval, 0); -} - #define VKD3D_SPIRV_VERSION 0x00010000 #define VKD3D_SPIRV_GENERATOR_ID 18 -#define VKD3D_SPIRV_GENERATOR_VERSION 7 +#define VKD3D_SPIRV_GENERATOR_VERSION 8 #define VKD3D_SPIRV_GENERATOR_MAGIC vkd3d_make_u32(VKD3D_SPIRV_GENERATOR_VERSION, VKD3D_SPIRV_GENERATOR_ID)
struct vkd3d_spirv_stream @@ -1967,11 +1962,9 @@ struct vkd3d_symbol_register_data uint32_t member_idx; enum vkd3d_shader_component_type component_type; unsigned int write_mask; - uint32_t dcl_mask; unsigned int structure_stride; unsigned int binding_base_idx; bool is_aggregate; /* An aggregate, i.e. a structure or an array. */ - bool is_dynamically_indexed; /* If member_idx is a variable ID instead of a constant. */ };
struct vkd3d_symbol_resource_data @@ -2064,10 +2057,14 @@ static void vkd3d_symbol_make_register(struct vkd3d_symbol *symbol, symbol->type = VKD3D_SYMBOL_REGISTER; memset(&symbol->key, 0, sizeof(symbol->key)); symbol->key.reg.type = reg->type; - if (vkd3d_shader_register_is_input(reg) && reg->idx[1].offset != ~0u) - symbol->key.reg.idx = reg->idx[1].offset; + if (vkd3d_shader_register_is_input(reg) || vkd3d_shader_register_is_output(reg) + || vkd3d_shader_register_is_patch_constant(reg)) + { + symbol->key.reg.idx = reg->idx_count ? reg->idx[reg->idx_count - 1].offset : ~0u; + assert(!reg->idx_count || symbol->key.reg.idx != ~0u); + } else if (reg->type != VKD3DSPR_IMMCONSTBUFFER) - symbol->key.reg.idx = reg->idx[0].offset; + symbol->key.reg.idx = reg->idx_count ? reg->idx[0].offset : ~0u; }
static void vkd3d_symbol_set_register_info(struct vkd3d_symbol *symbol, @@ -2080,11 +2077,9 @@ static void vkd3d_symbol_set_register_info(struct vkd3d_symbol *symbol, symbol->info.reg.member_idx = 0; symbol->info.reg.component_type = component_type; symbol->info.reg.write_mask = write_mask; - symbol->info.reg.dcl_mask = 0; symbol->info.reg.structure_stride = 0; symbol->info.reg.binding_base_idx = 0; symbol->info.reg.is_aggregate = false; - symbol->info.reg.is_dynamically_indexed = false; }
static void vkd3d_symbol_make_resource(struct vkd3d_symbol *symbol, @@ -2197,11 +2192,7 @@ struct vkd3d_push_constant_buffer_binding
struct vkd3d_shader_phase { - enum vkd3d_shader_opcode type; - unsigned int idx; - unsigned int instance_count; uint32_t function_id; - uint32_t instance_id; size_t function_location; };
@@ -2253,10 +2244,11 @@ struct spirv_compiler struct vkd3d_push_constant_buffer_binding *push_constants; const struct vkd3d_shader_spirv_target_info *spirv_target_info;
+ bool main_block_open; bool after_declarations_section; - const struct vkd3d_shader_signature *input_signature; - const struct vkd3d_shader_signature *output_signature; - const struct vkd3d_shader_signature *patch_constant_signature; + struct shader_signature input_signature; + struct shader_signature output_signature; + struct shader_signature patch_constant_signature; const struct vkd3d_shader_transform_feedback_info *xfb_info; struct vkd3d_shader_output_info { @@ -2276,9 +2268,10 @@ struct spirv_compiler unsigned int output_control_point_count; bool use_vocp;
- unsigned int shader_phase_count; - struct vkd3d_shader_phase *shader_phases; - size_t shader_phases_size; + enum vkd3d_shader_opcode phase; + bool emit_default_control_point_phase; + struct vkd3d_shader_phase control_point_phase; + struct vkd3d_shader_phase patch_constant_phase;
uint32_t current_spec_constant_id; unsigned int spec_constant_count; @@ -2290,9 +2283,19 @@ struct spirv_compiler struct vkd3d_string_buffer_cache string_buffers; };
-static bool is_control_point_phase(const struct vkd3d_shader_phase *phase) +static bool is_in_default_phase(const struct spirv_compiler *compiler) +{ + return compiler->phase == VKD3DSIH_INVALID; +} + +static bool is_in_control_point_phase(const struct spirv_compiler *compiler) +{ + return compiler->phase == VKD3DSIH_HS_CONTROL_POINT_PHASE; +} + +static bool is_in_fork_or_join_phase(const struct spirv_compiler *compiler) { - return phase && phase->type == VKD3DSIH_HS_CONTROL_POINT_PHASE; + return compiler->phase == VKD3DSIH_HS_FORK_PHASE || compiler->phase == VKD3DSIH_HS_JOIN_PHASE; }
static void spirv_compiler_emit_initial_declarations(struct spirv_compiler *compiler); @@ -2304,13 +2307,37 @@ static const char *spirv_compiler_get_entry_point_name(const struct spirv_compil return info && info->entry_point ? info->entry_point : "main"; }
-struct spirv_compiler *spirv_compiler_create(const struct vkd3d_shader_version *shader_version, - const struct vkd3d_shader_desc *shader_desc, const struct vkd3d_shader_compile_info *compile_info, +static void spirv_compiler_destroy(struct spirv_compiler *compiler) +{ + vkd3d_free(compiler->control_flow_info); + + vkd3d_free(compiler->output_info); + + vkd3d_free(compiler->push_constants); + vkd3d_free(compiler->descriptor_offset_ids); + + vkd3d_spirv_builder_free(&compiler->spirv_builder); + + rb_destroy(&compiler->symbol_table, vkd3d_symbol_free, NULL); + + vkd3d_free(compiler->spec_constants); + + vkd3d_string_buffer_cache_cleanup(&compiler->string_buffers); + + shader_signature_cleanup(&compiler->input_signature); + shader_signature_cleanup(&compiler->output_signature); + shader_signature_cleanup(&compiler->patch_constant_signature); + + vkd3d_free(compiler); +} + +static struct spirv_compiler *spirv_compiler_create(const struct vkd3d_shader_version *shader_version, + struct vkd3d_shader_desc *shader_desc, const struct vkd3d_shader_compile_info *compile_info, const struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info, struct vkd3d_shader_message_context *message_context, const struct vkd3d_shader_location *location) { - const struct vkd3d_shader_signature *patch_constant_signature = &shader_desc->patch_constant_signature; - const struct vkd3d_shader_signature *output_signature = &shader_desc->output_signature; + const struct shader_signature *patch_constant_signature = &shader_desc->patch_constant_signature; + const struct shader_signature *output_signature = &shader_desc->output_signature; const struct vkd3d_shader_interface_info *shader_interface; const struct vkd3d_shader_descriptor_offset_info *offset_info; const struct vkd3d_shader_spirv_target_info *target_info; @@ -2402,9 +2429,12 @@ struct spirv_compiler *spirv_compiler_create(const struct vkd3d_shader_version *
compiler->shader_type = shader_version->type;
- compiler->input_signature = &shader_desc->input_signature; - compiler->output_signature = &shader_desc->output_signature; - compiler->patch_constant_signature = &shader_desc->patch_constant_signature; + compiler->input_signature = shader_desc->input_signature; + compiler->output_signature = shader_desc->output_signature; + compiler->patch_constant_signature = shader_desc->patch_constant_signature; + memset(&shader_desc->input_signature, 0, sizeof(shader_desc->input_signature)); + memset(&shader_desc->output_signature, 0, sizeof(shader_desc->output_signature)); + memset(&shader_desc->patch_constant_signature, 0, sizeof(shader_desc->patch_constant_signature));
if ((shader_interface = vkd3d_find_struct(compile_info->next, INTERFACE_INFO))) { @@ -2437,6 +2467,8 @@ struct spirv_compiler *spirv_compiler_create(const struct vkd3d_shader_version *
compiler->scan_descriptor_info = scan_descriptor_info;
+ compiler->phase = VKD3DSIH_INVALID; + vkd3d_string_buffer_cache_init(&compiler->string_buffers);
spirv_compiler_emit_initial_declarations(compiler); @@ -2857,7 +2889,7 @@ static bool spirv_compiler_get_register_name(char *buffer, unsigned int buffer_s { unsigned int idx;
- idx = reg->idx[1].offset != ~0u ? reg->idx[1].offset : reg->idx[0].offset; + idx = reg->idx_count ? reg->idx[reg->idx_count - 1].offset : 0; switch (reg->type) { case VKD3DSPR_RESOURCE: @@ -2887,12 +2919,6 @@ static bool spirv_compiler_get_register_name(char *buffer, unsigned int buffer_s case VKD3DSPR_DEPTHOUTLE: snprintf(buffer, buffer_size, "oDepth"); break; - case VKD3DSPR_FORKINSTID: - snprintf(buffer, buffer_size, "vForkInstanceId"); - break; - case VKD3DSPR_JOININSTID: - snprintf(buffer, buffer_size, "vJoinInstanceId"); - break; case VKD3DSPR_GSINSTID: snprintf(buffer, buffer_size, "vGSInstanceID"); break; @@ -2965,18 +2991,26 @@ static uint32_t spirv_compiler_emit_variable(struct spirv_compiler *compiler,
static uint32_t spirv_compiler_emit_array_variable(struct spirv_compiler *compiler, struct vkd3d_spirv_stream *stream, SpvStorageClass storage_class, - enum vkd3d_shader_component_type component_type, unsigned int component_count, unsigned int array_length) + enum vkd3d_shader_component_type component_type, unsigned int component_count, + const unsigned int *array_lengths, unsigned int length_count) { struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; uint32_t type_id, length_id, ptr_type_id; + unsigned int i;
- if (!array_length) + if (!length_count) return spirv_compiler_emit_variable(compiler, stream, storage_class, component_type, component_count);
type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count); - length_id = spirv_compiler_get_constant_uint(compiler, array_length); - type_id = vkd3d_spirv_get_op_type_array(builder, type_id, length_id); + for (i = 0; i < length_count; ++i) + { + if (!array_lengths[i]) + continue; + length_id = spirv_compiler_get_constant_uint(compiler, array_lengths[i]); + type_id = vkd3d_spirv_get_op_type_array(builder, type_id, length_id); + } + ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, storage_class, type_id); return vkd3d_spirv_build_op_variable(builder, stream, ptr_type_id, storage_class, 0); } @@ -3169,7 +3203,6 @@ struct vkd3d_shader_register_info unsigned int structure_stride; unsigned int binding_base_idx; bool is_aggregate; - bool is_dynamically_indexed; };
static bool spirv_compiler_get_register_info(const struct spirv_compiler *compiler, @@ -3192,7 +3225,6 @@ static bool spirv_compiler_get_register_info(const struct spirv_compiler *compil register_info->structure_stride = 0; register_info->binding_base_idx = 0; register_info->is_aggregate = false; - register_info->is_dynamically_indexed = false; return true; }
@@ -3214,7 +3246,6 @@ static bool spirv_compiler_get_register_info(const struct spirv_compiler *compil register_info->structure_stride = symbol->info.reg.structure_stride; register_info->binding_base_idx = symbol->info.reg.binding_base_idx; register_info->is_aggregate = symbol->info.reg.is_aggregate; - register_info->is_dynamically_indexed = symbol->info.reg.is_dynamically_indexed;
return true; } @@ -3344,41 +3375,22 @@ static void spirv_compiler_emit_dereference_register(struct spirv_compiler *comp } else if (register_info->is_aggregate) { - if (reg->type == VKD3DSPR_INPUT || reg->type == VKD3DSPR_INCONTROLPOINT) - { - /* Indices for these are swapped compared to the generated SPIR-V. */ - if (reg->idx[1].offset != ~0u) - indexes[index_count++] = spirv_compiler_emit_register_addressing(compiler, ®->idx[1]); - if (reg->idx[0].offset != ~0u) - indexes[index_count++] = spirv_compiler_emit_register_addressing(compiler, ®->idx[0]); - } - else - { - struct vkd3d_shader_register_index reg_idx = reg->idx[0]; - - if (reg->idx[1].rel_addr) - FIXME("Relative addressing not implemented.\n"); - - if (register_info->is_dynamically_indexed) - { - indexes[index_count++] = vkd3d_spirv_build_op_load(builder, - vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_INT, 1), - register_info->member_idx, SpvMemoryAccessMaskNone); - } - else - { - reg_idx.offset = register_info->member_idx; - indexes[index_count++] = spirv_compiler_emit_register_addressing(compiler, ®_idx); - } - } + /* Indices for these are swapped compared to the generated SPIR-V. */ + if (reg->idx_count > 2) + indexes[index_count++] = spirv_compiler_emit_register_addressing(compiler, ®->idx[1]); + if (reg->idx_count > 1) + indexes[index_count++] = spirv_compiler_emit_register_addressing(compiler, ®->idx[0]); + if (!index_count) + /* A register sysval which is an array in SPIR-V, e.g. SAMPLEMASK. */ + indexes[index_count++] = spirv_compiler_get_constant_uint(compiler, 0); } else { - if (reg->idx[1].rel_addr || (reg->idx[1].offset == ~0u && reg->idx[0].rel_addr)) + if (reg->idx_count && reg->idx[reg->idx_count - 1].rel_addr) FIXME("Relative addressing not implemented.\n");
/* Handle arrayed registers, e.g. v[3][0]. */ - if (reg->idx[1].offset != ~0u && !register_is_descriptor(reg)) + if (reg->idx_count > 1 && !register_is_descriptor(reg)) indexes[index_count++] = spirv_compiler_emit_register_addressing(compiler, ®->idx[0]); }
@@ -4249,35 +4261,12 @@ static const struct vkd3d_spirv_builtin *vkd3d_get_spirv_builtin(const struct sp if ((builtin = get_spirv_builtin_for_register(reg_type))) return builtin;
- if (sysval != VKD3D_SIV_NONE || (reg_type != VKD3DSPR_OUTPUT && reg_type != VKD3DSPR_COLOROUT)) + if (sysval != VKD3D_SIV_NONE || (reg_type != VKD3DSPR_OUTPUT && reg_type != VKD3DSPR_COLOROUT + && reg_type != VKD3DSPR_PATCHCONST)) FIXME("Unhandled builtin (register type %#x, sysval %#x).\n", reg_type, sysval); return NULL; }
-static const struct vkd3d_shader_signature_element *vkd3d_find_signature_element_for_reg( - const struct vkd3d_shader_signature *signature, unsigned int *signature_element_index, - unsigned int reg_idx, DWORD write_mask) -{ - unsigned int signature_idx; - - for (signature_idx = 0; signature_idx < signature->element_count; ++signature_idx) - { - if (signature->elements[signature_idx].register_index == reg_idx - && (signature->elements[signature_idx].mask & write_mask) == write_mask) - { - if (signature_element_index) - *signature_element_index = signature_idx; - return &signature->elements[signature_idx]; - } - } - - FIXME("Could not find shader signature element (register %u, write mask %#x).\n", - reg_idx, write_mask); - if (signature_element_index) - *signature_element_index = ~0u; - return NULL; -} - static uint32_t spirv_compiler_get_invocation_id(struct spirv_compiler *compiler) { struct vkd3d_shader_register r; @@ -4288,6 +4277,7 @@ static uint32_t spirv_compiler_get_invocation_id(struct spirv_compiler *compiler r.type = VKD3DSPR_OUTPOINTID; r.idx[0].offset = ~0u; r.idx[1].offset = ~0u; + r.idx_count = 0; return spirv_compiler_get_register_id(compiler, &r); }
@@ -4302,7 +4292,7 @@ static uint32_t spirv_compiler_emit_load_invocation_id(struct spirv_compiler *co }
static void spirv_compiler_emit_shader_phase_name(struct spirv_compiler *compiler, - uint32_t id, const struct vkd3d_shader_phase *phase, const char *suffix) + uint32_t id, const char *suffix) { struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; const char *name; @@ -4310,7 +4300,7 @@ static void spirv_compiler_emit_shader_phase_name(struct spirv_compiler *compile if (!suffix) suffix = "";
- switch (phase->type) + switch (compiler->phase) { case VKD3DSIH_HS_CONTROL_POINT_PHASE: name = "control"; @@ -4322,62 +4312,23 @@ static void spirv_compiler_emit_shader_phase_name(struct spirv_compiler *compile name = "join"; break; default: - ERR("Invalid phase type %#x.\n", phase->type); + ERR("Invalid phase type %#x.\n", compiler->phase); return; } - vkd3d_spirv_build_op_name(builder, id, "%s%u%s", name, phase->idx, suffix); -} - -static void spirv_compiler_begin_shader_phase(struct spirv_compiler *compiler, - struct vkd3d_shader_phase *phase) -{ - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - uint32_t void_id, function_type_id; - unsigned int param_count; - uint32_t param_type_id; - - if (phase->instance_count) - { - param_type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); - param_count = 1; - } - else - { - param_count = 0; - } - - phase->function_id = vkd3d_spirv_alloc_id(builder); - - void_id = vkd3d_spirv_get_op_type_void(builder); - function_type_id = vkd3d_spirv_get_op_type_function(builder, void_id, ¶m_type_id, param_count); - vkd3d_spirv_build_op_function(builder, void_id, phase->function_id, - SpvFunctionControlMaskNone, function_type_id); - - if (phase->instance_count) - phase->instance_id = vkd3d_spirv_build_op_function_parameter(builder, param_type_id); - - vkd3d_spirv_build_op_label(builder, vkd3d_spirv_alloc_id(builder)); - phase->function_location = vkd3d_spirv_stream_current_location(&builder->function_stream); - - spirv_compiler_emit_shader_phase_name(compiler, phase->function_id, phase, NULL); + vkd3d_spirv_build_op_name(builder, id, "%s%s", name, suffix); }
static const struct vkd3d_shader_phase *spirv_compiler_get_current_shader_phase( struct spirv_compiler *compiler) { - struct vkd3d_shader_phase *phase; - - if (!compiler->shader_phase_count) + if (is_in_default_phase(compiler)) return NULL;
- phase = &compiler->shader_phases[compiler->shader_phase_count - 1]; - if (!phase->function_id) - spirv_compiler_begin_shader_phase(compiler, phase); - return phase; + return is_in_control_point_phase(compiler) ? &compiler->control_point_phase : &compiler->patch_constant_phase; }
static void spirv_compiler_decorate_xfb_output(struct spirv_compiler *compiler, - uint32_t id, unsigned int component_count, const struct vkd3d_shader_signature_element *signature_element) + uint32_t id, unsigned int component_count, const struct signature_element *signature_element) { const struct vkd3d_shader_transform_feedback_info *xfb_info = compiler->xfb_info; const struct vkd3d_shader_transform_feedback_element *xfb_element; @@ -4436,17 +4387,21 @@ static void spirv_compiler_decorate_xfb_output(struct spirv_compiler *compiler, vkd3d_spirv_build_op_decorate1(builder, id, SpvDecorationOffset, offset); }
-static uint32_t spirv_compiler_emit_builtin_variable(struct spirv_compiler *compiler, - const struct vkd3d_spirv_builtin *builtin, SpvStorageClass storage_class, unsigned int array_size) +static uint32_t spirv_compiler_emit_builtin_variable_v(struct spirv_compiler *compiler, + const struct vkd3d_spirv_builtin *builtin, SpvStorageClass storage_class, const unsigned int *array_sizes, + unsigned int size_count) { struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + unsigned int sizes[2]; uint32_t id;
- array_size = max(array_size, builtin->spirv_array_size); + assert(size_count <= ARRAY_SIZE(sizes)); + memcpy(sizes, array_sizes, size_count * sizeof(sizes[0])); + array_sizes = sizes; + sizes[0] = max(sizes[0], builtin->spirv_array_size);
- id = spirv_compiler_emit_array_variable(compiler, - &builder->global_stream, storage_class, - builtin->component_type, builtin->component_count, array_size); + id = spirv_compiler_emit_array_variable(compiler, &builder->global_stream, storage_class, + builtin->component_type, builtin->component_count, array_sizes, size_count); vkd3d_spirv_add_iface_variable(builder, id); spirv_compiler_decorate_builtin(compiler, id, builtin->spirv_builtin);
@@ -4458,54 +4413,45 @@ static uint32_t spirv_compiler_emit_builtin_variable(struct spirv_compiler *comp return id; }
-static bool needs_private_io_variable(const struct vkd3d_shader_signature *signature, - unsigned int reg_idx, const struct vkd3d_spirv_builtin *builtin, - unsigned int *component_count, unsigned int *out_write_mask) +static uint32_t spirv_compiler_emit_builtin_variable(struct spirv_compiler *compiler, + const struct vkd3d_spirv_builtin *builtin, SpvStorageClass storage_class, unsigned int array_size) { - unsigned int write_mask = 0; - bool have_sysval = false; - unsigned int i, count; - - /* Always use private variables for arrayed builtins. These are generally - * scalars on the D3D side, so would need extra array indices when - * accessing them. It may be feasible to insert those indices at the point - * where the builtins are used, but it's not clear it's worth the effort. */ - if (builtin && (builtin->spirv_array_size || builtin->fixup_pfn)) - return true; - - if (*component_count == VKD3D_VEC4_SIZE) - return false; - - for (i = 0, count = 0; i < signature->element_count; ++i) - { - const struct vkd3d_shader_signature_element *current = &signature->elements[i]; + return spirv_compiler_emit_builtin_variable_v(compiler, builtin, storage_class, &array_size, 1); +}
- if (current->register_index != reg_idx) - continue; +static bool needs_private_io_variable(const struct vkd3d_spirv_builtin *builtin) +{ + return builtin && builtin->fixup_pfn; +}
- write_mask |= current->mask; - ++count; +static unsigned int shader_signature_next_location(const struct shader_signature *signature) +{ + unsigned int i, max_row;
- if (current->sysval_semantic) - have_sysval = true; - } + if (!signature) + return 0;
- if (count == 1) - return false; + for (i = 0, max_row = 0; i < signature->element_count; ++i) + max_row = max(max_row, signature->elements[i].register_index + signature->elements[i].register_count); + return max_row; +}
- if (builtin || have_sysval) - return true; +static unsigned int shader_register_get_io_indices(const struct vkd3d_shader_register *reg, + unsigned int *array_sizes) +{ + unsigned int i, element_idx;
- if (!vkd3d_bitmask_is_contiguous(write_mask)) + array_sizes[0] = 0; + array_sizes[1] = 0; + element_idx = reg->idx[0].offset; + for (i = 1; i < reg->idx_count; ++i) { - FIXME("Write mask %#x is non-contiguous.\n", write_mask); - return true; + array_sizes[1] = array_sizes[0]; + array_sizes[0] = element_idx; + element_idx = reg->idx[i].offset; }
- assert(vkd3d_write_mask_component_count(write_mask) >= *component_count); - *component_count = vkd3d_write_mask_component_count(write_mask); - *out_write_mask = write_mask; - return false; + return element_idx; }
static uint32_t spirv_compiler_emit_input(struct spirv_compiler *compiler, @@ -4513,50 +4459,35 @@ static uint32_t spirv_compiler_emit_input(struct spirv_compiler *compiler, enum vkd3d_shader_interpolation_mode interpolation_mode) { struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - const struct vkd3d_shader_signature_element *signature_element; - const struct vkd3d_shader_signature *shader_signature; const struct vkd3d_shader_register *reg = &dst->reg; unsigned int component_idx, input_component_count; + const struct signature_element *signature_element; + const struct shader_signature *shader_signature; enum vkd3d_shader_component_type component_type; uint32_t type_id, ptr_type_id, float_type_id; const struct vkd3d_spirv_builtin *builtin; + unsigned int write_mask, reg_write_mask; struct vkd3d_symbol *symbol = NULL; uint32_t val_id, input_id, var_id; struct vkd3d_symbol reg_symbol; - struct vkd3d_symbol tmp_symbol; SpvStorageClass storage_class; struct rb_entry *entry = NULL; bool use_private_var = false; - unsigned int write_mask; - unsigned int array_size; - unsigned int reg_idx; + unsigned int array_sizes[2]; + unsigned int element_idx; uint32_t i, index;
- assert(!reg->idx[0].rel_addr); - assert(!reg->idx[1].rel_addr); - - if (reg->idx[1].offset != ~0u) - { - array_size = reg->idx[0].offset; - reg_idx = reg->idx[1].offset; - } - else - { - array_size = 0; - reg_idx = reg->idx[0].offset; - } + assert(!reg->idx_count || !reg->idx[0].rel_addr); + assert(reg->idx_count < 2 || !reg->idx[1].rel_addr);
shader_signature = reg->type == VKD3DSPR_PATCHCONST - ? compiler->patch_constant_signature : compiler->input_signature; + ? &compiler->patch_constant_signature : &compiler->input_signature;
- if (!(signature_element = vkd3d_find_signature_element_for_reg(shader_signature, - NULL, reg_idx, dst->write_mask))) - { - FIXME("No signature element for shader input, ignoring shader input.\n"); - return 0; - } + element_idx = shader_register_get_io_indices(reg, array_sizes); + signature_element = &shader_signature->elements[element_idx];
- if (compiler->shader_type == VKD3D_SHADER_TYPE_HULL && !sysval && signature_element->sysval_semantic) + if ((compiler->shader_type == VKD3D_SHADER_TYPE_HULL || compiler->shader_type == VKD3D_SHADER_TYPE_GEOMETRY) + && !sysval && signature_element->sysval_semantic) sysval = vkd3d_siv_from_sysval(signature_element->sysval_semantic);
builtin = get_spirv_builtin_for_sysval(compiler, sysval); @@ -4576,12 +4507,16 @@ static uint32_t spirv_compiler_emit_input(struct spirv_compiler *compiler, component_idx = vkd3d_write_mask_get_component_idx(signature_element->mask); }
- if (needs_private_io_variable(shader_signature, reg_idx, builtin, &input_component_count, &write_mask) - && (compiler->shader_type != VKD3D_SHADER_TYPE_HULL - || (reg->type != VKD3DSPR_INCONTROLPOINT && reg->type != VKD3DSPR_PATCHCONST))) + if (needs_private_io_variable(builtin)) + { use_private_var = true; + reg_write_mask = write_mask; + } else + { component_idx = vkd3d_write_mask_get_component_idx(write_mask); + reg_write_mask = write_mask >> component_idx; + }
storage_class = SpvStorageClassInput;
@@ -4589,111 +4524,68 @@ static uint32_t spirv_compiler_emit_input(struct spirv_compiler *compiler,
if ((entry = rb_get(&compiler->symbol_table, ®_symbol))) { + /* Except for vicp there should be one declaration per signature element. Sources of + * duplicate declarations are: a single register split into multiple declarations having + * different components, which should have been merged, and declarations in one phase + * being repeated in another (i.e. vcp/vocp), which should have been deleted. */ + if (reg->type != VKD3DSPR_INPUT || !is_in_fork_or_join_phase(compiler)) + FIXME("Duplicate input definition found.\n"); symbol = RB_ENTRY_VALUE(entry, struct vkd3d_symbol, entry); - input_id = symbol->id; - } - else if (compiler->shader_type == VKD3D_SHADER_TYPE_HULL - && (reg->type == VKD3DSPR_INCONTROLPOINT || reg->type == VKD3DSPR_PATCHCONST)) - { - /* Input/output registers from one phase can be used as inputs in - * subsequent phases. Specifically: - * - * - Control phase inputs are available as "vicp" in fork and join - * phases. - * - Control phase outputs are available as "vocp" in fork and join - * phases. - * - Fork phase patch constants are available as "vpc" in join - * phases. - * - * We handle "vicp" and "vpc" here by creating aliases to the shader's - * global inputs and outputs. We handle "vocp" in - * spirv_compiler_leave_shader_phase(). */ - - tmp_symbol = reg_symbol; - if (reg->type == VKD3DSPR_PATCHCONST) - tmp_symbol.key.reg.type = VKD3DSPR_OUTPUT; - else - tmp_symbol.key.reg.type = VKD3DSPR_INPUT; - - if ((entry = rb_get(&compiler->symbol_table, &tmp_symbol))) - { - symbol = RB_ENTRY_VALUE(entry, struct vkd3d_symbol, entry); - tmp_symbol = *symbol; - tmp_symbol.key.reg.type = reg->type; - spirv_compiler_put_symbol(compiler, &tmp_symbol); - - input_id = symbol->id; - } - else - { - if (reg->type == VKD3DSPR_PATCHCONST) - ERR("Patch constant register %u was not declared in a previous phase.\n", reg_idx); - else - ERR("Input control point register %u was not declared in a previous phase.\n", reg_idx); - } + return symbol->id; }
- if (!symbol || ~symbol->info.reg.dcl_mask & write_mask) + if (builtin) { - if (builtin) - { - input_id = spirv_compiler_emit_builtin_variable(compiler, builtin, storage_class, array_size); - if (reg->type == VKD3DSPR_PATCHCONST) - vkd3d_spirv_build_op_decorate(builder, input_id, SpvDecorationPatch, NULL, 0); - } - else - { - unsigned int location = reg_idx; - - input_id = spirv_compiler_emit_array_variable(compiler, &builder->global_stream, - storage_class, component_type, input_component_count, array_size); - vkd3d_spirv_add_iface_variable(builder, input_id); - if (reg->type == VKD3DSPR_PATCHCONST) - { - vkd3d_spirv_build_op_decorate(builder, input_id, SpvDecorationPatch, NULL, 0); - location += compiler->input_signature->element_count; - } - vkd3d_spirv_build_op_decorate1(builder, input_id, SpvDecorationLocation, location); - if (component_idx) - vkd3d_spirv_build_op_decorate1(builder, input_id, SpvDecorationComponent, component_idx); - - spirv_compiler_emit_interpolation_decorations(compiler, input_id, interpolation_mode); - } + input_id = spirv_compiler_emit_builtin_variable_v(compiler, builtin, storage_class, array_sizes, 2); + if (reg->type == VKD3DSPR_PATCHCONST) + vkd3d_spirv_build_op_decorate(builder, input_id, SpvDecorationPatch, NULL, 0); } - - if (!symbol) + else { - var_id = input_id; - if (use_private_var) + unsigned int location = signature_element->register_index; + + input_id = spirv_compiler_emit_array_variable(compiler, &builder->global_stream, + storage_class, component_type, input_component_count, array_sizes, 2); + vkd3d_spirv_add_iface_variable(builder, input_id); + if (reg->type == VKD3DSPR_PATCHCONST) { - storage_class = SpvStorageClassPrivate; - var_id = spirv_compiler_emit_array_variable(compiler, &builder->global_stream, - storage_class, VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_VEC4_SIZE, array_size); + vkd3d_spirv_build_op_decorate(builder, input_id, SpvDecorationPatch, NULL, 0); + location += shader_signature_next_location(&compiler->input_signature); } + vkd3d_spirv_build_op_decorate1(builder, input_id, SpvDecorationLocation, location); + if (component_idx) + vkd3d_spirv_build_op_decorate1(builder, input_id, SpvDecorationComponent, component_idx);
- vkd3d_symbol_set_register_info(®_symbol, var_id, storage_class, - use_private_var ? VKD3D_SHADER_COMPONENT_FLOAT : component_type, - use_private_var ? VKD3DSP_WRITEMASK_ALL : write_mask); - reg_symbol.info.reg.dcl_mask |= write_mask; - spirv_compiler_put_symbol(compiler, ®_symbol); - - spirv_compiler_emit_register_debug_name(builder, var_id, reg); + spirv_compiler_emit_interpolation_decorations(compiler, input_id, interpolation_mode); } - else + + var_id = input_id; + if (use_private_var) { - symbol->info.reg.dcl_mask |= write_mask; + storage_class = SpvStorageClassPrivate; + var_id = spirv_compiler_emit_array_variable(compiler, &builder->global_stream, + storage_class, VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_VEC4_SIZE, array_sizes, 2); }
+ vkd3d_symbol_set_register_info(®_symbol, var_id, storage_class, + use_private_var ? VKD3D_SHADER_COMPONENT_FLOAT : component_type, + use_private_var ? VKD3DSP_WRITEMASK_ALL : reg_write_mask); + reg_symbol.info.reg.is_aggregate = array_sizes[0] || array_sizes[1]; + assert(!builtin || !builtin->spirv_array_size || use_private_var || array_sizes[0] || array_sizes[1]); + spirv_compiler_put_symbol(compiler, ®_symbol); + + spirv_compiler_emit_register_debug_name(builder, var_id, reg); + if (use_private_var) { type_id = vkd3d_spirv_get_type_id(builder, component_type, input_component_count); - for (i = 0; i < max(array_size, 1); ++i) + for (i = 0; i < max(array_sizes[0], 1); ++i) { struct vkd3d_shader_register dst_reg = *reg; dst_reg.data_type = VKD3D_DATA_FLOAT;
val_id = input_id; - if (array_size) + if (array_sizes[0]) { ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassInput, type_id); index = spirv_compiler_get_constant_uint(compiler, i); @@ -4708,7 +4600,7 @@ static uint32_t spirv_compiler_emit_input(struct spirv_compiler *compiler, ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassInput, type_id); index = spirv_compiler_get_constant_uint(compiler, builtin->member_idx); val_id = vkd3d_spirv_build_op_in_bounds_access_chain1(builder, ptr_type_id, input_id, index); - dst_reg.idx[0].offset = reg_idx + i; + dst_reg.idx[0].offset = element_idx + i; } val_id = vkd3d_spirv_build_op_load(builder, type_id, val_id, SpvMemoryAccessMaskNone);
@@ -4743,9 +4635,8 @@ static void spirv_compiler_emit_input_register(struct spirv_compiler *compiler, uint32_t write_mask; uint32_t input_id;
- assert(!reg->idx[0].rel_addr); - assert(!reg->idx[1].rel_addr); - assert(reg->idx[1].offset == ~0u); + assert(!reg->idx_count || !reg->idx[0].rel_addr); + assert(reg->idx_count < 2);
if (!(builtin = get_spirv_builtin_for_register(reg->type))) { @@ -4763,19 +4654,15 @@ static void spirv_compiler_emit_input_register(struct spirv_compiler *compiler, write_mask = vkd3d_write_mask_from_component_count(builtin->component_count); vkd3d_symbol_set_register_info(®_symbol, input_id, SpvStorageClassInput, builtin->component_type, write_mask); - reg_symbol.info.reg.dcl_mask = write_mask; reg_symbol.info.reg.is_aggregate = builtin->spirv_array_size; spirv_compiler_put_symbol(compiler, ®_symbol); spirv_compiler_emit_register_debug_name(builder, input_id, reg); }
static void spirv_compiler_emit_shader_phase_input(struct spirv_compiler *compiler, - const struct vkd3d_shader_phase *phase, const struct vkd3d_shader_dst_param *dst) + const struct vkd3d_shader_dst_param *dst) { - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; const struct vkd3d_shader_register *reg = &dst->reg; - struct vkd3d_symbol reg_symbol; - uint32_t val_id;
switch (reg->type) { @@ -4787,10 +4674,6 @@ static void spirv_compiler_emit_shader_phase_input(struct spirv_compiler *compil case VKD3DSPR_PRIMID: spirv_compiler_emit_input_register(compiler, dst); return; - case VKD3DSPR_FORKINSTID: - case VKD3DSPR_JOININSTID: - val_id = phase->instance_id; - break; case VKD3DSPR_OUTPOINTID: /* Emitted in spirv_compiler_emit_initial_declarations(). */ case VKD3DSPR_OUTCONTROLPOINT: /* See spirv_compiler_leave_shader_phase(). */ return; @@ -4798,22 +4681,6 @@ static void spirv_compiler_emit_shader_phase_input(struct spirv_compiler *compil FIXME("Unhandled shader phase input register %#x.\n", reg->type); return; } - - vkd3d_symbol_make_register(®_symbol, reg); - vkd3d_symbol_set_register_info(®_symbol, val_id, - SpvStorageClassMax /* Intermediate value */, - VKD3D_SHADER_COMPONENT_UINT, VKD3DSP_WRITEMASK_0); - spirv_compiler_put_symbol(compiler, ®_symbol); - spirv_compiler_emit_register_debug_name(builder, val_id, reg); -} - -static unsigned int spirv_compiler_get_output_variable_index( - struct spirv_compiler *compiler, unsigned int register_idx) -{ - if (register_idx == ~0u) /* oDepth */ - return ARRAY_SIZE(compiler->private_output_variable) - 1; - assert(register_idx < ARRAY_SIZE(compiler->private_output_variable) - 1); - return register_idx; }
static unsigned int get_shader_output_swizzle(const struct spirv_compiler *compiler, @@ -4835,8 +4702,7 @@ static bool is_dual_source_blending(const struct spirv_compiler *compiler) return compiler->shader_type == VKD3D_SHADER_TYPE_PIXEL && info && info->dual_source_blending; }
-static void calculate_clip_or_cull_distance_mask(const struct vkd3d_shader_signature_element *e, - uint32_t *mask) +static void calculate_clip_or_cull_distance_mask(const struct signature_element *e, uint32_t *mask) { if (e->semantic_index >= sizeof(*mask) * CHAR_BIT / VKD3D_VEC4_SIZE) { @@ -4847,38 +4713,10 @@ static void calculate_clip_or_cull_distance_mask(const struct vkd3d_shader_signa *mask |= (e->mask & VKD3DSP_WRITEMASK_ALL) << (VKD3D_VEC4_SIZE * e->semantic_index); }
-static uint32_t calculate_sysval_array_mask(struct spirv_compiler *compiler, - const struct vkd3d_shader_signature *signature, enum vkd3d_shader_input_sysval_semantic sysval) -{ - const struct vkd3d_shader_signature_element *e; - const struct vkd3d_spirv_builtin *sig_builtin; - const struct vkd3d_spirv_builtin *builtin; - uint32_t signature_idx, mask = 0; - - if (!(builtin = get_spirv_builtin_for_sysval(compiler, sysval))) - { - FIXME("Unhandled sysval %#x.\n", sysval); - return 0; - } - - for (signature_idx = 0; signature_idx < signature->element_count; ++signature_idx) - { - e = &signature->elements[signature_idx]; - - sig_builtin = get_spirv_builtin_for_sysval(compiler, - vkd3d_siv_from_sysval_indexed(e->sysval_semantic, e->semantic_index)); - - if (sig_builtin && sig_builtin->spirv_builtin == builtin->spirv_builtin) - mask |= (e->mask & VKD3DSP_WRITEMASK_ALL) << (VKD3D_VEC4_SIZE * sig_builtin->member_idx); - } - - return mask; -} - /* Emits arrayed SPIR-V built-in variables. */ static void spirv_compiler_emit_shader_signature_outputs(struct spirv_compiler *compiler) { - const struct vkd3d_shader_signature *output_signature = compiler->output_signature; + const struct shader_signature *output_signature = &compiler->output_signature; uint32_t clip_distance_mask = 0, clip_distance_id = 0; uint32_t cull_distance_mask = 0, cull_distance_id = 0; const struct vkd3d_spirv_builtin *builtin; @@ -4886,7 +4724,7 @@ static void spirv_compiler_emit_shader_signature_outputs(struct spirv_compiler *
for (i = 0; i < output_signature->element_count; ++i) { - const struct vkd3d_shader_signature_element *e = &output_signature->elements[i]; + const struct signature_element *e = &output_signature->elements[i];
switch (e->sysval_semantic) { @@ -4921,7 +4759,7 @@ static void spirv_compiler_emit_shader_signature_outputs(struct spirv_compiler *
for (i = 0; i < output_signature->element_count; ++i) { - const struct vkd3d_shader_signature_element *e = &output_signature->elements[i]; + const struct signature_element *e = &output_signature->elements[i];
switch (e->sysval_semantic) { @@ -4953,9 +4791,8 @@ static void spirv_compiler_emit_output_register(struct spirv_compiler *compiler, uint32_t write_mask; uint32_t output_id;
- assert(!reg->idx[0].rel_addr); - assert(!reg->idx[1].rel_addr); - assert(reg->idx[1].offset == ~0u); + assert(!reg->idx_count || !reg->idx[0].rel_addr); + assert(reg->idx_count < 2);
if (!(builtin = get_spirv_builtin_for_register(reg->type))) { @@ -4969,7 +4806,6 @@ static void spirv_compiler_emit_output_register(struct spirv_compiler *compiler, write_mask = vkd3d_write_mask_from_component_count(builtin->component_count); vkd3d_symbol_set_register_info(®_symbol, output_id, SpvStorageClassOutput, builtin->component_type, write_mask); - reg_symbol.info.reg.dcl_mask = write_mask; reg_symbol.info.reg.is_aggregate = builtin->spirv_array_size; spirv_compiler_put_symbol(compiler, ®_symbol); spirv_compiler_emit_register_execution_mode(compiler, reg); @@ -4977,7 +4813,7 @@ static void spirv_compiler_emit_output_register(struct spirv_compiler *compiler, }
static uint32_t spirv_compiler_emit_shader_phase_builtin_variable(struct spirv_compiler *compiler, - const struct vkd3d_shader_phase *phase, const struct vkd3d_spirv_builtin *builtin) + const struct vkd3d_spirv_builtin *builtin) { struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; uint32_t *variable_id, id; @@ -4993,7 +4829,7 @@ static uint32_t spirv_compiler_emit_shader_phase_builtin_variable(struct spirv_c return *variable_id;
id = spirv_compiler_emit_builtin_variable(compiler, builtin, SpvStorageClassOutput, 0); - if (phase->type == VKD3DSIH_HS_FORK_PHASE || phase->type == VKD3DSIH_HS_JOIN_PHASE) + if (is_in_fork_or_join_phase(compiler)) vkd3d_spirv_build_op_decorate(builder, id, SpvDecorationPatch, NULL, 0);
if (variable_id) @@ -5005,44 +4841,34 @@ static void spirv_compiler_emit_output(struct spirv_compiler *compiler, const struct vkd3d_shader_dst_param *dst, enum vkd3d_shader_input_sysval_semantic sysval) { struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - const struct vkd3d_shader_signature_element *signature_element; - const struct vkd3d_shader_signature *shader_signature; const struct vkd3d_shader_register *reg = &dst->reg; unsigned int component_idx, output_component_count; + const struct signature_element *signature_element; enum vkd3d_shader_component_type component_type; + const struct shader_signature *shader_signature; const struct vkd3d_spirv_builtin *builtin; - const struct vkd3d_shader_phase *phase; - struct vkd3d_symbol *symbol = NULL; + unsigned int write_mask, reg_write_mask; bool use_private_variable = false; struct vkd3d_symbol reg_symbol; SpvStorageClass storage_class; - struct rb_entry *entry = NULL; - unsigned int signature_idx; - unsigned int write_mask; - unsigned int array_size; + unsigned int array_sizes[2]; + unsigned int element_idx; bool is_patch_constant; uint32_t id, var_id;
- phase = spirv_compiler_get_current_shader_phase(compiler); - is_patch_constant = phase && (phase->type == VKD3DSIH_HS_FORK_PHASE || phase->type == VKD3DSIH_HS_JOIN_PHASE); + is_patch_constant = is_in_fork_or_join_phase(compiler);
- shader_signature = is_patch_constant ? compiler->patch_constant_signature : compiler->output_signature; + shader_signature = is_patch_constant ? &compiler->patch_constant_signature : &compiler->output_signature;
- array_size = is_control_point_phase(phase) ? compiler->output_control_point_count : 0; - - if (!(signature_element = vkd3d_find_signature_element_for_reg(shader_signature, - &signature_idx, reg->idx[0].offset, dst->write_mask))) - { - FIXME("No signature element for shader output, ignoring shader output.\n"); - return; - } + element_idx = shader_register_get_io_indices(reg, array_sizes); + signature_element = &shader_signature->elements[element_idx];
builtin = vkd3d_get_spirv_builtin(compiler, dst->reg.type, sysval);
write_mask = signature_element->mask;
- component_idx = vkd3d_write_mask_get_component_idx(dst->write_mask); - output_component_count = vkd3d_write_mask_component_count(signature_element->mask); + component_idx = vkd3d_write_mask_get_component_idx(write_mask); + output_component_count = vkd3d_write_mask_component_count(write_mask); if (builtin) { component_type = builtin->component_type; @@ -5058,128 +4884,103 @@ static void spirv_compiler_emit_output(struct spirv_compiler *compiler, storage_class = SpvStorageClassOutput;
if (get_shader_output_swizzle(compiler, signature_element->register_index) != VKD3D_SHADER_NO_SWIZZLE - || needs_private_io_variable(shader_signature, signature_element->register_index, - builtin, &output_component_count, &write_mask) - || is_patch_constant) + || (compiler->output_info[element_idx].id && compiler->output_info[element_idx].array_element_mask) + || needs_private_io_variable(builtin)) + { use_private_variable = true; + reg_write_mask = write_mask; + } else + { component_idx = vkd3d_write_mask_get_component_idx(write_mask); + reg_write_mask = write_mask >> component_idx; + }
vkd3d_symbol_make_register(®_symbol, reg);
- if ((entry = rb_get(&compiler->symbol_table, ®_symbol))) + if (rb_get(&compiler->symbol_table, ®_symbol)) { - symbol = RB_ENTRY_VALUE(entry, struct vkd3d_symbol, entry); - id = symbol->id; + /* See spirv_compiler_emit_input() for possible causes. */ + FIXME("Duplicate output definition found.\n"); + return; }
- if (!symbol || ~symbol->info.reg.dcl_mask & write_mask) + if (compiler->output_info[element_idx].id) { - if (compiler->output_info[signature_idx].id) - { - id = compiler->output_info[signature_idx].id; - if (compiler->output_info[signature_idx].array_element_mask) - use_private_variable = true; - } - else if (builtin) - { - if (phase) - id = spirv_compiler_emit_shader_phase_builtin_variable(compiler, phase, builtin); - else - id = spirv_compiler_emit_builtin_variable(compiler, builtin, storage_class, array_size); - - if (builtin->spirv_array_size) - compiler->output_info[signature_idx].array_element_mask = - calculate_sysval_array_mask(compiler, shader_signature, sysval); - - spirv_compiler_emit_register_execution_mode(compiler, &dst->reg); - } + id = compiler->output_info[element_idx].id; + } + else if (builtin) + { + if (spirv_compiler_get_current_shader_phase(compiler)) + id = spirv_compiler_emit_shader_phase_builtin_variable(compiler, builtin); else - { - unsigned int location = reg->idx[0].offset; - - if (is_patch_constant) - location += compiler->output_signature->element_count; - - id = spirv_compiler_emit_array_variable(compiler, &builder->global_stream, - storage_class, component_type, output_component_count, array_size); - vkd3d_spirv_add_iface_variable(builder, id); - - if (is_dual_source_blending(compiler) && reg->idx[0].offset < 2) - { - vkd3d_spirv_build_op_decorate1(builder, id, SpvDecorationLocation, 0); - vkd3d_spirv_build_op_decorate1(builder, id, SpvDecorationIndex, reg->idx[0].offset); - } - else - { - vkd3d_spirv_build_op_decorate1(builder, id, SpvDecorationLocation, location); - } - - if (component_idx) - vkd3d_spirv_build_op_decorate1(builder, id, SpvDecorationComponent, component_idx); - } + id = spirv_compiler_emit_builtin_variable_v(compiler, builtin, storage_class, array_sizes, 2);
- if (is_patch_constant) - vkd3d_spirv_build_op_decorate(builder, id, SpvDecorationPatch, NULL, 0); - - spirv_compiler_decorate_xfb_output(compiler, id, output_component_count, signature_element); - - compiler->output_info[signature_idx].id = id; - compiler->output_info[signature_idx].component_type = component_type; + spirv_compiler_emit_register_execution_mode(compiler, &dst->reg); } - - if (!symbol) + else { - var_id = id; - if (use_private_variable) - storage_class = SpvStorageClassPrivate; + unsigned int location = signature_element->register_index; + if (is_patch_constant) - var_id = compiler->hs.patch_constants_id; - else if (use_private_variable) - var_id = spirv_compiler_emit_variable(compiler, &builder->global_stream, - storage_class, VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_VEC4_SIZE); + location += shader_signature_next_location(&compiler->output_signature);
- vkd3d_symbol_set_register_info(®_symbol, var_id, storage_class, - use_private_variable ? VKD3D_SHADER_COMPONENT_FLOAT : component_type, - use_private_variable ? VKD3DSP_WRITEMASK_ALL : write_mask); - reg_symbol.info.reg.is_aggregate = use_private_variable ? is_patch_constant : array_size; - if (!use_private_variable && is_control_point_phase(phase)) + id = spirv_compiler_emit_array_variable(compiler, &builder->global_stream, + storage_class, component_type, output_component_count, array_sizes, 2); + vkd3d_spirv_add_iface_variable(builder, id); + + if (is_dual_source_blending(compiler) && signature_element->register_index < 2) { - reg_symbol.info.reg.member_idx = spirv_compiler_get_invocation_id(compiler); - reg_symbol.info.reg.is_dynamically_indexed = true; + vkd3d_spirv_build_op_decorate1(builder, id, SpvDecorationLocation, 0); + vkd3d_spirv_build_op_decorate1(builder, id, SpvDecorationIndex, signature_element->register_index); } - else if (is_patch_constant) + else { - reg_symbol.info.reg.member_idx = reg->idx[0].offset; + vkd3d_spirv_build_op_decorate1(builder, id, SpvDecorationLocation, location); } - reg_symbol.info.reg.dcl_mask = write_mask; - - spirv_compiler_put_symbol(compiler, ®_symbol);
- if (!is_patch_constant) - spirv_compiler_emit_register_debug_name(builder, var_id, reg); + if (component_idx) + vkd3d_spirv_build_op_decorate1(builder, id, SpvDecorationComponent, component_idx); } - else + + if (is_patch_constant) + vkd3d_spirv_build_op_decorate(builder, id, SpvDecorationPatch, NULL, 0); + + spirv_compiler_decorate_xfb_output(compiler, id, output_component_count, signature_element); + + compiler->output_info[element_idx].id = id; + compiler->output_info[element_idx].component_type = component_type; + + var_id = id; + if (use_private_variable) { - symbol->info.reg.dcl_mask |= write_mask; - var_id = symbol->id; + storage_class = SpvStorageClassPrivate; + var_id = spirv_compiler_emit_variable(compiler, &builder->global_stream, + storage_class, VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_VEC4_SIZE); }
+ vkd3d_symbol_set_register_info(®_symbol, var_id, storage_class, + use_private_variable ? VKD3D_SHADER_COMPONENT_FLOAT : component_type, + use_private_variable ? VKD3DSP_WRITEMASK_ALL : reg_write_mask); + reg_symbol.info.reg.is_aggregate = array_sizes[0] || array_sizes[1]; + assert(!builtin || !builtin->spirv_array_size || use_private_variable || array_sizes[0] || array_sizes[1]); + + spirv_compiler_put_symbol(compiler, ®_symbol); + + if (!is_patch_constant) + spirv_compiler_emit_register_debug_name(builder, var_id, reg); + if (use_private_variable) { - unsigned int idx = spirv_compiler_get_output_variable_index(compiler, reg->idx[0].offset); - compiler->private_output_variable[idx] = var_id; - compiler->private_output_variable_write_mask[idx] |= dst->write_mask; - if (is_patch_constant) - compiler->private_output_variable_array_idx[idx] = spirv_compiler_get_constant_uint( - compiler, reg->idx[0].offset); + compiler->private_output_variable[element_idx] = var_id; + compiler->private_output_variable_write_mask[element_idx] |= reg_write_mask; if (!compiler->epilogue_function_id) compiler->epilogue_function_id = vkd3d_spirv_alloc_id(builder); } }
static uint32_t spirv_compiler_get_output_array_index(struct spirv_compiler *compiler, - const struct vkd3d_shader_signature_element *e) + const struct signature_element *e) { enum vkd3d_shader_input_sysval_semantic sysval; const struct vkd3d_spirv_builtin *builtin; @@ -5198,14 +4999,14 @@ static uint32_t spirv_compiler_get_output_array_index(struct spirv_compiler *com }
static void spirv_compiler_emit_store_shader_output(struct spirv_compiler *compiler, - const struct vkd3d_shader_signature *signature, const struct vkd3d_shader_signature_element *output, + const struct shader_signature *signature, const struct signature_element *output, const struct vkd3d_shader_output_info *output_info, uint32_t output_index_id, uint32_t val_id, unsigned int write_mask) { unsigned int dst_write_mask, use_mask, uninit_mask, swizzle, mask; struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; uint32_t type_id, zero_id, ptr_type_id, chain_id, object_id; - const struct vkd3d_shader_signature_element *element; + const struct signature_element *element; unsigned int i, index, array_idx; uint32_t output_id;
@@ -5224,6 +5025,9 @@ static void spirv_compiler_emit_store_shader_output(struct spirv_compiler *compi use_mask |= element->used_mask; } } + index = vkd3d_write_mask_get_component_idx(output->mask); + dst_write_mask >>= index; + use_mask >>= index; write_mask &= dst_write_mask;
if (!write_mask) @@ -5294,22 +5098,19 @@ static void spirv_compiler_emit_shader_epilogue_function(struct spirv_compiler * uint32_t param_type_id[MAX_REG_OUTPUT + 1], param_id[MAX_REG_OUTPUT + 1] = {0}; uint32_t void_id, type_id, ptr_type_id, function_type_id, function_id; struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - const struct vkd3d_shader_signature *signature; - const struct vkd3d_shader_phase *phase; + const struct shader_signature *signature; uint32_t output_index_id = 0; bool is_patch_constant; unsigned int i, count; - DWORD variable_idx;
STATIC_ASSERT(ARRAY_SIZE(compiler->private_output_variable) == ARRAY_SIZE(param_id)); STATIC_ASSERT(ARRAY_SIZE(compiler->private_output_variable) == ARRAY_SIZE(param_type_id)); STATIC_ASSERT(ARRAY_SIZE(compiler->private_output_variable) == ARRAY_SIZE(compiler->private_output_variable_array_idx)); STATIC_ASSERT(ARRAY_SIZE(compiler->private_output_variable) == ARRAY_SIZE(compiler->private_output_variable_write_mask));
- phase = spirv_compiler_get_current_shader_phase(compiler); - is_patch_constant = phase && (phase->type == VKD3DSIH_HS_FORK_PHASE || phase->type == VKD3DSIH_HS_JOIN_PHASE); + is_patch_constant = is_in_fork_or_join_phase(compiler);
- signature = is_patch_constant ? compiler->patch_constant_signature : compiler->output_signature; + signature = is_patch_constant ? &compiler->patch_constant_signature : &compiler->output_signature;
function_id = compiler->epilogue_function_id;
@@ -5340,7 +5141,7 @@ static void spirv_compiler_emit_shader_epilogue_function(struct spirv_compiler * param_id[i] = vkd3d_spirv_build_op_load(builder, type_id, param_id[i], SpvMemoryAccessMaskNone); }
- if (is_control_point_phase(phase)) + if (is_in_control_point_phase(compiler)) output_index_id = spirv_compiler_emit_load_invocation_id(compiler);
for (i = 0; i < signature->element_count; ++i) @@ -5348,14 +5149,12 @@ static void spirv_compiler_emit_shader_epilogue_function(struct spirv_compiler * if (!compiler->output_info[i].id) continue;
- variable_idx = spirv_compiler_get_output_variable_index(compiler, - signature->elements[i].register_index); - if (!param_id[variable_idx]) + if (!param_id[i]) continue;
spirv_compiler_emit_store_shader_output(compiler, signature, &signature->elements[i], &compiler->output_info[i], output_index_id, - param_id[variable_idx], compiler->private_output_variable_write_mask[variable_idx]); + param_id[i], compiler->private_output_variable_write_mask[i]); }
vkd3d_spirv_build_op_return(&compiler->spirv_builder); @@ -5375,28 +5174,11 @@ static void spirv_compiler_emit_hull_shader_builtins(struct spirv_compiler *comp dst.reg.type = VKD3DSPR_OUTPOINTID; dst.reg.idx[0].offset = ~0u; dst.reg.idx[1].offset = ~0u; + dst.reg.idx_count = 0; dst.write_mask = VKD3DSP_WRITEMASK_0; spirv_compiler_emit_input_register(compiler, &dst); }
-static void spirv_compiler_emit_hull_shader_patch_constants(struct spirv_compiler *compiler) -{ - const struct vkd3d_shader_signature *signature = compiler->patch_constant_signature; - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - uint32_t register_count = 0; - unsigned int signature_idx; - - for (signature_idx = 0; signature_idx < signature->element_count; ++signature_idx) - register_count = max(register_count, signature->elements[signature_idx].register_index + 1); - - if (!register_count) - return; - - compiler->hs.patch_constants_id = spirv_compiler_emit_array_variable(compiler, &builder->global_stream, - SpvStorageClassPrivate, VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_VEC4_SIZE, register_count); - vkd3d_spirv_build_op_name(builder, compiler->hs.patch_constants_id, "opc"); -} - static void spirv_compiler_emit_initial_declarations(struct spirv_compiler *compiler) { const struct vkd3d_shader_transform_feedback_info *xfb_info = compiler->xfb_info; @@ -5410,7 +5192,6 @@ static void spirv_compiler_emit_initial_declarations(struct spirv_compiler *comp case VKD3D_SHADER_TYPE_HULL: vkd3d_spirv_set_execution_model(builder, SpvExecutionModelTessellationControl); spirv_compiler_emit_hull_shader_builtins(compiler); - spirv_compiler_emit_hull_shader_patch_constants(compiler); break; case VKD3D_SHADER_TYPE_DOMAIN: vkd3d_spirv_set_execution_model(builder, SpvExecutionModelTessellationEvaluation); @@ -5439,8 +5220,7 @@ static void spirv_compiler_emit_initial_declarations(struct spirv_compiler *comp if (compiler->shader_type != VKD3D_SHADER_TYPE_HULL) { vkd3d_spirv_builder_begin_main_function(builder); - - spirv_compiler_emit_shader_signature_outputs(compiler); + compiler->main_block_open = true; } }
@@ -5522,12 +5302,13 @@ static void spirv_compiler_emit_dcl_indexable_temp(struct spirv_compiler *compil reg.type = VKD3DSPR_IDXTEMP; reg.idx[0].offset = temp->register_idx; reg.idx[1].offset = ~0u; + reg.idx_count = 1;
function_location = spirv_compiler_get_current_function_location(compiler); vkd3d_spirv_begin_function_stream_insertion(builder, function_location);
id = spirv_compiler_emit_array_variable(compiler, &builder->function_stream, - SpvStorageClassFunction, VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_VEC4_SIZE, temp->register_size); + SpvStorageClassFunction, VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_VEC4_SIZE, &temp->register_size, 1);
spirv_compiler_emit_register_debug_name(builder, id, ®);
@@ -6097,6 +5878,7 @@ static void spirv_compiler_emit_dcl_resource(struct spirv_compiler *compiler, const struct vkd3d_shader_instruction *instruction) { const struct vkd3d_shader_semantic *semantic = &instruction->declaration.semantic; + enum vkd3d_shader_resource_type resource_type = semantic->resource_type; uint32_t flags = instruction->flags;
/* We don't distinguish between APPEND and COUNTER UAVs. */ @@ -6104,8 +5886,13 @@ static void spirv_compiler_emit_dcl_resource(struct spirv_compiler *compiler, if (flags) FIXME("Unhandled UAV flags %#x.\n", flags);
+ if (resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMS && semantic->sample_count == 1) + resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_2D; + else if (resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY && semantic->sample_count == 1) + resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY; + spirv_compiler_emit_resource_declaration(compiler, &semantic->resource, - semantic->resource_type, semantic->resource_data_type[0], 0, false); + resource_type, semantic->resource_data_type[0], 0, false); }
static void spirv_compiler_emit_dcl_resource_raw(struct spirv_compiler *compiler, @@ -6185,10 +5972,9 @@ static void spirv_compiler_emit_dcl_input(struct spirv_compiler *compiler, const struct vkd3d_shader_instruction *instruction) { const struct vkd3d_shader_dst_param *dst = &instruction->declaration.dst; - const struct vkd3d_shader_phase *phase;
- if ((phase = spirv_compiler_get_current_shader_phase(compiler))) - spirv_compiler_emit_shader_phase_input(compiler, phase, dst); + if (spirv_compiler_get_current_shader_phase(compiler)) + spirv_compiler_emit_shader_phase_input(compiler, dst); else if (vkd3d_shader_register_is_input(&dst->reg) || dst->reg.type == VKD3DSPR_PATCHCONST) spirv_compiler_emit_input(compiler, dst, VKD3D_SIV_NONE, VKD3DSIM_NONE); else @@ -6224,7 +6010,8 @@ static void spirv_compiler_emit_dcl_output(struct spirv_compiler *compiler, { const struct vkd3d_shader_dst_param *dst = &instruction->declaration.dst;
- if (vkd3d_shader_register_is_output(&dst->reg)) + if (vkd3d_shader_register_is_output(&dst->reg) + || (is_in_fork_or_join_phase(compiler) && vkd3d_shader_register_is_patch_constant(&dst->reg))) spirv_compiler_emit_output(compiler, dst, VKD3D_SIV_NONE); else spirv_compiler_emit_output_register(compiler, dst); @@ -6242,64 +6029,6 @@ static void spirv_compiler_emit_dcl_output_siv(struct spirv_compiler *compiler, spirv_compiler_emit_output(compiler, dst, sysval); }
-static bool spirv_compiler_check_index_range(struct spirv_compiler *compiler, - const struct vkd3d_shader_index_range *range) -{ - const struct vkd3d_shader_register *reg = &range->dst.reg; - struct vkd3d_shader_register_info reg_info; - struct vkd3d_shader_register current_reg; - struct vkd3d_symbol reg_symbol; - unsigned int i; - uint32_t id; - - current_reg = *reg; - vkd3d_symbol_make_register(®_symbol, ¤t_reg); - if (!spirv_compiler_get_register_info(compiler, ¤t_reg, ®_info)) - { - ERR("Failed to get register info.\n"); - return false; - } - - /* FIXME: We should check if it's an array. */ - if (!reg_info.is_aggregate) - { - FIXME("Unhandled register %#x.\n", reg->type); - return false; - } - id = reg_info.id; - - for (i = reg->idx[0].offset; i < reg->idx[0].offset + range->register_count; ++i) - { - current_reg.idx[0].offset = i; - vkd3d_symbol_make_register(®_symbol, ¤t_reg); - - if (range->dst.write_mask != reg_info.write_mask - || vkd3d_write_mask_component_count(reg_info.write_mask) != 1) - { - FIXME("Unhandled index range write mask %#x (%#x).\n", - range->dst.write_mask, reg_info.write_mask); - return false; - } - - if (reg_info.id != id) - { - FIXME("Unhandled index range %#x, %u.\n", reg->type, i); - return false; - } - } - - return true; -} - -static void spirv_compiler_emit_dcl_index_range(struct spirv_compiler *compiler, - const struct vkd3d_shader_instruction *instruction) -{ - const struct vkd3d_shader_index_range *range = &instruction->declaration.index_range; - - if (!spirv_compiler_check_index_range(compiler, range)) - FIXME("Ignoring dcl_index_range %#x %u.\n", range->dst.reg.type, range->register_count); -} - static void spirv_compiler_emit_dcl_stream(struct spirv_compiler *compiler, const struct vkd3d_shader_instruction *instruction) { @@ -6495,157 +6224,83 @@ static void spirv_compiler_emit_dcl_thread_group(struct spirv_compiler *compiler SpvExecutionModeLocalSize, local_size, ARRAY_SIZE(local_size)); }
-static void spirv_compiler_leave_shader_phase(struct spirv_compiler *compiler, - const struct vkd3d_shader_phase *phase) +static void spirv_compiler_emit_default_control_point_phase(struct spirv_compiler *compiler); + +static void spirv_compiler_leave_shader_phase(struct spirv_compiler *compiler) { - const struct vkd3d_shader_signature *signature = compiler->output_signature; + const struct shader_signature *signature = &compiler->output_signature; struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - struct vkd3d_symbol reg_symbol, *symbol; - struct vkd3d_shader_register reg; - struct rb_entry *entry; - unsigned int i; + + if (is_in_control_point_phase(compiler) && compiler->emit_default_control_point_phase) + spirv_compiler_emit_default_control_point_phase(compiler);
vkd3d_spirv_build_op_function_end(builder);
compiler->temp_id = 0; compiler->temp_count = 0;
- /* - * vocp inputs in fork and join shader phases are outputs of the control - * point phase. Reinsert symbols for vocp registers while leaving the - * control point phase. - */ - if (is_control_point_phase(phase)) + if (is_in_control_point_phase(compiler)) { if (compiler->epilogue_function_id) { - spirv_compiler_emit_shader_phase_name(compiler, compiler->epilogue_function_id, phase, "_epilogue"); + spirv_compiler_emit_shader_phase_name(compiler, compiler->epilogue_function_id, "_epilogue"); spirv_compiler_emit_shader_epilogue_function(compiler); }
- memset(®, 0, sizeof(reg)); - reg.idx[1].offset = ~0u; - /* Fork and join phases share output registers (patch constants). * Control point phase has separate output registers. */ memset(compiler->output_info, 0, signature->element_count * sizeof(*compiler->output_info)); memset(compiler->private_output_variable, 0, sizeof(compiler->private_output_variable)); memset(compiler->private_output_variable_array_idx, 0, sizeof(compiler->private_output_variable_array_idx)); memset(compiler->private_output_variable_write_mask, 0, sizeof(compiler->private_output_variable_write_mask)); - - for (i = 0; i < signature->element_count; ++i) - { - const struct vkd3d_shader_signature_element *e = &signature->elements[i]; - - reg.type = VKD3DSPR_OUTPUT; - reg.idx[0].offset = e->register_index; - vkd3d_symbol_make_register(®_symbol, ®); - if ((entry = rb_get(&compiler->symbol_table, ®_symbol))) - { - rb_remove(&compiler->symbol_table, entry); - - symbol = RB_ENTRY_VALUE(entry, struct vkd3d_symbol, entry); - - reg.type = VKD3DSPR_OUTCONTROLPOINT; - reg.idx[1].offset = reg.idx[0].offset; - reg.idx[0].offset = compiler->output_control_point_count; - vkd3d_symbol_make_register(symbol, ®); - symbol->info.reg.is_aggregate = false; - - if (rb_put(&compiler->symbol_table, symbol, entry) == -1) - { - ERR("Failed to insert vocp symbol entry (%s).\n", debug_vkd3d_symbol(symbol)); - vkd3d_symbol_free(entry, NULL); - } - } - } - } - - if (phase->instance_count) - { - memset(®, 0, sizeof(reg)); - reg.type = phase->type == VKD3DSIH_HS_FORK_PHASE ? VKD3DSPR_FORKINSTID : VKD3DSPR_JOININSTID; - reg.idx[0].offset = ~0u; - reg.idx[1].offset = ~0u; - vkd3d_symbol_make_register(®_symbol, ®); - if ((entry = rb_get(&compiler->symbol_table, ®_symbol))) - { - rb_remove(&compiler->symbol_table, entry); - vkd3d_symbol_free(entry, NULL); - } } }
static void spirv_compiler_enter_shader_phase(struct spirv_compiler *compiler, const struct vkd3d_shader_instruction *instruction) { - const struct vkd3d_shader_phase *previous_phase; + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + uint32_t function_id, void_id, function_type_id; struct vkd3d_shader_phase *phase;
- if ((previous_phase = spirv_compiler_get_current_shader_phase(compiler))) - spirv_compiler_leave_shader_phase(compiler, previous_phase); + assert(compiler->phase != instruction->handler_idx);
- if (!vkd3d_array_reserve((void **)&compiler->shader_phases, &compiler->shader_phases_size, - compiler->shader_phase_count + 1, sizeof(*compiler->shader_phases))) - return; - phase = &compiler->shader_phases[compiler->shader_phase_count]; + if (!is_in_default_phase(compiler)) + spirv_compiler_leave_shader_phase(compiler);
- phase->type = instruction->handler_idx; - phase->idx = compiler->shader_phase_count; - phase->instance_count = 0; - phase->function_id = 0; - phase->instance_id = 0; - phase->function_location = 0; + function_id = vkd3d_spirv_alloc_id(builder);
- ++compiler->shader_phase_count; -} - -static int spirv_compiler_emit_shader_phase_instance_count(struct spirv_compiler *compiler, - const struct vkd3d_shader_instruction *instruction) -{ - struct vkd3d_shader_phase *phase = &compiler->shader_phases[compiler->shader_phase_count - 1]; - - if (!compiler->shader_phase_count - || (phase->type != VKD3DSIH_HS_FORK_PHASE && phase->type != VKD3DSIH_HS_JOIN_PHASE) - || phase->function_id) - { - WARN("Unexpected dcl_hs_{fork,join}_phase_instance_count instruction.\n"); - return VKD3D_ERROR_INVALID_SHADER; - } - - phase->instance_count = instruction->declaration.count; - - spirv_compiler_begin_shader_phase(compiler, phase); - - return VKD3D_OK; -} + void_id = vkd3d_spirv_get_op_type_void(builder); + function_type_id = vkd3d_spirv_get_op_type_function(builder, void_id, NULL, 0); + vkd3d_spirv_build_op_function(builder, void_id, function_id, + SpvFunctionControlMaskNone, function_type_id);
-static const struct vkd3d_shader_phase *spirv_compiler_get_control_point_phase( - struct spirv_compiler *compiler) -{ - const struct vkd3d_shader_phase *phase; + vkd3d_spirv_build_op_label(builder, vkd3d_spirv_alloc_id(builder));
- if (compiler->shader_phase_count < 1) - return NULL; + compiler->phase = instruction->handler_idx; + spirv_compiler_emit_shader_phase_name(compiler, function_id, NULL);
- phase = &compiler->shader_phases[0]; - if (is_control_point_phase(phase)) - return phase; + phase = (instruction->handler_idx == VKD3DSIH_HS_CONTROL_POINT_PHASE) + ? &compiler->control_point_phase : &compiler->patch_constant_phase; + phase->function_id = function_id; + phase->function_location = vkd3d_spirv_stream_current_location(&builder->function_stream);
- return NULL; + if (instruction->handler_idx == VKD3DSIH_HS_CONTROL_POINT_PHASE) + compiler->emit_default_control_point_phase = instruction->flags; }
static void spirv_compiler_emit_default_control_point_phase(struct spirv_compiler *compiler) { - const struct vkd3d_shader_signature *output_signature = compiler->output_signature; - const struct vkd3d_shader_signature *input_signature = compiler->input_signature; + const struct shader_signature *output_signature = &compiler->output_signature; + const struct shader_signature *input_signature = &compiler->input_signature; struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; enum vkd3d_shader_component_type component_type; - uint32_t input_id, output_id, dst_id, src_id; struct vkd3d_shader_src_param invocation; struct vkd3d_shader_register input_reg; uint32_t type_id, output_ptr_type_id; + uint32_t input_id, output_id, dst_id; unsigned int component_count; + unsigned int array_sizes[2]; uint32_t invocation_id; unsigned int i;
@@ -6657,6 +6312,7 @@ static void spirv_compiler_emit_default_control_point_phase(struct spirv_compile invocation.reg.idx[0].offset = ~0u; invocation.reg.idx[1].offset = ~0u; invocation.reg.idx[2].offset = ~0u; + invocation.reg.idx_count = 0; invocation.swizzle = VKD3D_SHADER_NO_SWIZZLE;
memset(&input_reg, 0, sizeof(input_reg)); @@ -6664,37 +6320,42 @@ static void spirv_compiler_emit_default_control_point_phase(struct spirv_compile input_reg.data_type = VKD3D_DATA_FLOAT; input_reg.idx[0].rel_addr = &invocation; input_reg.idx[2].offset = ~0u; + input_reg.idx_count = 2; input_id = spirv_compiler_get_register_id(compiler, &input_reg);
assert(input_signature->element_count == output_signature->element_count); for (i = 0; i < output_signature->element_count; ++i) { - const struct vkd3d_shader_signature_element *output = &output_signature->elements[i]; - const struct vkd3d_shader_signature_element *input = &input_signature->elements[i]; + const struct signature_element *output = &output_signature->elements[i]; + const struct signature_element *input = &input_signature->elements[i];
assert(input->mask == output->mask); assert(input->component_type == output->component_type);
- input_reg.idx[1].offset = input->register_index; + input_reg.idx[1].offset = i; input_id = spirv_compiler_get_register_id(compiler, &input_reg); - type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_FLOAT, 4); - src_id = vkd3d_spirv_build_op_load(builder, type_id, input_id, SpvMemoryAccessMaskNone);
component_type = output->component_type; component_count = vkd3d_write_mask_component_count(output->mask); - output_id = spirv_compiler_emit_array_variable(compiler, &builder->global_stream, - SpvStorageClassOutput, component_type, component_count, compiler->output_control_point_count); + type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count); + if ((array_sizes[0] = (input->register_count > 1) ? input->register_count : 0)) + type_id = vkd3d_spirv_get_op_type_array(builder, type_id, spirv_compiler_get_constant_uint(compiler, + array_sizes[0])); + + array_sizes[1] = compiler->output_control_point_count; + output_id = spirv_compiler_emit_array_variable(compiler, &builder->global_stream, SpvStorageClassOutput, + component_type, component_count, array_sizes, 2); vkd3d_spirv_add_iface_variable(builder, output_id); vkd3d_spirv_build_op_decorate1(builder, output_id, SpvDecorationLocation, output->register_index); vkd3d_spirv_build_op_name(builder, output_id, "vocp%u", output->register_index);
- type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count); output_ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassOutput, type_id); dst_id = vkd3d_spirv_build_op_access_chain1(builder, output_ptr_type_id, output_id, invocation_id);
- spirv_compiler_emit_store(compiler, dst_id, output->mask, - component_type, SpvStorageClassOutput, VKD3DSP_WRITEMASK_ALL, src_id); + vkd3d_spirv_build_op_copy_memory(builder, dst_id, input_id, SpvMemoryAccessMaskNone); } + + vkd3d_spirv_build_op_return(builder); }
static void spirv_compiler_emit_barrier(struct spirv_compiler *compiler, @@ -6723,95 +6384,6 @@ static void spirv_compiler_emit_hull_shader_barrier(struct spirv_compiler *compi SpvScopeWorkgroup, SpvScopeInvocation, SpvMemorySemanticsMaskNone); }
-static void spirv_compiler_emit_hull_shader_input_initialisation(struct spirv_compiler *compiler) -{ - uint32_t type_id, length_id, register_index_id, src_array_id, dst_array_id, vicp_id, tmp_id; - const struct vkd3d_shader_signature *signature = compiler->input_signature; - uint32_t src_type_id, dst_type_id, src_id, dst_id, point_index_id; - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - const struct vkd3d_shader_signature_element *element; - enum vkd3d_shader_input_sysval_semantic sysval; - const struct vkd3d_spirv_builtin *builtin; - struct vkd3d_symbol *symbol, symbol_key; - unsigned int register_count, i, j; - struct vkd3d_shader_register r; - struct rb_entry *entry; - uint32_t indices[2]; - - for (i = 0, register_count = 0; i < signature->element_count; ++i) - { - register_count = max(register_count, signature->elements[i].register_index + 1); - } - - type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_FLOAT, 4); - length_id = spirv_compiler_get_constant_uint(compiler, compiler->input_control_point_count); - type_id = vkd3d_spirv_get_op_type_array(builder, type_id, length_id); - type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassPrivate, type_id); - - memset(&r, 0, sizeof(r)); - r.type = VKD3DSPR_INPUT; - r.idx[0].offset = 0; - r.idx[1].offset = ~0u; - vkd3d_symbol_make_register(&symbol_key, &r); - - for (i = 0; i < signature->element_count; ++i) - { - element = &signature->elements[i]; - - symbol_key.key.reg.idx = element->register_index; - entry = rb_get(&compiler->symbol_table, &symbol_key); - symbol = RB_ENTRY_VALUE(entry, struct vkd3d_symbol, entry); - - vicp_id = symbol->id; - register_index_id = spirv_compiler_get_constant_uint(compiler, element->register_index); - dst_array_id = vkd3d_spirv_build_op_in_bounds_access_chain1(builder, type_id, vicp_id, register_index_id); - - if (element->sysval_semantic) - { - sysval = vkd3d_siv_from_sysval(element->sysval_semantic); - builtin = get_spirv_builtin_for_sysval(compiler, sysval); - src_array_id = spirv_compiler_emit_builtin_variable(compiler, builtin, - SpvStorageClassInput, compiler->input_control_point_count); - - if (builtin->component_count == 4) - { - vkd3d_spirv_build_op_copy_memory(builder, dst_array_id, src_array_id, SpvMemoryAccessMaskNone); - } - else - { - tmp_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_FLOAT, builtin->component_count); - src_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassInput, tmp_id); - dst_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassPrivate, tmp_id); - - for (j = 0; j < compiler->input_control_point_count; ++j) - { - point_index_id = spirv_compiler_get_constant_uint(compiler, j); - src_id = vkd3d_spirv_build_op_in_bounds_access_chain1(builder, - src_type_id, src_array_id, point_index_id); - - indices[0] = point_index_id; - indices[1] = spirv_compiler_get_constant_uint(compiler, 0); - dst_id = vkd3d_spirv_build_op_in_bounds_access_chain(builder, - dst_type_id, dst_array_id, indices, 2); - - vkd3d_spirv_build_op_copy_memory(builder, dst_id, src_id, SpvMemoryAccessMaskNone); - } - } - } - else - { - src_array_id = spirv_compiler_emit_array_variable(compiler, &builder->global_stream, - SpvStorageClassInput, VKD3D_SHADER_COMPONENT_FLOAT, 4, compiler->input_control_point_count); - vkd3d_spirv_add_iface_variable(builder, src_array_id); - vkd3d_spirv_build_op_decorate1(builder, src_array_id, SpvDecorationLocation, element->register_index); - vkd3d_spirv_build_op_name(builder, src_array_id, "v%u", element->register_index); - - vkd3d_spirv_build_op_copy_memory(builder, dst_array_id, src_array_id, SpvMemoryAccessMaskNone); - } - symbol->info.reg.dcl_mask |= element->mask; - } -} - static void spirv_compiler_emit_shader_epilogue_invocation(struct spirv_compiler *compiler) { struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; @@ -6854,46 +6426,21 @@ static void spirv_compiler_emit_shader_epilogue_invocation(struct spirv_compiler static void spirv_compiler_emit_hull_shader_main(struct spirv_compiler *compiler) { struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - const struct vkd3d_shader_phase *control_point_phase, *phase; - uint32_t phase_instance_id; - unsigned int i, j; uint32_t void_id;
vkd3d_spirv_builder_begin_main_function(builder);
- spirv_compiler_emit_hull_shader_input_initialisation(compiler); - void_id = vkd3d_spirv_get_op_type_void(builder);
- if ((control_point_phase = spirv_compiler_get_control_point_phase(compiler))) - vkd3d_spirv_build_op_function_call(builder, void_id, control_point_phase->function_id, NULL, 0); - else - spirv_compiler_emit_default_control_point_phase(compiler); + vkd3d_spirv_build_op_function_call(builder, void_id, compiler->control_point_phase.function_id, NULL, 0);
if (compiler->use_vocp) spirv_compiler_emit_hull_shader_barrier(compiler);
- for (i = 0; i < compiler->shader_phase_count; ++i) - { - phase = &compiler->shader_phases[i]; - if (is_control_point_phase(phase)) - continue; - - if (phase->instance_count) - { - for (j = 0; j < phase->instance_count; ++j) - { - phase_instance_id = spirv_compiler_get_constant_uint(compiler, j); - vkd3d_spirv_build_op_function_call(builder, - void_id, phase->function_id, &phase_instance_id, 1); - } - } - else - { - vkd3d_spirv_build_op_function_call(builder, void_id, phase->function_id, NULL, 0); - } - } - + /* TODO: only call the patch constant function for invocation 0. The simplest way + * is to avoid use of private variables there, otherwise we would need a separate + * patch constant epilogue also only called from invocation 0. */ + vkd3d_spirv_build_op_function_call(builder, void_id, compiler->patch_constant_phase.function_id, NULL, 0); spirv_compiler_emit_shader_epilogue_invocation(compiler); vkd3d_spirv_build_op_return(builder); vkd3d_spirv_build_op_function_end(builder); @@ -7575,10 +7122,10 @@ static uint32_t spirv_compiler_emit_conditional_branch(struct spirv_compiler *co static void spirv_compiler_emit_return(struct spirv_compiler *compiler, const struct vkd3d_shader_instruction *instruction) { - const struct vkd3d_shader_phase *phase = spirv_compiler_get_current_shader_phase(compiler); struct vkd3d_spirv_builder *builder = &compiler->spirv_builder;
- if (compiler->shader_type != VKD3D_SHADER_TYPE_GEOMETRY && (!phase || is_control_point_phase(phase))) + if (compiler->shader_type != VKD3D_SHADER_TYPE_GEOMETRY && (is_in_default_phase(compiler) + || is_in_control_point_phase(compiler))) spirv_compiler_emit_shader_epilogue_invocation(compiler);
vkd3d_spirv_build_op_return(builder); @@ -7972,12 +7519,15 @@ static int spirv_compiler_emit_control_flow_instruction(struct spirv_compiler *c
if (cf_info) cf_info->inside_block = false; + else + compiler->main_block_open = false; break;
case VKD3DSIH_RETP: spirv_compiler_emit_retc(compiler, instruction); break;
+ case VKD3DSIH_DISCARD: case VKD3DSIH_TEXKILL: spirv_compiler_emit_kill(compiler, instruction); break; @@ -8256,7 +7806,7 @@ static void spirv_compiler_emit_ld(struct spirv_compiler *compiler, image_operands[image_operand_count++] = spirv_compiler_emit_texel_offset(compiler, instruction, image.resource_type_info); } - if (multisample) + if (multisample && image.resource_type_info->ms) { operands_mask |= SpvImageOperandsSampleMask; image_operands[image_operand_count++] = spirv_compiler_emit_load_src(compiler, @@ -9521,58 +9071,6 @@ static void spirv_compiler_emit_cut_stream(struct spirv_compiler *compiler, vkd3d_spirv_build_op_end_primitive(builder); }
-static void spirv_compiler_emit_hull_shader_inputs(struct spirv_compiler *compiler) -{ - const struct vkd3d_shader_signature *signature = compiler->input_signature; - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - uint32_t type_id, length_id, vicp_id, vicp_type_id; - unsigned int register_count, register_idx, i; - struct vkd3d_shader_register r; - struct vkd3d_symbol symbol; - struct rb_entry *entry; - - for (i = 0, register_count = 0; i < signature->element_count; ++i) - { - register_count = max(register_count, signature->elements[i].register_index + 1); - } - - type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_FLOAT, 4); - length_id = spirv_compiler_get_constant_uint(compiler, compiler->input_control_point_count); - type_id = vkd3d_spirv_get_op_type_array(builder, type_id, length_id); - length_id = spirv_compiler_get_constant_uint(compiler, register_count); - type_id = vkd3d_spirv_get_op_type_array(builder, type_id, length_id); - vicp_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassPrivate, type_id); - - vicp_id = vkd3d_spirv_build_op_variable(builder, - &builder->global_stream, vicp_type_id, SpvStorageClassPrivate, 0); - vkd3d_spirv_build_op_name(builder, vicp_id, "vicp"); - - memset(&r, 0, sizeof(r)); - r.type = VKD3DSPR_INPUT; - r.idx[0].offset = 0; - r.idx[1].offset = ~0u; - vkd3d_symbol_make_register(&symbol, &r); - - for (i = 0; i < signature->element_count; ++i) - { - register_idx = signature->elements[i].register_index; - - symbol.key.reg.idx = register_idx; - if ((entry = rb_get(&compiler->symbol_table, &symbol))) - { - struct vkd3d_symbol *s = RB_ENTRY_VALUE(entry, struct vkd3d_symbol, entry); - s->info.reg.dcl_mask |= signature->elements[i].mask; - continue; - } - - vkd3d_symbol_set_register_info(&symbol, vicp_id, SpvStorageClassPrivate, - VKD3D_SHADER_COMPONENT_FLOAT, VKD3DSP_WRITEMASK_ALL); - symbol.info.reg.dcl_mask = signature->elements[i].mask; - symbol.info.reg.is_aggregate = true; - spirv_compiler_put_symbol(compiler, &symbol); - } -} - /* This function is called after declarations are processed. */ static void spirv_compiler_emit_main_prolog(struct spirv_compiler *compiler) { @@ -9581,8 +9079,6 @@ static void spirv_compiler_emit_main_prolog(struct spirv_compiler *compiler) if (compiler->xfb_info && compiler->xfb_info->element_count && compiler->shader_type != VKD3D_SHADER_TYPE_GEOMETRY) spirv_compiler_emit_point_size(compiler); - if (compiler->shader_type == VKD3D_SHADER_TYPE_HULL) - spirv_compiler_emit_hull_shader_inputs(compiler); }
static bool is_dcl_instruction(enum vkd3d_shader_opcode handler_idx) @@ -9660,9 +9156,6 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, case VKD3DSIH_DCL_OUTPUT_SIV: spirv_compiler_emit_dcl_output_siv(compiler, instruction); break; - case VKD3DSIH_DCL_INDEX_RANGE: - spirv_compiler_emit_dcl_index_range(compiler, instruction); - break; case VKD3DSIH_DCL_STREAM: spirv_compiler_emit_dcl_stream(compiler, instruction); break; @@ -9699,10 +9192,6 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, case VKD3DSIH_DCL_THREAD_GROUP: spirv_compiler_emit_dcl_thread_group(compiler, instruction); break; - case VKD3DSIH_DCL_HS_FORK_PHASE_INSTANCE_COUNT: - case VKD3DSIH_DCL_HS_JOIN_PHASE_INSTANCE_COUNT: - ret = spirv_compiler_emit_shader_phase_instance_count(compiler, instruction); - break; case VKD3DSIH_HS_CONTROL_POINT_PHASE: case VKD3DSIH_HS_FORK_PHASE: case VKD3DSIH_HS_JOIN_PHASE: @@ -9826,6 +9315,7 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, case VKD3DSIH_CONTINUE: case VKD3DSIH_CONTINUEP: case VKD3DSIH_DEFAULT: + case VKD3DSIH_DISCARD: case VKD3DSIH_ELSE: case VKD3DSIH_ENDIF: case VKD3DSIH_ENDLOOP: @@ -9947,28 +9437,55 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, return ret; }
-int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, +static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_parser *parser, struct vkd3d_shader_code *spirv) { - const struct vkd3d_shader_instruction_array *instructions = &parser->instructions; const struct vkd3d_shader_spirv_target_info *info = compiler->spirv_target_info; const struct vkd3d_shader_spirv_domain_shader_target_info *ds_info; struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - const struct vkd3d_shader_phase *phase; + struct vkd3d_shader_instruction_array instructions; enum vkd3d_result result = VKD3D_OK; unsigned int i;
compiler->location.column = 0; - for (i = 0; i < instructions->count; ++i) + compiler->location.line = 1; + + instructions = parser->instructions; + memset(&parser->instructions, 0, sizeof(parser->instructions)); + + if (compiler->shader_type == VKD3D_SHADER_TYPE_HULL + && (result = instruction_array_flatten_hull_shader_phases(&instructions)) >= 0) + { + result = instruction_array_normalise_hull_shader_control_point_io(&instructions, + &compiler->input_signature); + } + if (result >= 0) + result = instruction_array_normalise_io_registers(&instructions, parser->shader_version.type, + &compiler->input_signature, &compiler->output_signature, &compiler->patch_constant_signature); + + if (result >= 0 && TRACE_ON()) + vkd3d_shader_trace(&instructions, &parser->shader_version); + + if (compiler->shader_type != VKD3D_SHADER_TYPE_HULL) + spirv_compiler_emit_shader_signature_outputs(compiler); + + for (i = 0; i < instructions.count && result >= 0; ++i) { compiler->location.line = i + 1; - if ((result = spirv_compiler_handle_instruction(compiler, &instructions->elements[i])) < 0) - return result; + result = spirv_compiler_handle_instruction(compiler, &instructions.elements[i]); }
- if ((phase = spirv_compiler_get_current_shader_phase(compiler))) - spirv_compiler_leave_shader_phase(compiler, phase); + shader_instruction_array_destroy(&instructions); + + if (result < 0) + return result; + + if (compiler->main_block_open) + vkd3d_spirv_build_op_return(builder); + + if (!is_in_default_phase(compiler)) + spirv_compiler_leave_shader_phase(compiler); else vkd3d_spirv_build_op_function_end(builder);
@@ -10023,23 +9540,23 @@ int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, return VKD3D_OK; }
-void spirv_compiler_destroy(struct spirv_compiler *compiler) +int spirv_compile(struct vkd3d_shader_parser *parser, + const struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info, + const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) { - vkd3d_free(compiler->control_flow_info); - - vkd3d_free(compiler->output_info); - - vkd3d_free(compiler->push_constants); - vkd3d_free(compiler->descriptor_offset_ids); - - vkd3d_spirv_builder_free(&compiler->spirv_builder); - - rb_destroy(&compiler->symbol_table, vkd3d_symbol_free, NULL); + struct spirv_compiler *spirv_compiler; + int ret;
- vkd3d_free(compiler->shader_phases); - vkd3d_free(compiler->spec_constants); + if (!(spirv_compiler = spirv_compiler_create(&parser->shader_version, &parser->shader_desc, + compile_info, scan_descriptor_info, message_context, &parser->location))) + { + ERR("Failed to create SPIR-V compiler.\n"); + return VKD3D_ERROR; + }
- vkd3d_string_buffer_cache_cleanup(&compiler->string_buffers); + ret = spirv_compiler_generate_spirv(spirv_compiler, compile_info, parser, out);
- vkd3d_free(compiler); + spirv_compiler_destroy(spirv_compiler); + return ret; } diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c new file mode 100644 index 00000000000..d066b13ee4e --- /dev/null +++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c @@ -0,0 +1,5234 @@ +/* + * TPF (Direct3D shader models 4 and 5 bytecode) support + * + * Copyright 2008-2009 Henri Verbeet for CodeWeavers + * Copyright 2010 Rico Schüller + * Copyright 2017 Józef Kucia for CodeWeavers + * Copyright 2019-2020 Zebediah Figura for CodeWeavers + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#include "hlsl.h" + +#define SM4_MAX_SRC_COUNT 6 +#define SM4_MAX_DST_COUNT 2 + +STATIC_ASSERT(SM4_MAX_SRC_COUNT <= SPIRV_MAX_SRC_COUNT); + +#define VKD3D_SM4_PS 0x0000u +#define VKD3D_SM4_VS 0x0001u +#define VKD3D_SM4_GS 0x0002u +#define VKD3D_SM5_HS 0x0003u +#define VKD3D_SM5_DS 0x0004u +#define VKD3D_SM5_CS 0x0005u +#define VKD3D_SM4_LIB 0xfff0u + +#define VKD3D_SM4_INSTRUCTION_MODIFIER (0x1u << 31) + +#define VKD3D_SM4_MODIFIER_MASK 0x3fu + +#define VKD3D_SM5_MODIFIER_DATA_TYPE_SHIFT 6 +#define VKD3D_SM5_MODIFIER_DATA_TYPE_MASK (0xffffu << VKD3D_SM5_MODIFIER_DATA_TYPE_SHIFT) + +#define VKD3D_SM5_MODIFIER_RESOURCE_TYPE_SHIFT 6 +#define VKD3D_SM5_MODIFIER_RESOURCE_TYPE_MASK (0xfu << VKD3D_SM5_MODIFIER_RESOURCE_TYPE_SHIFT) + +#define VKD3D_SM5_MODIFIER_RESOURCE_STRIDE_SHIFT 11 +#define VKD3D_SM5_MODIFIER_RESOURCE_STRIDE_MASK (0xfffu << VKD3D_SM5_MODIFIER_RESOURCE_STRIDE_SHIFT) + +#define VKD3D_SM4_AOFFIMMI_U_SHIFT 9 +#define VKD3D_SM4_AOFFIMMI_U_MASK (0xfu << VKD3D_SM4_AOFFIMMI_U_SHIFT) +#define VKD3D_SM4_AOFFIMMI_V_SHIFT 13 +#define VKD3D_SM4_AOFFIMMI_V_MASK (0xfu << VKD3D_SM4_AOFFIMMI_V_SHIFT) +#define VKD3D_SM4_AOFFIMMI_W_SHIFT 17 +#define VKD3D_SM4_AOFFIMMI_W_MASK (0xfu << VKD3D_SM4_AOFFIMMI_W_SHIFT) + +#define VKD3D_SM4_INSTRUCTION_LENGTH_SHIFT 24 +#define VKD3D_SM4_INSTRUCTION_LENGTH_MASK (0x1fu << VKD3D_SM4_INSTRUCTION_LENGTH_SHIFT) + +#define VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT 11 +#define VKD3D_SM4_INSTRUCTION_FLAGS_MASK (0x7u << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT) + +#define VKD3D_SM4_RESOURCE_TYPE_SHIFT 11 +#define VKD3D_SM4_RESOURCE_TYPE_MASK (0xfu << VKD3D_SM4_RESOURCE_TYPE_SHIFT) + +#define VKD3D_SM4_RESOURCE_SAMPLE_COUNT_SHIFT 16 +#define VKD3D_SM4_RESOURCE_SAMPLE_COUNT_MASK (0xfu << VKD3D_SM4_RESOURCE_SAMPLE_COUNT_SHIFT) + +#define VKD3D_SM4_PRIMITIVE_TYPE_SHIFT 11 +#define VKD3D_SM4_PRIMITIVE_TYPE_MASK (0x3fu << VKD3D_SM4_PRIMITIVE_TYPE_SHIFT) + +#define VKD3D_SM4_INDEX_TYPE_SHIFT 11 +#define VKD3D_SM4_INDEX_TYPE_MASK (0x1u << VKD3D_SM4_INDEX_TYPE_SHIFT) + +#define VKD3D_SM4_SAMPLER_MODE_SHIFT 11 +#define VKD3D_SM4_SAMPLER_MODE_MASK (0xfu << VKD3D_SM4_SAMPLER_MODE_SHIFT) + +#define VKD3D_SM4_SHADER_DATA_TYPE_SHIFT 11 +#define VKD3D_SM4_SHADER_DATA_TYPE_MASK (0xfu << VKD3D_SM4_SHADER_DATA_TYPE_SHIFT) + +#define VKD3D_SM4_INTERPOLATION_MODE_SHIFT 11 +#define VKD3D_SM4_INTERPOLATION_MODE_MASK (0xfu << VKD3D_SM4_INTERPOLATION_MODE_SHIFT) + +#define VKD3D_SM4_GLOBAL_FLAGS_SHIFT 11 +#define VKD3D_SM4_GLOBAL_FLAGS_MASK (0xffu << VKD3D_SM4_GLOBAL_FLAGS_SHIFT) + +#define VKD3D_SM5_PRECISE_SHIFT 19 +#define VKD3D_SM5_PRECISE_MASK (0xfu << VKD3D_SM5_PRECISE_SHIFT) + +#define VKD3D_SM5_CONTROL_POINT_COUNT_SHIFT 11 +#define VKD3D_SM5_CONTROL_POINT_COUNT_MASK (0xffu << VKD3D_SM5_CONTROL_POINT_COUNT_SHIFT) + +#define VKD3D_SM5_FP_ARRAY_SIZE_SHIFT 16 +#define VKD3D_SM5_FP_TABLE_COUNT_MASK 0xffffu + +#define VKD3D_SM5_UAV_FLAGS_SHIFT 15 +#define VKD3D_SM5_UAV_FLAGS_MASK (0x1ffu << VKD3D_SM5_UAV_FLAGS_SHIFT) + +#define VKD3D_SM5_SYNC_FLAGS_SHIFT 11 +#define VKD3D_SM5_SYNC_FLAGS_MASK (0xffu << VKD3D_SM5_SYNC_FLAGS_SHIFT) + +#define VKD3D_SM5_TESSELLATOR_SHIFT 11 +#define VKD3D_SM5_TESSELLATOR_MASK (0xfu << VKD3D_SM5_TESSELLATOR_SHIFT) + +#define VKD3D_SM4_OPCODE_MASK 0xff + +#define VKD3D_SM4_EXTENDED_OPERAND (0x1u << 31) + +#define VKD3D_SM4_EXTENDED_OPERAND_TYPE_MASK 0x3fu + +#define VKD3D_SM4_REGISTER_MODIFIER_SHIFT 6 +#define VKD3D_SM4_REGISTER_MODIFIER_MASK (0xffu << VKD3D_SM4_REGISTER_MODIFIER_SHIFT) + +#define VKD3D_SM4_REGISTER_PRECISION_SHIFT 14 +#define VKD3D_SM4_REGISTER_PRECISION_MASK (0x7u << VKD3D_SM4_REGISTER_PRECISION_SHIFT) + +#define VKD3D_SM4_REGISTER_NON_UNIFORM_SHIFT 17 +#define VKD3D_SM4_REGISTER_NON_UNIFORM_MASK (0x1u << VKD3D_SM4_REGISTER_NON_UNIFORM_SHIFT) + +#define VKD3D_SM4_ADDRESSING_SHIFT2 28 +#define VKD3D_SM4_ADDRESSING_MASK2 (0x3u << VKD3D_SM4_ADDRESSING_SHIFT2) + +#define VKD3D_SM4_ADDRESSING_SHIFT1 25 +#define VKD3D_SM4_ADDRESSING_MASK1 (0x3u << VKD3D_SM4_ADDRESSING_SHIFT1) + +#define VKD3D_SM4_ADDRESSING_SHIFT0 22 +#define VKD3D_SM4_ADDRESSING_MASK0 (0x3u << VKD3D_SM4_ADDRESSING_SHIFT0) + +#define VKD3D_SM4_REGISTER_ORDER_SHIFT 20 +#define VKD3D_SM4_REGISTER_ORDER_MASK (0x3u << VKD3D_SM4_REGISTER_ORDER_SHIFT) + +#define VKD3D_SM4_REGISTER_TYPE_SHIFT 12 +#define VKD3D_SM4_REGISTER_TYPE_MASK (0xffu << VKD3D_SM4_REGISTER_TYPE_SHIFT) + +#define VKD3D_SM4_SWIZZLE_TYPE_SHIFT 2 +#define VKD3D_SM4_SWIZZLE_TYPE_MASK (0x3u << VKD3D_SM4_SWIZZLE_TYPE_SHIFT) + +#define VKD3D_SM4_DIMENSION_SHIFT 0 +#define VKD3D_SM4_DIMENSION_MASK (0x3u << VKD3D_SM4_DIMENSION_SHIFT) + +#define VKD3D_SM4_WRITEMASK_SHIFT 4 +#define VKD3D_SM4_WRITEMASK_MASK (0xfu << VKD3D_SM4_WRITEMASK_SHIFT) + +#define VKD3D_SM4_SWIZZLE_SHIFT 4 +#define VKD3D_SM4_SWIZZLE_MASK (0xffu << VKD3D_SM4_SWIZZLE_SHIFT) + +#define VKD3D_SM4_VERSION_MAJOR(version) (((version) >> 4) & 0xf) +#define VKD3D_SM4_VERSION_MINOR(version) (((version) >> 0) & 0xf) + +#define VKD3D_SM4_ADDRESSING_RELATIVE 0x2 +#define VKD3D_SM4_ADDRESSING_OFFSET 0x1 + +#define VKD3D_SM4_INSTRUCTION_FLAG_SATURATE 0x4 + +#define VKD3D_SM4_CONDITIONAL_NZ (0x1u << 18) + +#define VKD3D_SM4_TYPE_COMPONENT(com, i) (((com) >> (4 * (i))) & 0xfu) + +/* The shift that corresponds to the D3D_SIF_TEXTURE_COMPONENTS mask. */ +#define VKD3D_SM4_SIF_TEXTURE_COMPONENTS_SHIFT 2 + +enum vkd3d_sm4_opcode +{ + VKD3D_SM4_OP_ADD = 0x00, + VKD3D_SM4_OP_AND = 0x01, + VKD3D_SM4_OP_BREAK = 0x02, + VKD3D_SM4_OP_BREAKC = 0x03, + VKD3D_SM4_OP_CASE = 0x06, + VKD3D_SM4_OP_CONTINUE = 0x07, + VKD3D_SM4_OP_CONTINUEC = 0x08, + VKD3D_SM4_OP_CUT = 0x09, + VKD3D_SM4_OP_DEFAULT = 0x0a, + VKD3D_SM4_OP_DERIV_RTX = 0x0b, + VKD3D_SM4_OP_DERIV_RTY = 0x0c, + VKD3D_SM4_OP_DISCARD = 0x0d, + VKD3D_SM4_OP_DIV = 0x0e, + VKD3D_SM4_OP_DP2 = 0x0f, + VKD3D_SM4_OP_DP3 = 0x10, + VKD3D_SM4_OP_DP4 = 0x11, + VKD3D_SM4_OP_ELSE = 0x12, + VKD3D_SM4_OP_EMIT = 0x13, + VKD3D_SM4_OP_ENDIF = 0x15, + VKD3D_SM4_OP_ENDLOOP = 0x16, + VKD3D_SM4_OP_ENDSWITCH = 0x17, + VKD3D_SM4_OP_EQ = 0x18, + VKD3D_SM4_OP_EXP = 0x19, + VKD3D_SM4_OP_FRC = 0x1a, + VKD3D_SM4_OP_FTOI = 0x1b, + VKD3D_SM4_OP_FTOU = 0x1c, + VKD3D_SM4_OP_GE = 0x1d, + VKD3D_SM4_OP_IADD = 0x1e, + VKD3D_SM4_OP_IF = 0x1f, + VKD3D_SM4_OP_IEQ = 0x20, + VKD3D_SM4_OP_IGE = 0x21, + VKD3D_SM4_OP_ILT = 0x22, + VKD3D_SM4_OP_IMAD = 0x23, + VKD3D_SM4_OP_IMAX = 0x24, + VKD3D_SM4_OP_IMIN = 0x25, + VKD3D_SM4_OP_IMUL = 0x26, + VKD3D_SM4_OP_INE = 0x27, + VKD3D_SM4_OP_INEG = 0x28, + VKD3D_SM4_OP_ISHL = 0x29, + VKD3D_SM4_OP_ISHR = 0x2a, + VKD3D_SM4_OP_ITOF = 0x2b, + VKD3D_SM4_OP_LABEL = 0x2c, + VKD3D_SM4_OP_LD = 0x2d, + VKD3D_SM4_OP_LD2DMS = 0x2e, + VKD3D_SM4_OP_LOG = 0x2f, + VKD3D_SM4_OP_LOOP = 0x30, + VKD3D_SM4_OP_LT = 0x31, + VKD3D_SM4_OP_MAD = 0x32, + VKD3D_SM4_OP_MIN = 0x33, + VKD3D_SM4_OP_MAX = 0x34, + VKD3D_SM4_OP_SHADER_DATA = 0x35, + VKD3D_SM4_OP_MOV = 0x36, + VKD3D_SM4_OP_MOVC = 0x37, + VKD3D_SM4_OP_MUL = 0x38, + VKD3D_SM4_OP_NE = 0x39, + VKD3D_SM4_OP_NOP = 0x3a, + VKD3D_SM4_OP_NOT = 0x3b, + VKD3D_SM4_OP_OR = 0x3c, + VKD3D_SM4_OP_RESINFO = 0x3d, + VKD3D_SM4_OP_RET = 0x3e, + VKD3D_SM4_OP_RETC = 0x3f, + VKD3D_SM4_OP_ROUND_NE = 0x40, + VKD3D_SM4_OP_ROUND_NI = 0x41, + VKD3D_SM4_OP_ROUND_PI = 0x42, + VKD3D_SM4_OP_ROUND_Z = 0x43, + VKD3D_SM4_OP_RSQ = 0x44, + VKD3D_SM4_OP_SAMPLE = 0x45, + VKD3D_SM4_OP_SAMPLE_C = 0x46, + VKD3D_SM4_OP_SAMPLE_C_LZ = 0x47, + VKD3D_SM4_OP_SAMPLE_LOD = 0x48, + VKD3D_SM4_OP_SAMPLE_GRAD = 0x49, + VKD3D_SM4_OP_SAMPLE_B = 0x4a, + VKD3D_SM4_OP_SQRT = 0x4b, + VKD3D_SM4_OP_SWITCH = 0x4c, + VKD3D_SM4_OP_SINCOS = 0x4d, + VKD3D_SM4_OP_UDIV = 0x4e, + VKD3D_SM4_OP_ULT = 0x4f, + VKD3D_SM4_OP_UGE = 0x50, + VKD3D_SM4_OP_UMUL = 0x51, + VKD3D_SM4_OP_UMAX = 0x53, + VKD3D_SM4_OP_UMIN = 0x54, + VKD3D_SM4_OP_USHR = 0x55, + VKD3D_SM4_OP_UTOF = 0x56, + VKD3D_SM4_OP_XOR = 0x57, + VKD3D_SM4_OP_DCL_RESOURCE = 0x58, + VKD3D_SM4_OP_DCL_CONSTANT_BUFFER = 0x59, + VKD3D_SM4_OP_DCL_SAMPLER = 0x5a, + VKD3D_SM4_OP_DCL_INDEX_RANGE = 0x5b, + VKD3D_SM4_OP_DCL_OUTPUT_TOPOLOGY = 0x5c, + VKD3D_SM4_OP_DCL_INPUT_PRIMITIVE = 0x5d, + VKD3D_SM4_OP_DCL_VERTICES_OUT = 0x5e, + VKD3D_SM4_OP_DCL_INPUT = 0x5f, + VKD3D_SM4_OP_DCL_INPUT_SGV = 0x60, + VKD3D_SM4_OP_DCL_INPUT_SIV = 0x61, + VKD3D_SM4_OP_DCL_INPUT_PS = 0x62, + VKD3D_SM4_OP_DCL_INPUT_PS_SGV = 0x63, + VKD3D_SM4_OP_DCL_INPUT_PS_SIV = 0x64, + VKD3D_SM4_OP_DCL_OUTPUT = 0x65, + VKD3D_SM4_OP_DCL_OUTPUT_SIV = 0x67, + VKD3D_SM4_OP_DCL_TEMPS = 0x68, + VKD3D_SM4_OP_DCL_INDEXABLE_TEMP = 0x69, + VKD3D_SM4_OP_DCL_GLOBAL_FLAGS = 0x6a, + VKD3D_SM4_OP_LOD = 0x6c, + VKD3D_SM4_OP_GATHER4 = 0x6d, + VKD3D_SM4_OP_SAMPLE_POS = 0x6e, + VKD3D_SM4_OP_SAMPLE_INFO = 0x6f, + VKD3D_SM5_OP_HS_DECLS = 0x71, + VKD3D_SM5_OP_HS_CONTROL_POINT_PHASE = 0x72, + VKD3D_SM5_OP_HS_FORK_PHASE = 0x73, + VKD3D_SM5_OP_HS_JOIN_PHASE = 0x74, + VKD3D_SM5_OP_EMIT_STREAM = 0x75, + VKD3D_SM5_OP_CUT_STREAM = 0x76, + VKD3D_SM5_OP_FCALL = 0x78, + VKD3D_SM5_OP_BUFINFO = 0x79, + VKD3D_SM5_OP_DERIV_RTX_COARSE = 0x7a, + VKD3D_SM5_OP_DERIV_RTX_FINE = 0x7b, + VKD3D_SM5_OP_DERIV_RTY_COARSE = 0x7c, + VKD3D_SM5_OP_DERIV_RTY_FINE = 0x7d, + VKD3D_SM5_OP_GATHER4_C = 0x7e, + VKD3D_SM5_OP_GATHER4_PO = 0x7f, + VKD3D_SM5_OP_GATHER4_PO_C = 0x80, + VKD3D_SM5_OP_RCP = 0x81, + VKD3D_SM5_OP_F32TOF16 = 0x82, + VKD3D_SM5_OP_F16TOF32 = 0x83, + VKD3D_SM5_OP_COUNTBITS = 0x86, + VKD3D_SM5_OP_FIRSTBIT_HI = 0x87, + VKD3D_SM5_OP_FIRSTBIT_LO = 0x88, + VKD3D_SM5_OP_FIRSTBIT_SHI = 0x89, + VKD3D_SM5_OP_UBFE = 0x8a, + VKD3D_SM5_OP_IBFE = 0x8b, + VKD3D_SM5_OP_BFI = 0x8c, + VKD3D_SM5_OP_BFREV = 0x8d, + VKD3D_SM5_OP_SWAPC = 0x8e, + VKD3D_SM5_OP_DCL_STREAM = 0x8f, + VKD3D_SM5_OP_DCL_FUNCTION_BODY = 0x90, + VKD3D_SM5_OP_DCL_FUNCTION_TABLE = 0x91, + VKD3D_SM5_OP_DCL_INTERFACE = 0x92, + VKD3D_SM5_OP_DCL_INPUT_CONTROL_POINT_COUNT = 0x93, + VKD3D_SM5_OP_DCL_OUTPUT_CONTROL_POINT_COUNT = 0x94, + VKD3D_SM5_OP_DCL_TESSELLATOR_DOMAIN = 0x95, + VKD3D_SM5_OP_DCL_TESSELLATOR_PARTITIONING = 0x96, + VKD3D_SM5_OP_DCL_TESSELLATOR_OUTPUT_PRIMITIVE = 0x97, + VKD3D_SM5_OP_DCL_HS_MAX_TESSFACTOR = 0x98, + VKD3D_SM5_OP_DCL_HS_FORK_PHASE_INSTANCE_COUNT = 0x99, + VKD3D_SM5_OP_DCL_HS_JOIN_PHASE_INSTANCE_COUNT = 0x9a, + VKD3D_SM5_OP_DCL_THREAD_GROUP = 0x9b, + VKD3D_SM5_OP_DCL_UAV_TYPED = 0x9c, + VKD3D_SM5_OP_DCL_UAV_RAW = 0x9d, + VKD3D_SM5_OP_DCL_UAV_STRUCTURED = 0x9e, + VKD3D_SM5_OP_DCL_TGSM_RAW = 0x9f, + VKD3D_SM5_OP_DCL_TGSM_STRUCTURED = 0xa0, + VKD3D_SM5_OP_DCL_RESOURCE_RAW = 0xa1, + VKD3D_SM5_OP_DCL_RESOURCE_STRUCTURED = 0xa2, + VKD3D_SM5_OP_LD_UAV_TYPED = 0xa3, + VKD3D_SM5_OP_STORE_UAV_TYPED = 0xa4, + VKD3D_SM5_OP_LD_RAW = 0xa5, + VKD3D_SM5_OP_STORE_RAW = 0xa6, + VKD3D_SM5_OP_LD_STRUCTURED = 0xa7, + VKD3D_SM5_OP_STORE_STRUCTURED = 0xa8, + VKD3D_SM5_OP_ATOMIC_AND = 0xa9, + VKD3D_SM5_OP_ATOMIC_OR = 0xaa, + VKD3D_SM5_OP_ATOMIC_XOR = 0xab, + VKD3D_SM5_OP_ATOMIC_CMP_STORE = 0xac, + VKD3D_SM5_OP_ATOMIC_IADD = 0xad, + VKD3D_SM5_OP_ATOMIC_IMAX = 0xae, + VKD3D_SM5_OP_ATOMIC_IMIN = 0xaf, + VKD3D_SM5_OP_ATOMIC_UMAX = 0xb0, + VKD3D_SM5_OP_ATOMIC_UMIN = 0xb1, + VKD3D_SM5_OP_IMM_ATOMIC_ALLOC = 0xb2, + VKD3D_SM5_OP_IMM_ATOMIC_CONSUME = 0xb3, + VKD3D_SM5_OP_IMM_ATOMIC_IADD = 0xb4, + VKD3D_SM5_OP_IMM_ATOMIC_AND = 0xb5, + VKD3D_SM5_OP_IMM_ATOMIC_OR = 0xb6, + VKD3D_SM5_OP_IMM_ATOMIC_XOR = 0xb7, + VKD3D_SM5_OP_IMM_ATOMIC_EXCH = 0xb8, + VKD3D_SM5_OP_IMM_ATOMIC_CMP_EXCH = 0xb9, + VKD3D_SM5_OP_IMM_ATOMIC_IMAX = 0xba, + VKD3D_SM5_OP_IMM_ATOMIC_IMIN = 0xbb, + VKD3D_SM5_OP_IMM_ATOMIC_UMAX = 0xbc, + VKD3D_SM5_OP_IMM_ATOMIC_UMIN = 0xbd, + VKD3D_SM5_OP_SYNC = 0xbe, + VKD3D_SM5_OP_DADD = 0xbf, + VKD3D_SM5_OP_DMAX = 0xc0, + VKD3D_SM5_OP_DMIN = 0xc1, + VKD3D_SM5_OP_DMUL = 0xc2, + VKD3D_SM5_OP_DEQ = 0xc3, + VKD3D_SM5_OP_DGE = 0xc4, + VKD3D_SM5_OP_DLT = 0xc5, + VKD3D_SM5_OP_DNE = 0xc6, + VKD3D_SM5_OP_DMOV = 0xc7, + VKD3D_SM5_OP_DMOVC = 0xc8, + VKD3D_SM5_OP_DTOF = 0xc9, + VKD3D_SM5_OP_FTOD = 0xca, + VKD3D_SM5_OP_EVAL_SAMPLE_INDEX = 0xcc, + VKD3D_SM5_OP_EVAL_CENTROID = 0xcd, + VKD3D_SM5_OP_DCL_GS_INSTANCES = 0xce, + VKD3D_SM5_OP_DDIV = 0xd2, + VKD3D_SM5_OP_DFMA = 0xd3, + VKD3D_SM5_OP_DRCP = 0xd4, + VKD3D_SM5_OP_MSAD = 0xd5, + VKD3D_SM5_OP_DTOI = 0xd6, + VKD3D_SM5_OP_DTOU = 0xd7, + VKD3D_SM5_OP_ITOD = 0xd8, + VKD3D_SM5_OP_UTOD = 0xd9, + VKD3D_SM5_OP_GATHER4_S = 0xdb, + VKD3D_SM5_OP_GATHER4_C_S = 0xdc, + VKD3D_SM5_OP_GATHER4_PO_S = 0xdd, + VKD3D_SM5_OP_GATHER4_PO_C_S = 0xde, + VKD3D_SM5_OP_LD_S = 0xdf, + VKD3D_SM5_OP_LD2DMS_S = 0xe0, + VKD3D_SM5_OP_LD_UAV_TYPED_S = 0xe1, + VKD3D_SM5_OP_LD_RAW_S = 0xe2, + VKD3D_SM5_OP_LD_STRUCTURED_S = 0xe3, + VKD3D_SM5_OP_SAMPLE_LOD_S = 0xe4, + VKD3D_SM5_OP_SAMPLE_C_LZ_S = 0xe5, + VKD3D_SM5_OP_SAMPLE_CL_S = 0xe6, + VKD3D_SM5_OP_SAMPLE_B_CL_S = 0xe7, + VKD3D_SM5_OP_SAMPLE_GRAD_CL_S = 0xe8, + VKD3D_SM5_OP_SAMPLE_C_CL_S = 0xe9, + VKD3D_SM5_OP_CHECK_ACCESS_FULLY_MAPPED = 0xea, +}; + +enum vkd3d_sm4_instruction_modifier +{ + VKD3D_SM4_MODIFIER_AOFFIMMI = 0x1, + VKD3D_SM5_MODIFIER_RESOURCE_TYPE = 0x2, + VKD3D_SM5_MODIFIER_DATA_TYPE = 0x3, +}; + +enum vkd3d_sm4_register_type +{ + VKD3D_SM4_RT_TEMP = 0x00, + VKD3D_SM4_RT_INPUT = 0x01, + VKD3D_SM4_RT_OUTPUT = 0x02, + VKD3D_SM4_RT_INDEXABLE_TEMP = 0x03, + VKD3D_SM4_RT_IMMCONST = 0x04, + VKD3D_SM4_RT_IMMCONST64 = 0x05, + VKD3D_SM4_RT_SAMPLER = 0x06, + VKD3D_SM4_RT_RESOURCE = 0x07, + VKD3D_SM4_RT_CONSTBUFFER = 0x08, + VKD3D_SM4_RT_IMMCONSTBUFFER = 0x09, + VKD3D_SM4_RT_PRIMID = 0x0b, + VKD3D_SM4_RT_DEPTHOUT = 0x0c, + VKD3D_SM4_RT_NULL = 0x0d, + VKD3D_SM4_RT_RASTERIZER = 0x0e, + VKD3D_SM4_RT_OMASK = 0x0f, + VKD3D_SM5_RT_STREAM = 0x10, + VKD3D_SM5_RT_FUNCTION_BODY = 0x11, + VKD3D_SM5_RT_FUNCTION_POINTER = 0x13, + VKD3D_SM5_RT_OUTPUT_CONTROL_POINT_ID = 0x16, + VKD3D_SM5_RT_FORK_INSTANCE_ID = 0x17, + VKD3D_SM5_RT_JOIN_INSTANCE_ID = 0x18, + VKD3D_SM5_RT_INPUT_CONTROL_POINT = 0x19, + VKD3D_SM5_RT_OUTPUT_CONTROL_POINT = 0x1a, + VKD3D_SM5_RT_PATCH_CONSTANT_DATA = 0x1b, + VKD3D_SM5_RT_DOMAIN_LOCATION = 0x1c, + VKD3D_SM5_RT_UAV = 0x1e, + VKD3D_SM5_RT_SHARED_MEMORY = 0x1f, + VKD3D_SM5_RT_THREAD_ID = 0x20, + VKD3D_SM5_RT_THREAD_GROUP_ID = 0x21, + VKD3D_SM5_RT_LOCAL_THREAD_ID = 0x22, + VKD3D_SM5_RT_COVERAGE = 0x23, + VKD3D_SM5_RT_LOCAL_THREAD_INDEX = 0x24, + VKD3D_SM5_RT_GS_INSTANCE_ID = 0x25, + VKD3D_SM5_RT_DEPTHOUT_GREATER_EQUAL = 0x26, + VKD3D_SM5_RT_DEPTHOUT_LESS_EQUAL = 0x27, + VKD3D_SM5_RT_OUTPUT_STENCIL_REF = 0x29, +}; + +enum vkd3d_sm4_extended_operand_type +{ + VKD3D_SM4_EXTENDED_OPERAND_NONE = 0x0, + VKD3D_SM4_EXTENDED_OPERAND_MODIFIER = 0x1, +}; + +enum vkd3d_sm4_register_modifier +{ + VKD3D_SM4_REGISTER_MODIFIER_NONE = 0x00, + VKD3D_SM4_REGISTER_MODIFIER_NEGATE = 0x01, + VKD3D_SM4_REGISTER_MODIFIER_ABS = 0x02, + VKD3D_SM4_REGISTER_MODIFIER_ABS_NEGATE = 0x03, +}; + +enum vkd3d_sm4_register_precision +{ + VKD3D_SM4_REGISTER_PRECISION_DEFAULT = 0x0, + VKD3D_SM4_REGISTER_PRECISION_MIN_FLOAT_16 = 0x1, + VKD3D_SM4_REGISTER_PRECISION_MIN_FLOAT_10 = 0x2, + VKD3D_SM4_REGISTER_PRECISION_MIN_INT_16 = 0x4, + VKD3D_SM4_REGISTER_PRECISION_MIN_UINT_16 = 0x5, +}; + +enum vkd3d_sm4_output_primitive_type +{ + VKD3D_SM4_OUTPUT_PT_POINTLIST = 0x1, + VKD3D_SM4_OUTPUT_PT_LINESTRIP = 0x3, + VKD3D_SM4_OUTPUT_PT_TRIANGLESTRIP = 0x5, +}; + +enum vkd3d_sm4_input_primitive_type +{ + VKD3D_SM4_INPUT_PT_POINT = 0x01, + VKD3D_SM4_INPUT_PT_LINE = 0x02, + VKD3D_SM4_INPUT_PT_TRIANGLE = 0x03, + VKD3D_SM4_INPUT_PT_LINEADJ = 0x06, + VKD3D_SM4_INPUT_PT_TRIANGLEADJ = 0x07, + VKD3D_SM5_INPUT_PT_PATCH1 = 0x08, + VKD3D_SM5_INPUT_PT_PATCH2 = 0x09, + VKD3D_SM5_INPUT_PT_PATCH3 = 0x0a, + VKD3D_SM5_INPUT_PT_PATCH4 = 0x0b, + VKD3D_SM5_INPUT_PT_PATCH5 = 0x0c, + VKD3D_SM5_INPUT_PT_PATCH6 = 0x0d, + VKD3D_SM5_INPUT_PT_PATCH7 = 0x0e, + VKD3D_SM5_INPUT_PT_PATCH8 = 0x0f, + VKD3D_SM5_INPUT_PT_PATCH9 = 0x10, + VKD3D_SM5_INPUT_PT_PATCH10 = 0x11, + VKD3D_SM5_INPUT_PT_PATCH11 = 0x12, + VKD3D_SM5_INPUT_PT_PATCH12 = 0x13, + VKD3D_SM5_INPUT_PT_PATCH13 = 0x14, + VKD3D_SM5_INPUT_PT_PATCH14 = 0x15, + VKD3D_SM5_INPUT_PT_PATCH15 = 0x16, + VKD3D_SM5_INPUT_PT_PATCH16 = 0x17, + VKD3D_SM5_INPUT_PT_PATCH17 = 0x18, + VKD3D_SM5_INPUT_PT_PATCH18 = 0x19, + VKD3D_SM5_INPUT_PT_PATCH19 = 0x1a, + VKD3D_SM5_INPUT_PT_PATCH20 = 0x1b, + VKD3D_SM5_INPUT_PT_PATCH21 = 0x1c, + VKD3D_SM5_INPUT_PT_PATCH22 = 0x1d, + VKD3D_SM5_INPUT_PT_PATCH23 = 0x1e, + VKD3D_SM5_INPUT_PT_PATCH24 = 0x1f, + VKD3D_SM5_INPUT_PT_PATCH25 = 0x20, + VKD3D_SM5_INPUT_PT_PATCH26 = 0x21, + VKD3D_SM5_INPUT_PT_PATCH27 = 0x22, + VKD3D_SM5_INPUT_PT_PATCH28 = 0x23, + VKD3D_SM5_INPUT_PT_PATCH29 = 0x24, + VKD3D_SM5_INPUT_PT_PATCH30 = 0x25, + VKD3D_SM5_INPUT_PT_PATCH31 = 0x26, + VKD3D_SM5_INPUT_PT_PATCH32 = 0x27, +}; + +enum vkd3d_sm4_swizzle_type +{ + VKD3D_SM4_SWIZZLE_NONE = 0x0, + VKD3D_SM4_SWIZZLE_VEC4 = 0x1, + VKD3D_SM4_SWIZZLE_SCALAR = 0x2, +}; + +enum vkd3d_sm4_dimension +{ + VKD3D_SM4_DIMENSION_NONE = 0x0, + VKD3D_SM4_DIMENSION_SCALAR = 0x1, + VKD3D_SM4_DIMENSION_VEC4 = 0x2, +}; + +enum vkd3d_sm4_resource_type +{ + VKD3D_SM4_RESOURCE_BUFFER = 0x1, + VKD3D_SM4_RESOURCE_TEXTURE_1D = 0x2, + VKD3D_SM4_RESOURCE_TEXTURE_2D = 0x3, + VKD3D_SM4_RESOURCE_TEXTURE_2DMS = 0x4, + VKD3D_SM4_RESOURCE_TEXTURE_3D = 0x5, + VKD3D_SM4_RESOURCE_TEXTURE_CUBE = 0x6, + VKD3D_SM4_RESOURCE_TEXTURE_1DARRAY = 0x7, + VKD3D_SM4_RESOURCE_TEXTURE_2DARRAY = 0x8, + VKD3D_SM4_RESOURCE_TEXTURE_2DMSARRAY = 0x9, + VKD3D_SM4_RESOURCE_TEXTURE_CUBEARRAY = 0xa, + VKD3D_SM4_RESOURCE_RAW_BUFFER = 0xb, + VKD3D_SM4_RESOURCE_STRUCTURED_BUFFER = 0xc, +}; + +enum vkd3d_sm4_data_type +{ + VKD3D_SM4_DATA_UNORM = 0x1, + VKD3D_SM4_DATA_SNORM = 0x2, + VKD3D_SM4_DATA_INT = 0x3, + VKD3D_SM4_DATA_UINT = 0x4, + VKD3D_SM4_DATA_FLOAT = 0x5, + VKD3D_SM4_DATA_MIXED = 0x6, + VKD3D_SM4_DATA_DOUBLE = 0x7, + VKD3D_SM4_DATA_CONTINUED = 0x8, + VKD3D_SM4_DATA_UNUSED = 0x9, +}; + +enum vkd3d_sm4_sampler_mode +{ + VKD3D_SM4_SAMPLER_DEFAULT = 0x0, + VKD3D_SM4_SAMPLER_COMPARISON = 0x1, +}; + +enum vkd3d_sm4_shader_data_type +{ + VKD3D_SM4_SHADER_DATA_IMMEDIATE_CONSTANT_BUFFER = 0x3, + VKD3D_SM4_SHADER_DATA_MESSAGE = 0x4, +}; + +struct sm4_index_range +{ + unsigned int index; + unsigned int count; + unsigned int mask; +}; + +struct sm4_index_range_array +{ + unsigned int count; + struct sm4_index_range ranges[MAX_REG_OUTPUT * 2]; +}; + +struct vkd3d_shader_sm4_parser +{ + const uint32_t *start, *end, *ptr; + + unsigned int output_map[MAX_REG_OUTPUT]; + + enum vkd3d_shader_opcode phase; + bool has_control_point_phase; + unsigned int input_register_masks[MAX_REG_OUTPUT]; + unsigned int output_register_masks[MAX_REG_OUTPUT]; + unsigned int patch_constant_register_masks[MAX_REG_OUTPUT]; + + struct sm4_index_range_array input_index_ranges; + struct sm4_index_range_array output_index_ranges; + struct sm4_index_range_array patch_constant_index_ranges; + + struct vkd3d_shader_parser p; +}; + +struct vkd3d_sm4_opcode_info +{ + enum vkd3d_sm4_opcode opcode; + enum vkd3d_shader_opcode handler_idx; + char dst_info[SM4_MAX_DST_COUNT]; + char src_info[SM4_MAX_SRC_COUNT]; + void (*read_opcode_func)(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, + const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv); +}; + +static const enum vkd3d_primitive_type output_primitive_type_table[] = +{ + /* UNKNOWN */ VKD3D_PT_UNDEFINED, + /* VKD3D_SM4_OUTPUT_PT_POINTLIST */ VKD3D_PT_POINTLIST, + /* UNKNOWN */ VKD3D_PT_UNDEFINED, + /* VKD3D_SM4_OUTPUT_PT_LINESTRIP */ VKD3D_PT_LINESTRIP, + /* UNKNOWN */ VKD3D_PT_UNDEFINED, + /* VKD3D_SM4_OUTPUT_PT_TRIANGLESTRIP */ VKD3D_PT_TRIANGLESTRIP, +}; + +static const enum vkd3d_primitive_type input_primitive_type_table[] = +{ + /* UNKNOWN */ VKD3D_PT_UNDEFINED, + /* VKD3D_SM4_INPUT_PT_POINT */ VKD3D_PT_POINTLIST, + /* VKD3D_SM4_INPUT_PT_LINE */ VKD3D_PT_LINELIST, + /* VKD3D_SM4_INPUT_PT_TRIANGLE */ VKD3D_PT_TRIANGLELIST, + /* UNKNOWN */ VKD3D_PT_UNDEFINED, + /* UNKNOWN */ VKD3D_PT_UNDEFINED, + /* VKD3D_SM4_INPUT_PT_LINEADJ */ VKD3D_PT_LINELIST_ADJ, + /* VKD3D_SM4_INPUT_PT_TRIANGLEADJ */ VKD3D_PT_TRIANGLELIST_ADJ, +}; + +static const enum vkd3d_shader_resource_type resource_type_table[] = +{ + /* 0 */ VKD3D_SHADER_RESOURCE_NONE, + /* VKD3D_SM4_RESOURCE_BUFFER */ VKD3D_SHADER_RESOURCE_BUFFER, + /* VKD3D_SM4_RESOURCE_TEXTURE_1D */ VKD3D_SHADER_RESOURCE_TEXTURE_1D, + /* VKD3D_SM4_RESOURCE_TEXTURE_2D */ VKD3D_SHADER_RESOURCE_TEXTURE_2D, + /* VKD3D_SM4_RESOURCE_TEXTURE_2DMS */ VKD3D_SHADER_RESOURCE_TEXTURE_2DMS, + /* VKD3D_SM4_RESOURCE_TEXTURE_3D */ VKD3D_SHADER_RESOURCE_TEXTURE_3D, + /* VKD3D_SM4_RESOURCE_TEXTURE_CUBE */ VKD3D_SHADER_RESOURCE_TEXTURE_CUBE, + /* VKD3D_SM4_RESOURCE_TEXTURE_1DARRAY */ VKD3D_SHADER_RESOURCE_TEXTURE_1DARRAY, + /* VKD3D_SM4_RESOURCE_TEXTURE_2DARRAY */ VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY, + /* VKD3D_SM4_RESOURCE_TEXTURE_2DMSARRAY */ VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY, + /* VKD3D_SM4_RESOURCE_TEXTURE_CUBEARRAY */ VKD3D_SHADER_RESOURCE_TEXTURE_CUBEARRAY, + /* VKD3D_SM4_RESOURCE_RAW_BUFFER */ VKD3D_SHADER_RESOURCE_BUFFER, + /* VKD3D_SM4_RESOURCE_STRUCTURED_BUFFER */ VKD3D_SHADER_RESOURCE_BUFFER, +}; + +static const enum vkd3d_data_type data_type_table[] = +{ + /* 0 */ VKD3D_DATA_FLOAT, + /* VKD3D_SM4_DATA_UNORM */ VKD3D_DATA_UNORM, + /* VKD3D_SM4_DATA_SNORM */ VKD3D_DATA_SNORM, + /* VKD3D_SM4_DATA_INT */ VKD3D_DATA_INT, + /* VKD3D_SM4_DATA_UINT */ VKD3D_DATA_UINT, + /* VKD3D_SM4_DATA_FLOAT */ VKD3D_DATA_FLOAT, + /* VKD3D_SM4_DATA_MIXED */ VKD3D_DATA_MIXED, + /* VKD3D_SM4_DATA_DOUBLE */ VKD3D_DATA_DOUBLE, + /* VKD3D_SM4_DATA_CONTINUED */ VKD3D_DATA_CONTINUED, + /* VKD3D_SM4_DATA_UNUSED */ VKD3D_DATA_UNUSED, +}; + +static struct vkd3d_shader_sm4_parser *vkd3d_shader_sm4_parser(struct vkd3d_shader_parser *parser) +{ + return CONTAINING_RECORD(parser, struct vkd3d_shader_sm4_parser, p); +} + +static bool shader_is_sm_5_1(const struct vkd3d_shader_sm4_parser *sm4) +{ + const struct vkd3d_shader_version *version = &sm4->p.shader_version; + + return version->major >= 5 && version->minor >= 1; +} + +static bool shader_sm4_read_src_param(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, + const uint32_t *end, enum vkd3d_data_type data_type, struct vkd3d_shader_src_param *src_param); +static bool shader_sm4_read_dst_param(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, + const uint32_t *end, enum vkd3d_data_type data_type, struct vkd3d_shader_dst_param *dst_param); + +static bool shader_sm4_read_register_space(struct vkd3d_shader_sm4_parser *priv, + const uint32_t **ptr, const uint32_t *end, unsigned int *register_space) +{ + *register_space = 0; + + if (!shader_is_sm_5_1(priv)) + return true; + + if (*ptr >= end) + { + WARN("Invalid ptr %p >= end %p.\n", *ptr, end); + return false; + } + + *register_space = *(*ptr)++; + return true; +} + +static void shader_sm4_read_conditional_op(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + shader_sm4_read_src_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_UINT, + (struct vkd3d_shader_src_param *)&ins->src[0]); + ins->flags = (opcode_token & VKD3D_SM4_CONDITIONAL_NZ) ? + VKD3D_SHADER_CONDITIONAL_OP_NZ : VKD3D_SHADER_CONDITIONAL_OP_Z; +} + +static void shader_sm4_read_shader_data(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, + const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + struct vkd3d_shader_immediate_constant_buffer *icb; + enum vkd3d_sm4_shader_data_type type; + unsigned int icb_size; + + type = (opcode_token & VKD3D_SM4_SHADER_DATA_TYPE_MASK) >> VKD3D_SM4_SHADER_DATA_TYPE_SHIFT; + if (type != VKD3D_SM4_SHADER_DATA_IMMEDIATE_CONSTANT_BUFFER) + { + FIXME("Ignoring shader data type %#x.\n", type); + ins->handler_idx = VKD3DSIH_NOP; + return; + } + + ++tokens; + icb_size = token_count - 1; + if (icb_size % 4) + { + FIXME("Unexpected immediate constant buffer size %u.\n", icb_size); + ins->handler_idx = VKD3DSIH_INVALID; + return; + } + + if (!(icb = vkd3d_malloc(offsetof(struct vkd3d_shader_immediate_constant_buffer, data[icb_size])))) + { + ERR("Failed to allocate immediate constant buffer, size %u.\n", icb_size); + vkd3d_shader_parser_error(&priv->p, VKD3D_SHADER_ERROR_TPF_OUT_OF_MEMORY, "Out of memory."); + ins->handler_idx = VKD3DSIH_INVALID; + return; + } + icb->vec4_count = icb_size / 4; + memcpy(icb->data, tokens, sizeof(*tokens) * icb_size); + shader_instruction_array_add_icb(&priv->p.instructions, icb); + ins->declaration.icb = icb; +} + +static void shader_sm4_set_descriptor_register_range(struct vkd3d_shader_sm4_parser *sm4, + const struct vkd3d_shader_register *reg, struct vkd3d_shader_register_range *range) +{ + range->first = reg->idx[1].offset; + range->last = reg->idx[shader_is_sm_5_1(sm4) ? 2 : 1].offset; + if (range->last < range->first) + { + FIXME("Invalid register range [%u:%u].\n", range->first, range->last); + vkd3d_shader_parser_error(&sm4->p, VKD3D_SHADER_ERROR_TPF_INVALID_REGISTER_RANGE, + "Last register %u must not be less than first register %u in range.", range->last, range->first); + } +} + +static void shader_sm4_read_dcl_resource(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + struct vkd3d_shader_semantic *semantic = &ins->declaration.semantic; + enum vkd3d_sm4_resource_type resource_type; + const uint32_t *end = &tokens[token_count]; + enum vkd3d_sm4_data_type data_type; + enum vkd3d_data_type reg_data_type; + DWORD components; + unsigned int i; + + resource_type = (opcode_token & VKD3D_SM4_RESOURCE_TYPE_MASK) >> VKD3D_SM4_RESOURCE_TYPE_SHIFT; + if (!resource_type || (resource_type >= ARRAY_SIZE(resource_type_table))) + { + FIXME("Unhandled resource type %#x.\n", resource_type); + semantic->resource_type = VKD3D_SHADER_RESOURCE_NONE; + } + else + { + semantic->resource_type = resource_type_table[resource_type]; + } + + if (semantic->resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMS + || semantic->resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY) + { + semantic->sample_count = (opcode_token & VKD3D_SM4_RESOURCE_SAMPLE_COUNT_MASK) + >> VKD3D_SM4_RESOURCE_SAMPLE_COUNT_SHIFT; + } + + reg_data_type = opcode == VKD3D_SM4_OP_DCL_RESOURCE ? VKD3D_DATA_RESOURCE : VKD3D_DATA_UAV; + shader_sm4_read_dst_param(priv, &tokens, end, reg_data_type, &semantic->resource.reg); + shader_sm4_set_descriptor_register_range(priv, &semantic->resource.reg.reg, &semantic->resource.range); + + components = *tokens++; + for (i = 0; i < VKD3D_VEC4_SIZE; i++) + { + data_type = VKD3D_SM4_TYPE_COMPONENT(components, i); + + if (!data_type || (data_type >= ARRAY_SIZE(data_type_table))) + { + FIXME("Unhandled data type %#x.\n", data_type); + semantic->resource_data_type[i] = VKD3D_DATA_FLOAT; + } + else + { + semantic->resource_data_type[i] = data_type_table[data_type]; + } + } + + if (reg_data_type == VKD3D_DATA_UAV) + ins->flags = (opcode_token & VKD3D_SM5_UAV_FLAGS_MASK) >> VKD3D_SM5_UAV_FLAGS_SHIFT; + + shader_sm4_read_register_space(priv, &tokens, end, &semantic->resource.range.space); +} + +static void shader_sm4_read_dcl_constant_buffer(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + const uint32_t *end = &tokens[token_count]; + + shader_sm4_read_src_param(priv, &tokens, end, VKD3D_DATA_FLOAT, &ins->declaration.cb.src); + shader_sm4_set_descriptor_register_range(priv, &ins->declaration.cb.src.reg, &ins->declaration.cb.range); + if (opcode_token & VKD3D_SM4_INDEX_TYPE_MASK) + ins->flags |= VKD3DSI_INDEXED_DYNAMIC; + + ins->declaration.cb.size = ins->declaration.cb.src.reg.idx[2].offset; + ins->declaration.cb.range.space = 0; + + if (shader_is_sm_5_1(priv)) + { + if (tokens >= end) + { + FIXME("Invalid ptr %p >= end %p.\n", tokens, end); + return; + } + + ins->declaration.cb.size = *tokens++; + shader_sm4_read_register_space(priv, &tokens, end, &ins->declaration.cb.range.space); + } +} + +static void shader_sm4_read_dcl_sampler(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, + const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + const uint32_t *end = &tokens[token_count]; + + ins->flags = (opcode_token & VKD3D_SM4_SAMPLER_MODE_MASK) >> VKD3D_SM4_SAMPLER_MODE_SHIFT; + if (ins->flags & ~VKD3D_SM4_SAMPLER_COMPARISON) + FIXME("Unhandled sampler mode %#x.\n", ins->flags); + shader_sm4_read_src_param(priv, &tokens, end, VKD3D_DATA_SAMPLER, &ins->declaration.sampler.src); + shader_sm4_set_descriptor_register_range(priv, &ins->declaration.sampler.src.reg, &ins->declaration.sampler.range); + shader_sm4_read_register_space(priv, &tokens, end, &ins->declaration.sampler.range.space); +} + +static bool sm4_parser_is_in_fork_or_join_phase(const struct vkd3d_shader_sm4_parser *sm4) +{ + return sm4->phase == VKD3DSIH_HS_FORK_PHASE || sm4->phase == VKD3DSIH_HS_JOIN_PHASE; +} + +static void shader_sm4_read_dcl_index_range(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + struct vkd3d_shader_index_range *index_range = &ins->declaration.index_range; + unsigned int i, register_idx, register_count, write_mask; + enum vkd3d_shader_register_type type; + struct sm4_index_range_array *ranges; + unsigned int *io_masks; + + shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_OPAQUE, + &index_range->dst); + index_range->register_count = *tokens; + + register_idx = index_range->dst.reg.idx[index_range->dst.reg.idx_count - 1].offset; + register_count = index_range->register_count; + write_mask = index_range->dst.write_mask; + + if (vkd3d_write_mask_component_count(write_mask) != 1) + { + WARN("Unhandled write mask %#x.\n", write_mask); + vkd3d_shader_parser_warning(&priv->p, VKD3D_SHADER_WARNING_TPF_UNHANDLED_INDEX_RANGE_MASK, + "Index range mask %#x is not scalar.", write_mask); + } + + switch ((type = index_range->dst.reg.type)) + { + case VKD3DSPR_INPUT: + case VKD3DSPR_INCONTROLPOINT: + io_masks = priv->input_register_masks; + ranges = &priv->input_index_ranges; + break; + case VKD3DSPR_OUTPUT: + if (sm4_parser_is_in_fork_or_join_phase(priv)) + { + io_masks = priv->patch_constant_register_masks; + ranges = &priv->patch_constant_index_ranges; + } + else + { + io_masks = priv->output_register_masks; + ranges = &priv->output_index_ranges; + } + break; + case VKD3DSPR_COLOROUT: + case VKD3DSPR_OUTCONTROLPOINT: + io_masks = priv->output_register_masks; + ranges = &priv->output_index_ranges; + break; + case VKD3DSPR_PATCHCONST: + io_masks = priv->patch_constant_register_masks; + ranges = &priv->patch_constant_index_ranges; + break; + + default: + WARN("Unhandled register type %#x.\n", type); + vkd3d_shader_parser_error(&priv->p, VKD3D_SHADER_ERROR_TPF_INVALID_INDEX_RANGE_DCL, + "Invalid register type %#x for index range base %u, count %u, mask %#x.", + type, register_idx, register_count, write_mask); + return; + } + + for (i = 0; i < ranges->count; ++i) + { + struct sm4_index_range r = ranges->ranges[i]; + + if (!(r.mask & write_mask)) + continue; + /* Ranges with the same base but different lengths are not an issue. */ + if (register_idx == r.index) + continue; + + if ((r.index <= register_idx && register_idx - r.index < r.count) + || (register_idx < r.index && r.index - register_idx < register_count)) + { + WARN("Detected index range collision for base %u, count %u, mask %#x.\n", + register_idx, register_count, write_mask); + vkd3d_shader_parser_error(&priv->p, VKD3D_SHADER_ERROR_TPF_INVALID_INDEX_RANGE_DCL, + "Register index range base %u, count %u, mask %#x collides with a previous declaration.", + register_idx, register_count, write_mask); + return; + } + } + ranges->ranges[ranges->count].index = register_idx; + ranges->ranges[ranges->count].count = register_count; + ranges->ranges[ranges->count++].mask = write_mask; + + for (i = 0; i < register_count; ++i) + { + if ((io_masks[register_idx + i] & write_mask) != write_mask) + { + WARN("No matching declaration for index range base %u, count %u, mask %#x.\n", + register_idx, register_count, write_mask); + vkd3d_shader_parser_error(&priv->p, VKD3D_SHADER_ERROR_TPF_INVALID_INDEX_RANGE_DCL, + "Input/output registers matching index range base %u, count %u, mask %#x were not declared.", + register_idx, register_count, write_mask); + return; + } + } +} + +static void shader_sm4_read_dcl_output_topology(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + enum vkd3d_sm4_output_primitive_type primitive_type; + + primitive_type = (opcode_token & VKD3D_SM4_PRIMITIVE_TYPE_MASK) >> VKD3D_SM4_PRIMITIVE_TYPE_SHIFT; + if (primitive_type >= ARRAY_SIZE(output_primitive_type_table)) + ins->declaration.primitive_type.type = VKD3D_PT_UNDEFINED; + else + ins->declaration.primitive_type.type = output_primitive_type_table[primitive_type]; + + if (ins->declaration.primitive_type.type == VKD3D_PT_UNDEFINED) + FIXME("Unhandled output primitive type %#x.\n", primitive_type); +} + +static void shader_sm4_read_dcl_input_primitive(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + enum vkd3d_sm4_input_primitive_type primitive_type; + + primitive_type = (opcode_token & VKD3D_SM4_PRIMITIVE_TYPE_MASK) >> VKD3D_SM4_PRIMITIVE_TYPE_SHIFT; + if (VKD3D_SM5_INPUT_PT_PATCH1 <= primitive_type && primitive_type <= VKD3D_SM5_INPUT_PT_PATCH32) + { + ins->declaration.primitive_type.type = VKD3D_PT_PATCH; + ins->declaration.primitive_type.patch_vertex_count = primitive_type - VKD3D_SM5_INPUT_PT_PATCH1 + 1; + } + else if (primitive_type >= ARRAY_SIZE(input_primitive_type_table)) + { + ins->declaration.primitive_type.type = VKD3D_PT_UNDEFINED; + } + else + { + ins->declaration.primitive_type.type = input_primitive_type_table[primitive_type]; + } + + if (ins->declaration.primitive_type.type == VKD3D_PT_UNDEFINED) + FIXME("Unhandled input primitive type %#x.\n", primitive_type); +} + +static void shader_sm4_read_declaration_count(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + ins->declaration.count = *tokens; +} + +static void shader_sm4_read_declaration_dst(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_FLOAT, &ins->declaration.dst); +} + +static void shader_sm4_read_declaration_register_semantic(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_FLOAT, + &ins->declaration.register_semantic.reg); + ins->declaration.register_semantic.sysval_semantic = *tokens; +} + +static void shader_sm4_read_dcl_input_ps(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + ins->flags = (opcode_token & VKD3D_SM4_INTERPOLATION_MODE_MASK) >> VKD3D_SM4_INTERPOLATION_MODE_SHIFT; + shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_FLOAT, &ins->declaration.dst); +} + +static void shader_sm4_read_dcl_input_ps_siv(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + ins->flags = (opcode_token & VKD3D_SM4_INTERPOLATION_MODE_MASK) >> VKD3D_SM4_INTERPOLATION_MODE_SHIFT; + shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_FLOAT, + &ins->declaration.register_semantic.reg); + ins->declaration.register_semantic.sysval_semantic = *tokens; +} + +static void shader_sm4_read_dcl_indexable_temp(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + ins->declaration.indexable_temp.register_idx = *tokens++; + ins->declaration.indexable_temp.register_size = *tokens++; + ins->declaration.indexable_temp.component_count = *tokens; +} + +static void shader_sm4_read_dcl_global_flags(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + ins->flags = (opcode_token & VKD3D_SM4_GLOBAL_FLAGS_MASK) >> VKD3D_SM4_GLOBAL_FLAGS_SHIFT; +} + +static void shader_sm5_read_fcall(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, + const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + struct vkd3d_shader_src_param *src_params = (struct vkd3d_shader_src_param *)ins->src; + src_params[0].reg.u.fp_body_idx = *tokens++; + shader_sm4_read_src_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_OPAQUE, &src_params[0]); +} + +static void shader_sm5_read_dcl_function_body(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + ins->declaration.index = *tokens; +} + +static void shader_sm5_read_dcl_function_table(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + ins->declaration.index = *tokens++; + FIXME("Ignoring set of function bodies (count %u).\n", *tokens); +} + +static void shader_sm5_read_dcl_interface(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + ins->declaration.fp.index = *tokens++; + ins->declaration.fp.body_count = *tokens++; + ins->declaration.fp.array_size = *tokens >> VKD3D_SM5_FP_ARRAY_SIZE_SHIFT; + ins->declaration.fp.table_count = *tokens++ & VKD3D_SM5_FP_TABLE_COUNT_MASK; + FIXME("Ignoring set of function tables (count %u).\n", ins->declaration.fp.table_count); +} + +static void shader_sm5_read_control_point_count(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + ins->declaration.count = (opcode_token & VKD3D_SM5_CONTROL_POINT_COUNT_MASK) + >> VKD3D_SM5_CONTROL_POINT_COUNT_SHIFT; +} + +static void shader_sm5_read_dcl_tessellator_domain(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + ins->declaration.tessellator_domain = (opcode_token & VKD3D_SM5_TESSELLATOR_MASK) + >> VKD3D_SM5_TESSELLATOR_SHIFT; +} + +static void shader_sm5_read_dcl_tessellator_partitioning(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + ins->declaration.tessellator_partitioning = (opcode_token & VKD3D_SM5_TESSELLATOR_MASK) + >> VKD3D_SM5_TESSELLATOR_SHIFT; +} + +static void shader_sm5_read_dcl_tessellator_output_primitive(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + ins->declaration.tessellator_output_primitive = (opcode_token & VKD3D_SM5_TESSELLATOR_MASK) + >> VKD3D_SM5_TESSELLATOR_SHIFT; +} + +static void shader_sm5_read_dcl_hs_max_tessfactor(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + ins->declaration.max_tessellation_factor = *(float *)tokens; +} + +static void shader_sm5_read_dcl_thread_group(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + ins->declaration.thread_group_size.x = *tokens++; + ins->declaration.thread_group_size.y = *tokens++; + ins->declaration.thread_group_size.z = *tokens++; +} + +static void shader_sm5_read_dcl_uav_raw(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, + const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + struct vkd3d_shader_raw_resource *resource = &ins->declaration.raw_resource; + const uint32_t *end = &tokens[token_count]; + + shader_sm4_read_dst_param(priv, &tokens, end, VKD3D_DATA_UAV, &resource->resource.reg); + shader_sm4_set_descriptor_register_range(priv, &resource->resource.reg.reg, &resource->resource.range); + ins->flags = (opcode_token & VKD3D_SM5_UAV_FLAGS_MASK) >> VKD3D_SM5_UAV_FLAGS_SHIFT; + shader_sm4_read_register_space(priv, &tokens, end, &resource->resource.range.space); +} + +static void shader_sm5_read_dcl_uav_structured(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + struct vkd3d_shader_structured_resource *resource = &ins->declaration.structured_resource; + const uint32_t *end = &tokens[token_count]; + + shader_sm4_read_dst_param(priv, &tokens, end, VKD3D_DATA_UAV, &resource->resource.reg); + shader_sm4_set_descriptor_register_range(priv, &resource->resource.reg.reg, &resource->resource.range); + ins->flags = (opcode_token & VKD3D_SM5_UAV_FLAGS_MASK) >> VKD3D_SM5_UAV_FLAGS_SHIFT; + resource->byte_stride = *tokens++; + if (resource->byte_stride % 4) + FIXME("Byte stride %u is not multiple of 4.\n", resource->byte_stride); + shader_sm4_read_register_space(priv, &tokens, end, &resource->resource.range.space); +} + +static void shader_sm5_read_dcl_tgsm_raw(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_FLOAT, &ins->declaration.tgsm_raw.reg); + ins->declaration.tgsm_raw.byte_count = *tokens; + if (ins->declaration.tgsm_raw.byte_count % 4) + FIXME("Byte count %u is not multiple of 4.\n", ins->declaration.tgsm_raw.byte_count); +} + +static void shader_sm5_read_dcl_tgsm_structured(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_FLOAT, + &ins->declaration.tgsm_structured.reg); + ins->declaration.tgsm_structured.byte_stride = *tokens++; + ins->declaration.tgsm_structured.structure_count = *tokens; + if (ins->declaration.tgsm_structured.byte_stride % 4) + FIXME("Byte stride %u is not multiple of 4.\n", ins->declaration.tgsm_structured.byte_stride); +} + +static void shader_sm5_read_dcl_resource_structured(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + struct vkd3d_shader_structured_resource *resource = &ins->declaration.structured_resource; + const uint32_t *end = &tokens[token_count]; + + shader_sm4_read_dst_param(priv, &tokens, end, VKD3D_DATA_RESOURCE, &resource->resource.reg); + shader_sm4_set_descriptor_register_range(priv, &resource->resource.reg.reg, &resource->resource.range); + resource->byte_stride = *tokens++; + if (resource->byte_stride % 4) + FIXME("Byte stride %u is not multiple of 4.\n", resource->byte_stride); + shader_sm4_read_register_space(priv, &tokens, end, &resource->resource.range.space); +} + +static void shader_sm5_read_dcl_resource_raw(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + struct vkd3d_shader_raw_resource *resource = &ins->declaration.raw_resource; + const uint32_t *end = &tokens[token_count]; + + shader_sm4_read_dst_param(priv, &tokens, end, VKD3D_DATA_RESOURCE, &resource->resource.reg); + shader_sm4_set_descriptor_register_range(priv, &resource->resource.reg.reg, &resource->resource.range); + shader_sm4_read_register_space(priv, &tokens, end, &resource->resource.range.space); +} + +static void shader_sm5_read_sync(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, + const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + ins->flags = (opcode_token & VKD3D_SM5_SYNC_FLAGS_MASK) >> VKD3D_SM5_SYNC_FLAGS_SHIFT; +} + +/* + * d -> VKD3D_DATA_DOUBLE + * f -> VKD3D_DATA_FLOAT + * i -> VKD3D_DATA_INT + * u -> VKD3D_DATA_UINT + * O -> VKD3D_DATA_OPAQUE + * R -> VKD3D_DATA_RESOURCE + * S -> VKD3D_DATA_SAMPLER + * U -> VKD3D_DATA_UAV + */ +static const struct vkd3d_sm4_opcode_info opcode_table[] = +{ + {VKD3D_SM4_OP_ADD, VKD3DSIH_ADD, "f", "ff"}, + {VKD3D_SM4_OP_AND, VKD3DSIH_AND, "u", "uu"}, + {VKD3D_SM4_OP_BREAK, VKD3DSIH_BREAK, "", ""}, + {VKD3D_SM4_OP_BREAKC, VKD3DSIH_BREAKP, "", "u", + shader_sm4_read_conditional_op}, + {VKD3D_SM4_OP_CASE, VKD3DSIH_CASE, "", "u"}, + {VKD3D_SM4_OP_CONTINUE, VKD3DSIH_CONTINUE, "", ""}, + {VKD3D_SM4_OP_CONTINUEC, VKD3DSIH_CONTINUEP, "", "u", + shader_sm4_read_conditional_op}, + {VKD3D_SM4_OP_CUT, VKD3DSIH_CUT, "", ""}, + {VKD3D_SM4_OP_DEFAULT, VKD3DSIH_DEFAULT, "", ""}, + {VKD3D_SM4_OP_DERIV_RTX, VKD3DSIH_DSX, "f", "f"}, + {VKD3D_SM4_OP_DERIV_RTY, VKD3DSIH_DSY, "f", "f"}, + {VKD3D_SM4_OP_DISCARD, VKD3DSIH_DISCARD, "", "u", + shader_sm4_read_conditional_op}, + {VKD3D_SM4_OP_DIV, VKD3DSIH_DIV, "f", "ff"}, + {VKD3D_SM4_OP_DP2, VKD3DSIH_DP2, "f", "ff"}, + {VKD3D_SM4_OP_DP3, VKD3DSIH_DP3, "f", "ff"}, + {VKD3D_SM4_OP_DP4, VKD3DSIH_DP4, "f", "ff"}, + {VKD3D_SM4_OP_ELSE, VKD3DSIH_ELSE, "", ""}, + {VKD3D_SM4_OP_EMIT, VKD3DSIH_EMIT, "", ""}, + {VKD3D_SM4_OP_ENDIF, VKD3DSIH_ENDIF, "", ""}, + {VKD3D_SM4_OP_ENDLOOP, VKD3DSIH_ENDLOOP, "", ""}, + {VKD3D_SM4_OP_ENDSWITCH, VKD3DSIH_ENDSWITCH, "", ""}, + {VKD3D_SM4_OP_EQ, VKD3DSIH_EQ, "u", "ff"}, + {VKD3D_SM4_OP_EXP, VKD3DSIH_EXP, "f", "f"}, + {VKD3D_SM4_OP_FRC, VKD3DSIH_FRC, "f", "f"}, + {VKD3D_SM4_OP_FTOI, VKD3DSIH_FTOI, "i", "f"}, + {VKD3D_SM4_OP_FTOU, VKD3DSIH_FTOU, "u", "f"}, + {VKD3D_SM4_OP_GE, VKD3DSIH_GE, "u", "ff"}, + {VKD3D_SM4_OP_IADD, VKD3DSIH_IADD, "i", "ii"}, + {VKD3D_SM4_OP_IF, VKD3DSIH_IF, "", "u", + shader_sm4_read_conditional_op}, + {VKD3D_SM4_OP_IEQ, VKD3DSIH_IEQ, "u", "ii"}, + {VKD3D_SM4_OP_IGE, VKD3DSIH_IGE, "u", "ii"}, + {VKD3D_SM4_OP_ILT, VKD3DSIH_ILT, "u", "ii"}, + {VKD3D_SM4_OP_IMAD, VKD3DSIH_IMAD, "i", "iii"}, + {VKD3D_SM4_OP_IMAX, VKD3DSIH_IMAX, "i", "ii"}, + {VKD3D_SM4_OP_IMIN, VKD3DSIH_IMIN, "i", "ii"}, + {VKD3D_SM4_OP_IMUL, VKD3DSIH_IMUL, "ii", "ii"}, + {VKD3D_SM4_OP_INE, VKD3DSIH_INE, "u", "ii"}, + {VKD3D_SM4_OP_INEG, VKD3DSIH_INEG, "i", "i"}, + {VKD3D_SM4_OP_ISHL, VKD3DSIH_ISHL, "i", "ii"}, + {VKD3D_SM4_OP_ISHR, VKD3DSIH_ISHR, "i", "ii"}, + {VKD3D_SM4_OP_ITOF, VKD3DSIH_ITOF, "f", "i"}, + {VKD3D_SM4_OP_LABEL, VKD3DSIH_LABEL, "", "O"}, + {VKD3D_SM4_OP_LD, VKD3DSIH_LD, "u", "iR"}, + {VKD3D_SM4_OP_LD2DMS, VKD3DSIH_LD2DMS, "u", "iRi"}, + {VKD3D_SM4_OP_LOG, VKD3DSIH_LOG, "f", "f"}, + {VKD3D_SM4_OP_LOOP, VKD3DSIH_LOOP, "", ""}, + {VKD3D_SM4_OP_LT, VKD3DSIH_LT, "u", "ff"}, + {VKD3D_SM4_OP_MAD, VKD3DSIH_MAD, "f", "fff"}, + {VKD3D_SM4_OP_MIN, VKD3DSIH_MIN, "f", "ff"}, + {VKD3D_SM4_OP_MAX, VKD3DSIH_MAX, "f", "ff"}, + {VKD3D_SM4_OP_SHADER_DATA, VKD3DSIH_DCL_IMMEDIATE_CONSTANT_BUFFER, "", "", + shader_sm4_read_shader_data}, + {VKD3D_SM4_OP_MOV, VKD3DSIH_MOV, "f", "f"}, + {VKD3D_SM4_OP_MOVC, VKD3DSIH_MOVC, "f", "uff"}, + {VKD3D_SM4_OP_MUL, VKD3DSIH_MUL, "f", "ff"}, + {VKD3D_SM4_OP_NE, VKD3DSIH_NE, "u", "ff"}, + {VKD3D_SM4_OP_NOP, VKD3DSIH_NOP, "", ""}, + {VKD3D_SM4_OP_NOT, VKD3DSIH_NOT, "u", "u"}, + {VKD3D_SM4_OP_OR, VKD3DSIH_OR, "u", "uu"}, + {VKD3D_SM4_OP_RESINFO, VKD3DSIH_RESINFO, "f", "iR"}, + {VKD3D_SM4_OP_RET, VKD3DSIH_RET, "", ""}, + {VKD3D_SM4_OP_RETC, VKD3DSIH_RETP, "", "u", + shader_sm4_read_conditional_op}, + {VKD3D_SM4_OP_ROUND_NE, VKD3DSIH_ROUND_NE, "f", "f"}, + {VKD3D_SM4_OP_ROUND_NI, VKD3DSIH_ROUND_NI, "f", "f"}, + {VKD3D_SM4_OP_ROUND_PI, VKD3DSIH_ROUND_PI, "f", "f"}, + {VKD3D_SM4_OP_ROUND_Z, VKD3DSIH_ROUND_Z, "f", "f"}, + {VKD3D_SM4_OP_RSQ, VKD3DSIH_RSQ, "f", "f"}, + {VKD3D_SM4_OP_SAMPLE, VKD3DSIH_SAMPLE, "u", "fRS"}, + {VKD3D_SM4_OP_SAMPLE_C, VKD3DSIH_SAMPLE_C, "f", "fRSf"}, + {VKD3D_SM4_OP_SAMPLE_C_LZ, VKD3DSIH_SAMPLE_C_LZ, "f", "fRSf"}, + {VKD3D_SM4_OP_SAMPLE_LOD, VKD3DSIH_SAMPLE_LOD, "u", "fRSf"}, + {VKD3D_SM4_OP_SAMPLE_GRAD, VKD3DSIH_SAMPLE_GRAD, "u", "fRSff"}, + {VKD3D_SM4_OP_SAMPLE_B, VKD3DSIH_SAMPLE_B, "u", "fRSf"}, + {VKD3D_SM4_OP_SQRT, VKD3DSIH_SQRT, "f", "f"}, + {VKD3D_SM4_OP_SWITCH, VKD3DSIH_SWITCH, "", "i"}, + {VKD3D_SM4_OP_SINCOS, VKD3DSIH_SINCOS, "ff", "f"}, + {VKD3D_SM4_OP_UDIV, VKD3DSIH_UDIV, "uu", "uu"}, + {VKD3D_SM4_OP_ULT, VKD3DSIH_ULT, "u", "uu"}, + {VKD3D_SM4_OP_UGE, VKD3DSIH_UGE, "u", "uu"}, + {VKD3D_SM4_OP_UMUL, VKD3DSIH_UMUL, "uu", "uu"}, + {VKD3D_SM4_OP_UMAX, VKD3DSIH_UMAX, "u", "uu"}, + {VKD3D_SM4_OP_UMIN, VKD3DSIH_UMIN, "u", "uu"}, + {VKD3D_SM4_OP_USHR, VKD3DSIH_USHR, "u", "uu"}, + {VKD3D_SM4_OP_UTOF, VKD3DSIH_UTOF, "f", "u"}, + {VKD3D_SM4_OP_XOR, VKD3DSIH_XOR, "u", "uu"}, + {VKD3D_SM4_OP_DCL_RESOURCE, VKD3DSIH_DCL, "", "", + shader_sm4_read_dcl_resource}, + {VKD3D_SM4_OP_DCL_CONSTANT_BUFFER, VKD3DSIH_DCL_CONSTANT_BUFFER, "", "", + shader_sm4_read_dcl_constant_buffer}, + {VKD3D_SM4_OP_DCL_SAMPLER, VKD3DSIH_DCL_SAMPLER, "", "", + shader_sm4_read_dcl_sampler}, + {VKD3D_SM4_OP_DCL_INDEX_RANGE, VKD3DSIH_DCL_INDEX_RANGE, "", "", + shader_sm4_read_dcl_index_range}, + {VKD3D_SM4_OP_DCL_OUTPUT_TOPOLOGY, VKD3DSIH_DCL_OUTPUT_TOPOLOGY, "", "", + shader_sm4_read_dcl_output_topology}, + {VKD3D_SM4_OP_DCL_INPUT_PRIMITIVE, VKD3DSIH_DCL_INPUT_PRIMITIVE, "", "", + shader_sm4_read_dcl_input_primitive}, + {VKD3D_SM4_OP_DCL_VERTICES_OUT, VKD3DSIH_DCL_VERTICES_OUT, "", "", + shader_sm4_read_declaration_count}, + {VKD3D_SM4_OP_DCL_INPUT, VKD3DSIH_DCL_INPUT, "", "", + shader_sm4_read_declaration_dst}, + {VKD3D_SM4_OP_DCL_INPUT_SGV, VKD3DSIH_DCL_INPUT_SGV, "", "", + shader_sm4_read_declaration_register_semantic}, + {VKD3D_SM4_OP_DCL_INPUT_SIV, VKD3DSIH_DCL_INPUT_SIV, "", "", + shader_sm4_read_declaration_register_semantic}, + {VKD3D_SM4_OP_DCL_INPUT_PS, VKD3DSIH_DCL_INPUT_PS, "", "", + shader_sm4_read_dcl_input_ps}, + {VKD3D_SM4_OP_DCL_INPUT_PS_SGV, VKD3DSIH_DCL_INPUT_PS_SGV, "", "", + shader_sm4_read_declaration_register_semantic}, + {VKD3D_SM4_OP_DCL_INPUT_PS_SIV, VKD3DSIH_DCL_INPUT_PS_SIV, "", "", + shader_sm4_read_dcl_input_ps_siv}, + {VKD3D_SM4_OP_DCL_OUTPUT, VKD3DSIH_DCL_OUTPUT, "", "", + shader_sm4_read_declaration_dst}, + {VKD3D_SM4_OP_DCL_OUTPUT_SIV, VKD3DSIH_DCL_OUTPUT_SIV, "", "", + shader_sm4_read_declaration_register_semantic}, + {VKD3D_SM4_OP_DCL_TEMPS, VKD3DSIH_DCL_TEMPS, "", "", + shader_sm4_read_declaration_count}, + {VKD3D_SM4_OP_DCL_INDEXABLE_TEMP, VKD3DSIH_DCL_INDEXABLE_TEMP, "", "", + shader_sm4_read_dcl_indexable_temp}, + {VKD3D_SM4_OP_DCL_GLOBAL_FLAGS, VKD3DSIH_DCL_GLOBAL_FLAGS, "", "", + shader_sm4_read_dcl_global_flags}, + {VKD3D_SM4_OP_LOD, VKD3DSIH_LOD, "f", "fRS"}, + {VKD3D_SM4_OP_GATHER4, VKD3DSIH_GATHER4, "u", "fRS"}, + {VKD3D_SM4_OP_SAMPLE_POS, VKD3DSIH_SAMPLE_POS, "f", "Ru"}, + {VKD3D_SM4_OP_SAMPLE_INFO, VKD3DSIH_SAMPLE_INFO, "f", "R"}, + {VKD3D_SM5_OP_HS_DECLS, VKD3DSIH_HS_DECLS, "", ""}, + {VKD3D_SM5_OP_HS_CONTROL_POINT_PHASE, VKD3DSIH_HS_CONTROL_POINT_PHASE, "", ""}, + {VKD3D_SM5_OP_HS_FORK_PHASE, VKD3DSIH_HS_FORK_PHASE, "", ""}, + {VKD3D_SM5_OP_HS_JOIN_PHASE, VKD3DSIH_HS_JOIN_PHASE, "", ""}, + {VKD3D_SM5_OP_EMIT_STREAM, VKD3DSIH_EMIT_STREAM, "", "f"}, + {VKD3D_SM5_OP_CUT_STREAM, VKD3DSIH_CUT_STREAM, "", "f"}, + {VKD3D_SM5_OP_FCALL, VKD3DSIH_FCALL, "", "O", + shader_sm5_read_fcall}, + {VKD3D_SM5_OP_BUFINFO, VKD3DSIH_BUFINFO, "i", "U"}, + {VKD3D_SM5_OP_DERIV_RTX_COARSE, VKD3DSIH_DSX_COARSE, "f", "f"}, + {VKD3D_SM5_OP_DERIV_RTX_FINE, VKD3DSIH_DSX_FINE, "f", "f"}, + {VKD3D_SM5_OP_DERIV_RTY_COARSE, VKD3DSIH_DSY_COARSE, "f", "f"}, + {VKD3D_SM5_OP_DERIV_RTY_FINE, VKD3DSIH_DSY_FINE, "f", "f"}, + {VKD3D_SM5_OP_GATHER4_C, VKD3DSIH_GATHER4_C, "f", "fRSf"}, + {VKD3D_SM5_OP_GATHER4_PO, VKD3DSIH_GATHER4_PO, "f", "fiRS"}, + {VKD3D_SM5_OP_GATHER4_PO_C, VKD3DSIH_GATHER4_PO_C, "f", "fiRSf"}, + {VKD3D_SM5_OP_RCP, VKD3DSIH_RCP, "f", "f"}, + {VKD3D_SM5_OP_F32TOF16, VKD3DSIH_F32TOF16, "u", "f"}, + {VKD3D_SM5_OP_F16TOF32, VKD3DSIH_F16TOF32, "f", "u"}, + {VKD3D_SM5_OP_COUNTBITS, VKD3DSIH_COUNTBITS, "u", "u"}, + {VKD3D_SM5_OP_FIRSTBIT_HI, VKD3DSIH_FIRSTBIT_HI, "u", "u"}, + {VKD3D_SM5_OP_FIRSTBIT_LO, VKD3DSIH_FIRSTBIT_LO, "u", "u"}, + {VKD3D_SM5_OP_FIRSTBIT_SHI, VKD3DSIH_FIRSTBIT_SHI, "u", "i"}, + {VKD3D_SM5_OP_UBFE, VKD3DSIH_UBFE, "u", "iiu"}, + {VKD3D_SM5_OP_IBFE, VKD3DSIH_IBFE, "i", "iii"}, + {VKD3D_SM5_OP_BFI, VKD3DSIH_BFI, "u", "iiuu"}, + {VKD3D_SM5_OP_BFREV, VKD3DSIH_BFREV, "u", "u"}, + {VKD3D_SM5_OP_SWAPC, VKD3DSIH_SWAPC, "ff", "uff"}, + {VKD3D_SM5_OP_DCL_STREAM, VKD3DSIH_DCL_STREAM, "", "O"}, + {VKD3D_SM5_OP_DCL_FUNCTION_BODY, VKD3DSIH_DCL_FUNCTION_BODY, "", "", + shader_sm5_read_dcl_function_body}, + {VKD3D_SM5_OP_DCL_FUNCTION_TABLE, VKD3DSIH_DCL_FUNCTION_TABLE, "", "", + shader_sm5_read_dcl_function_table}, + {VKD3D_SM5_OP_DCL_INTERFACE, VKD3DSIH_DCL_INTERFACE, "", "", + shader_sm5_read_dcl_interface}, + {VKD3D_SM5_OP_DCL_INPUT_CONTROL_POINT_COUNT, VKD3DSIH_DCL_INPUT_CONTROL_POINT_COUNT, "", "", + shader_sm5_read_control_point_count}, + {VKD3D_SM5_OP_DCL_OUTPUT_CONTROL_POINT_COUNT, VKD3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT, "", "", + shader_sm5_read_control_point_count}, + {VKD3D_SM5_OP_DCL_TESSELLATOR_DOMAIN, VKD3DSIH_DCL_TESSELLATOR_DOMAIN, "", "", + shader_sm5_read_dcl_tessellator_domain}, + {VKD3D_SM5_OP_DCL_TESSELLATOR_PARTITIONING, VKD3DSIH_DCL_TESSELLATOR_PARTITIONING, "", "", + shader_sm5_read_dcl_tessellator_partitioning}, + {VKD3D_SM5_OP_DCL_TESSELLATOR_OUTPUT_PRIMITIVE, VKD3DSIH_DCL_TESSELLATOR_OUTPUT_PRIMITIVE, "", "", + shader_sm5_read_dcl_tessellator_output_primitive}, + {VKD3D_SM5_OP_DCL_HS_MAX_TESSFACTOR, VKD3DSIH_DCL_HS_MAX_TESSFACTOR, "", "", + shader_sm5_read_dcl_hs_max_tessfactor}, + {VKD3D_SM5_OP_DCL_HS_FORK_PHASE_INSTANCE_COUNT, VKD3DSIH_DCL_HS_FORK_PHASE_INSTANCE_COUNT, "", "", + shader_sm4_read_declaration_count}, + {VKD3D_SM5_OP_DCL_HS_JOIN_PHASE_INSTANCE_COUNT, VKD3DSIH_DCL_HS_JOIN_PHASE_INSTANCE_COUNT, "", "", + shader_sm4_read_declaration_count}, + {VKD3D_SM5_OP_DCL_THREAD_GROUP, VKD3DSIH_DCL_THREAD_GROUP, "", "", + shader_sm5_read_dcl_thread_group}, + {VKD3D_SM5_OP_DCL_UAV_TYPED, VKD3DSIH_DCL_UAV_TYPED, "", "", + shader_sm4_read_dcl_resource}, + {VKD3D_SM5_OP_DCL_UAV_RAW, VKD3DSIH_DCL_UAV_RAW, "", "", + shader_sm5_read_dcl_uav_raw}, + {VKD3D_SM5_OP_DCL_UAV_STRUCTURED, VKD3DSIH_DCL_UAV_STRUCTURED, "", "", + shader_sm5_read_dcl_uav_structured}, + {VKD3D_SM5_OP_DCL_TGSM_RAW, VKD3DSIH_DCL_TGSM_RAW, "", "", + shader_sm5_read_dcl_tgsm_raw}, + {VKD3D_SM5_OP_DCL_TGSM_STRUCTURED, VKD3DSIH_DCL_TGSM_STRUCTURED, "", "", + shader_sm5_read_dcl_tgsm_structured}, + {VKD3D_SM5_OP_DCL_RESOURCE_RAW, VKD3DSIH_DCL_RESOURCE_RAW, "", "", + shader_sm5_read_dcl_resource_raw}, + {VKD3D_SM5_OP_DCL_RESOURCE_STRUCTURED, VKD3DSIH_DCL_RESOURCE_STRUCTURED, "", "", + shader_sm5_read_dcl_resource_structured}, + {VKD3D_SM5_OP_LD_UAV_TYPED, VKD3DSIH_LD_UAV_TYPED, "u", "iU"}, + {VKD3D_SM5_OP_STORE_UAV_TYPED, VKD3DSIH_STORE_UAV_TYPED, "U", "iu"}, + {VKD3D_SM5_OP_LD_RAW, VKD3DSIH_LD_RAW, "u", "iU"}, + {VKD3D_SM5_OP_STORE_RAW, VKD3DSIH_STORE_RAW, "U", "uu"}, + {VKD3D_SM5_OP_LD_STRUCTURED, VKD3DSIH_LD_STRUCTURED, "u", "iiR"}, + {VKD3D_SM5_OP_STORE_STRUCTURED, VKD3DSIH_STORE_STRUCTURED, "U", "iiu"}, + {VKD3D_SM5_OP_ATOMIC_AND, VKD3DSIH_ATOMIC_AND, "U", "iu"}, + {VKD3D_SM5_OP_ATOMIC_OR, VKD3DSIH_ATOMIC_OR, "U", "iu"}, + {VKD3D_SM5_OP_ATOMIC_XOR, VKD3DSIH_ATOMIC_XOR, "U", "iu"}, + {VKD3D_SM5_OP_ATOMIC_CMP_STORE, VKD3DSIH_ATOMIC_CMP_STORE, "U", "iuu"}, + {VKD3D_SM5_OP_ATOMIC_IADD, VKD3DSIH_ATOMIC_IADD, "U", "ii"}, + {VKD3D_SM5_OP_ATOMIC_IMAX, VKD3DSIH_ATOMIC_IMAX, "U", "ii"}, + {VKD3D_SM5_OP_ATOMIC_IMIN, VKD3DSIH_ATOMIC_IMIN, "U", "ii"}, + {VKD3D_SM5_OP_ATOMIC_UMAX, VKD3DSIH_ATOMIC_UMAX, "U", "iu"}, + {VKD3D_SM5_OP_ATOMIC_UMIN, VKD3DSIH_ATOMIC_UMIN, "U", "iu"}, + {VKD3D_SM5_OP_IMM_ATOMIC_ALLOC, VKD3DSIH_IMM_ATOMIC_ALLOC, "u", "U"}, + {VKD3D_SM5_OP_IMM_ATOMIC_CONSUME, VKD3DSIH_IMM_ATOMIC_CONSUME, "u", "U"}, + {VKD3D_SM5_OP_IMM_ATOMIC_IADD, VKD3DSIH_IMM_ATOMIC_IADD, "uU", "ii"}, + {VKD3D_SM5_OP_IMM_ATOMIC_AND, VKD3DSIH_IMM_ATOMIC_AND, "uU", "iu"}, + {VKD3D_SM5_OP_IMM_ATOMIC_OR, VKD3DSIH_IMM_ATOMIC_OR, "uU", "iu"}, + {VKD3D_SM5_OP_IMM_ATOMIC_XOR, VKD3DSIH_IMM_ATOMIC_XOR, "uU", "iu"}, + {VKD3D_SM5_OP_IMM_ATOMIC_EXCH, VKD3DSIH_IMM_ATOMIC_EXCH, "uU", "iu"}, + {VKD3D_SM5_OP_IMM_ATOMIC_CMP_EXCH, VKD3DSIH_IMM_ATOMIC_CMP_EXCH, "uU", "iuu"}, + {VKD3D_SM5_OP_IMM_ATOMIC_IMAX, VKD3DSIH_IMM_ATOMIC_IMAX, "iU", "ii"}, + {VKD3D_SM5_OP_IMM_ATOMIC_IMIN, VKD3DSIH_IMM_ATOMIC_IMIN, "iU", "ii"}, + {VKD3D_SM5_OP_IMM_ATOMIC_UMAX, VKD3DSIH_IMM_ATOMIC_UMAX, "uU", "iu"}, + {VKD3D_SM5_OP_IMM_ATOMIC_UMIN, VKD3DSIH_IMM_ATOMIC_UMIN, "uU", "iu"}, + {VKD3D_SM5_OP_SYNC, VKD3DSIH_SYNC, "", "", + shader_sm5_read_sync}, + {VKD3D_SM5_OP_DADD, VKD3DSIH_DADD, "d", "dd"}, + {VKD3D_SM5_OP_DMAX, VKD3DSIH_DMAX, "d", "dd"}, + {VKD3D_SM5_OP_DMIN, VKD3DSIH_DMIN, "d", "dd"}, + {VKD3D_SM5_OP_DMUL, VKD3DSIH_DMUL, "d", "dd"}, + {VKD3D_SM5_OP_DEQ, VKD3DSIH_DEQ, "u", "dd"}, + {VKD3D_SM5_OP_DGE, VKD3DSIH_DGE, "u", "dd"}, + {VKD3D_SM5_OP_DLT, VKD3DSIH_DLT, "u", "dd"}, + {VKD3D_SM5_OP_DNE, VKD3DSIH_DNE, "u", "dd"}, + {VKD3D_SM5_OP_DMOV, VKD3DSIH_DMOV, "d", "d"}, + {VKD3D_SM5_OP_DMOVC, VKD3DSIH_DMOVC, "d", "udd"}, + {VKD3D_SM5_OP_DTOF, VKD3DSIH_DTOF, "f", "d"}, + {VKD3D_SM5_OP_FTOD, VKD3DSIH_FTOD, "d", "f"}, + {VKD3D_SM5_OP_EVAL_SAMPLE_INDEX, VKD3DSIH_EVAL_SAMPLE_INDEX, "f", "fi"}, + {VKD3D_SM5_OP_EVAL_CENTROID, VKD3DSIH_EVAL_CENTROID, "f", "f"}, + {VKD3D_SM5_OP_DCL_GS_INSTANCES, VKD3DSIH_DCL_GS_INSTANCES, "", "", + shader_sm4_read_declaration_count}, + {VKD3D_SM5_OP_DDIV, VKD3DSIH_DDIV, "d", "dd"}, + {VKD3D_SM5_OP_DFMA, VKD3DSIH_DFMA, "d", "ddd"}, + {VKD3D_SM5_OP_DRCP, VKD3DSIH_DRCP, "d", "d"}, + {VKD3D_SM5_OP_MSAD, VKD3DSIH_MSAD, "u", "uuu"}, + {VKD3D_SM5_OP_DTOI, VKD3DSIH_DTOI, "i", "d"}, + {VKD3D_SM5_OP_DTOU, VKD3DSIH_DTOU, "u", "d"}, + {VKD3D_SM5_OP_ITOD, VKD3DSIH_ITOD, "d", "i"}, + {VKD3D_SM5_OP_UTOD, VKD3DSIH_UTOD, "d", "u"}, + {VKD3D_SM5_OP_GATHER4_S, VKD3DSIH_GATHER4_S, "uu", "fRS"}, + {VKD3D_SM5_OP_GATHER4_C_S, VKD3DSIH_GATHER4_C_S, "fu", "fRSf"}, + {VKD3D_SM5_OP_GATHER4_PO_S, VKD3DSIH_GATHER4_PO_S, "fu", "fiRS"}, + {VKD3D_SM5_OP_GATHER4_PO_C_S, VKD3DSIH_GATHER4_PO_C_S, "fu", "fiRSf"}, + {VKD3D_SM5_OP_LD_S, VKD3DSIH_LD_S, "uu", "iR"}, + {VKD3D_SM5_OP_LD2DMS_S, VKD3DSIH_LD2DMS_S, "uu", "iRi"}, + {VKD3D_SM5_OP_LD_UAV_TYPED_S, VKD3DSIH_LD_UAV_TYPED_S, "uu", "iU"}, + {VKD3D_SM5_OP_LD_RAW_S, VKD3DSIH_LD_RAW_S, "uu", "iU"}, + {VKD3D_SM5_OP_LD_STRUCTURED_S, VKD3DSIH_LD_STRUCTURED_S, "uu", "iiR"}, + {VKD3D_SM5_OP_SAMPLE_LOD_S, VKD3DSIH_SAMPLE_LOD_S, "uu", "fRSf"}, + {VKD3D_SM5_OP_SAMPLE_C_LZ_S, VKD3DSIH_SAMPLE_C_LZ_S, "fu", "fRSf"}, + {VKD3D_SM5_OP_SAMPLE_CL_S, VKD3DSIH_SAMPLE_CL_S, "uu", "fRSf"}, + {VKD3D_SM5_OP_SAMPLE_B_CL_S, VKD3DSIH_SAMPLE_B_CL_S, "uu", "fRSff"}, + {VKD3D_SM5_OP_SAMPLE_GRAD_CL_S, VKD3DSIH_SAMPLE_GRAD_CL_S, "uu", "fRSfff"}, + {VKD3D_SM5_OP_SAMPLE_C_CL_S, VKD3DSIH_SAMPLE_C_CL_S, "fu", "fRSff"}, + {VKD3D_SM5_OP_CHECK_ACCESS_FULLY_MAPPED, VKD3DSIH_CHECK_ACCESS_FULLY_MAPPED, "u", "u"}, +}; + +static const enum vkd3d_shader_register_type register_type_table[] = +{ + /* VKD3D_SM4_RT_TEMP */ VKD3DSPR_TEMP, + /* VKD3D_SM4_RT_INPUT */ VKD3DSPR_INPUT, + /* VKD3D_SM4_RT_OUTPUT */ VKD3DSPR_OUTPUT, + /* VKD3D_SM4_RT_INDEXABLE_TEMP */ VKD3DSPR_IDXTEMP, + /* VKD3D_SM4_RT_IMMCONST */ VKD3DSPR_IMMCONST, + /* VKD3D_SM4_RT_IMMCONST64 */ VKD3DSPR_IMMCONST64, + /* VKD3D_SM4_RT_SAMPLER */ VKD3DSPR_SAMPLER, + /* VKD3D_SM4_RT_RESOURCE */ VKD3DSPR_RESOURCE, + /* VKD3D_SM4_RT_CONSTBUFFER */ VKD3DSPR_CONSTBUFFER, + /* VKD3D_SM4_RT_IMMCONSTBUFFER */ VKD3DSPR_IMMCONSTBUFFER, + /* UNKNOWN */ ~0u, + /* VKD3D_SM4_RT_PRIMID */ VKD3DSPR_PRIMID, + /* VKD3D_SM4_RT_DEPTHOUT */ VKD3DSPR_DEPTHOUT, + /* VKD3D_SM4_RT_NULL */ VKD3DSPR_NULL, + /* VKD3D_SM4_RT_RASTERIZER */ VKD3DSPR_RASTERIZER, + /* VKD3D_SM4_RT_OMASK */ VKD3DSPR_SAMPLEMASK, + /* VKD3D_SM5_RT_STREAM */ VKD3DSPR_STREAM, + /* VKD3D_SM5_RT_FUNCTION_BODY */ VKD3DSPR_FUNCTIONBODY, + /* UNKNOWN */ ~0u, + /* VKD3D_SM5_RT_FUNCTION_POINTER */ VKD3DSPR_FUNCTIONPOINTER, + /* UNKNOWN */ ~0u, + /* UNKNOWN */ ~0u, + /* VKD3D_SM5_RT_OUTPUT_CONTROL_POINT_ID */ VKD3DSPR_OUTPOINTID, + /* VKD3D_SM5_RT_FORK_INSTANCE_ID */ VKD3DSPR_FORKINSTID, + /* VKD3D_SM5_RT_JOIN_INSTANCE_ID */ VKD3DSPR_JOININSTID, + /* VKD3D_SM5_RT_INPUT_CONTROL_POINT */ VKD3DSPR_INCONTROLPOINT, + /* VKD3D_SM5_RT_OUTPUT_CONTROL_POINT */ VKD3DSPR_OUTCONTROLPOINT, + /* VKD3D_SM5_RT_PATCH_CONSTANT_DATA */ VKD3DSPR_PATCHCONST, + /* VKD3D_SM5_RT_DOMAIN_LOCATION */ VKD3DSPR_TESSCOORD, + /* UNKNOWN */ ~0u, + /* VKD3D_SM5_RT_UAV */ VKD3DSPR_UAV, + /* VKD3D_SM5_RT_SHARED_MEMORY */ VKD3DSPR_GROUPSHAREDMEM, + /* VKD3D_SM5_RT_THREAD_ID */ VKD3DSPR_THREADID, + /* VKD3D_SM5_RT_THREAD_GROUP_ID */ VKD3DSPR_THREADGROUPID, + /* VKD3D_SM5_RT_LOCAL_THREAD_ID */ VKD3DSPR_LOCALTHREADID, + /* VKD3D_SM5_RT_COVERAGE */ VKD3DSPR_COVERAGE, + /* VKD3D_SM5_RT_LOCAL_THREAD_INDEX */ VKD3DSPR_LOCALTHREADINDEX, + /* VKD3D_SM5_RT_GS_INSTANCE_ID */ VKD3DSPR_GSINSTID, + /* VKD3D_SM5_RT_DEPTHOUT_GREATER_EQUAL */ VKD3DSPR_DEPTHOUTGE, + /* VKD3D_SM5_RT_DEPTHOUT_LESS_EQUAL */ VKD3DSPR_DEPTHOUTLE, + /* VKD3D_SM5_RT_CYCLE_COUNTER */ ~0u, + /* VKD3D_SM5_RT_OUTPUT_STENCIL_REF */ VKD3DSPR_OUTSTENCILREF, +}; + +static const enum vkd3d_shader_register_precision register_precision_table[] = +{ + /* VKD3D_SM4_REGISTER_PRECISION_DEFAULT */ VKD3D_SHADER_REGISTER_PRECISION_DEFAULT, + /* VKD3D_SM4_REGISTER_PRECISION_MIN_FLOAT_16 */ VKD3D_SHADER_REGISTER_PRECISION_MIN_FLOAT_16, + /* VKD3D_SM4_REGISTER_PRECISION_MIN_FLOAT_10 */ VKD3D_SHADER_REGISTER_PRECISION_MIN_FLOAT_10, + /* UNKNOWN */ VKD3D_SHADER_REGISTER_PRECISION_INVALID, + /* VKD3D_SM4_REGISTER_PRECISION_MIN_INT_16 */ VKD3D_SHADER_REGISTER_PRECISION_MIN_INT_16, + /* VKD3D_SM4_REGISTER_PRECISION_MIN_UINT_16 */ VKD3D_SHADER_REGISTER_PRECISION_MIN_UINT_16, +}; + +static const struct vkd3d_sm4_opcode_info *get_opcode_info(enum vkd3d_sm4_opcode opcode) +{ + unsigned int i; + + for (i = 0; i < sizeof(opcode_table) / sizeof(*opcode_table); ++i) + { + if (opcode == opcode_table[i].opcode) return &opcode_table[i]; + } + + return NULL; +} + +static void map_register(const struct vkd3d_shader_sm4_parser *sm4, struct vkd3d_shader_register *reg) +{ + switch (sm4->p.shader_version.type) + { + case VKD3D_SHADER_TYPE_PIXEL: + if (reg->type == VKD3DSPR_OUTPUT) + { + unsigned int reg_idx = reg->idx[0].offset; + + if (reg_idx >= ARRAY_SIZE(sm4->output_map)) + { + /* Validated later */ + break; + } + + reg->type = VKD3DSPR_COLOROUT; + reg->idx[0].offset = sm4->output_map[reg_idx]; + } + break; + + default: + break; + } +} + +static enum vkd3d_data_type map_data_type(char t) +{ + switch (t) + { + case 'd': + return VKD3D_DATA_DOUBLE; + case 'f': + return VKD3D_DATA_FLOAT; + case 'i': + return VKD3D_DATA_INT; + case 'u': + return VKD3D_DATA_UINT; + case 'O': + return VKD3D_DATA_OPAQUE; + case 'R': + return VKD3D_DATA_RESOURCE; + case 'S': + return VKD3D_DATA_SAMPLER; + case 'U': + return VKD3D_DATA_UAV; + default: + ERR("Invalid data type '%c'.\n", t); + return VKD3D_DATA_FLOAT; + } +} + +static void shader_sm4_destroy(struct vkd3d_shader_parser *parser) +{ + struct vkd3d_shader_sm4_parser *sm4 = vkd3d_shader_sm4_parser(parser); + + shader_instruction_array_destroy(&parser->instructions); + free_shader_desc(&parser->shader_desc); + vkd3d_free(sm4); +} + +static bool shader_sm4_read_reg_idx(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, + const uint32_t *end, uint32_t addressing, struct vkd3d_shader_register_index *reg_idx) +{ + if (addressing & VKD3D_SM4_ADDRESSING_RELATIVE) + { + struct vkd3d_shader_src_param *rel_addr = shader_parser_get_src_params(&priv->p, 1); + + if (!(reg_idx->rel_addr = rel_addr)) + { + ERR("Failed to get src param for relative addressing.\n"); + return false; + } + + if (addressing & VKD3D_SM4_ADDRESSING_OFFSET) + reg_idx->offset = *(*ptr)++; + else + reg_idx->offset = 0; + shader_sm4_read_src_param(priv, ptr, end, VKD3D_DATA_INT, rel_addr); + } + else + { + reg_idx->rel_addr = NULL; + reg_idx->offset = *(*ptr)++; + } + + return true; +} + +static bool sm4_register_is_descriptor(enum vkd3d_sm4_register_type register_type) +{ + switch (register_type) + { + case VKD3D_SM4_RT_SAMPLER: + case VKD3D_SM4_RT_RESOURCE: + case VKD3D_SM4_RT_CONSTBUFFER: + case VKD3D_SM5_RT_UAV: + return true; + + default: + return false; + } +} + +static bool shader_sm4_read_param(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, const uint32_t *end, + enum vkd3d_data_type data_type, struct vkd3d_shader_register *param, enum vkd3d_shader_src_modifier *modifier) +{ + enum vkd3d_sm4_register_precision precision; + enum vkd3d_sm4_register_type register_type; + enum vkd3d_sm4_extended_operand_type type; + enum vkd3d_sm4_register_modifier m; + uint32_t token, order, extended; + + if (*ptr >= end) + { + WARN("Invalid ptr %p >= end %p.\n", *ptr, end); + return false; + } + token = *(*ptr)++; + + register_type = (token & VKD3D_SM4_REGISTER_TYPE_MASK) >> VKD3D_SM4_REGISTER_TYPE_SHIFT; + if (register_type >= ARRAY_SIZE(register_type_table) + || register_type_table[register_type] == VKD3DSPR_INVALID) + { + FIXME("Unhandled register type %#x.\n", register_type); + param->type = VKD3DSPR_TEMP; + } + else + { + param->type = register_type_table[register_type]; + } + param->precision = VKD3D_SHADER_REGISTER_PRECISION_DEFAULT; + param->non_uniform = false; + param->data_type = data_type; + + *modifier = VKD3DSPSM_NONE; + if (token & VKD3D_SM4_EXTENDED_OPERAND) + { + if (*ptr >= end) + { + WARN("Invalid ptr %p >= end %p.\n", *ptr, end); + return false; + } + extended = *(*ptr)++; + + if (extended & VKD3D_SM4_EXTENDED_OPERAND) + { + FIXME("Skipping second-order extended operand.\n"); + *ptr += *ptr < end; + } + + type = extended & VKD3D_SM4_EXTENDED_OPERAND_TYPE_MASK; + if (type == VKD3D_SM4_EXTENDED_OPERAND_MODIFIER) + { + m = (extended & VKD3D_SM4_REGISTER_MODIFIER_MASK) >> VKD3D_SM4_REGISTER_MODIFIER_SHIFT; + switch (m) + { + case VKD3D_SM4_REGISTER_MODIFIER_NEGATE: + *modifier = VKD3DSPSM_NEG; + break; + + case VKD3D_SM4_REGISTER_MODIFIER_ABS: + *modifier = VKD3DSPSM_ABS; + break; + + case VKD3D_SM4_REGISTER_MODIFIER_ABS_NEGATE: + *modifier = VKD3DSPSM_ABSNEG; + break; + + default: + FIXME("Unhandled register modifier %#x.\n", m); + /* fall-through */ + case VKD3D_SM4_REGISTER_MODIFIER_NONE: + break; + } + + precision = (extended & VKD3D_SM4_REGISTER_PRECISION_MASK) >> VKD3D_SM4_REGISTER_PRECISION_SHIFT; + if (precision >= ARRAY_SIZE(register_precision_table) + || register_precision_table[precision] == VKD3D_SHADER_REGISTER_PRECISION_INVALID) + { + FIXME("Unhandled register precision %#x.\n", precision); + param->precision = VKD3D_SHADER_REGISTER_PRECISION_INVALID; + } + else + { + param->precision = register_precision_table[precision]; + } + + if (extended & VKD3D_SM4_REGISTER_NON_UNIFORM_MASK) + param->non_uniform = true; + + extended &= ~(VKD3D_SM4_EXTENDED_OPERAND_TYPE_MASK | VKD3D_SM4_REGISTER_MODIFIER_MASK + | VKD3D_SM4_REGISTER_PRECISION_MASK | VKD3D_SM4_REGISTER_NON_UNIFORM_MASK + | VKD3D_SM4_EXTENDED_OPERAND); + if (extended) + FIXME("Skipping unhandled extended operand bits 0x%08x.\n", extended); + } + else if (type) + { + FIXME("Skipping unhandled extended operand token 0x%08x (type %#x).\n", extended, type); + } + } + + order = (token & VKD3D_SM4_REGISTER_ORDER_MASK) >> VKD3D_SM4_REGISTER_ORDER_SHIFT; + + if (order < 1) + { + param->idx[0].offset = ~0u; + param->idx[0].rel_addr = NULL; + } + else + { + DWORD addressing = (token & VKD3D_SM4_ADDRESSING_MASK0) >> VKD3D_SM4_ADDRESSING_SHIFT0; + if (!(shader_sm4_read_reg_idx(priv, ptr, end, addressing, ¶m->idx[0]))) + { + ERR("Failed to read register index.\n"); + return false; + } + } + + if (order < 2) + { + param->idx[1].offset = ~0u; + param->idx[1].rel_addr = NULL; + } + else + { + DWORD addressing = (token & VKD3D_SM4_ADDRESSING_MASK1) >> VKD3D_SM4_ADDRESSING_SHIFT1; + if (!(shader_sm4_read_reg_idx(priv, ptr, end, addressing, ¶m->idx[1]))) + { + ERR("Failed to read register index.\n"); + return false; + } + } + + if (order < 3) + { + param->idx[2].offset = ~0u; + param->idx[2].rel_addr = NULL; + } + else + { + DWORD addressing = (token & VKD3D_SM4_ADDRESSING_MASK2) >> VKD3D_SM4_ADDRESSING_SHIFT2; + if (!(shader_sm4_read_reg_idx(priv, ptr, end, addressing, ¶m->idx[2]))) + { + ERR("Failed to read register index.\n"); + return false; + } + } + + if (order > 3) + { + WARN("Unhandled order %u.\n", order); + return false; + } + + param->idx_count = order; + + if (register_type == VKD3D_SM4_RT_IMMCONST || register_type == VKD3D_SM4_RT_IMMCONST64) + { + enum vkd3d_sm4_dimension dimension = (token & VKD3D_SM4_DIMENSION_MASK) >> VKD3D_SM4_DIMENSION_SHIFT; + unsigned int dword_count; + + switch (dimension) + { + case VKD3D_SM4_DIMENSION_SCALAR: + param->immconst_type = VKD3D_IMMCONST_SCALAR; + dword_count = 1 + (register_type == VKD3D_SM4_RT_IMMCONST64); + if (end - *ptr < dword_count) + { + WARN("Invalid ptr %p, end %p.\n", *ptr, end); + return false; + } + memcpy(param->u.immconst_uint, *ptr, dword_count * sizeof(DWORD)); + *ptr += dword_count; + break; + + case VKD3D_SM4_DIMENSION_VEC4: + param->immconst_type = VKD3D_IMMCONST_VEC4; + if (end - *ptr < VKD3D_VEC4_SIZE) + { + WARN("Invalid ptr %p, end %p.\n", *ptr, end); + return false; + } + memcpy(param->u.immconst_uint, *ptr, VKD3D_VEC4_SIZE * sizeof(DWORD)); + *ptr += 4; + break; + + default: + FIXME("Unhandled dimension %#x.\n", dimension); + break; + } + } + else if (!shader_is_sm_5_1(priv) && sm4_register_is_descriptor(register_type)) + { + /* SM5.1 places a symbol identifier in idx[0] and moves + * other values up one slot. Normalize to SM5.1. */ + param->idx[2] = param->idx[1]; + param->idx[1] = param->idx[0]; + ++param->idx_count; + } + + map_register(priv, param); + + return true; +} + +static bool shader_sm4_is_scalar_register(const struct vkd3d_shader_register *reg) +{ + switch (reg->type) + { + case VKD3DSPR_COVERAGE: + case VKD3DSPR_DEPTHOUT: + case VKD3DSPR_DEPTHOUTGE: + case VKD3DSPR_DEPTHOUTLE: + case VKD3DSPR_GSINSTID: + case VKD3DSPR_LOCALTHREADINDEX: + case VKD3DSPR_OUTPOINTID: + case VKD3DSPR_PRIMID: + case VKD3DSPR_SAMPLEMASK: + case VKD3DSPR_OUTSTENCILREF: + return true; + default: + return false; + } +} + +static uint32_t swizzle_from_sm4(uint32_t s) +{ + return vkd3d_shader_create_swizzle(s & 0x3, (s >> 2) & 0x3, (s >> 4) & 0x3, (s >> 6) & 0x3); +} + +static bool register_is_input_output(const struct vkd3d_shader_register *reg) +{ + switch (reg->type) + { + case VKD3DSPR_INPUT: + case VKD3DSPR_OUTPUT: + case VKD3DSPR_COLOROUT: + case VKD3DSPR_INCONTROLPOINT: + case VKD3DSPR_OUTCONTROLPOINT: + case VKD3DSPR_PATCHCONST: + return true; + + default: + return false; + } +} + +static bool register_is_control_point_input(const struct vkd3d_shader_register *reg, + const struct vkd3d_shader_sm4_parser *priv) +{ + return reg->type == VKD3DSPR_INCONTROLPOINT || reg->type == VKD3DSPR_OUTCONTROLPOINT + || (reg->type == VKD3DSPR_INPUT && (priv->phase == VKD3DSIH_HS_CONTROL_POINT_PHASE + || priv->p.shader_version.type == VKD3D_SHADER_TYPE_GEOMETRY)); +} + +static unsigned int mask_from_swizzle(unsigned int swizzle) +{ + return (1u << vkd3d_swizzle_get_component(swizzle, 0)) + | (1u << vkd3d_swizzle_get_component(swizzle, 1)) + | (1u << vkd3d_swizzle_get_component(swizzle, 2)) + | (1u << vkd3d_swizzle_get_component(swizzle, 3)); +} + +static bool shader_sm4_validate_input_output_register(struct vkd3d_shader_sm4_parser *priv, + const struct vkd3d_shader_register *reg, unsigned int mask) +{ + unsigned int idx_count = 1 + register_is_control_point_input(reg, priv); + const unsigned int *masks; + unsigned int register_idx; + + if (reg->idx_count != idx_count) + { + vkd3d_shader_parser_error(&priv->p, VKD3D_SHADER_ERROR_TPF_INVALID_REGISTER_INDEX_COUNT, + "Invalid index count %u for register type %#x; expected count %u.", + reg->idx_count, reg->type, idx_count); + return false; + } + + switch (reg->type) + { + case VKD3DSPR_INPUT: + case VKD3DSPR_INCONTROLPOINT: + masks = priv->input_register_masks; + break; + case VKD3DSPR_OUTPUT: + masks = sm4_parser_is_in_fork_or_join_phase(priv) ? priv->patch_constant_register_masks + : priv->output_register_masks; + break; + case VKD3DSPR_COLOROUT: + case VKD3DSPR_OUTCONTROLPOINT: + masks = priv->output_register_masks; + break; + case VKD3DSPR_PATCHCONST: + masks = priv->patch_constant_register_masks; + break; + + default: + vkd3d_unreachable(); + } + + register_idx = reg->idx[reg->idx_count - 1].offset; + /* The signature element registers have already been checked against MAX_REG_OUTPUT. */ + if (register_idx >= MAX_REG_OUTPUT || (masks[register_idx] & mask) != mask) + { + WARN("Failed to find signature element for register type %#x, index %u and mask %#x.\n", + reg->type, register_idx, mask); + vkd3d_shader_parser_error(&priv->p, VKD3D_SHADER_ERROR_TPF_INVALID_IO_REGISTER, + "Could not find signature element matching register type %#x, index %u and mask %#x.", + reg->type, register_idx, mask); + return false; + } + + return true; +} + +static bool shader_sm4_read_src_param(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, + const uint32_t *end, enum vkd3d_data_type data_type, struct vkd3d_shader_src_param *src_param) +{ + DWORD token; + + if (*ptr >= end) + { + WARN("Invalid ptr %p >= end %p.\n", *ptr, end); + return false; + } + token = **ptr; + + if (!shader_sm4_read_param(priv, ptr, end, data_type, &src_param->reg, &src_param->modifiers)) + { + ERR("Failed to read parameter.\n"); + return false; + } + + if (src_param->reg.type == VKD3DSPR_IMMCONST || src_param->reg.type == VKD3DSPR_IMMCONST64) + { + src_param->swizzle = VKD3D_SHADER_NO_SWIZZLE; + } + else + { + enum vkd3d_sm4_swizzle_type swizzle_type = + (token & VKD3D_SM4_SWIZZLE_TYPE_MASK) >> VKD3D_SM4_SWIZZLE_TYPE_SHIFT; + + switch (swizzle_type) + { + case VKD3D_SM4_SWIZZLE_NONE: + if (shader_sm4_is_scalar_register(&src_param->reg)) + src_param->swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); + else + src_param->swizzle = VKD3D_SHADER_NO_SWIZZLE; + break; + + case VKD3D_SM4_SWIZZLE_SCALAR: + src_param->swizzle = (token & VKD3D_SM4_SWIZZLE_MASK) >> VKD3D_SM4_SWIZZLE_SHIFT; + src_param->swizzle = (src_param->swizzle & 0x3) * 0x01010101; + break; + + case VKD3D_SM4_SWIZZLE_VEC4: + src_param->swizzle = swizzle_from_sm4((token & VKD3D_SM4_SWIZZLE_MASK) >> VKD3D_SM4_SWIZZLE_SHIFT); + break; + + default: + FIXME("Unhandled swizzle type %#x.\n", swizzle_type); + break; + } + } + + if (register_is_input_output(&src_param->reg) && !shader_sm4_validate_input_output_register(priv, + &src_param->reg, mask_from_swizzle(src_param->swizzle))) + return false; + + return true; +} + +static bool shader_sm4_read_dst_param(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, + const uint32_t *end, enum vkd3d_data_type data_type, struct vkd3d_shader_dst_param *dst_param) +{ + enum vkd3d_shader_src_modifier modifier; + DWORD token; + + if (*ptr >= end) + { + WARN("Invalid ptr %p >= end %p.\n", *ptr, end); + return false; + } + token = **ptr; + + if (!shader_sm4_read_param(priv, ptr, end, data_type, &dst_param->reg, &modifier)) + { + ERR("Failed to read parameter.\n"); + return false; + } + + if (modifier != VKD3DSPSM_NONE) + { + ERR("Invalid source modifier %#x on destination register.\n", modifier); + return false; + } + + dst_param->write_mask = (token & VKD3D_SM4_WRITEMASK_MASK) >> VKD3D_SM4_WRITEMASK_SHIFT; + if (data_type == VKD3D_DATA_DOUBLE) + dst_param->write_mask = vkd3d_write_mask_64_from_32(dst_param->write_mask); + /* Scalar registers are declared with no write mask in shader bytecode. */ + if (!dst_param->write_mask && shader_sm4_is_scalar_register(&dst_param->reg)) + dst_param->write_mask = VKD3DSP_WRITEMASK_0; + dst_param->modifiers = 0; + dst_param->shift = 0; + + if (register_is_input_output(&dst_param->reg) && !shader_sm4_validate_input_output_register(priv, + &dst_param->reg, dst_param->write_mask)) + return false; + + return true; +} + +static void shader_sm4_read_instruction_modifier(DWORD modifier, struct vkd3d_shader_instruction *ins) +{ + enum vkd3d_sm4_instruction_modifier modifier_type = modifier & VKD3D_SM4_MODIFIER_MASK; + + switch (modifier_type) + { + case VKD3D_SM4_MODIFIER_AOFFIMMI: + { + static const DWORD recognized_bits = VKD3D_SM4_INSTRUCTION_MODIFIER + | VKD3D_SM4_MODIFIER_MASK + | VKD3D_SM4_AOFFIMMI_U_MASK + | VKD3D_SM4_AOFFIMMI_V_MASK + | VKD3D_SM4_AOFFIMMI_W_MASK; + + /* Bit fields are used for sign extension. */ + struct + { + int u : 4; + int v : 4; + int w : 4; + } aoffimmi; + + if (modifier & ~recognized_bits) + FIXME("Unhandled instruction modifier %#x.\n", modifier); + + aoffimmi.u = (modifier & VKD3D_SM4_AOFFIMMI_U_MASK) >> VKD3D_SM4_AOFFIMMI_U_SHIFT; + aoffimmi.v = (modifier & VKD3D_SM4_AOFFIMMI_V_MASK) >> VKD3D_SM4_AOFFIMMI_V_SHIFT; + aoffimmi.w = (modifier & VKD3D_SM4_AOFFIMMI_W_MASK) >> VKD3D_SM4_AOFFIMMI_W_SHIFT; + ins->texel_offset.u = aoffimmi.u; + ins->texel_offset.v = aoffimmi.v; + ins->texel_offset.w = aoffimmi.w; + break; + } + + case VKD3D_SM5_MODIFIER_DATA_TYPE: + { + DWORD components = (modifier & VKD3D_SM5_MODIFIER_DATA_TYPE_MASK) >> VKD3D_SM5_MODIFIER_DATA_TYPE_SHIFT; + unsigned int i; + + for (i = 0; i < VKD3D_VEC4_SIZE; i++) + { + enum vkd3d_sm4_data_type data_type = VKD3D_SM4_TYPE_COMPONENT(components, i); + + if (!data_type || (data_type >= ARRAY_SIZE(data_type_table))) + { + FIXME("Unhandled data type %#x.\n", data_type); + ins->resource_data_type[i] = VKD3D_DATA_FLOAT; + } + else + { + ins->resource_data_type[i] = data_type_table[data_type]; + } + } + break; + } + + case VKD3D_SM5_MODIFIER_RESOURCE_TYPE: + { + enum vkd3d_sm4_resource_type resource_type + = (modifier & VKD3D_SM5_MODIFIER_RESOURCE_TYPE_MASK) >> VKD3D_SM5_MODIFIER_RESOURCE_TYPE_SHIFT; + + if (resource_type == VKD3D_SM4_RESOURCE_RAW_BUFFER) + ins->raw = true; + else if (resource_type == VKD3D_SM4_RESOURCE_STRUCTURED_BUFFER) + ins->structured = true; + + if (resource_type < ARRAY_SIZE(resource_type_table)) + ins->resource_type = resource_type_table[resource_type]; + else + { + FIXME("Unhandled resource type %#x.\n", resource_type); + ins->resource_type = VKD3D_SHADER_RESOURCE_NONE; + } + + ins->resource_stride + = (modifier & VKD3D_SM5_MODIFIER_RESOURCE_STRIDE_MASK) >> VKD3D_SM5_MODIFIER_RESOURCE_STRIDE_SHIFT; + break; + } + + default: + FIXME("Unhandled instruction modifier %#x.\n", modifier); + } +} + +static void shader_sm4_read_instruction(struct vkd3d_shader_sm4_parser *sm4, struct vkd3d_shader_instruction *ins) +{ + const struct vkd3d_sm4_opcode_info *opcode_info; + uint32_t opcode_token, opcode, previous_token; + struct vkd3d_shader_dst_param *dst_params; + struct vkd3d_shader_src_param *src_params; + const uint32_t **ptr = &sm4->ptr; + unsigned int i, len; + size_t remaining; + const uint32_t *p; + DWORD precise; + + if (*ptr >= sm4->end) + { + WARN("End of byte-code, failed to read opcode.\n"); + goto fail; + } + remaining = sm4->end - *ptr; + + ++sm4->p.location.line; + + opcode_token = *(*ptr)++; + opcode = opcode_token & VKD3D_SM4_OPCODE_MASK; + + len = ((opcode_token & VKD3D_SM4_INSTRUCTION_LENGTH_MASK) >> VKD3D_SM4_INSTRUCTION_LENGTH_SHIFT); + if (!len) + { + if (remaining < 2) + { + WARN("End of byte-code, failed to read length token.\n"); + goto fail; + } + len = **ptr; + } + if (!len || remaining < len) + { + WARN("Read invalid length %u (remaining %zu).\n", len, remaining); + goto fail; + } + --len; + + if (!(opcode_info = get_opcode_info(opcode))) + { + FIXME("Unrecognized opcode %#x, opcode_token 0x%08x.\n", opcode, opcode_token); + ins->handler_idx = VKD3DSIH_INVALID; + *ptr += len; + return; + } + + ins->handler_idx = opcode_info->handler_idx; + if (ins->handler_idx == VKD3DSIH_HS_CONTROL_POINT_PHASE || ins->handler_idx == VKD3DSIH_HS_FORK_PHASE + || ins->handler_idx == VKD3DSIH_HS_JOIN_PHASE) + sm4->phase = ins->handler_idx; + sm4->has_control_point_phase |= ins->handler_idx == VKD3DSIH_HS_CONTROL_POINT_PHASE; + ins->flags = 0; + ins->coissue = false; + ins->raw = false; + ins->structured = false; + ins->predicate = NULL; + ins->dst_count = strnlen(opcode_info->dst_info, SM4_MAX_DST_COUNT); + ins->src_count = strnlen(opcode_info->src_info, SM4_MAX_SRC_COUNT); + ins->src = src_params = shader_parser_get_src_params(&sm4->p, ins->src_count); + if (!src_params && ins->src_count) + { + ERR("Failed to allocate src parameters.\n"); + vkd3d_shader_parser_error(&sm4->p, VKD3D_SHADER_ERROR_TPF_OUT_OF_MEMORY, "Out of memory."); + ins->handler_idx = VKD3DSIH_INVALID; + return; + } + ins->resource_type = VKD3D_SHADER_RESOURCE_NONE; + ins->resource_stride = 0; + ins->resource_data_type[0] = VKD3D_DATA_FLOAT; + ins->resource_data_type[1] = VKD3D_DATA_FLOAT; + ins->resource_data_type[2] = VKD3D_DATA_FLOAT; + ins->resource_data_type[3] = VKD3D_DATA_FLOAT; + memset(&ins->texel_offset, 0, sizeof(ins->texel_offset)); + + p = *ptr; + *ptr += len; + + if (opcode_info->read_opcode_func) + { + ins->dst = NULL; + ins->dst_count = 0; + opcode_info->read_opcode_func(ins, opcode, opcode_token, p, len, sm4); + } + else + { + enum vkd3d_shader_dst_modifier instruction_dst_modifier = VKD3DSPDM_NONE; + + previous_token = opcode_token; + while (previous_token & VKD3D_SM4_INSTRUCTION_MODIFIER && p != *ptr) + shader_sm4_read_instruction_modifier(previous_token = *p++, ins); + + ins->flags = (opcode_token & VKD3D_SM4_INSTRUCTION_FLAGS_MASK) >> VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT; + if (ins->flags & VKD3D_SM4_INSTRUCTION_FLAG_SATURATE) + { + ins->flags &= ~VKD3D_SM4_INSTRUCTION_FLAG_SATURATE; + instruction_dst_modifier = VKD3DSPDM_SATURATE; + } + precise = (opcode_token & VKD3D_SM5_PRECISE_MASK) >> VKD3D_SM5_PRECISE_SHIFT; + ins->flags |= precise << VKD3DSI_PRECISE_SHIFT; + + ins->dst = dst_params = shader_parser_get_dst_params(&sm4->p, ins->dst_count); + if (!dst_params && ins->dst_count) + { + ERR("Failed to allocate dst parameters.\n"); + vkd3d_shader_parser_error(&sm4->p, VKD3D_SHADER_ERROR_TPF_OUT_OF_MEMORY, "Out of memory."); + ins->handler_idx = VKD3DSIH_INVALID; + return; + } + for (i = 0; i < ins->dst_count; ++i) + { + if (!(shader_sm4_read_dst_param(sm4, &p, *ptr, map_data_type(opcode_info->dst_info[i]), + &dst_params[i]))) + { + ins->handler_idx = VKD3DSIH_INVALID; + return; + } + dst_params[i].modifiers |= instruction_dst_modifier; + } + + for (i = 0; i < ins->src_count; ++i) + { + if (!(shader_sm4_read_src_param(sm4, &p, *ptr, map_data_type(opcode_info->src_info[i]), + &src_params[i]))) + { + ins->handler_idx = VKD3DSIH_INVALID; + return; + } + } + } + + return; + +fail: + *ptr = sm4->end; + ins->handler_idx = VKD3DSIH_INVALID; + return; +} + +static const struct vkd3d_shader_parser_ops shader_sm4_parser_ops = +{ + .parser_destroy = shader_sm4_destroy, +}; + +static bool shader_sm4_init(struct vkd3d_shader_sm4_parser *sm4, const uint32_t *byte_code, + size_t byte_code_size, const char *source_name, const struct shader_signature *output_signature, + struct vkd3d_shader_message_context *message_context) +{ + struct vkd3d_shader_version version; + uint32_t version_token, token_count; + unsigned int i; + + if (byte_code_size / sizeof(*byte_code) < 2) + { + WARN("Invalid byte code size %lu.\n", (long)byte_code_size); + return false; + } + + version_token = byte_code[0]; + TRACE("Version: 0x%08x.\n", version_token); + token_count = byte_code[1]; + TRACE("Token count: %u.\n", token_count); + + if (token_count < 2 || byte_code_size / sizeof(*byte_code) < token_count) + { + WARN("Invalid token count %u.\n", token_count); + return false; + } + + sm4->start = &byte_code[2]; + sm4->end = &byte_code[token_count]; + + switch (version_token >> 16) + { + case VKD3D_SM4_PS: + version.type = VKD3D_SHADER_TYPE_PIXEL; + break; + + case VKD3D_SM4_VS: + version.type = VKD3D_SHADER_TYPE_VERTEX; + break; + + case VKD3D_SM4_GS: + version.type = VKD3D_SHADER_TYPE_GEOMETRY; + break; + + case VKD3D_SM5_HS: + version.type = VKD3D_SHADER_TYPE_HULL; + break; + + case VKD3D_SM5_DS: + version.type = VKD3D_SHADER_TYPE_DOMAIN; + break; + + case VKD3D_SM5_CS: + version.type = VKD3D_SHADER_TYPE_COMPUTE; + break; + + default: + FIXME("Unrecognised shader type %#x.\n", version_token >> 16); + } + version.major = VKD3D_SM4_VERSION_MAJOR(version_token); + version.minor = VKD3D_SM4_VERSION_MINOR(version_token); + + /* Estimate instruction count to avoid reallocation in most shaders. */ + if (!vkd3d_shader_parser_init(&sm4->p, message_context, source_name, &version, &shader_sm4_parser_ops, + token_count / 7u + 20)) + return false; + sm4->ptr = sm4->start; + + memset(sm4->output_map, 0xff, sizeof(sm4->output_map)); + for (i = 0; i < output_signature->element_count; ++i) + { + struct signature_element *e = &output_signature->elements[i]; + + if (version.type == VKD3D_SHADER_TYPE_PIXEL + && ascii_strcasecmp(e->semantic_name, "SV_Target")) + continue; + if (e->register_index >= ARRAY_SIZE(sm4->output_map)) + { + WARN("Invalid output index %u.\n", e->register_index); + continue; + } + + sm4->output_map[e->register_index] = e->semantic_index; + } + + return true; +} + +static bool shader_sm4_parser_validate_signature(struct vkd3d_shader_sm4_parser *sm4, + const struct shader_signature *signature, unsigned int *masks, const char *name) +{ + unsigned int i, register_idx, register_count, mask; + + for (i = 0; i < signature->element_count; ++i) + { + register_idx = signature->elements[i].register_index; + register_count = signature->elements[i].register_count; + if (register_idx != ~0u && (register_idx >= MAX_REG_OUTPUT || MAX_REG_OUTPUT - register_idx < register_count)) + { + WARN("%s signature element %u unhandled register index %u, count %u.\n", + name, i, register_idx, register_count); + vkd3d_shader_parser_error(&sm4->p, VKD3D_SHADER_ERROR_TPF_TOO_MANY_REGISTERS, + "%s signature element %u register index %u, count %u exceeds maximum index of %u.", name, + i, register_idx, register_count, MAX_REG_OUTPUT - 1); + return false; + } + + if (!vkd3d_bitmask_is_contiguous(mask = signature->elements[i].mask)) + { + WARN("%s signature element %u mask %#x is not contiguous.\n", name, i, mask); + vkd3d_shader_parser_warning(&sm4->p, VKD3D_SHADER_WARNING_TPF_MASK_NOT_CONTIGUOUS, + "%s signature element %u mask %#x is not contiguous.", name, i, mask); + } + + if (register_idx != ~0u) + masks[register_idx] |= mask; + } + + return true; +} + +static int index_range_compare(const void *a, const void *b) +{ + return memcmp(a, b, sizeof(struct sm4_index_range)); +} + +static void shader_sm4_validate_default_phase_index_ranges(struct vkd3d_shader_sm4_parser *sm4) +{ + if (!sm4->input_index_ranges.count || !sm4->output_index_ranges.count) + return; + + if (sm4->input_index_ranges.count == sm4->output_index_ranges.count) + { + qsort(sm4->input_index_ranges.ranges, sm4->input_index_ranges.count, sizeof(sm4->input_index_ranges.ranges[0]), + index_range_compare); + qsort(sm4->output_index_ranges.ranges, sm4->output_index_ranges.count, sizeof(sm4->output_index_ranges.ranges[0]), + index_range_compare); + if (!memcmp(sm4->input_index_ranges.ranges, sm4->output_index_ranges.ranges, + sm4->input_index_ranges.count * sizeof(sm4->input_index_ranges.ranges[0]))) + return; + } + + /* This is very unlikely to occur and would complicate the default control point phase implementation. */ + WARN("Default phase index ranges are not identical.\n"); + vkd3d_shader_parser_error(&sm4->p, VKD3D_SHADER_ERROR_TPF_INVALID_INDEX_RANGE_DCL, + "Default control point phase input and output index range declarations are not identical."); + return; +} + +int vkd3d_shader_sm4_parser_create(const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser **parser) +{ + struct vkd3d_shader_instruction_array *instructions; + struct vkd3d_shader_desc *shader_desc; + struct vkd3d_shader_instruction *ins; + struct vkd3d_shader_sm4_parser *sm4; + int ret; + + if (!(sm4 = vkd3d_calloc(1, sizeof(*sm4)))) + { + ERR("Failed to allocate parser.\n"); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + + shader_desc = &sm4->p.shader_desc; + if ((ret = shader_extract_from_dxbc(&compile_info->source, + message_context, compile_info->source_name, shader_desc)) < 0) + { + WARN("Failed to extract shader, vkd3d result %d.\n", ret); + vkd3d_free(sm4); + return ret; + } + + if (!shader_sm4_init(sm4, shader_desc->byte_code, shader_desc->byte_code_size, + compile_info->source_name, &shader_desc->output_signature, message_context)) + { + WARN("Failed to initialise shader parser.\n"); + free_shader_desc(shader_desc); + vkd3d_free(sm4); + return VKD3D_ERROR_INVALID_ARGUMENT; + } + + if (!shader_sm4_parser_validate_signature(sm4, &shader_desc->input_signature, + sm4->input_register_masks, "Input") + || !shader_sm4_parser_validate_signature(sm4, &shader_desc->output_signature, + sm4->output_register_masks, "Output") + || !shader_sm4_parser_validate_signature(sm4, &shader_desc->patch_constant_signature, + sm4->patch_constant_register_masks, "Patch constant")) + { + shader_sm4_destroy(&sm4->p); + return VKD3D_ERROR_INVALID_SHADER; + } + + instructions = &sm4->p.instructions; + while (sm4->ptr != sm4->end) + { + if (!shader_instruction_array_reserve(instructions, instructions->count + 1)) + { + ERR("Failed to allocate instructions.\n"); + vkd3d_shader_parser_error(&sm4->p, VKD3D_SHADER_ERROR_TPF_OUT_OF_MEMORY, "Out of memory."); + shader_sm4_destroy(&sm4->p); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + ins = &instructions->elements[instructions->count]; + shader_sm4_read_instruction(sm4, ins); + + if (ins->handler_idx == VKD3DSIH_INVALID) + { + WARN("Encountered unrecognized or invalid instruction.\n"); + shader_sm4_destroy(&sm4->p); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + ++instructions->count; + } + if (sm4->p.shader_version.type == VKD3D_SHADER_TYPE_HULL && !sm4->has_control_point_phase && !sm4->p.failed) + shader_sm4_validate_default_phase_index_ranges(sm4); + + *parser = &sm4->p; + + return sm4->p.failed ? VKD3D_ERROR_INVALID_SHADER : VKD3D_OK; +} + +static void write_sm4_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_block *block); + +static bool type_is_integer(const struct hlsl_type *type) +{ + switch (type->base_type) + { + case HLSL_TYPE_BOOL: + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: + return true; + + default: + return false; + } +} + +bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, + bool output, unsigned int *type, enum vkd3d_sm4_swizzle_type *swizzle_type, bool *has_idx) +{ + unsigned int i; + + static const struct + { + const char *semantic; + bool output; + enum vkd3d_shader_type shader_type; + enum vkd3d_sm4_swizzle_type swizzle_type; + enum vkd3d_sm4_register_type type; + bool has_idx; + } + register_table[] = + { + {"sv_dispatchthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM5_RT_THREAD_ID, false}, + {"sv_groupid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM5_RT_THREAD_GROUP_ID, false}, + {"sv_groupthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM5_RT_LOCAL_THREAD_ID, false}, + + {"sv_primitiveid", false, VKD3D_SHADER_TYPE_GEOMETRY, VKD3D_SM4_SWIZZLE_NONE, VKD3D_SM4_RT_PRIMID, false}, + + /* Put sv_target in this table, instead of letting it fall through to + * default varying allocation, so that the register index matches the + * usage index. */ + {"color", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM4_RT_OUTPUT, true}, + {"depth", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM4_RT_DEPTHOUT, false}, + {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM4_RT_DEPTHOUT, false}, + {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM4_RT_OUTPUT, true}, + }; + + for (i = 0; i < ARRAY_SIZE(register_table); ++i) + { + if (!ascii_strcasecmp(semantic->name, register_table[i].semantic) + && output == register_table[i].output + && ctx->profile->type == register_table[i].shader_type) + { + *type = register_table[i].type; + if (swizzle_type) + *swizzle_type = register_table[i].swizzle_type; + *has_idx = register_table[i].has_idx; + return true; + } + } + + return false; +} + +bool hlsl_sm4_usage_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, + bool output, D3D_NAME *usage) +{ + unsigned int i; + + static const struct + { + const char *name; + bool output; + enum vkd3d_shader_type shader_type; + D3DDECLUSAGE usage; + } + semantics[] = + { + {"sv_dispatchthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, ~0u}, + {"sv_groupid", false, VKD3D_SHADER_TYPE_COMPUTE, ~0u}, + {"sv_groupthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, ~0u}, + + {"position", false, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_POSITION}, + {"sv_position", false, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_POSITION}, + {"sv_primitiveid", false, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_PRIMITIVE_ID}, + + {"position", true, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_POSITION}, + {"sv_position", true, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_POSITION}, + {"sv_primitiveid", true, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_PRIMITIVE_ID}, + + {"position", false, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_POSITION}, + {"sv_position", false, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_POSITION}, + {"sv_isfrontface", false, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_IS_FRONT_FACE}, + + {"color", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_TARGET}, + {"depth", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_DEPTH}, + {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_TARGET}, + {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_DEPTH}, + + {"sv_position", false, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_UNDEFINED}, + {"sv_vertexid", false, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_VERTEX_ID}, + + {"position", true, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_POSITION}, + {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_POSITION}, + }; + + for (i = 0; i < ARRAY_SIZE(semantics); ++i) + { + if (!ascii_strcasecmp(semantic->name, semantics[i].name) + && output == semantics[i].output + && ctx->profile->type == semantics[i].shader_type + && !ascii_strncasecmp(semantic->name, "sv_", 3)) + { + *usage = semantics[i].usage; + return true; + } + } + + if (!ascii_strncasecmp(semantic->name, "sv_", 3)) + return false; + + *usage = D3D_NAME_UNDEFINED; + return true; +} + +static void add_section(struct dxbc_writer *dxbc, uint32_t tag, struct vkd3d_bytecode_buffer *buffer) +{ + /* Native D3DDisassemble() expects at least the sizes of the ISGN and OSGN + * sections to be aligned. Without this, the sections themselves will be + * aligned, but their reported sizes won't. */ + size_t size = bytecode_align(buffer); + + dxbc_writer_add_section(dxbc, tag, buffer->data, size); +} + +static void write_sm4_signature(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc, bool output) +{ + struct vkd3d_bytecode_buffer buffer = {0}; + struct vkd3d_string_buffer *string; + const struct hlsl_ir_var *var; + size_t count_position; + unsigned int i; + bool ret; + + count_position = put_u32(&buffer, 0); + put_u32(&buffer, 8); /* unknown */ + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + unsigned int width = (1u << var->data_type->dimx) - 1, use_mask; + enum vkd3d_sm4_register_type type; + uint32_t usage_idx, reg_idx; + D3D_NAME usage; + bool has_idx; + + if ((output && !var->is_output_semantic) || (!output && !var->is_input_semantic)) + continue; + + ret = hlsl_sm4_usage_from_semantic(ctx, &var->semantic, output, &usage); + assert(ret); + if (usage == ~0u) + continue; + usage_idx = var->semantic.index; + + if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, output, &type, NULL, &has_idx)) + { + reg_idx = has_idx ? var->semantic.index : ~0u; + } + else + { + assert(var->regs[HLSL_REGSET_NUMERIC].allocated); + type = VKD3D_SM4_RT_INPUT; + reg_idx = var->regs[HLSL_REGSET_NUMERIC].id; + } + + use_mask = width; /* FIXME: accurately report use mask */ + if (output) + use_mask = 0xf ^ use_mask; + + /* Special pixel shader semantics (TARGET, DEPTH, COVERAGE). */ + if (usage >= 64) + usage = 0; + + put_u32(&buffer, 0); /* name */ + put_u32(&buffer, usage_idx); + put_u32(&buffer, usage); + switch (var->data_type->base_type) + { + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: + put_u32(&buffer, D3D_REGISTER_COMPONENT_FLOAT32); + break; + + case HLSL_TYPE_INT: + put_u32(&buffer, D3D_REGISTER_COMPONENT_SINT32); + break; + + case HLSL_TYPE_BOOL: + case HLSL_TYPE_UINT: + put_u32(&buffer, D3D_REGISTER_COMPONENT_UINT32); + break; + + default: + if ((string = hlsl_type_to_string(ctx, var->data_type))) + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Invalid data type %s for semantic variable %s.", string->buffer, var->name); + hlsl_release_string_buffer(ctx, string); + put_u32(&buffer, D3D_REGISTER_COMPONENT_UNKNOWN); + } + put_u32(&buffer, reg_idx); + put_u32(&buffer, vkd3d_make_u16(width, use_mask)); + } + + i = 0; + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + const char *semantic = var->semantic.name; + size_t string_offset; + D3D_NAME usage; + + if ((output && !var->is_output_semantic) || (!output && !var->is_input_semantic)) + continue; + + hlsl_sm4_usage_from_semantic(ctx, &var->semantic, output, &usage); + if (usage == ~0u) + continue; + + if (usage == D3D_NAME_TARGET && !ascii_strcasecmp(semantic, "color")) + string_offset = put_string(&buffer, "SV_Target"); + else if (usage == D3D_NAME_DEPTH && !ascii_strcasecmp(semantic, "depth")) + string_offset = put_string(&buffer, "SV_Depth"); + else if (usage == D3D_NAME_POSITION && !ascii_strcasecmp(semantic, "position")) + string_offset = put_string(&buffer, "SV_Position"); + else + string_offset = put_string(&buffer, semantic); + set_u32(&buffer, (2 + i++ * 6) * sizeof(uint32_t), string_offset); + } + + set_u32(&buffer, count_position, i); + + add_section(dxbc, output ? TAG_OSGN : TAG_ISGN, &buffer); +} + +static D3D_SHADER_VARIABLE_CLASS sm4_class(const struct hlsl_type *type) +{ + switch (type->class) + { + case HLSL_CLASS_ARRAY: + return sm4_class(type->e.array.type); + case HLSL_CLASS_MATRIX: + assert(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); + if (type->modifiers & HLSL_MODIFIER_COLUMN_MAJOR) + return D3D_SVC_MATRIX_COLUMNS; + else + return D3D_SVC_MATRIX_ROWS; + case HLSL_CLASS_OBJECT: + return D3D_SVC_OBJECT; + case HLSL_CLASS_SCALAR: + return D3D_SVC_SCALAR; + case HLSL_CLASS_STRUCT: + return D3D_SVC_STRUCT; + case HLSL_CLASS_VECTOR: + return D3D_SVC_VECTOR; + default: + ERR("Invalid class %#x.\n", type->class); + vkd3d_unreachable(); + } +} + +static D3D_SHADER_VARIABLE_TYPE sm4_base_type(const struct hlsl_type *type) +{ + switch (type->base_type) + { + case HLSL_TYPE_BOOL: + return D3D_SVT_BOOL; + case HLSL_TYPE_DOUBLE: + return D3D_SVT_DOUBLE; + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: + return D3D_SVT_FLOAT; + case HLSL_TYPE_INT: + return D3D_SVT_INT; + case HLSL_TYPE_PIXELSHADER: + return D3D_SVT_PIXELSHADER; + case HLSL_TYPE_SAMPLER: + switch (type->sampler_dim) + { + case HLSL_SAMPLER_DIM_1D: + return D3D_SVT_SAMPLER1D; + case HLSL_SAMPLER_DIM_2D: + return D3D_SVT_SAMPLER2D; + case HLSL_SAMPLER_DIM_3D: + return D3D_SVT_SAMPLER3D; + case HLSL_SAMPLER_DIM_CUBE: + return D3D_SVT_SAMPLERCUBE; + case HLSL_SAMPLER_DIM_GENERIC: + return D3D_SVT_SAMPLER; + default: + vkd3d_unreachable(); + } + break; + case HLSL_TYPE_STRING: + return D3D_SVT_STRING; + case HLSL_TYPE_TEXTURE: + switch (type->sampler_dim) + { + case HLSL_SAMPLER_DIM_1D: + return D3D_SVT_TEXTURE1D; + case HLSL_SAMPLER_DIM_2D: + return D3D_SVT_TEXTURE2D; + case HLSL_SAMPLER_DIM_2DMS: + return D3D_SVT_TEXTURE2DMS; + case HLSL_SAMPLER_DIM_3D: + return D3D_SVT_TEXTURE3D; + case HLSL_SAMPLER_DIM_CUBE: + return D3D_SVT_TEXTURECUBE; + case HLSL_SAMPLER_DIM_GENERIC: + return D3D_SVT_TEXTURE; + default: + vkd3d_unreachable(); + } + break; + case HLSL_TYPE_UINT: + return D3D_SVT_UINT; + case HLSL_TYPE_VERTEXSHADER: + return D3D_SVT_VERTEXSHADER; + case HLSL_TYPE_VOID: + return D3D_SVT_VOID; + default: + vkd3d_unreachable(); + } +} + +static void write_sm4_type(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, struct hlsl_type *type) +{ + const struct hlsl_type *array_type = hlsl_get_multiarray_element_type(type); + const char *name = array_type->name ? array_type->name : "<unnamed>"; + const struct hlsl_profile_info *profile = ctx->profile; + unsigned int field_count = 0, array_size = 0; + size_t fields_offset = 0, name_offset = 0; + size_t i; + + if (type->bytecode_offset) + return; + + if (profile->major_version >= 5) + name_offset = put_string(buffer, name); + + if (type->class == HLSL_CLASS_ARRAY) + array_size = hlsl_get_multiarray_size(type); + + if (array_type->class == HLSL_CLASS_STRUCT) + { + field_count = array_type->e.record.field_count; + + for (i = 0; i < field_count; ++i) + { + struct hlsl_struct_field *field = &array_type->e.record.fields[i]; + + field->name_bytecode_offset = put_string(buffer, field->name); + write_sm4_type(ctx, buffer, field->type); + } + + fields_offset = bytecode_align(buffer); + + for (i = 0; i < field_count; ++i) + { + struct hlsl_struct_field *field = &array_type->e.record.fields[i]; + + put_u32(buffer, field->name_bytecode_offset); + put_u32(buffer, field->type->bytecode_offset); + put_u32(buffer, field->reg_offset[HLSL_REGSET_NUMERIC]); + } + } + + type->bytecode_offset = put_u32(buffer, vkd3d_make_u32(sm4_class(type), sm4_base_type(type))); + put_u32(buffer, vkd3d_make_u32(type->dimy, type->dimx)); + put_u32(buffer, vkd3d_make_u32(array_size, field_count)); + put_u32(buffer, fields_offset); + + if (profile->major_version >= 5) + { + put_u32(buffer, 0); /* FIXME: unknown */ + put_u32(buffer, 0); /* FIXME: unknown */ + put_u32(buffer, 0); /* FIXME: unknown */ + put_u32(buffer, 0); /* FIXME: unknown */ + put_u32(buffer, name_offset); + } +} + +static D3D_SHADER_INPUT_TYPE sm4_resource_type(const struct hlsl_type *type) +{ + if (type->class == HLSL_CLASS_ARRAY) + return sm4_resource_type(type->e.array.type); + + switch (type->base_type) + { + case HLSL_TYPE_SAMPLER: + return D3D_SIT_SAMPLER; + case HLSL_TYPE_TEXTURE: + return D3D_SIT_TEXTURE; + case HLSL_TYPE_UAV: + return D3D_SIT_UAV_RWTYPED; + default: + vkd3d_unreachable(); + } +} + +static D3D_RESOURCE_RETURN_TYPE sm4_resource_format(const struct hlsl_type *type) +{ + if (type->class == HLSL_CLASS_ARRAY) + return sm4_resource_format(type->e.array.type); + + switch (type->e.resource_format->base_type) + { + case HLSL_TYPE_DOUBLE: + return D3D_RETURN_TYPE_DOUBLE; + + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: + return D3D_RETURN_TYPE_FLOAT; + + case HLSL_TYPE_INT: + return D3D_RETURN_TYPE_SINT; + break; + + case HLSL_TYPE_BOOL: + case HLSL_TYPE_UINT: + return D3D_RETURN_TYPE_UINT; + + default: + vkd3d_unreachable(); + } +} + +static D3D_SRV_DIMENSION sm4_rdef_resource_dimension(const struct hlsl_type *type) +{ + if (type->class == HLSL_CLASS_ARRAY) + return sm4_rdef_resource_dimension(type->e.array.type); + + switch (type->sampler_dim) + { + case HLSL_SAMPLER_DIM_1D: + return D3D_SRV_DIMENSION_TEXTURE1D; + case HLSL_SAMPLER_DIM_2D: + return D3D_SRV_DIMENSION_TEXTURE2D; + case HLSL_SAMPLER_DIM_3D: + return D3D_SRV_DIMENSION_TEXTURE3D; + case HLSL_SAMPLER_DIM_CUBE: + return D3D_SRV_DIMENSION_TEXTURECUBE; + case HLSL_SAMPLER_DIM_1DARRAY: + return D3D_SRV_DIMENSION_TEXTURE1DARRAY; + case HLSL_SAMPLER_DIM_2DARRAY: + return D3D_SRV_DIMENSION_TEXTURE2DARRAY; + case HLSL_SAMPLER_DIM_2DMS: + return D3D_SRV_DIMENSION_TEXTURE2DMS; + case HLSL_SAMPLER_DIM_2DMSARRAY: + return D3D_SRV_DIMENSION_TEXTURE2DMSARRAY; + case HLSL_SAMPLER_DIM_CUBEARRAY: + return D3D_SRV_DIMENSION_TEXTURECUBEARRAY; + case HLSL_SAMPLER_DIM_BUFFER: + case HLSL_SAMPLER_DIM_STRUCTURED_BUFFER: + return D3D_SRV_DIMENSION_BUFFER; + default: + vkd3d_unreachable(); + } +} + +static int sm4_compare_extern_resources(const void *a, const void *b) +{ + const struct hlsl_ir_var *aa = *(const struct hlsl_ir_var **)a; + const struct hlsl_ir_var *bb = *(const struct hlsl_ir_var **)b; + enum hlsl_regset aa_regset, bb_regset; + + aa_regset = hlsl_type_get_regset(aa->data_type); + bb_regset = hlsl_type_get_regset(bb->data_type); + + if (aa_regset != bb_regset) + return aa_regset - bb_regset; + + return aa->regs[aa_regset].id - bb->regs[bb_regset].id; +} + +static const struct hlsl_ir_var **sm4_get_extern_resources(struct hlsl_ctx *ctx, unsigned int *count) +{ + const struct hlsl_ir_var **extern_resources = NULL; + const struct hlsl_ir_var *var; + enum hlsl_regset regset; + size_t capacity = 0; + + *count = 0; + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + if (!hlsl_type_is_resource(var->data_type)) + continue; + regset = hlsl_type_get_regset(var->data_type); + if (!var->regs[regset].allocated) + continue; + + if (!(hlsl_array_reserve(ctx, (void **)&extern_resources, &capacity, *count + 1, + sizeof(*extern_resources)))) + { + *count = 0; + return NULL; + } + + extern_resources[*count] = var; + ++*count; + } + + qsort(extern_resources, *count, sizeof(*extern_resources), sm4_compare_extern_resources); + return extern_resources; +} + +static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) +{ + unsigned int cbuffer_count = 0, resource_count = 0, extern_resources_count, i, j; + size_t cbuffers_offset, resources_offset, creator_offset, string_offset; + size_t cbuffer_position, resource_position, creator_position; + const struct hlsl_profile_info *profile = ctx->profile; + const struct hlsl_ir_var **extern_resources; + struct vkd3d_bytecode_buffer buffer = {0}; + const struct hlsl_buffer *cbuffer; + const struct hlsl_ir_var *var; + + static const uint16_t target_types[] = + { + 0xffff, /* PIXEL */ + 0xfffe, /* VERTEX */ + 0x4753, /* GEOMETRY */ + 0x4853, /* HULL */ + 0x4453, /* DOMAIN */ + 0x4353, /* COMPUTE */ + }; + + extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count); + + resource_count += extern_resources_count; + LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) + { + if (cbuffer->reg.allocated) + { + ++cbuffer_count; + ++resource_count; + } + } + + put_u32(&buffer, cbuffer_count); + cbuffer_position = put_u32(&buffer, 0); + put_u32(&buffer, resource_count); + resource_position = put_u32(&buffer, 0); + put_u32(&buffer, vkd3d_make_u32(vkd3d_make_u16(profile->minor_version, profile->major_version), + target_types[profile->type])); + put_u32(&buffer, 0); /* FIXME: compilation flags */ + creator_position = put_u32(&buffer, 0); + + if (profile->major_version >= 5) + { + put_u32(&buffer, TAG_RD11); + put_u32(&buffer, 15 * sizeof(uint32_t)); /* size of RDEF header including this header */ + put_u32(&buffer, 6 * sizeof(uint32_t)); /* size of buffer desc */ + put_u32(&buffer, 8 * sizeof(uint32_t)); /* size of binding desc */ + put_u32(&buffer, 10 * sizeof(uint32_t)); /* size of variable desc */ + put_u32(&buffer, 9 * sizeof(uint32_t)); /* size of type desc */ + put_u32(&buffer, 3 * sizeof(uint32_t)); /* size of member desc */ + put_u32(&buffer, 0); /* unknown; possibly a null terminator */ + } + + /* Bound resources. */ + + resources_offset = bytecode_align(&buffer); + set_u32(&buffer, resource_position, resources_offset); + + for (i = 0; i < extern_resources_count; ++i) + { + enum hlsl_regset regset; + uint32_t flags = 0; + + var = extern_resources[i]; + regset = hlsl_type_get_regset(var->data_type); + + if (var->reg_reservation.reg_type) + flags |= D3D_SIF_USERPACKED; + + put_u32(&buffer, 0); /* name */ + put_u32(&buffer, sm4_resource_type(var->data_type)); + if (regset == HLSL_REGSET_SAMPLERS) + { + put_u32(&buffer, 0); + put_u32(&buffer, 0); + put_u32(&buffer, 0); + } + else + { + unsigned int dimx = hlsl_type_get_component_type(ctx, var->data_type, 0)->e.resource_format->dimx; + + put_u32(&buffer, sm4_resource_format(var->data_type)); + put_u32(&buffer, sm4_rdef_resource_dimension(var->data_type)); + put_u32(&buffer, ~0u); /* FIXME: multisample count */ + flags |= (dimx - 1) << VKD3D_SM4_SIF_TEXTURE_COMPONENTS_SHIFT; + } + put_u32(&buffer, var->regs[regset].id); + put_u32(&buffer, var->regs[regset].bind_count); + put_u32(&buffer, flags); + } + + LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) + { + uint32_t flags = 0; + + if (!cbuffer->reg.allocated) + continue; + + if (cbuffer->reservation.reg_type) + flags |= D3D_SIF_USERPACKED; + + put_u32(&buffer, 0); /* name */ + put_u32(&buffer, cbuffer->type == HLSL_BUFFER_CONSTANT ? D3D_SIT_CBUFFER : D3D_SIT_TBUFFER); + put_u32(&buffer, 0); /* return type */ + put_u32(&buffer, 0); /* dimension */ + put_u32(&buffer, 0); /* multisample count */ + put_u32(&buffer, cbuffer->reg.id); /* bind point */ + put_u32(&buffer, 1); /* bind count */ + put_u32(&buffer, flags); /* flags */ + } + + for (i = 0; i < extern_resources_count; ++i) + { + var = extern_resources[i]; + + string_offset = put_string(&buffer, var->name); + set_u32(&buffer, resources_offset + i * 8 * sizeof(uint32_t), string_offset); + } + + LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) + { + if (!cbuffer->reg.allocated) + continue; + + string_offset = put_string(&buffer, cbuffer->name); + set_u32(&buffer, resources_offset + i++ * 8 * sizeof(uint32_t), string_offset); + } + + /* Buffers. */ + + cbuffers_offset = bytecode_align(&buffer); + set_u32(&buffer, cbuffer_position, cbuffers_offset); + LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) + { + unsigned int var_count = 0; + + if (!cbuffer->reg.allocated) + continue; + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + if (var->is_uniform && var->buffer == cbuffer) + ++var_count; + } + + put_u32(&buffer, 0); /* name */ + put_u32(&buffer, var_count); + put_u32(&buffer, 0); /* variable offset */ + put_u32(&buffer, align(cbuffer->size, 4) * sizeof(float)); + put_u32(&buffer, 0); /* FIXME: flags */ + put_u32(&buffer, cbuffer->type == HLSL_BUFFER_CONSTANT ? D3D_CT_CBUFFER : D3D_CT_TBUFFER); + } + + i = 0; + LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) + { + if (!cbuffer->reg.allocated) + continue; + + string_offset = put_string(&buffer, cbuffer->name); + set_u32(&buffer, cbuffers_offset + i++ * 6 * sizeof(uint32_t), string_offset); + } + + i = 0; + LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) + { + size_t vars_start = bytecode_align(&buffer); + + if (!cbuffer->reg.allocated) + continue; + + set_u32(&buffer, cbuffers_offset + (i++ * 6 + 2) * sizeof(uint32_t), vars_start); + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + if (var->is_uniform && var->buffer == cbuffer) + { + uint32_t flags = 0; + + if (var->last_read) + flags |= D3D_SVF_USED; + + put_u32(&buffer, 0); /* name */ + put_u32(&buffer, var->buffer_offset * sizeof(float)); + put_u32(&buffer, var->data_type->reg_size[HLSL_REGSET_NUMERIC] * sizeof(float)); + put_u32(&buffer, flags); + put_u32(&buffer, 0); /* type */ + put_u32(&buffer, 0); /* FIXME: default value */ + + if (profile->major_version >= 5) + { + put_u32(&buffer, 0); /* texture start */ + put_u32(&buffer, 0); /* texture count */ + put_u32(&buffer, 0); /* sampler start */ + put_u32(&buffer, 0); /* sampler count */ + } + } + } + + j = 0; + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + if (var->is_uniform && var->buffer == cbuffer) + { + const unsigned int var_size = (profile->major_version >= 5 ? 10 : 6); + size_t var_offset = vars_start + j * var_size * sizeof(uint32_t); + size_t string_offset = put_string(&buffer, var->name); + + set_u32(&buffer, var_offset, string_offset); + write_sm4_type(ctx, &buffer, var->data_type); + set_u32(&buffer, var_offset + 4 * sizeof(uint32_t), var->data_type->bytecode_offset); + ++j; + } + } + } + + creator_offset = put_string(&buffer, vkd3d_shader_get_version(NULL, NULL)); + set_u32(&buffer, creator_position, creator_offset); + + add_section(dxbc, TAG_RDEF, &buffer); + + vkd3d_free(extern_resources); +} + +static enum vkd3d_sm4_resource_type sm4_resource_dimension(const struct hlsl_type *type) +{ + switch (type->sampler_dim) + { + case HLSL_SAMPLER_DIM_1D: + return VKD3D_SM4_RESOURCE_TEXTURE_1D; + case HLSL_SAMPLER_DIM_2D: + return VKD3D_SM4_RESOURCE_TEXTURE_2D; + case HLSL_SAMPLER_DIM_3D: + return VKD3D_SM4_RESOURCE_TEXTURE_3D; + case HLSL_SAMPLER_DIM_CUBE: + return VKD3D_SM4_RESOURCE_TEXTURE_CUBE; + case HLSL_SAMPLER_DIM_1DARRAY: + return VKD3D_SM4_RESOURCE_TEXTURE_1DARRAY; + case HLSL_SAMPLER_DIM_2DARRAY: + return VKD3D_SM4_RESOURCE_TEXTURE_2DARRAY; + case HLSL_SAMPLER_DIM_2DMS: + return VKD3D_SM4_RESOURCE_TEXTURE_2DMS; + case HLSL_SAMPLER_DIM_2DMSARRAY: + return VKD3D_SM4_RESOURCE_TEXTURE_2DMSARRAY; + case HLSL_SAMPLER_DIM_CUBEARRAY: + return VKD3D_SM4_RESOURCE_TEXTURE_CUBEARRAY; + case HLSL_SAMPLER_DIM_BUFFER: + case HLSL_SAMPLER_DIM_STRUCTURED_BUFFER: + return VKD3D_SM4_RESOURCE_BUFFER; + default: + vkd3d_unreachable(); + } +} + +struct sm4_instruction_modifier +{ + enum vkd3d_sm4_instruction_modifier type; + + union + { + struct + { + int u, v, w; + } aoffimmi; + } u; +}; + +static uint32_t sm4_encode_instruction_modifier(const struct sm4_instruction_modifier *imod) +{ + uint32_t word = 0; + + word |= VKD3D_SM4_MODIFIER_MASK & imod->type; + + switch (imod->type) + { + case VKD3D_SM4_MODIFIER_AOFFIMMI: + assert(-8 <= imod->u.aoffimmi.u && imod->u.aoffimmi.u <= 7); + assert(-8 <= imod->u.aoffimmi.v && imod->u.aoffimmi.v <= 7); + assert(-8 <= imod->u.aoffimmi.w && imod->u.aoffimmi.w <= 7); + word |= ((uint32_t)imod->u.aoffimmi.u & 0xf) << VKD3D_SM4_AOFFIMMI_U_SHIFT; + word |= ((uint32_t)imod->u.aoffimmi.v & 0xf) << VKD3D_SM4_AOFFIMMI_V_SHIFT; + word |= ((uint32_t)imod->u.aoffimmi.w & 0xf) << VKD3D_SM4_AOFFIMMI_W_SHIFT; + break; + + default: + vkd3d_unreachable(); + } + + return word; +} + +struct sm4_register +{ + enum vkd3d_sm4_register_type type; + uint32_t idx[2]; + unsigned int idx_count; + enum vkd3d_sm4_dimension dim; + uint32_t immconst_uint[4]; + unsigned int mod; +}; + +struct sm4_instruction +{ + enum vkd3d_sm4_opcode opcode; + + struct sm4_instruction_modifier modifiers[1]; + unsigned int modifier_count; + + struct sm4_dst_register + { + struct sm4_register reg; + unsigned int writemask; + } dsts[2]; + unsigned int dst_count; + + struct sm4_src_register + { + struct sm4_register reg; + enum vkd3d_sm4_swizzle_type swizzle_type; + unsigned int swizzle; + } srcs[5]; + unsigned int src_count; + + unsigned int byte_stride; + + uint32_t idx[3]; + unsigned int idx_count; +}; + +static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *reg, + unsigned int *writemask, enum vkd3d_sm4_swizzle_type *swizzle_type, + const struct hlsl_deref *deref, const struct hlsl_type *data_type) +{ + const struct hlsl_ir_var *var = deref->var; + + if (var->is_uniform) + { + enum hlsl_regset regset = hlsl_type_get_regset(data_type); + + if (regset == HLSL_REGSET_TEXTURES) + { + reg->type = VKD3D_SM4_RT_RESOURCE; + reg->dim = VKD3D_SM4_DIMENSION_VEC4; + if (swizzle_type) + *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; + reg->idx[0] = var->regs[HLSL_REGSET_TEXTURES].id; + reg->idx[0] += hlsl_offset_from_deref_safe(ctx, deref); + assert(deref->offset_regset == HLSL_REGSET_TEXTURES); + reg->idx_count = 1; + *writemask = VKD3DSP_WRITEMASK_ALL; + } + else if (regset == HLSL_REGSET_UAVS) + { + reg->type = VKD3D_SM5_RT_UAV; + reg->dim = VKD3D_SM4_DIMENSION_VEC4; + if (swizzle_type) + *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; + reg->idx[0] = var->regs[HLSL_REGSET_UAVS].id; + reg->idx[0] += hlsl_offset_from_deref_safe(ctx, deref); + assert(deref->offset_regset == HLSL_REGSET_UAVS); + reg->idx_count = 1; + *writemask = VKD3DSP_WRITEMASK_ALL; + } + else if (regset == HLSL_REGSET_SAMPLERS) + { + reg->type = VKD3D_SM4_RT_SAMPLER; + reg->dim = VKD3D_SM4_DIMENSION_NONE; + if (swizzle_type) + *swizzle_type = VKD3D_SM4_SWIZZLE_NONE; + reg->idx[0] = var->regs[HLSL_REGSET_SAMPLERS].id; + reg->idx[0] += hlsl_offset_from_deref_safe(ctx, deref); + assert(deref->offset_regset == HLSL_REGSET_SAMPLERS); + reg->idx_count = 1; + *writemask = VKD3DSP_WRITEMASK_ALL; + } + else + { + unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref) + var->buffer_offset; + + assert(data_type->class <= HLSL_CLASS_VECTOR); + reg->type = VKD3D_SM4_RT_CONSTBUFFER; + reg->dim = VKD3D_SM4_DIMENSION_VEC4; + if (swizzle_type) + *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; + reg->idx[0] = var->buffer->reg.id; + reg->idx[1] = offset / 4; + reg->idx_count = 2; + *writemask = ((1u << data_type->dimx) - 1) << (offset & 3); + } + } + else if (var->is_input_semantic) + { + bool has_idx; + + if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, false, ®->type, swizzle_type, &has_idx)) + { + unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref); + + if (has_idx) + { + reg->idx[0] = var->semantic.index + offset / 4; + reg->idx_count = 1; + } + + reg->dim = VKD3D_SM4_DIMENSION_VEC4; + *writemask = ((1u << data_type->dimx) - 1) << (offset % 4); + } + else + { + struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref); + + assert(hlsl_reg.allocated); + reg->type = VKD3D_SM4_RT_INPUT; + reg->dim = VKD3D_SM4_DIMENSION_VEC4; + if (swizzle_type) + *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; + reg->idx[0] = hlsl_reg.id; + reg->idx_count = 1; + *writemask = hlsl_reg.writemask; + } + } + else if (var->is_output_semantic) + { + bool has_idx; + + if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, true, ®->type, swizzle_type, &has_idx)) + { + unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref); + + if (has_idx) + { + reg->idx[0] = var->semantic.index + offset / 4; + reg->idx_count = 1; + } + + if (reg->type == VKD3D_SM4_RT_DEPTHOUT) + reg->dim = VKD3D_SM4_DIMENSION_SCALAR; + else + reg->dim = VKD3D_SM4_DIMENSION_VEC4; + *writemask = ((1u << data_type->dimx) - 1) << (offset % 4); + } + else + { + struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref); + + assert(hlsl_reg.allocated); + reg->type = VKD3D_SM4_RT_OUTPUT; + reg->dim = VKD3D_SM4_DIMENSION_VEC4; + reg->idx[0] = hlsl_reg.id; + reg->idx_count = 1; + *writemask = hlsl_reg.writemask; + } + } + else + { + struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref); + + assert(hlsl_reg.allocated); + reg->type = VKD3D_SM4_RT_TEMP; + reg->dim = VKD3D_SM4_DIMENSION_VEC4; + if (swizzle_type) + *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; + reg->idx[0] = hlsl_reg.id; + reg->idx_count = 1; + *writemask = hlsl_reg.writemask; + } +} + +static void sm4_src_from_deref(struct hlsl_ctx *ctx, struct sm4_src_register *src, + const struct hlsl_deref *deref, const struct hlsl_type *data_type, unsigned int map_writemask) +{ + unsigned int writemask; + + sm4_register_from_deref(ctx, &src->reg, &writemask, &src->swizzle_type, deref, data_type); + if (src->swizzle_type == VKD3D_SM4_SWIZZLE_VEC4) + src->swizzle = hlsl_map_swizzle(hlsl_swizzle_from_writemask(writemask), map_writemask); +} + +static void sm4_register_from_node(struct sm4_register *reg, unsigned int *writemask, + enum vkd3d_sm4_swizzle_type *swizzle_type, const struct hlsl_ir_node *instr) +{ + assert(instr->reg.allocated); + reg->type = VKD3D_SM4_RT_TEMP; + reg->dim = VKD3D_SM4_DIMENSION_VEC4; + *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; + reg->idx[0] = instr->reg.id; + reg->idx_count = 1; + *writemask = instr->reg.writemask; +} + +static void sm4_dst_from_node(struct sm4_dst_register *dst, const struct hlsl_ir_node *instr) +{ + unsigned int swizzle_type; + + sm4_register_from_node(&dst->reg, &dst->writemask, &swizzle_type, instr); +} + +static void sm4_src_from_constant_value(struct sm4_src_register *src, + const struct hlsl_constant_value *value, unsigned int width, unsigned int map_writemask) +{ + src->swizzle_type = VKD3D_SM4_SWIZZLE_NONE; + src->reg.type = VKD3D_SM4_RT_IMMCONST; + if (width == 1) + { + src->reg.dim = VKD3D_SM4_DIMENSION_SCALAR; + src->reg.immconst_uint[0] = value->u[0].u; + } + else + { + unsigned int i, j = 0; + + src->reg.dim = VKD3D_SM4_DIMENSION_VEC4; + for (i = 0; i < 4; ++i) + { + if (map_writemask & (1u << i)) + src->reg.immconst_uint[i] = value->u[j++].u; + } + } +} + +static void sm4_src_from_node(struct sm4_src_register *src, + const struct hlsl_ir_node *instr, unsigned int map_writemask) +{ + unsigned int writemask; + + if (instr->type == HLSL_IR_CONSTANT) + { + struct hlsl_ir_constant *constant = hlsl_ir_constant(instr); + + sm4_src_from_constant_value(src, &constant->value, instr->data_type->dimx, map_writemask); + return; + } + + sm4_register_from_node(&src->reg, &writemask, &src->swizzle_type, instr); + if (src->swizzle_type == VKD3D_SM4_SWIZZLE_VEC4) + src->swizzle = hlsl_map_swizzle(hlsl_swizzle_from_writemask(writemask), map_writemask); +} + +static uint32_t sm4_encode_register(const struct sm4_register *reg) +{ + return (reg->type << VKD3D_SM4_REGISTER_TYPE_SHIFT) + | (reg->idx_count << VKD3D_SM4_REGISTER_ORDER_SHIFT) + | (reg->dim << VKD3D_SM4_DIMENSION_SHIFT); +} + +static uint32_t sm4_register_order(const struct sm4_register *reg) +{ + uint32_t order = 1; + if (reg->type == VKD3D_SM4_RT_IMMCONST) + order += reg->dim == VKD3D_SM4_DIMENSION_VEC4 ? 4 : 1; + order += reg->idx_count; + if (reg->mod) + ++order; + return order; +} + +static void write_sm4_instruction(struct vkd3d_bytecode_buffer *buffer, const struct sm4_instruction *instr) +{ + uint32_t token = instr->opcode; + unsigned int size = 1, i, j; + + size += instr->modifier_count; + for (i = 0; i < instr->dst_count; ++i) + size += sm4_register_order(&instr->dsts[i].reg); + for (i = 0; i < instr->src_count; ++i) + size += sm4_register_order(&instr->srcs[i].reg); + size += instr->idx_count; + if (instr->byte_stride) + ++size; + + token |= (size << VKD3D_SM4_INSTRUCTION_LENGTH_SHIFT); + + if (instr->modifier_count > 0) + token |= VKD3D_SM4_INSTRUCTION_MODIFIER; + put_u32(buffer, token); + + for (i = 0; i < instr->modifier_count; ++i) + { + token = sm4_encode_instruction_modifier(&instr->modifiers[i]); + if (instr->modifier_count > i + 1) + token |= VKD3D_SM4_INSTRUCTION_MODIFIER; + put_u32(buffer, token); + } + + for (i = 0; i < instr->dst_count; ++i) + { + token = sm4_encode_register(&instr->dsts[i].reg); + if (instr->dsts[i].reg.dim == VKD3D_SM4_DIMENSION_VEC4) + token |= instr->dsts[i].writemask << VKD3D_SM4_WRITEMASK_SHIFT; + put_u32(buffer, token); + + for (j = 0; j < instr->dsts[i].reg.idx_count; ++j) + put_u32(buffer, instr->dsts[i].reg.idx[j]); + } + + for (i = 0; i < instr->src_count; ++i) + { + token = sm4_encode_register(&instr->srcs[i].reg); + token |= (uint32_t)instr->srcs[i].swizzle_type << VKD3D_SM4_SWIZZLE_TYPE_SHIFT; + token |= instr->srcs[i].swizzle << VKD3D_SM4_SWIZZLE_SHIFT; + if (instr->srcs[i].reg.mod) + token |= VKD3D_SM4_EXTENDED_OPERAND; + put_u32(buffer, token); + + if (instr->srcs[i].reg.mod) + put_u32(buffer, (instr->srcs[i].reg.mod << VKD3D_SM4_REGISTER_MODIFIER_SHIFT) + | VKD3D_SM4_EXTENDED_OPERAND_MODIFIER); + + for (j = 0; j < instr->srcs[i].reg.idx_count; ++j) + put_u32(buffer, instr->srcs[i].reg.idx[j]); + + if (instr->srcs[i].reg.type == VKD3D_SM4_RT_IMMCONST) + { + put_u32(buffer, instr->srcs[i].reg.immconst_uint[0]); + if (instr->srcs[i].reg.dim == VKD3D_SM4_DIMENSION_VEC4) + { + put_u32(buffer, instr->srcs[i].reg.immconst_uint[1]); + put_u32(buffer, instr->srcs[i].reg.immconst_uint[2]); + put_u32(buffer, instr->srcs[i].reg.immconst_uint[3]); + } + } + } + + if (instr->byte_stride) + put_u32(buffer, instr->byte_stride); + + for (j = 0; j < instr->idx_count; ++j) + put_u32(buffer, instr->idx[j]); +} + +static bool encode_texel_offset_as_aoffimmi(struct sm4_instruction *instr, + const struct hlsl_ir_node *texel_offset) +{ + struct sm4_instruction_modifier modif; + struct hlsl_ir_constant *offset; + + if (!texel_offset || texel_offset->type != HLSL_IR_CONSTANT) + return false; + offset = hlsl_ir_constant(texel_offset); + + modif.type = VKD3D_SM4_MODIFIER_AOFFIMMI; + modif.u.aoffimmi.u = offset->value.u[0].i; + modif.u.aoffimmi.v = 0; + modif.u.aoffimmi.w = 0; + if (offset->node.data_type->dimx > 1) + modif.u.aoffimmi.v = offset->value.u[1].i; + if (offset->node.data_type->dimx > 2) + modif.u.aoffimmi.w = offset->value.u[2].i; + if (modif.u.aoffimmi.u < -8 || modif.u.aoffimmi.u > 7 + || modif.u.aoffimmi.v < -8 || modif.u.aoffimmi.v > 7 + || modif.u.aoffimmi.w < -8 || modif.u.aoffimmi.w > 7) + return false; + + instr->modifiers[instr->modifier_count++] = modif; + return true; +} + +static void write_sm4_dcl_constant_buffer(struct vkd3d_bytecode_buffer *buffer, const struct hlsl_buffer *cbuffer) +{ + const struct sm4_instruction instr = + { + .opcode = VKD3D_SM4_OP_DCL_CONSTANT_BUFFER, + + .srcs[0].reg.dim = VKD3D_SM4_DIMENSION_VEC4, + .srcs[0].reg.type = VKD3D_SM4_RT_CONSTBUFFER, + .srcs[0].reg.idx = {cbuffer->reg.id, (cbuffer->used_size + 3) / 4}, + .srcs[0].reg.idx_count = 2, + .srcs[0].swizzle_type = VKD3D_SM4_SWIZZLE_VEC4, + .srcs[0].swizzle = HLSL_SWIZZLE(X, Y, Z, W), + .src_count = 1, + }; + write_sm4_instruction(buffer, &instr); +} + +static void write_sm4_dcl_samplers(struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_var *var) +{ + unsigned int i, count = var->data_type->reg_size[HLSL_REGSET_SAMPLERS]; + struct sm4_instruction instr = + { + .opcode = VKD3D_SM4_OP_DCL_SAMPLER, + + .dsts[0].reg.type = VKD3D_SM4_RT_SAMPLER, + .dsts[0].reg.idx_count = 1, + .dst_count = 1, + }; + + if (var->data_type->sampler_dim == HLSL_SAMPLER_DIM_COMPARISON) + instr.opcode |= VKD3D_SM4_SAMPLER_COMPARISON << VKD3D_SM4_SAMPLER_MODE_SHIFT; + + for (i = 0; i < count; ++i) + { + if (!var->objects_usage[HLSL_REGSET_SAMPLERS][i].used) + continue; + + instr.dsts[0].reg.idx[0] = var->regs[HLSL_REGSET_SAMPLERS].id + i; + write_sm4_instruction(buffer, &instr); + } +} + +static void write_sm4_dcl_textures(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + const struct hlsl_ir_var *var, bool uav) +{ + enum hlsl_regset regset = uav ? HLSL_REGSET_UAVS : HLSL_REGSET_TEXTURES; + unsigned int i, count = var->data_type->reg_size[regset]; + struct hlsl_type *component_type; + struct sm4_instruction instr; + + component_type = hlsl_type_get_component_type(ctx, var->data_type, 0); + + for (i = 0; i < count; ++i) + { + if (!var->objects_usage[regset][i].used) + continue; + + instr = (struct sm4_instruction) + { + .dsts[0].reg.type = uav ? VKD3D_SM5_RT_UAV : VKD3D_SM4_RT_RESOURCE, + .dsts[0].reg.idx = {var->regs[regset].id + i}, + .dsts[0].reg.idx_count = 1, + .dst_count = 1, + + .idx[0] = sm4_resource_format(component_type) * 0x1111, + .idx_count = 1, + }; + + if (uav) + { + switch (var->data_type->sampler_dim) + { + case HLSL_SAMPLER_DIM_STRUCTURED_BUFFER: + instr.opcode = VKD3D_SM5_OP_DCL_UAV_STRUCTURED; + instr.byte_stride = var->data_type->e.resource_format->reg_size[HLSL_REGSET_NUMERIC] * 4; + break; + default: + instr.opcode = VKD3D_SM5_OP_DCL_UAV_TYPED; + break; + } + } + else + { + instr.opcode = VKD3D_SM4_OP_DCL_RESOURCE; + } + instr.opcode |= (sm4_resource_dimension(component_type) << VKD3D_SM4_RESOURCE_TYPE_SHIFT); + + if (component_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS + || component_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY) + { + instr.opcode |= component_type->sample_count << VKD3D_SM4_RESOURCE_SAMPLE_COUNT_SHIFT; + } + + write_sm4_instruction(buffer, &instr); + } +} + +static void write_sm4_dcl_semantic(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_var *var) +{ + const struct hlsl_profile_info *profile = ctx->profile; + const bool output = var->is_output_semantic; + D3D_NAME usage; + bool has_idx; + + struct sm4_instruction instr = + { + .dsts[0].reg.dim = VKD3D_SM4_DIMENSION_VEC4, + .dst_count = 1, + }; + + if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, output, &instr.dsts[0].reg.type, NULL, &has_idx)) + { + if (has_idx) + { + instr.dsts[0].reg.idx[0] = var->semantic.index; + instr.dsts[0].reg.idx_count = 1; + } + else + { + instr.dsts[0].reg.idx_count = 0; + } + instr.dsts[0].writemask = (1 << var->data_type->dimx) - 1; + } + else + { + instr.dsts[0].reg.type = output ? VKD3D_SM4_RT_OUTPUT : VKD3D_SM4_RT_INPUT; + instr.dsts[0].reg.idx[0] = var->regs[HLSL_REGSET_NUMERIC].id; + instr.dsts[0].reg.idx_count = 1; + instr.dsts[0].writemask = var->regs[HLSL_REGSET_NUMERIC].writemask; + } + + if (instr.dsts[0].reg.type == VKD3D_SM4_RT_DEPTHOUT) + instr.dsts[0].reg.dim = VKD3D_SM4_DIMENSION_SCALAR; + + hlsl_sm4_usage_from_semantic(ctx, &var->semantic, output, &usage); + if (usage == ~0u) + usage = D3D_NAME_UNDEFINED; + + if (var->is_input_semantic) + { + switch (usage) + { + case D3D_NAME_UNDEFINED: + instr.opcode = (profile->type == VKD3D_SHADER_TYPE_PIXEL) + ? VKD3D_SM4_OP_DCL_INPUT_PS : VKD3D_SM4_OP_DCL_INPUT; + break; + + case D3D_NAME_INSTANCE_ID: + case D3D_NAME_PRIMITIVE_ID: + case D3D_NAME_VERTEX_ID: + instr.opcode = (profile->type == VKD3D_SHADER_TYPE_PIXEL) + ? VKD3D_SM4_OP_DCL_INPUT_PS_SGV : VKD3D_SM4_OP_DCL_INPUT_SGV; + break; + + default: + instr.opcode = (profile->type == VKD3D_SHADER_TYPE_PIXEL) + ? VKD3D_SM4_OP_DCL_INPUT_PS_SIV : VKD3D_SM4_OP_DCL_INPUT_SIV; + break; + } + + if (profile->type == VKD3D_SHADER_TYPE_PIXEL) + { + enum vkd3d_shader_interpolation_mode mode = VKD3DSIM_LINEAR; + + if ((var->storage_modifiers & HLSL_STORAGE_NOINTERPOLATION) || type_is_integer(var->data_type)) + mode = VKD3DSIM_CONSTANT; + + instr.opcode |= mode << VKD3D_SM4_INTERPOLATION_MODE_SHIFT; + } + } + else + { + if (usage == D3D_NAME_UNDEFINED || profile->type == VKD3D_SHADER_TYPE_PIXEL) + instr.opcode = VKD3D_SM4_OP_DCL_OUTPUT; + else + instr.opcode = VKD3D_SM4_OP_DCL_OUTPUT_SIV; + } + + switch (usage) + { + case D3D_NAME_COVERAGE: + case D3D_NAME_DEPTH: + case D3D_NAME_DEPTH_GREATER_EQUAL: + case D3D_NAME_DEPTH_LESS_EQUAL: + case D3D_NAME_TARGET: + case D3D_NAME_UNDEFINED: + break; + + default: + instr.idx_count = 1; + instr.idx[0] = usage; + break; + } + + write_sm4_instruction(buffer, &instr); +} + +static void write_sm4_dcl_temps(struct vkd3d_bytecode_buffer *buffer, uint32_t temp_count) +{ + struct sm4_instruction instr = + { + .opcode = VKD3D_SM4_OP_DCL_TEMPS, + + .idx = {temp_count}, + .idx_count = 1, + }; + + write_sm4_instruction(buffer, &instr); +} + +static void write_sm4_dcl_thread_group(struct vkd3d_bytecode_buffer *buffer, const uint32_t thread_count[3]) +{ + struct sm4_instruction instr = + { + .opcode = VKD3D_SM5_OP_DCL_THREAD_GROUP, + + .idx = {thread_count[0], thread_count[1], thread_count[2]}, + .idx_count = 3, + }; + + write_sm4_instruction(buffer, &instr); +} + +static void write_sm4_ret(struct vkd3d_bytecode_buffer *buffer) +{ + struct sm4_instruction instr = + { + .opcode = VKD3D_SM4_OP_RET, + }; + + write_sm4_instruction(buffer, &instr); +} + +static void write_sm4_unary_op(struct vkd3d_bytecode_buffer *buffer, enum vkd3d_sm4_opcode opcode, + const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src, unsigned int src_mod) +{ + struct sm4_instruction instr; + + memset(&instr, 0, sizeof(instr)); + instr.opcode = opcode; + + sm4_dst_from_node(&instr.dsts[0], dst); + instr.dst_count = 1; + + sm4_src_from_node(&instr.srcs[0], src, instr.dsts[0].writemask); + instr.srcs[0].reg.mod = src_mod; + instr.src_count = 1; + + write_sm4_instruction(buffer, &instr); +} + +static void write_sm4_unary_op_with_two_destinations(struct vkd3d_bytecode_buffer *buffer, + enum vkd3d_sm4_opcode opcode, const struct hlsl_ir_node *dst, unsigned dst_idx, + const struct hlsl_ir_node *src) +{ + struct sm4_instruction instr; + + memset(&instr, 0, sizeof(instr)); + instr.opcode = opcode; + + assert(dst_idx < ARRAY_SIZE(instr.dsts)); + sm4_dst_from_node(&instr.dsts[dst_idx], dst); + assert(1 - dst_idx >= 0); + instr.dsts[1 - dst_idx].reg.type = VKD3D_SM4_RT_NULL; + instr.dsts[1 - dst_idx].reg.dim = VKD3D_SM4_DIMENSION_NONE; + instr.dsts[1 - dst_idx].reg.idx_count = 0; + instr.dst_count = 2; + + sm4_src_from_node(&instr.srcs[0], src, instr.dsts[dst_idx].writemask); + instr.src_count = 1; + + write_sm4_instruction(buffer, &instr); +} + +static void write_sm4_binary_op(struct vkd3d_bytecode_buffer *buffer, enum vkd3d_sm4_opcode opcode, + const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2) +{ + struct sm4_instruction instr; + + memset(&instr, 0, sizeof(instr)); + instr.opcode = opcode; + + sm4_dst_from_node(&instr.dsts[0], dst); + instr.dst_count = 1; + + sm4_src_from_node(&instr.srcs[0], src1, instr.dsts[0].writemask); + sm4_src_from_node(&instr.srcs[1], src2, instr.dsts[0].writemask); + instr.src_count = 2; + + write_sm4_instruction(buffer, &instr); +} + +/* dp# instructions don't map the swizzle. */ +static void write_sm4_binary_op_dot(struct vkd3d_bytecode_buffer *buffer, enum vkd3d_sm4_opcode opcode, + const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2) +{ + struct sm4_instruction instr; + + memset(&instr, 0, sizeof(instr)); + instr.opcode = opcode; + + sm4_dst_from_node(&instr.dsts[0], dst); + instr.dst_count = 1; + + sm4_src_from_node(&instr.srcs[0], src1, VKD3DSP_WRITEMASK_ALL); + sm4_src_from_node(&instr.srcs[1], src2, VKD3DSP_WRITEMASK_ALL); + instr.src_count = 2; + + write_sm4_instruction(buffer, &instr); +} + +static void write_sm4_binary_op_with_two_destinations(struct vkd3d_bytecode_buffer *buffer, + enum vkd3d_sm4_opcode opcode, const struct hlsl_ir_node *dst, unsigned dst_idx, + const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2) +{ + struct sm4_instruction instr; + + memset(&instr, 0, sizeof(instr)); + instr.opcode = opcode; + + assert(dst_idx < ARRAY_SIZE(instr.dsts)); + sm4_dst_from_node(&instr.dsts[dst_idx], dst); + assert(1 - dst_idx >= 0); + instr.dsts[1 - dst_idx].reg.type = VKD3D_SM4_RT_NULL; + instr.dsts[1 - dst_idx].reg.dim = VKD3D_SM4_DIMENSION_NONE; + instr.dsts[1 - dst_idx].reg.idx_count = 0; + instr.dst_count = 2; + + sm4_src_from_node(&instr.srcs[0], src1, instr.dsts[dst_idx].writemask); + sm4_src_from_node(&instr.srcs[1], src2, instr.dsts[dst_idx].writemask); + instr.src_count = 2; + + write_sm4_instruction(buffer, &instr); +} + +static void write_sm4_ld(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + const struct hlsl_type *resource_type, const struct hlsl_ir_node *dst, + const struct hlsl_deref *resource, const struct hlsl_ir_node *coords, + const struct hlsl_ir_node *sample_index, const struct hlsl_ir_node *texel_offset, + enum hlsl_sampler_dim dim) +{ + bool multisampled = resource_type->base_type == HLSL_TYPE_TEXTURE + && (resource_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS || resource_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY); + bool uav = (hlsl_type_get_regset(resource_type) == HLSL_REGSET_UAVS); + unsigned int coords_writemask = VKD3DSP_WRITEMASK_ALL; + struct sm4_instruction instr; + + memset(&instr, 0, sizeof(instr)); + if (uav) + instr.opcode = VKD3D_SM5_OP_LD_UAV_TYPED; + else + instr.opcode = multisampled ? VKD3D_SM4_OP_LD2DMS : VKD3D_SM4_OP_LD; + + if (texel_offset) + { + if (!encode_texel_offset_as_aoffimmi(&instr, texel_offset)) + { + hlsl_error(ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, + "Offset must resolve to integer literal in the range -8 to 7."); + return; + } + } + + sm4_dst_from_node(&instr.dsts[0], dst); + instr.dst_count = 1; + + if (!uav) + { + /* Mipmap level is in the last component in the IR, but needs to be in the W + * component in the instruction. */ + unsigned int dim_count = hlsl_sampler_dim_count(dim); + + if (dim_count == 1) + coords_writemask = VKD3DSP_WRITEMASK_0 | VKD3DSP_WRITEMASK_3; + if (dim_count == 2) + coords_writemask = VKD3DSP_WRITEMASK_0 | VKD3DSP_WRITEMASK_1 | VKD3DSP_WRITEMASK_3; + } + + sm4_src_from_node(&instr.srcs[0], coords, coords_writemask); + + sm4_src_from_deref(ctx, &instr.srcs[1], resource, resource_type, instr.dsts[0].writemask); + + instr.src_count = 2; + + if (multisampled) + { + if (sample_index->type == HLSL_IR_CONSTANT) + { + struct sm4_register *reg = &instr.srcs[2].reg; + struct hlsl_ir_constant *index; + + index = hlsl_ir_constant(sample_index); + + memset(&instr.srcs[2], 0, sizeof(instr.srcs[2])); + instr.srcs[2].swizzle_type = VKD3D_SM4_SWIZZLE_NONE; + reg->type = VKD3D_SM4_RT_IMMCONST; + reg->dim = VKD3D_SM4_DIMENSION_SCALAR; + reg->immconst_uint[0] = index->value.u[0].u; + } + else if (ctx->profile->major_version == 4 && ctx->profile->minor_version == 0) + { + hlsl_error(ctx, &sample_index->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Expected literal sample index."); + } + else + { + sm4_src_from_node(&instr.srcs[2], sample_index, 0); + } + + ++instr.src_count; + } + + write_sm4_instruction(buffer, &instr); +} + +static void write_sm4_sample(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + const struct hlsl_ir_resource_load *load) +{ + const struct hlsl_type *resource_type = load->resource.var->data_type; + const struct hlsl_ir_node *texel_offset = load->texel_offset.node; + const struct hlsl_ir_node *coords = load->coords.node; + const struct hlsl_deref *resource = &load->resource; + const struct hlsl_deref *sampler = &load->sampler; + const struct hlsl_ir_node *dst = &load->node; + struct sm4_instruction instr; + + memset(&instr, 0, sizeof(instr)); + switch (load->load_type) + { + case HLSL_RESOURCE_SAMPLE: + instr.opcode = VKD3D_SM4_OP_SAMPLE; + break; + + case HLSL_RESOURCE_SAMPLE_CMP: + instr.opcode = VKD3D_SM4_OP_SAMPLE_C; + break; + + case HLSL_RESOURCE_SAMPLE_CMP_LZ: + instr.opcode = VKD3D_SM4_OP_SAMPLE_C_LZ; + break; + + case HLSL_RESOURCE_SAMPLE_LOD: + instr.opcode = VKD3D_SM4_OP_SAMPLE_LOD; + break; + + case HLSL_RESOURCE_SAMPLE_LOD_BIAS: + instr.opcode = VKD3D_SM4_OP_SAMPLE_B; + break; + + case HLSL_RESOURCE_SAMPLE_GRAD: + instr.opcode = VKD3D_SM4_OP_SAMPLE_GRAD; + break; + + default: + vkd3d_unreachable(); + } + + if (texel_offset) + { + if (!encode_texel_offset_as_aoffimmi(&instr, texel_offset)) + { + hlsl_error(ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, + "Offset must resolve to integer literal in the range -8 to 7."); + return; + } + } + + sm4_dst_from_node(&instr.dsts[0], dst); + instr.dst_count = 1; + + sm4_src_from_node(&instr.srcs[0], coords, VKD3DSP_WRITEMASK_ALL); + sm4_src_from_deref(ctx, &instr.srcs[1], resource, resource_type, instr.dsts[0].writemask); + sm4_src_from_deref(ctx, &instr.srcs[2], sampler, sampler->var->data_type, VKD3DSP_WRITEMASK_ALL); + instr.src_count = 3; + + if (load->load_type == HLSL_RESOURCE_SAMPLE_LOD + || load->load_type == HLSL_RESOURCE_SAMPLE_LOD_BIAS) + { + sm4_src_from_node(&instr.srcs[3], load->lod.node, VKD3DSP_WRITEMASK_ALL); + ++instr.src_count; + } + else if (load->load_type == HLSL_RESOURCE_SAMPLE_GRAD) + { + sm4_src_from_node(&instr.srcs[3], load->ddx.node, VKD3DSP_WRITEMASK_ALL); + sm4_src_from_node(&instr.srcs[4], load->ddy.node, VKD3DSP_WRITEMASK_ALL); + instr.src_count += 2; + } + else if (load->load_type == HLSL_RESOURCE_SAMPLE_CMP + || load->load_type == HLSL_RESOURCE_SAMPLE_CMP_LZ) + { + sm4_src_from_node(&instr.srcs[3], load->cmp.node, VKD3DSP_WRITEMASK_ALL); + ++instr.src_count; + } + + write_sm4_instruction(buffer, &instr); +} + +static bool type_is_float(const struct hlsl_type *type) +{ + return type->base_type == HLSL_TYPE_FLOAT || type->base_type == HLSL_TYPE_HALF; +} + +static void write_sm4_cast_from_bool(struct hlsl_ctx *ctx, + struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_expr *expr, + const struct hlsl_ir_node *arg, uint32_t mask) +{ + struct sm4_instruction instr; + + memset(&instr, 0, sizeof(instr)); + instr.opcode = VKD3D_SM4_OP_AND; + + sm4_dst_from_node(&instr.dsts[0], &expr->node); + instr.dst_count = 1; + + sm4_src_from_node(&instr.srcs[0], arg, instr.dsts[0].writemask); + instr.srcs[1].swizzle_type = VKD3D_SM4_SWIZZLE_NONE; + instr.srcs[1].reg.type = VKD3D_SM4_RT_IMMCONST; + instr.srcs[1].reg.dim = VKD3D_SM4_DIMENSION_SCALAR; + instr.srcs[1].reg.immconst_uint[0] = mask; + instr.src_count = 2; + + write_sm4_instruction(buffer, &instr); +} + +static void write_sm4_cast(struct hlsl_ctx *ctx, + struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_expr *expr) +{ + static const union + { + uint32_t u; + float f; + } one = { .f = 1.0 }; + const struct hlsl_ir_node *arg1 = expr->operands[0].node; + const struct hlsl_type *dst_type = expr->node.data_type; + const struct hlsl_type *src_type = arg1->data_type; + + /* Narrowing casts were already lowered. */ + assert(src_type->dimx == dst_type->dimx); + + switch (dst_type->base_type) + { + case HLSL_TYPE_HALF: + case HLSL_TYPE_FLOAT: + switch (src_type->base_type) + { + case HLSL_TYPE_HALF: + case HLSL_TYPE_FLOAT: + write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); + break; + + case HLSL_TYPE_INT: + write_sm4_unary_op(buffer, VKD3D_SM4_OP_ITOF, &expr->node, arg1, 0); + break; + + case HLSL_TYPE_UINT: + write_sm4_unary_op(buffer, VKD3D_SM4_OP_UTOF, &expr->node, arg1, 0); + break; + + case HLSL_TYPE_BOOL: + write_sm4_cast_from_bool(ctx, buffer, expr, arg1, one.u); + break; + + case HLSL_TYPE_DOUBLE: + hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to float."); + break; + + default: + vkd3d_unreachable(); + } + break; + + case HLSL_TYPE_INT: + switch (src_type->base_type) + { + case HLSL_TYPE_HALF: + case HLSL_TYPE_FLOAT: + write_sm4_unary_op(buffer, VKD3D_SM4_OP_FTOI, &expr->node, arg1, 0); + break; + + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: + write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); + break; + + case HLSL_TYPE_BOOL: + write_sm4_cast_from_bool(ctx, buffer, expr, arg1, 1); + break; + + case HLSL_TYPE_DOUBLE: + hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to int."); + break; + + default: + vkd3d_unreachable(); + } + break; + + case HLSL_TYPE_UINT: + switch (src_type->base_type) + { + case HLSL_TYPE_HALF: + case HLSL_TYPE_FLOAT: + write_sm4_unary_op(buffer, VKD3D_SM4_OP_FTOU, &expr->node, arg1, 0); + break; + + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: + write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); + break; + + case HLSL_TYPE_BOOL: + write_sm4_cast_from_bool(ctx, buffer, expr, arg1, 1); + break; + + case HLSL_TYPE_DOUBLE: + hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to uint."); + break; + + default: + vkd3d_unreachable(); + } + break; + + case HLSL_TYPE_DOUBLE: + hlsl_fixme(ctx, &expr->node.loc, "SM4 cast to double."); + break; + + case HLSL_TYPE_BOOL: + /* Casts to bool should have already been lowered. */ + default: + vkd3d_unreachable(); + } +} + +static void write_sm4_store_uav_typed(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + const struct hlsl_deref *dst, const struct hlsl_ir_node *coords, const struct hlsl_ir_node *value) +{ + struct sm4_instruction instr; + + memset(&instr, 0, sizeof(instr)); + instr.opcode = VKD3D_SM5_OP_STORE_UAV_TYPED; + + sm4_register_from_deref(ctx, &instr.dsts[0].reg, &instr.dsts[0].writemask, NULL, dst, dst->var->data_type); + instr.dst_count = 1; + + sm4_src_from_node(&instr.srcs[0], coords, VKD3DSP_WRITEMASK_ALL); + sm4_src_from_node(&instr.srcs[1], value, VKD3DSP_WRITEMASK_ALL); + instr.src_count = 2; + + write_sm4_instruction(buffer, &instr); +} + +static void write_sm4_expr(struct hlsl_ctx *ctx, + struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_expr *expr) +{ + const struct hlsl_ir_node *arg1 = expr->operands[0].node; + const struct hlsl_ir_node *arg2 = expr->operands[1].node; + const struct hlsl_type *dst_type = expr->node.data_type; + struct vkd3d_string_buffer *dst_type_string; + + assert(expr->node.reg.allocated); + + if (!(dst_type_string = hlsl_type_to_string(ctx, dst_type))) + return; + + switch (expr->op) + { + case HLSL_OP1_ABS: + switch (dst_type->base_type) + { + case HLSL_TYPE_FLOAT: + write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, VKD3D_SM4_REGISTER_MODIFIER_ABS); + break; + + default: + hlsl_fixme(ctx, &expr->node.loc, "SM4 %s absolute value expression.", dst_type_string->buffer); + } + break; + + case HLSL_OP1_BIT_NOT: + assert(type_is_integer(dst_type)); + write_sm4_unary_op(buffer, VKD3D_SM4_OP_NOT, &expr->node, arg1, 0); + break; + + case HLSL_OP1_CAST: + write_sm4_cast(ctx, buffer, expr); + break; + + case HLSL_OP1_COS: + assert(type_is_float(dst_type)); + write_sm4_unary_op_with_two_destinations(buffer, VKD3D_SM4_OP_SINCOS, &expr->node, 1, arg1); + break; + + case HLSL_OP1_DSX: + assert(type_is_float(dst_type)); + write_sm4_unary_op(buffer, VKD3D_SM4_OP_DERIV_RTX, &expr->node, arg1, 0); + break; + + case HLSL_OP1_DSY: + assert(type_is_float(dst_type)); + write_sm4_unary_op(buffer, VKD3D_SM4_OP_DERIV_RTY, &expr->node, arg1, 0); + break; + + case HLSL_OP1_EXP2: + assert(type_is_float(dst_type)); + write_sm4_unary_op(buffer, VKD3D_SM4_OP_EXP, &expr->node, arg1, 0); + break; + + case HLSL_OP1_FLOOR: + assert(type_is_float(dst_type)); + write_sm4_unary_op(buffer, VKD3D_SM4_OP_ROUND_NI, &expr->node, arg1, 0); + break; + + case HLSL_OP1_FRACT: + assert(type_is_float(dst_type)); + write_sm4_unary_op(buffer, VKD3D_SM4_OP_FRC, &expr->node, arg1, 0); + break; + + case HLSL_OP1_LOG2: + assert(type_is_float(dst_type)); + write_sm4_unary_op(buffer, VKD3D_SM4_OP_LOG, &expr->node, arg1, 0); + break; + + case HLSL_OP1_LOGIC_NOT: + assert(dst_type->base_type == HLSL_TYPE_BOOL); + write_sm4_unary_op(buffer, VKD3D_SM4_OP_NOT, &expr->node, arg1, 0); + break; + + case HLSL_OP1_NEG: + switch (dst_type->base_type) + { + case HLSL_TYPE_FLOAT: + write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, VKD3D_SM4_REGISTER_MODIFIER_NEGATE); + break; + + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: + write_sm4_unary_op(buffer, VKD3D_SM4_OP_INEG, &expr->node, arg1, 0); + break; + + default: + hlsl_fixme(ctx, &expr->node.loc, "SM4 %s negation expression.", dst_type_string->buffer); + } + break; + + case HLSL_OP1_REINTERPRET: + write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); + break; + + case HLSL_OP1_ROUND: + assert(type_is_float(dst_type)); + write_sm4_unary_op(buffer, VKD3D_SM4_OP_ROUND_NE, &expr->node, arg1, 0); + break; + + case HLSL_OP1_RSQ: + assert(type_is_float(dst_type)); + write_sm4_unary_op(buffer, VKD3D_SM4_OP_RSQ, &expr->node, arg1, 0); + break; + + case HLSL_OP1_SAT: + assert(type_is_float(dst_type)); + write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV + | (VKD3D_SM4_INSTRUCTION_FLAG_SATURATE << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT), + &expr->node, arg1, 0); + break; + + case HLSL_OP1_SIN: + assert(type_is_float(dst_type)); + write_sm4_unary_op_with_two_destinations(buffer, VKD3D_SM4_OP_SINCOS, &expr->node, 0, arg1); + break; + + case HLSL_OP1_SQRT: + assert(type_is_float(dst_type)); + write_sm4_unary_op(buffer, VKD3D_SM4_OP_SQRT, &expr->node, arg1, 0); + break; + + case HLSL_OP1_TRUNC: + assert(type_is_float(dst_type)); + write_sm4_unary_op(buffer, VKD3D_SM4_OP_ROUND_Z, &expr->node, arg1, 0); + break; + + case HLSL_OP2_ADD: + switch (dst_type->base_type) + { + case HLSL_TYPE_FLOAT: + write_sm4_binary_op(buffer, VKD3D_SM4_OP_ADD, &expr->node, arg1, arg2); + break; + + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: + write_sm4_binary_op(buffer, VKD3D_SM4_OP_IADD, &expr->node, arg1, arg2); + break; + + default: + hlsl_fixme(ctx, &expr->node.loc, "SM4 %s addition expression.", dst_type_string->buffer); + } + break; + + case HLSL_OP2_BIT_AND: + assert(type_is_integer(dst_type)); + write_sm4_binary_op(buffer, VKD3D_SM4_OP_AND, &expr->node, arg1, arg2); + break; + + case HLSL_OP2_BIT_OR: + assert(type_is_integer(dst_type)); + write_sm4_binary_op(buffer, VKD3D_SM4_OP_OR, &expr->node, arg1, arg2); + break; + + case HLSL_OP2_BIT_XOR: + assert(type_is_integer(dst_type)); + write_sm4_binary_op(buffer, VKD3D_SM4_OP_XOR, &expr->node, arg1, arg2); + break; + + case HLSL_OP2_DIV: + switch (dst_type->base_type) + { + case HLSL_TYPE_FLOAT: + write_sm4_binary_op(buffer, VKD3D_SM4_OP_DIV, &expr->node, arg1, arg2); + break; + + case HLSL_TYPE_UINT: + write_sm4_binary_op_with_two_destinations(buffer, VKD3D_SM4_OP_UDIV, &expr->node, 0, arg1, arg2); + break; + + default: + hlsl_fixme(ctx, &expr->node.loc, "SM4 %s division expression.", dst_type_string->buffer); + } + break; + + case HLSL_OP2_DOT: + switch (dst_type->base_type) + { + case HLSL_TYPE_FLOAT: + switch (arg1->data_type->dimx) + { + case 4: + write_sm4_binary_op_dot(buffer, VKD3D_SM4_OP_DP4, &expr->node, arg1, arg2); + break; + + case 3: + write_sm4_binary_op_dot(buffer, VKD3D_SM4_OP_DP3, &expr->node, arg1, arg2); + break; + + case 2: + write_sm4_binary_op_dot(buffer, VKD3D_SM4_OP_DP2, &expr->node, arg1, arg2); + break; + + case 1: + default: + vkd3d_unreachable(); + } + break; + + default: + hlsl_fixme(ctx, &expr->node.loc, "SM4 %s dot expression.", dst_type_string->buffer); + } + break; + + case HLSL_OP2_EQUAL: + { + const struct hlsl_type *src_type = arg1->data_type; + + assert(dst_type->base_type == HLSL_TYPE_BOOL); + + switch (src_type->base_type) + { + case HLSL_TYPE_FLOAT: + write_sm4_binary_op(buffer, VKD3D_SM4_OP_EQ, &expr->node, arg1, arg2); + break; + + case HLSL_TYPE_BOOL: + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: + write_sm4_binary_op(buffer, VKD3D_SM4_OP_IEQ, &expr->node, arg1, arg2); + break; + + default: + hlsl_fixme(ctx, &expr->node.loc, "SM4 equality between "%s" operands.", + debug_hlsl_type(ctx, src_type)); + break; + } + break; + } + + case HLSL_OP2_GEQUAL: + { + const struct hlsl_type *src_type = arg1->data_type; + + assert(dst_type->base_type == HLSL_TYPE_BOOL); + + switch (src_type->base_type) + { + case HLSL_TYPE_FLOAT: + write_sm4_binary_op(buffer, VKD3D_SM4_OP_GE, &expr->node, arg1, arg2); + break; + + case HLSL_TYPE_INT: + write_sm4_binary_op(buffer, VKD3D_SM4_OP_IGE, &expr->node, arg1, arg2); + break; + + case HLSL_TYPE_BOOL: + case HLSL_TYPE_UINT: + write_sm4_binary_op(buffer, VKD3D_SM4_OP_UGE, &expr->node, arg1, arg2); + break; + + default: + hlsl_fixme(ctx, &expr->node.loc, "SM4 greater-than-or-equal between "%s" operands.", + debug_hlsl_type(ctx, src_type)); + break; + } + break; + } + + case HLSL_OP2_LESS: + { + const struct hlsl_type *src_type = arg1->data_type; + + assert(dst_type->base_type == HLSL_TYPE_BOOL); + + switch (src_type->base_type) + { + case HLSL_TYPE_FLOAT: + write_sm4_binary_op(buffer, VKD3D_SM4_OP_LT, &expr->node, arg1, arg2); + break; + + case HLSL_TYPE_INT: + write_sm4_binary_op(buffer, VKD3D_SM4_OP_ILT, &expr->node, arg1, arg2); + break; + + case HLSL_TYPE_BOOL: + case HLSL_TYPE_UINT: + write_sm4_binary_op(buffer, VKD3D_SM4_OP_ULT, &expr->node, arg1, arg2); + break; + + default: + hlsl_fixme(ctx, &expr->node.loc, "SM4 less-than between "%s" operands.", + debug_hlsl_type(ctx, src_type)); + break; + } + break; + } + + case HLSL_OP2_LOGIC_AND: + assert(dst_type->base_type == HLSL_TYPE_BOOL); + write_sm4_binary_op(buffer, VKD3D_SM4_OP_AND, &expr->node, arg1, arg2); + break; + + case HLSL_OP2_LOGIC_OR: + assert(dst_type->base_type == HLSL_TYPE_BOOL); + write_sm4_binary_op(buffer, VKD3D_SM4_OP_OR, &expr->node, arg1, arg2); + break; + + case HLSL_OP2_LSHIFT: + assert(type_is_integer(dst_type)); + assert(dst_type->base_type != HLSL_TYPE_BOOL); + write_sm4_binary_op(buffer, VKD3D_SM4_OP_ISHL, &expr->node, arg1, arg2); + break; + + case HLSL_OP2_MAX: + switch (dst_type->base_type) + { + case HLSL_TYPE_FLOAT: + write_sm4_binary_op(buffer, VKD3D_SM4_OP_MAX, &expr->node, arg1, arg2); + break; + + case HLSL_TYPE_INT: + write_sm4_binary_op(buffer, VKD3D_SM4_OP_IMAX, &expr->node, arg1, arg2); + break; + + case HLSL_TYPE_UINT: + write_sm4_binary_op(buffer, VKD3D_SM4_OP_UMAX, &expr->node, arg1, arg2); + break; + + default: + hlsl_fixme(ctx, &expr->node.loc, "SM4 %s maximum expression.", dst_type_string->buffer); + } + break; + + case HLSL_OP2_MIN: + switch (dst_type->base_type) + { + case HLSL_TYPE_FLOAT: + write_sm4_binary_op(buffer, VKD3D_SM4_OP_MIN, &expr->node, arg1, arg2); + break; + + case HLSL_TYPE_INT: + write_sm4_binary_op(buffer, VKD3D_SM4_OP_IMIN, &expr->node, arg1, arg2); + break; + + case HLSL_TYPE_UINT: + write_sm4_binary_op(buffer, VKD3D_SM4_OP_UMIN, &expr->node, arg1, arg2); + break; + + default: + hlsl_fixme(ctx, &expr->node.loc, "SM4 %s minimum expression.", dst_type_string->buffer); + } + break; + + case HLSL_OP2_MOD: + switch (dst_type->base_type) + { + case HLSL_TYPE_UINT: + write_sm4_binary_op_with_two_destinations(buffer, VKD3D_SM4_OP_UDIV, &expr->node, 1, arg1, arg2); + break; + + default: + hlsl_fixme(ctx, &expr->node.loc, "SM4 %s modulus expression.", dst_type_string->buffer); + } + break; + + case HLSL_OP2_MUL: + switch (dst_type->base_type) + { + case HLSL_TYPE_FLOAT: + write_sm4_binary_op(buffer, VKD3D_SM4_OP_MUL, &expr->node, arg1, arg2); + break; + + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: + /* Using IMUL instead of UMUL because we're taking the low + * bits, and the native compiler generates IMUL. */ + write_sm4_binary_op_with_two_destinations(buffer, VKD3D_SM4_OP_IMUL, &expr->node, 1, arg1, arg2); + break; + + default: + hlsl_fixme(ctx, &expr->node.loc, "SM4 %s multiplication expression.", dst_type_string->buffer); + } + break; + + case HLSL_OP2_NEQUAL: + { + const struct hlsl_type *src_type = arg1->data_type; + + assert(dst_type->base_type == HLSL_TYPE_BOOL); + + switch (src_type->base_type) + { + case HLSL_TYPE_FLOAT: + write_sm4_binary_op(buffer, VKD3D_SM4_OP_NE, &expr->node, arg1, arg2); + break; + + case HLSL_TYPE_BOOL: + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: + write_sm4_binary_op(buffer, VKD3D_SM4_OP_INE, &expr->node, arg1, arg2); + break; + + default: + hlsl_fixme(ctx, &expr->node.loc, "SM4 inequality between "%s" operands.", + debug_hlsl_type(ctx, src_type)); + break; + } + break; + } + + case HLSL_OP2_RSHIFT: + assert(type_is_integer(dst_type)); + assert(dst_type->base_type != HLSL_TYPE_BOOL); + write_sm4_binary_op(buffer, dst_type->base_type == HLSL_TYPE_INT ? VKD3D_SM4_OP_ISHR : VKD3D_SM4_OP_USHR, + &expr->node, arg1, arg2); + break; + + default: + hlsl_fixme(ctx, &expr->node.loc, "SM4 %s expression.", debug_hlsl_expr_op(expr->op)); + } + + hlsl_release_string_buffer(ctx, dst_type_string); +} + +static void write_sm4_if(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_if *iff) +{ + struct sm4_instruction instr = + { + .opcode = VKD3D_SM4_OP_IF | VKD3D_SM4_CONDITIONAL_NZ, + .src_count = 1, + }; + + assert(iff->condition.node->data_type->dimx == 1); + + sm4_src_from_node(&instr.srcs[0], iff->condition.node, VKD3DSP_WRITEMASK_ALL); + write_sm4_instruction(buffer, &instr); + + write_sm4_block(ctx, buffer, &iff->then_block); + + if (!list_empty(&iff->else_block.instrs)) + { + instr.opcode = VKD3D_SM4_OP_ELSE; + instr.src_count = 0; + write_sm4_instruction(buffer, &instr); + + write_sm4_block(ctx, buffer, &iff->else_block); + } + + instr.opcode = VKD3D_SM4_OP_ENDIF; + instr.src_count = 0; + write_sm4_instruction(buffer, &instr); +} + +static void write_sm4_jump(struct hlsl_ctx *ctx, + struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_jump *jump) +{ + struct sm4_instruction instr = {0}; + + switch (jump->type) + { + case HLSL_IR_JUMP_BREAK: + instr.opcode = VKD3D_SM4_OP_BREAK; + break; + + case HLSL_IR_JUMP_DISCARD: + { + struct sm4_register *reg = &instr.srcs[0].reg; + + instr.opcode = VKD3D_SM4_OP_DISCARD | VKD3D_SM4_CONDITIONAL_NZ; + + memset(&instr.srcs[0], 0, sizeof(*instr.srcs)); + instr.srcs[0].swizzle_type = VKD3D_SM4_SWIZZLE_NONE; + instr.src_count = 1; + reg->type = VKD3D_SM4_RT_IMMCONST; + reg->dim = VKD3D_SM4_DIMENSION_SCALAR; + reg->immconst_uint[0] = ~0u; + + break; + } + + case HLSL_IR_JUMP_RETURN: + vkd3d_unreachable(); + + default: + hlsl_fixme(ctx, &jump->node.loc, "Jump type %s.\n", hlsl_jump_type_to_string(jump->type)); + return; + } + + write_sm4_instruction(buffer, &instr); +} + +/* Does this variable's data come directly from the API user, rather than being + * temporary or from a previous shader stage? + * I.e. is it a uniform or VS input? */ +static bool var_is_user_input(struct hlsl_ctx *ctx, const struct hlsl_ir_var *var) +{ + if (var->is_uniform) + return true; + + return var->is_input_semantic && ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX; +} + +static void write_sm4_load(struct hlsl_ctx *ctx, + struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_load *load) +{ + const struct hlsl_type *type = load->node.data_type; + struct sm4_instruction instr; + + memset(&instr, 0, sizeof(instr)); + + sm4_dst_from_node(&instr.dsts[0], &load->node); + instr.dst_count = 1; + + assert(type->class <= HLSL_CLASS_LAST_NUMERIC); + if (type->base_type == HLSL_TYPE_BOOL && var_is_user_input(ctx, load->src.var)) + { + struct hlsl_constant_value value; + + /* Uniform bools can be specified as anything, but internal bools always + * have 0 for false and ~0 for true. Normalize that here. */ + + instr.opcode = VKD3D_SM4_OP_MOVC; + + sm4_src_from_deref(ctx, &instr.srcs[0], &load->src, type, instr.dsts[0].writemask); + + memset(&value, 0xff, sizeof(value)); + sm4_src_from_constant_value(&instr.srcs[1], &value, type->dimx, instr.dsts[0].writemask); + memset(&value, 0, sizeof(value)); + sm4_src_from_constant_value(&instr.srcs[2], &value, type->dimx, instr.dsts[0].writemask); + instr.src_count = 3; + } + else + { + instr.opcode = VKD3D_SM4_OP_MOV; + + sm4_src_from_deref(ctx, &instr.srcs[0], &load->src, type, instr.dsts[0].writemask); + instr.src_count = 1; + } + + write_sm4_instruction(buffer, &instr); +} + +static void write_sm4_loop(struct hlsl_ctx *ctx, + struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_loop *loop) +{ + struct sm4_instruction instr = + { + .opcode = VKD3D_SM4_OP_LOOP, + }; + + write_sm4_instruction(buffer, &instr); + + write_sm4_block(ctx, buffer, &loop->body); + + instr.opcode = VKD3D_SM4_OP_ENDLOOP; + write_sm4_instruction(buffer, &instr); +} + +static void write_sm4_gather(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + const struct hlsl_type *resource_type, const struct hlsl_ir_node *dst, + const struct hlsl_deref *resource, const struct hlsl_deref *sampler, + const struct hlsl_ir_node *coords, unsigned int swizzle, const struct hlsl_ir_node *texel_offset) +{ + struct sm4_src_register *src; + struct sm4_instruction instr; + + memset(&instr, 0, sizeof(instr)); + + instr.opcode = VKD3D_SM4_OP_GATHER4; + + sm4_dst_from_node(&instr.dsts[0], dst); + instr.dst_count = 1; + + sm4_src_from_node(&instr.srcs[instr.src_count++], coords, VKD3DSP_WRITEMASK_ALL); + + if (texel_offset) + { + if (!encode_texel_offset_as_aoffimmi(&instr, texel_offset)) + { + if (ctx->profile->major_version < 5) + { + hlsl_error(ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, + "Offset must resolve to integer literal in the range -8 to 7 for profiles < 5."); + return; + } + instr.opcode = VKD3D_SM5_OP_GATHER4_PO; + sm4_src_from_node(&instr.srcs[instr.src_count++], texel_offset, VKD3DSP_WRITEMASK_ALL); + } + } + + sm4_src_from_deref(ctx, &instr.srcs[instr.src_count++], resource, resource_type, instr.dsts[0].writemask); + + src = &instr.srcs[instr.src_count++]; + sm4_src_from_deref(ctx, src, sampler, sampler->var->data_type, VKD3DSP_WRITEMASK_ALL); + src->reg.dim = VKD3D_SM4_DIMENSION_VEC4; + src->swizzle_type = VKD3D_SM4_SWIZZLE_SCALAR; + src->swizzle = swizzle; + + write_sm4_instruction(buffer, &instr); +} + +static void write_sm4_resource_load(struct hlsl_ctx *ctx, + struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_resource_load *load) +{ + const struct hlsl_type *resource_type = load->resource.var->data_type; + const struct hlsl_ir_node *texel_offset = load->texel_offset.node; + const struct hlsl_ir_node *sample_index = load->sample_index.node; + const struct hlsl_ir_node *coords = load->coords.node; + + if (!hlsl_type_is_resource(resource_type)) + { + hlsl_fixme(ctx, &load->node.loc, "Separate object fields as new variables."); + return; + } + + if (load->sampler.var) + { + const struct hlsl_type *sampler_type = load->sampler.var->data_type; + + if (!hlsl_type_is_resource(sampler_type)) + { + hlsl_fixme(ctx, &load->node.loc, "Separate object fields as new variables."); + return; + } + + if (!load->sampler.var->is_uniform) + { + hlsl_fixme(ctx, &load->node.loc, "Sample using non-uniform sampler variable."); + return; + } + } + + if (!load->resource.var->is_uniform) + { + hlsl_fixme(ctx, &load->node.loc, "Load from non-uniform resource variable."); + return; + } + + switch (load->load_type) + { + case HLSL_RESOURCE_LOAD: + write_sm4_ld(ctx, buffer, resource_type, &load->node, &load->resource, + coords, sample_index, texel_offset, load->sampling_dim); + break; + + case HLSL_RESOURCE_SAMPLE: + case HLSL_RESOURCE_SAMPLE_CMP: + case HLSL_RESOURCE_SAMPLE_CMP_LZ: + case HLSL_RESOURCE_SAMPLE_LOD: + case HLSL_RESOURCE_SAMPLE_LOD_BIAS: + case HLSL_RESOURCE_SAMPLE_GRAD: + if (!load->sampler.var) + { + hlsl_fixme(ctx, &load->node.loc, "SM4 combined sample expression."); + return; + } + write_sm4_sample(ctx, buffer, load); + break; + + case HLSL_RESOURCE_GATHER_RED: + write_sm4_gather(ctx, buffer, resource_type, &load->node, &load->resource, + &load->sampler, coords, HLSL_SWIZZLE(X, X, X, X), texel_offset); + break; + + case HLSL_RESOURCE_GATHER_GREEN: + write_sm4_gather(ctx, buffer, resource_type, &load->node, &load->resource, + &load->sampler, coords, HLSL_SWIZZLE(Y, Y, Y, Y), texel_offset); + break; + + case HLSL_RESOURCE_GATHER_BLUE: + write_sm4_gather(ctx, buffer, resource_type, &load->node, &load->resource, + &load->sampler, coords, HLSL_SWIZZLE(Z, Z, Z, Z), texel_offset); + break; + + case HLSL_RESOURCE_GATHER_ALPHA: + write_sm4_gather(ctx, buffer, resource_type, &load->node, &load->resource, + &load->sampler, coords, HLSL_SWIZZLE(W, W, W, W), texel_offset); + break; + } +} + +static void write_sm4_resource_store(struct hlsl_ctx *ctx, + struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_resource_store *store) +{ + const struct hlsl_type *resource_type = store->resource.var->data_type; + + if (!hlsl_type_is_resource(resource_type)) + { + hlsl_fixme(ctx, &store->node.loc, "Separate object fields as new variables."); + return; + } + + if (!store->resource.var->is_uniform) + { + hlsl_fixme(ctx, &store->node.loc, "Store to non-uniform resource variable."); + return; + } + + if (resource_type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) + { + hlsl_fixme(ctx, &store->node.loc, "Structured buffers store is not implemented.\n"); + return; + } + + write_sm4_store_uav_typed(ctx, buffer, &store->resource, store->coords.node, store->value.node); +} + +static void write_sm4_store(struct hlsl_ctx *ctx, + struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_store *store) +{ + const struct hlsl_ir_node *rhs = store->rhs.node; + struct sm4_instruction instr; + unsigned int writemask; + + memset(&instr, 0, sizeof(instr)); + instr.opcode = VKD3D_SM4_OP_MOV; + + sm4_register_from_deref(ctx, &instr.dsts[0].reg, &writemask, NULL, &store->lhs, rhs->data_type); + instr.dsts[0].writemask = hlsl_combine_writemasks(writemask, store->writemask); + instr.dst_count = 1; + + sm4_src_from_node(&instr.srcs[0], rhs, instr.dsts[0].writemask); + instr.src_count = 1; + + write_sm4_instruction(buffer, &instr); +} + +static void write_sm4_swizzle(struct hlsl_ctx *ctx, + struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_swizzle *swizzle) +{ + struct sm4_instruction instr; + unsigned int writemask; + + memset(&instr, 0, sizeof(instr)); + instr.opcode = VKD3D_SM4_OP_MOV; + + sm4_dst_from_node(&instr.dsts[0], &swizzle->node); + instr.dst_count = 1; + + sm4_register_from_node(&instr.srcs[0].reg, &writemask, &instr.srcs[0].swizzle_type, swizzle->val.node); + instr.srcs[0].swizzle = hlsl_map_swizzle(hlsl_combine_swizzles(hlsl_swizzle_from_writemask(writemask), + swizzle->swizzle, swizzle->node.data_type->dimx), instr.dsts[0].writemask); + instr.src_count = 1; + + write_sm4_instruction(buffer, &instr); +} + +static void write_sm4_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + const struct hlsl_block *block) +{ + const struct hlsl_ir_node *instr; + + LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) + { + if (instr->data_type) + { + if (instr->data_type->class == HLSL_CLASS_MATRIX) + { + hlsl_fixme(ctx, &instr->loc, "Matrix operations need to be lowered."); + break; + } + else if (instr->data_type->class == HLSL_CLASS_OBJECT) + { + hlsl_fixme(ctx, &instr->loc, "Object copy."); + break; + } + + assert(instr->data_type->class == HLSL_CLASS_SCALAR || instr->data_type->class == HLSL_CLASS_VECTOR); + + if (!instr->reg.allocated) + { + assert(instr->type == HLSL_IR_CONSTANT); + continue; + } + } + + switch (instr->type) + { + case HLSL_IR_CALL: + case HLSL_IR_CONSTANT: + vkd3d_unreachable(); + + case HLSL_IR_EXPR: + write_sm4_expr(ctx, buffer, hlsl_ir_expr(instr)); + break; + + case HLSL_IR_IF: + write_sm4_if(ctx, buffer, hlsl_ir_if(instr)); + break; + + case HLSL_IR_JUMP: + write_sm4_jump(ctx, buffer, hlsl_ir_jump(instr)); + break; + + case HLSL_IR_LOAD: + write_sm4_load(ctx, buffer, hlsl_ir_load(instr)); + break; + + case HLSL_IR_RESOURCE_LOAD: + write_sm4_resource_load(ctx, buffer, hlsl_ir_resource_load(instr)); + break; + + case HLSL_IR_RESOURCE_STORE: + write_sm4_resource_store(ctx, buffer, hlsl_ir_resource_store(instr)); + break; + + case HLSL_IR_LOOP: + write_sm4_loop(ctx, buffer, hlsl_ir_loop(instr)); + break; + + case HLSL_IR_STORE: + write_sm4_store(ctx, buffer, hlsl_ir_store(instr)); + break; + + case HLSL_IR_SWIZZLE: + write_sm4_swizzle(ctx, buffer, hlsl_ir_swizzle(instr)); + break; + + default: + hlsl_fixme(ctx, &instr->loc, "Instruction type %s.", hlsl_node_type_to_string(instr->type)); + } + } +} + +static void write_sm4_shdr(struct hlsl_ctx *ctx, + const struct hlsl_ir_function_decl *entry_func, struct dxbc_writer *dxbc) +{ + const struct hlsl_profile_info *profile = ctx->profile; + const struct hlsl_ir_var **extern_resources; + struct vkd3d_bytecode_buffer buffer = {0}; + unsigned int extern_resources_count, i; + const struct hlsl_buffer *cbuffer; + const struct hlsl_ir_var *var; + size_t token_count_position; + + static const uint16_t shader_types[VKD3D_SHADER_TYPE_COUNT] = + { + VKD3D_SM4_PS, + VKD3D_SM4_VS, + VKD3D_SM4_GS, + VKD3D_SM5_HS, + VKD3D_SM5_DS, + VKD3D_SM5_CS, + 0, /* EFFECT */ + 0, /* TEXTURE */ + VKD3D_SM4_LIB, + }; + + extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count); + + put_u32(&buffer, vkd3d_make_u32((profile->major_version << 4) | profile->minor_version, shader_types[profile->type])); + token_count_position = put_u32(&buffer, 0); + + LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) + { + if (cbuffer->reg.allocated) + write_sm4_dcl_constant_buffer(&buffer, cbuffer); + } + + for (i = 0; i < extern_resources_count; ++i) + { + enum hlsl_regset regset; + + var = extern_resources[i]; + regset = hlsl_type_get_regset(var->data_type); + + if (regset == HLSL_REGSET_SAMPLERS) + write_sm4_dcl_samplers(&buffer, var); + else if (regset == HLSL_REGSET_TEXTURES) + write_sm4_dcl_textures(ctx, &buffer, var, false); + else if (regset == HLSL_REGSET_UAVS) + write_sm4_dcl_textures(ctx, &buffer, var, true); + } + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + if ((var->is_input_semantic && var->last_read) || (var->is_output_semantic && var->first_write)) + write_sm4_dcl_semantic(ctx, &buffer, var); + } + + if (profile->type == VKD3D_SHADER_TYPE_COMPUTE) + write_sm4_dcl_thread_group(&buffer, ctx->thread_count); + + if (ctx->temp_count) + write_sm4_dcl_temps(&buffer, ctx->temp_count); + + write_sm4_block(ctx, &buffer, &entry_func->body); + + write_sm4_ret(&buffer); + + set_u32(&buffer, token_count_position, bytecode_get_size(&buffer) / sizeof(uint32_t)); + + add_section(dxbc, TAG_SHDR, &buffer); + + vkd3d_free(extern_resources); +} + +int hlsl_sm4_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out) +{ + struct dxbc_writer dxbc; + size_t i; + int ret; + + dxbc_writer_init(&dxbc); + + write_sm4_signature(ctx, &dxbc, false); + write_sm4_signature(ctx, &dxbc, true); + write_sm4_rdef(ctx, &dxbc); + write_sm4_shdr(ctx, entry_func, &dxbc); + + if (!(ret = ctx->result)) + ret = dxbc_writer_write(&dxbc, out); + for (i = 0; i < dxbc.section_count; ++i) + vkd3d_shader_free_shader_code(&dxbc.sections[i].data); + return ret; +} diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c index c9c15f01155..b8f36df07f1 100644 --- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c +++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c @@ -338,22 +338,35 @@ void vkd3d_shader_error(struct vkd3d_shader_message_context *context, const stru va_end(args); }
+size_t bytecode_align(struct vkd3d_bytecode_buffer *buffer) +{ + size_t aligned_size = align(buffer->size, 4); + + if (!vkd3d_array_reserve((void **)&buffer->data, &buffer->capacity, aligned_size, 1)) + { + buffer->status = VKD3D_ERROR_OUT_OF_MEMORY; + return aligned_size; + } + + memset(buffer->data + buffer->size, 0xab, aligned_size - buffer->size); + buffer->size = aligned_size; + return aligned_size; +} + size_t bytecode_put_bytes(struct vkd3d_bytecode_buffer *buffer, const void *bytes, size_t size) { - size_t aligned_size = align(size, 4); - size_t offset = buffer->size; + size_t offset = bytecode_align(buffer);
if (buffer->status) return offset;
- if (!vkd3d_array_reserve((void **)&buffer->data, &buffer->capacity, offset + aligned_size, 1)) + if (!vkd3d_array_reserve((void **)&buffer->data, &buffer->capacity, offset + size, 1)) { buffer->status = VKD3D_ERROR_OUT_OF_MEMORY; return offset; } memcpy(buffer->data + offset, bytes, size); - memset(buffer->data + offset + size, 0xab, aligned_size - size); - buffer->size = offset + aligned_size; + buffer->size = offset + size; return offset; }
@@ -1070,7 +1083,7 @@ static int scan_with_parser(const struct vkd3d_shader_compile_info *compile_info
if (TRACE_ON()) { - vkd3d_shader_trace(parser); + vkd3d_shader_trace(&parser->instructions, &parser->shader_version); }
for (i = 0; i < parser->instructions.count; ++i) @@ -1167,75 +1180,73 @@ int vkd3d_shader_scan(const struct vkd3d_shader_compile_info *compile_info, char return ret; }
-static int compile_dxbc_tpf(const struct vkd3d_shader_compile_info *compile_info, +static int vkd3d_shader_parser_compile(struct vkd3d_shader_parser *parser, + const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) { struct vkd3d_shader_scan_descriptor_info scan_descriptor_info; + struct vkd3d_glsl_generator *glsl_generator; struct vkd3d_shader_compile_info scan_info; - struct spirv_compiler *spirv_compiler; - struct vkd3d_shader_parser *parser; int ret;
+ vkd3d_shader_dump_shader(compile_info->source_type, parser->shader_version.type, &compile_info->source); + scan_info = *compile_info; scan_descriptor_info.type = VKD3D_SHADER_STRUCTURE_TYPE_SCAN_DESCRIPTOR_INFO; scan_descriptor_info.next = scan_info.next; scan_info.next = &scan_descriptor_info;
- if ((ret = scan_dxbc(&scan_info, message_context)) < 0) + if ((ret = scan_with_parser(&scan_info, message_context, parser)) < 0) return ret;
- if ((ret = vkd3d_shader_sm4_parser_create(compile_info, message_context, &parser)) < 0) + switch (compile_info->target_type) { - WARN("Failed to initialise shader parser.\n"); - vkd3d_shader_free_scan_descriptor_info(&scan_descriptor_info); - return ret; - } + case VKD3D_SHADER_TARGET_D3D_ASM: + ret = vkd3d_dxbc_binary_to_text(&parser->instructions, &parser->shader_version, compile_info, out); + break;
- vkd3d_shader_dump_shader(compile_info->source_type, parser->shader_version.type, &compile_info->source); + case VKD3D_SHADER_TARGET_GLSL: + if (!(glsl_generator = vkd3d_glsl_generator_create(&parser->shader_version, + message_context, &parser->location))) + { + ERR("Failed to create GLSL generator.\n"); + vkd3d_shader_free_scan_descriptor_info(&scan_descriptor_info); + return VKD3D_ERROR; + }
- if (compile_info->target_type == VKD3D_SHADER_TARGET_D3D_ASM) - { - vkd3d_shader_free_scan_descriptor_info(&scan_descriptor_info); - ret = vkd3d_dxbc_binary_to_text(parser, compile_info, out); - vkd3d_shader_parser_destroy(parser); - return ret; - } + ret = vkd3d_glsl_generator_generate(glsl_generator, parser, out); + vkd3d_glsl_generator_destroy(glsl_generator); + break;
- if (compile_info->target_type == VKD3D_SHADER_TARGET_GLSL) - { - struct vkd3d_glsl_generator *glsl_generator; + case VKD3D_SHADER_TARGET_SPIRV_BINARY: + case VKD3D_SHADER_TARGET_SPIRV_TEXT: + ret = spirv_compile(parser, &scan_descriptor_info, compile_info, out, message_context); + break;
- if (!(glsl_generator = vkd3d_glsl_generator_create(&parser->shader_version, - message_context, &parser->location))) - { - ERR("Failed to create GLSL generator.\n"); - vkd3d_shader_parser_destroy(parser); - vkd3d_shader_free_scan_descriptor_info(&scan_descriptor_info); - return VKD3D_ERROR; - } + default: + /* Validation should prevent us from reaching this. */ + assert(0); + }
- ret = vkd3d_glsl_generator_generate(glsl_generator, parser, out); + vkd3d_shader_free_scan_descriptor_info(&scan_descriptor_info); + return ret; +}
- vkd3d_glsl_generator_destroy(glsl_generator); - vkd3d_shader_parser_destroy(parser); - vkd3d_shader_free_scan_descriptor_info(&scan_descriptor_info); - return ret; - } +static int compile_dxbc_tpf(const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) +{ + struct vkd3d_shader_parser *parser; + int ret;
- if (!(spirv_compiler = spirv_compiler_create(&parser->shader_version, &parser->shader_desc, - compile_info, &scan_descriptor_info, message_context, &parser->location))) + if ((ret = vkd3d_shader_sm4_parser_create(compile_info, message_context, &parser)) < 0) { - ERR("Failed to create DXBC compiler.\n"); - vkd3d_shader_parser_destroy(parser); - vkd3d_shader_free_scan_descriptor_info(&scan_descriptor_info); - return VKD3D_ERROR; + WARN("Failed to initialise shader parser.\n"); + return ret; }
- ret = spirv_compiler_generate_spirv(spirv_compiler, compile_info, parser, out); + ret = vkd3d_shader_parser_compile(parser, compile_info, out, message_context);
- spirv_compiler_destroy(spirv_compiler); vkd3d_shader_parser_destroy(parser); - vkd3d_shader_free_scan_descriptor_info(&scan_descriptor_info); return ret; }
@@ -1270,7 +1281,7 @@ static int compile_d3d_bytecode(const struct vkd3d_shader_compile_info *compile_
if (compile_info->target_type == VKD3D_SHADER_TARGET_D3D_ASM) { - ret = vkd3d_dxbc_binary_to_text(parser, compile_info, out); + ret = vkd3d_dxbc_binary_to_text(&parser->instructions, &parser->shader_version, compile_info, out); vkd3d_shader_parser_destroy(parser); return ret; } @@ -1388,10 +1399,54 @@ void vkd3d_shader_free_root_signature(struct vkd3d_shader_versioned_root_signatu desc->version = 0; }
+static bool vkd3d_shader_signature_from_shader_signature(struct vkd3d_shader_signature *signature, + const struct shader_signature *src) +{ + unsigned int i; + + signature->element_count = src->element_count; + if (!src->elements) + { + assert(!signature->element_count); + signature->elements = NULL; + return true; + } + + if (!(signature->elements = vkd3d_calloc(signature->element_count, sizeof(*signature->elements)))) + return false; + + for (i = 0; i < signature->element_count; ++i) + { + struct vkd3d_shader_signature_element *d = &signature->elements[i]; + struct signature_element *e = &src->elements[i]; + + d->semantic_name = e->semantic_name; + d->semantic_index = e->semantic_index; + d->stream_index = e->stream_index; + d->sysval_semantic = e->sysval_semantic; + d->component_type = e->component_type; + d->register_index = e->register_index; + if (e->register_count > 1) + FIXME("Arrayed elements are not supported yet.\n"); + d->mask = e->mask; + d->used_mask = e->used_mask; + d->min_precision = e->min_precision; + } + + return true; +} + +void shader_signature_cleanup(struct shader_signature *signature) +{ + vkd3d_free(signature->elements); + signature->elements = NULL; +} + int vkd3d_shader_parse_input_signature(const struct vkd3d_shader_code *dxbc, struct vkd3d_shader_signature *signature, char **messages) { struct vkd3d_shader_message_context message_context; + struct shader_signature shader_signature; int ret;
TRACE("dxbc {%p, %zu}, signature %p, messages %p.\n", dxbc->code, dxbc->size, signature, messages); @@ -1400,13 +1455,17 @@ int vkd3d_shader_parse_input_signature(const struct vkd3d_shader_code *dxbc, *messages = NULL; vkd3d_shader_message_context_init(&message_context, VKD3D_SHADER_LOG_INFO);
- ret = shader_parse_input_signature(dxbc, &message_context, signature); + ret = shader_parse_input_signature(dxbc, &message_context, &shader_signature); vkd3d_shader_message_context_trace_messages(&message_context); if (!vkd3d_shader_message_context_copy_messages(&message_context, messages)) ret = VKD3D_ERROR_OUT_OF_MEMORY;
vkd3d_shader_message_context_cleanup(&message_context);
+ if (!vkd3d_shader_signature_from_shader_signature(signature, &shader_signature)) + ret = VKD3D_ERROR_OUT_OF_MEMORY; + + shader_signature_cleanup(&shader_signature); return ret; }
@@ -1642,6 +1701,84 @@ bool shader_instruction_array_add_icb(struct vkd3d_shader_instruction_array *ins return true; }
+static struct vkd3d_shader_src_param *shader_instruction_array_clone_src_params( + struct vkd3d_shader_instruction_array *instructions, const struct vkd3d_shader_src_param *params, + unsigned int count); + +static bool shader_register_clone_relative_addresses(struct vkd3d_shader_register *reg, + struct vkd3d_shader_instruction_array *instructions) +{ + unsigned int i; + + for (i = 0; i < reg->idx_count; ++i) + { + if (!reg->idx[i].rel_addr) + continue; + + if (!(reg->idx[i].rel_addr = shader_instruction_array_clone_src_params(instructions, reg->idx[i].rel_addr, 1))) + return false; + } + + return true; +} + +static struct vkd3d_shader_dst_param *shader_instruction_array_clone_dst_params( + struct vkd3d_shader_instruction_array *instructions, const struct vkd3d_shader_dst_param *params, + unsigned int count) +{ + struct vkd3d_shader_dst_param *dst_params; + unsigned int i; + + if (!(dst_params = shader_dst_param_allocator_get(&instructions->dst_params, count))) + return NULL; + + memcpy(dst_params, params, count * sizeof(*params)); + for (i = 0; i < count; ++i) + { + if (!shader_register_clone_relative_addresses(&dst_params[i].reg, instructions)) + return NULL; + } + + return dst_params; +} + +static struct vkd3d_shader_src_param *shader_instruction_array_clone_src_params( + struct vkd3d_shader_instruction_array *instructions, const struct vkd3d_shader_src_param *params, + unsigned int count) +{ + struct vkd3d_shader_src_param *src_params; + unsigned int i; + + if (!(src_params = shader_src_param_allocator_get(&instructions->src_params, count))) + return NULL; + + memcpy(src_params, params, count * sizeof(*params)); + for (i = 0; i < count; ++i) + { + if (!shader_register_clone_relative_addresses(&src_params[i].reg, instructions)) + return NULL; + } + + return src_params; +} + +/* NOTE: Immediate constant buffers are not cloned, so the source must not be destroyed while the + * destination is in use. This seems like a reasonable requirement given how this is currently used. */ +bool shader_instruction_array_clone_instruction(struct vkd3d_shader_instruction_array *instructions, + unsigned int dst, unsigned int src) +{ + struct vkd3d_shader_instruction *ins = &instructions->elements[dst]; + + *ins = instructions->elements[src]; + + if (ins->dst_count && ins->dst && !(ins->dst = shader_instruction_array_clone_dst_params(instructions, + ins->dst, ins->dst_count))) + return false; + + return !ins->src_count || !!(ins->src = shader_instruction_array_clone_src_params(instructions, + ins->src, ins->src_count)); +} + void shader_instruction_array_destroy(struct vkd3d_shader_instruction_array *instructions) { unsigned int i; diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h index 79be999adf9..d77c5393940 100644 --- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h @@ -74,6 +74,13 @@ enum vkd3d_shader_error VKD3D_SHADER_ERROR_TPF_MISMATCHED_CF = 1000, VKD3D_SHADER_ERROR_TPF_INVALID_REGISTER_RANGE = 1001, VKD3D_SHADER_ERROR_TPF_OUT_OF_MEMORY = 1002, + VKD3D_SHADER_ERROR_TPF_INVALID_REGISTER_INDEX_COUNT = 1003, + VKD3D_SHADER_ERROR_TPF_TOO_MANY_REGISTERS = 1004, + VKD3D_SHADER_ERROR_TPF_INVALID_IO_REGISTER = 1005, + VKD3D_SHADER_ERROR_TPF_INVALID_INDEX_RANGE_DCL = 1006, + + VKD3D_SHADER_WARNING_TPF_MASK_NOT_CONTIGUOUS = 1300, + VKD3D_SHADER_WARNING_TPF_UNHANDLED_INDEX_RANGE_MASK = 1301,
VKD3D_SHADER_ERROR_SPV_DESCRIPTOR_BINDING_NOT_FOUND = 2000, VKD3D_SHADER_ERROR_SPV_INVALID_REGISTER_TYPE = 2001, @@ -125,6 +132,7 @@ enum vkd3d_shader_error VKD3D_SHADER_ERROR_HLSL_INVALID_THREAD_COUNT = 5023, VKD3D_SHADER_ERROR_HLSL_MISSING_ATTRIBUTE = 5024, VKD3D_SHADER_ERROR_HLSL_RECURSIVE_CALL = 5025, + VKD3D_SHADER_ERROR_HLSL_INCONSISTENT_SAMPLER = 5026,
VKD3D_SHADER_WARNING_HLSL_IMPLICIT_TRUNCATION = 5300, VKD3D_SHADER_WARNING_HLSL_DIVISION_BY_ZERO = 5301, @@ -223,6 +231,7 @@ enum vkd3d_shader_opcode VKD3DSIH_DEQ, VKD3DSIH_DFMA, VKD3DSIH_DGE, + VKD3DSIH_DISCARD, VKD3DSIH_DIV, VKD3DSIH_DLT, VKD3DSIH_DMAX, @@ -675,6 +684,7 @@ struct vkd3d_shader_register bool non_uniform; enum vkd3d_data_type data_type; struct vkd3d_shader_register_index idx[3]; + unsigned int idx_count; enum vkd3d_immconst_type immconst_type; union { @@ -774,13 +784,36 @@ enum vkd3d_shader_input_sysval_semantic VKD3D_SIV_LINE_DENSITY_TESS_FACTOR = 22, };
+struct signature_element +{ + unsigned int sort_index; + const char *semantic_name; + unsigned int semantic_index; + unsigned int stream_index; + enum vkd3d_shader_sysval_semantic sysval_semantic; + enum vkd3d_shader_component_type component_type; + unsigned int register_index; + unsigned int register_count; + unsigned int mask; + unsigned int used_mask; + enum vkd3d_shader_minimum_precision min_precision; +}; + +struct shader_signature +{ + struct signature_element *elements; + unsigned int element_count; +}; + +void shader_signature_cleanup(struct shader_signature *signature); + struct vkd3d_shader_desc { const uint32_t *byte_code; size_t byte_code_size; - struct vkd3d_shader_signature input_signature; - struct vkd3d_shader_signature output_signature; - struct vkd3d_shader_signature patch_constant_signature; + struct shader_signature input_signature; + struct shader_signature output_signature; + struct shader_signature patch_constant_signature; };
struct vkd3d_shader_register_semantic @@ -927,6 +960,11 @@ static inline bool vkd3d_shader_register_is_output(const struct vkd3d_shader_reg return reg->type == VKD3DSPR_OUTPUT || reg->type == VKD3DSPR_COLOROUT; }
+static inline bool vkd3d_shader_register_is_patch_constant(const struct vkd3d_shader_register *reg) +{ + return reg->type == VKD3DSPR_PATCHCONST; +} + struct vkd3d_shader_location { const char *source_name; @@ -981,6 +1019,8 @@ bool shader_instruction_array_init(struct vkd3d_shader_instruction_array *instru bool shader_instruction_array_reserve(struct vkd3d_shader_instruction_array *instructions, unsigned int reserve); bool shader_instruction_array_add_icb(struct vkd3d_shader_instruction_array *instructions, struct vkd3d_shader_immediate_constant_buffer *icb); +bool shader_instruction_array_clone_instruction(struct vkd3d_shader_instruction_array *instructions, + unsigned int dst, unsigned int src); void shader_instruction_array_destroy(struct vkd3d_shader_instruction_array *instructions);
struct vkd3d_shader_parser @@ -991,10 +1031,8 @@ struct vkd3d_shader_parser
struct vkd3d_shader_desc shader_desc; struct vkd3d_shader_version shader_version; - const uint32_t *ptr; const struct vkd3d_shader_parser_ops *ops; struct vkd3d_shader_instruction_array instructions; - size_t instruction_idx; };
struct vkd3d_shader_parser_ops @@ -1028,7 +1066,8 @@ static inline void vkd3d_shader_parser_destroy(struct vkd3d_shader_parser *parse parser->ops->parser_destroy(parser); }
-void vkd3d_shader_trace(struct vkd3d_shader_parser *parser); +void vkd3d_shader_trace(const struct vkd3d_shader_instruction_array *instructions, + const struct vkd3d_shader_version *shader_version);
const char *shader_get_type_prefix(enum vkd3d_shader_type type);
@@ -1044,8 +1083,9 @@ struct vkd3d_string_buffer_cache size_t count, max_count, capacity; };
-enum vkd3d_result vkd3d_dxbc_binary_to_text(struct vkd3d_shader_parser *parser, - const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out); +enum vkd3d_result vkd3d_dxbc_binary_to_text(const struct vkd3d_shader_instruction_array *instructions, + const struct vkd3d_shader_version *shader_version, const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_code *out); void vkd3d_string_buffer_cleanup(struct vkd3d_string_buffer *buffer); struct vkd3d_string_buffer *vkd3d_string_buffer_get(struct vkd3d_string_buffer_cache *list); void vkd3d_string_buffer_init(struct vkd3d_string_buffer *buffer); @@ -1067,6 +1107,8 @@ struct vkd3d_bytecode_buffer int status; };
+/* Align to the next 4-byte offset, and return that offset. */ +size_t bytecode_align(struct vkd3d_bytecode_buffer *buffer); size_t bytecode_put_bytes(struct vkd3d_bytecode_buffer *buffer, const void *bytes, size_t size); void set_u32(struct vkd3d_bytecode_buffer *buffer, size_t offset, uint32_t value);
@@ -1128,8 +1170,10 @@ int vkd3d_shader_sm4_parser_create(const struct vkd3d_shader_compile_info *compi
void free_shader_desc(struct vkd3d_shader_desc *desc);
+int shader_extract_from_dxbc(const struct vkd3d_shader_code *dxbc, + struct vkd3d_shader_message_context *message_context, const char *source_name, struct vkd3d_shader_desc *desc); int shader_parse_input_signature(const struct vkd3d_shader_code *dxbc, - struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_signature *signature); + struct vkd3d_shader_message_context *message_context, struct shader_signature *signature);
struct vkd3d_glsl_generator;
@@ -1141,16 +1185,10 @@ void vkd3d_glsl_generator_destroy(struct vkd3d_glsl_generator *generator);
#define SPIRV_MAX_SRC_COUNT 6
-struct spirv_compiler; - -struct spirv_compiler *spirv_compiler_create(const struct vkd3d_shader_version *shader_version, - const struct vkd3d_shader_desc *shader_desc, const struct vkd3d_shader_compile_info *compile_info, +int spirv_compile(struct vkd3d_shader_parser *parser, const struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info, - struct vkd3d_shader_message_context *message_context, const struct vkd3d_shader_location *location); -int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, - const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_parser *parser, - struct vkd3d_shader_code *spirv); -void spirv_compiler_destroy(struct spirv_compiler *compiler); + const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context);
void vkd3d_compute_dxbc_checksum(const void *dxbc, size_t size, uint32_t checksum[4]);
@@ -1202,6 +1240,14 @@ static inline enum vkd3d_data_type vkd3d_data_type_from_component_type( } }
+enum vkd3d_shader_input_sysval_semantic vkd3d_siv_from_sysval_indexed(enum vkd3d_shader_sysval_semantic sysval, + unsigned int index); + +static inline enum vkd3d_shader_input_sysval_semantic vkd3d_siv_from_sysval(enum vkd3d_shader_sysval_semantic sysval) +{ + return vkd3d_siv_from_sysval_indexed(sysval, 0); +} + static inline unsigned int vkd3d_write_mask_get_component_idx(DWORD write_mask) { unsigned int i; @@ -1323,4 +1369,11 @@ void dxbc_writer_add_section(struct dxbc_writer *dxbc, uint32_t tag, const void void dxbc_writer_init(struct dxbc_writer *dxbc); int dxbc_writer_write(struct dxbc_writer *dxbc, struct vkd3d_shader_code *code);
+enum vkd3d_result instruction_array_flatten_hull_shader_phases(struct vkd3d_shader_instruction_array *instructions); +enum vkd3d_result instruction_array_normalise_hull_shader_control_point_io( + struct vkd3d_shader_instruction_array *instructions, const struct shader_signature *input_signature); +enum vkd3d_result instruction_array_normalise_io_registers(struct vkd3d_shader_instruction_array *instructions, + enum vkd3d_shader_type shader_type, struct shader_signature *input_signature, + struct shader_signature *output_signature, struct shader_signature *patch_constant_signature); + #endif /* __VKD3D_SHADER_PRIVATE_H */ diff --git a/libs/vkd3d/libs/vkd3d/command.c b/libs/vkd3d/libs/vkd3d/command.c index 6eddcfa2d14..32439eec7eb 100644 --- a/libs/vkd3d/libs/vkd3d/command.c +++ b/libs/vkd3d/libs/vkd3d/command.c @@ -1437,7 +1437,7 @@ static VkDescriptorPool d3d12_command_allocator_allocate_descriptor_pool( pool_desc.pNext = NULL; pool_desc.flags = 0; pool_desc.maxSets = 512; - pool_desc.poolSizeCount = ARRAY_SIZE(device->vk_pool_sizes); + pool_desc.poolSizeCount = device->vk_pool_count; pool_desc.pPoolSizes = device->vk_pool_sizes; if ((vr = VK_CALL(vkCreateDescriptorPool(vk_device, &pool_desc, NULL, &vk_pool))) < 0) { @@ -2463,6 +2463,8 @@ static void d3d12_command_list_reset_state(struct d3d12_command_list *list, memset(list->so_counter_buffers, 0, sizeof(list->so_counter_buffers)); memset(list->so_counter_buffer_offsets, 0, sizeof(list->so_counter_buffer_offsets));
+ list->descriptor_heap_count = 0; + ID3D12GraphicsCommandList2_SetPipelineState(iface, initial_pipeline_state); }
@@ -2720,28 +2722,31 @@ static bool vk_write_descriptor_set_from_d3d12_desc(VkWriteDescriptorSet *vk_des unsigned int index, bool use_array) { uint32_t descriptor_range_magic = range->descriptor_magic; - const struct vkd3d_view *view = descriptor->s.u.view_info.view; + union d3d12_desc_object u = descriptor->s.u; uint32_t vk_binding = range->binding; + VkDescriptorType vk_descriptor_type; uint32_t set = range->set;
- if (descriptor->s.magic != descriptor_range_magic) + if (!u.header || u.header->magic != descriptor_range_magic) return false;
+ vk_descriptor_type = u.header->vk_descriptor_type; + vk_descriptor_write->sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; vk_descriptor_write->pNext = NULL; vk_descriptor_write->dstSet = vk_descriptor_sets[set]; vk_descriptor_write->dstBinding = use_array ? vk_binding : vk_binding + index; vk_descriptor_write->dstArrayElement = use_array ? index : 0; vk_descriptor_write->descriptorCount = 1; - vk_descriptor_write->descriptorType = descriptor->s.vk_descriptor_type; + vk_descriptor_write->descriptorType = vk_descriptor_type; vk_descriptor_write->pImageInfo = NULL; vk_descriptor_write->pBufferInfo = NULL; vk_descriptor_write->pTexelBufferView = NULL;
- switch (descriptor->s.magic) + switch (u.header->magic) { case VKD3D_DESCRIPTOR_MAGIC_CBV: - vk_descriptor_write->pBufferInfo = &descriptor->s.u.vk_cbv_info; + vk_descriptor_write->pBufferInfo = &u.cb_desc->vk_cbv_info; break;
case VKD3D_DESCRIPTOR_MAGIC_SRV: @@ -2752,8 +2757,8 @@ static bool vk_write_descriptor_set_from_d3d12_desc(VkWriteDescriptorSet *vk_des * in pairs in one set. */ if (range->descriptor_count == UINT_MAX) { - if (descriptor->s.vk_descriptor_type != VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER - && descriptor->s.vk_descriptor_type != VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) + if (vk_descriptor_type != VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER + && vk_descriptor_type != VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) { vk_descriptor_write->dstSet = vk_descriptor_sets[set + 1]; vk_descriptor_write->dstBinding = 0; @@ -2763,21 +2768,21 @@ static bool vk_write_descriptor_set_from_d3d12_desc(VkWriteDescriptorSet *vk_des { if (!use_array) vk_descriptor_write->dstBinding = vk_binding + 2 * index; - if (descriptor->s.vk_descriptor_type != VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER - && descriptor->s.vk_descriptor_type != VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) + if (vk_descriptor_type != VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER + && vk_descriptor_type != VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) ++vk_descriptor_write->dstBinding; }
- if (descriptor->s.vk_descriptor_type == VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER - || descriptor->s.vk_descriptor_type == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) + if (vk_descriptor_type == VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER + || vk_descriptor_type == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) { - vk_descriptor_write->pTexelBufferView = &view->u.vk_buffer_view; + vk_descriptor_write->pTexelBufferView = &u.view->v.u.vk_buffer_view; } else { vk_image_info->sampler = VK_NULL_HANDLE; - vk_image_info->imageView = view->u.vk_image_view; - vk_image_info->imageLayout = descriptor->s.magic == VKD3D_DESCRIPTOR_MAGIC_SRV + vk_image_info->imageView = u.view->v.u.vk_image_view; + vk_image_info->imageLayout = u.header->magic == VKD3D_DESCRIPTOR_MAGIC_SRV ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : VK_IMAGE_LAYOUT_GENERAL;
vk_descriptor_write->pImageInfo = vk_image_info; @@ -2785,7 +2790,7 @@ static bool vk_write_descriptor_set_from_d3d12_desc(VkWriteDescriptorSet *vk_des break;
case VKD3D_DESCRIPTOR_MAGIC_SAMPLER: - vk_image_info->sampler = view->u.vk_sampler; + vk_image_info->sampler = u.view->v.u.vk_sampler; vk_image_info->imageView = VK_NULL_HANDLE; vk_image_info->imageLayout = VK_IMAGE_LAYOUT_UNDEFINED;
@@ -2793,7 +2798,7 @@ static bool vk_write_descriptor_set_from_d3d12_desc(VkWriteDescriptorSet *vk_des break;
default: - ERR("Invalid descriptor %#x.\n", descriptor->s.magic); + ERR("Invalid descriptor %#x.\n", u.header->magic); return false; }
@@ -2847,6 +2852,11 @@ static void d3d12_command_list_update_descriptor_table(struct d3d12_command_list for (j = 0; j < descriptor_count; ++j, ++descriptor) { unsigned int register_idx = range->base_register_idx + j; + union d3d12_desc_object u = descriptor->s.u; + VkBufferView vk_counter_view; + + vk_counter_view = (u.header && u.header->magic == VKD3D_DESCRIPTOR_MAGIC_UAV) + ? u.view->v.vk_counter_view : VK_NULL_HANDLE;
/* Track UAV counters. */ if (range->descriptor_magic == VKD3D_DESCRIPTOR_MAGIC_UAV) @@ -2856,8 +2866,6 @@ static void d3d12_command_list_update_descriptor_table(struct d3d12_command_list if (state->uav_counters.bindings[k].register_space == range->register_space && state->uav_counters.bindings[k].register_index == register_idx) { - VkBufferView vk_counter_view = descriptor->s.magic == VKD3D_DESCRIPTOR_MAGIC_UAV - ? descriptor->s.u.view_info.view->vk_counter_view : VK_NULL_HANDLE; if (bindings->vk_uav_counter_views[k] != vk_counter_view) bindings->uav_counters_dirty = true; bindings->vk_uav_counter_views[k] = vk_counter_view; @@ -2867,7 +2875,7 @@ static void d3d12_command_list_update_descriptor_table(struct d3d12_command_list }
/* Not all descriptors are necessarily populated if the range is unbounded. */ - if (descriptor->s.magic == VKD3D_DESCRIPTOR_MAGIC_FREE) + if (!u.header) continue;
if (!vk_write_descriptor_set_from_d3d12_desc(current_descriptor_write, current_image_info, @@ -3153,6 +3161,30 @@ static void d3d12_command_list_update_descriptor_tables(struct d3d12_command_lis } }
+static bool contains_heap(struct d3d12_descriptor_heap **heap_array, unsigned int count, + const struct d3d12_descriptor_heap *query) +{ + unsigned int i; + + for (i = 0; i < count; ++i) + if (heap_array[i] == query) + return true; + return false; +} + +static void command_list_flush_vk_heap_updates(struct d3d12_command_list *list) +{ + struct d3d12_device *device = list->device; + unsigned int i; + + for (i = 0; i < list->descriptor_heap_count; ++i) + { + vkd3d_mutex_lock(&list->descriptor_heaps[i]->vk_sets_mutex); + d3d12_desc_flush_vk_heap_updates_locked(list->descriptor_heaps[i], device); + vkd3d_mutex_unlock(&list->descriptor_heaps[i]->vk_sets_mutex); + } +} + static void d3d12_command_list_bind_descriptor_heap(struct d3d12_command_list *list, enum vkd3d_pipeline_bind_point bind_point, struct d3d12_descriptor_heap *heap) { @@ -3177,10 +3209,18 @@ static void d3d12_command_list_bind_descriptor_heap(struct d3d12_command_list *l bindings->sampler_heap_id = heap->serial_id; }
- /* These sets can be shared across multiple command lists, and therefore binding must - * be synchronised. On an experimental branch in which caching of Vk descriptor writes - * greatly increased the chance of multiple threads arriving here at the same time, - * GRID 2019 crashed without the mutex lock. */ + if (!contains_heap(list->descriptor_heaps, list->descriptor_heap_count, heap)) + { + if (list->descriptor_heap_count == ARRAY_SIZE(list->descriptor_heaps)) + { + /* Descriptors can be written after binding. */ + FIXME("Flushing descriptor updates while list %p is not closed.\n", list); + command_list_flush_vk_heap_updates(list); + list->descriptor_heap_count = 0; + } + list->descriptor_heaps[list->descriptor_heap_count++] = heap; + } + vkd3d_mutex_lock(&heap->vk_sets_mutex);
for (set = 0; set < ARRAY_SIZE(heap->vk_descriptor_sets); ++set) @@ -3963,10 +4003,12 @@ static void STDMETHODCALLTYPE d3d12_command_list_RSSetViewports(ID3D12GraphicsCo vk_viewports[i].minDepth = viewports[i].MinDepth; vk_viewports[i].maxDepth = viewports[i].MaxDepth;
- if (!vk_viewports[i].width || !vk_viewports[i].height) + if (vk_viewports[i].width <= 0.0f) { - FIXME_ONCE("Invalid viewport %u, ignoring RSSetViewports().\n", i); - return; + /* Vulkan does not support width <= 0 */ + FIXME_ONCE("Setting invalid viewport %u to zero height.\n", i); + vk_viewports[i].width = 1.0f; + vk_viewports[i].height = 0.0f; } }
@@ -4481,11 +4523,20 @@ static void d3d12_command_list_set_root_cbv(struct d3d12_command_list *list, root_parameter = root_signature_get_root_descriptor(root_signature, index); assert(root_parameter->parameter_type == D3D12_ROOT_PARAMETER_TYPE_CBV);
- resource = vkd3d_gpu_va_allocator_dereference(&list->device->gpu_va_allocator, gpu_address); - buffer_info.buffer = resource->u.vk_buffer; - buffer_info.offset = gpu_address - resource->gpu_address; - buffer_info.range = resource->desc.Width - buffer_info.offset; - buffer_info.range = min(buffer_info.range, vk_info->device_limits.maxUniformBufferRange); + if (gpu_address) + { + resource = vkd3d_gpu_va_allocator_dereference(&list->device->gpu_va_allocator, gpu_address); + buffer_info.buffer = resource->u.vk_buffer; + buffer_info.offset = gpu_address - resource->gpu_address; + buffer_info.range = resource->desc.Width - buffer_info.offset; + buffer_info.range = min(buffer_info.range, vk_info->device_limits.maxUniformBufferRange); + } + else + { + buffer_info.buffer = list->device->null_resources.vk_buffer; + buffer_info.offset = 0; + buffer_info.range = VK_WHOLE_SIZE; + }
if (vk_info->KHR_push_descriptor) { @@ -4547,13 +4598,13 @@ static void d3d12_command_list_set_root_descriptor(struct d3d12_command_list *li assert(root_parameter->parameter_type != D3D12_ROOT_PARAMETER_TYPE_CBV);
/* FIXME: Re-use buffer views. */ - if (!vkd3d_create_raw_buffer_view(list->device, gpu_address, &vk_buffer_view)) + if (!vkd3d_create_raw_buffer_view(list->device, gpu_address, root_parameter->parameter_type, &vk_buffer_view)) { ERR("Failed to create buffer view.\n"); return; }
- if (!(d3d12_command_allocator_add_buffer_view(list->allocator, vk_buffer_view))) + if (vk_buffer_view && !(d3d12_command_allocator_add_buffer_view(list->allocator, vk_buffer_view))) { ERR("Failed to add buffer view.\n"); VK_CALL(vkDestroyBufferView(vk_device, vk_buffer_view, NULL)); @@ -4644,6 +4695,11 @@ static void STDMETHODCALLTYPE d3d12_command_list_IASetIndexBuffer(ID3D12Graphics WARN("Ignoring NULL index buffer view.\n"); return; } + if (!view->BufferLocation) + { + WARN("Ignoring index buffer location 0.\n"); + return; + }
vk_procs = &list->device->vk_procs;
@@ -4844,7 +4900,7 @@ static void STDMETHODCALLTYPE d3d12_command_list_OMSetRenderTargets(ID3D12Graphi WARN("Failed to add view.\n"); }
- list->rtvs[i] = view->u.vk_image_view; + list->rtvs[i] = view->v.u.vk_image_view; list->fb_width = max(list->fb_width, rtv_desc->width); list->fb_height = max(list->fb_height, rtv_desc->height); list->fb_layer_count = max(list->fb_layer_count, rtv_desc->layer_count); @@ -4868,7 +4924,7 @@ static void STDMETHODCALLTYPE d3d12_command_list_OMSetRenderTargets(ID3D12Graphi list->dsv = VK_NULL_HANDLE; }
- list->dsv = view->u.vk_image_view; + list->dsv = view->v.u.vk_image_view; list->fb_width = max(list->fb_width, dsv_desc->width); list->fb_height = max(list->fb_height, dsv_desc->height); list->fb_layer_count = max(list->fb_layer_count, dsv_desc->layer_count); @@ -4960,7 +5016,7 @@ static void d3d12_command_list_clear(struct d3d12_command_list *list, fb_desc.flags = 0; fb_desc.renderPass = vk_render_pass; fb_desc.attachmentCount = 1; - fb_desc.pAttachments = &view->u.vk_image_view; + fb_desc.pAttachments = &view->v.u.vk_image_view; fb_desc.width = width; fb_desc.height = height; fb_desc.layers = layer_count; @@ -5163,13 +5219,14 @@ static void vkd3d_uav_clear_state_get_image_pipeline(const struct vkd3d_uav_clea }
static void d3d12_command_list_clear_uav(struct d3d12_command_list *list, - struct d3d12_resource *resource, struct vkd3d_view *view, const VkClearColorValue *clear_colour, + struct d3d12_resource *resource, struct vkd3d_view *descriptor, const VkClearColorValue *clear_colour, unsigned int rect_count, const D3D12_RECT *rects) { const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs; unsigned int i, miplevel_idx, layer_count; struct vkd3d_uav_clear_pipeline pipeline; struct vkd3d_uav_clear_args clear_args; + const struct vkd3d_resource_view *view; VkDescriptorImageInfo image_info; D3D12_RECT full_rect, curr_rect; VkWriteDescriptorSet write_set; @@ -5181,8 +5238,9 @@ static void d3d12_command_list_clear_uav(struct d3d12_command_list *list, d3d12_command_list_invalidate_bindings(list, list->state); d3d12_command_list_invalidate_root_parameters(list, VKD3D_PIPELINE_BIND_POINT_COMPUTE);
- if (!d3d12_command_allocator_add_view(list->allocator, view)) + if (!d3d12_command_allocator_add_view(list->allocator, descriptor)) WARN("Failed to add view.\n"); + view = &descriptor->v;
clear_args.colour = *clear_colour;
@@ -5295,10 +5353,11 @@ static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewUint(ID const UINT values[4], UINT rect_count, const D3D12_RECT *rects) { struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct vkd3d_view *descriptor, *uint_view = NULL; struct d3d12_device *device = list->device; - struct vkd3d_view *view, *uint_view = NULL; struct vkd3d_texture_view_desc view_desc; const struct vkd3d_format *uint_format; + const struct vkd3d_resource_view *view; struct d3d12_resource *resource_impl; VkClearColorValue colour;
@@ -5306,7 +5365,9 @@ static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewUint(ID iface, gpu_handle.ptr, cpu_handle.ptr, resource, values, rect_count, rects);
resource_impl = unsafe_impl_from_ID3D12Resource(resource); - view = d3d12_desc_from_cpu_handle(cpu_handle)->s.u.view_info.view; + if (!(descriptor = d3d12_desc_from_cpu_handle(cpu_handle)->s.u.view)) + return; + view = &descriptor->v; memcpy(colour.uint32, values, sizeof(colour.uint32));
if (view->format->type != VKD3D_FORMAT_TYPE_UINT) @@ -5320,8 +5381,8 @@ static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewUint(ID
if (d3d12_resource_is_buffer(resource_impl)) { - if (!vkd3d_create_buffer_view(device, resource_impl->u.vk_buffer, uint_format, - view->info.buffer.offset, view->info.buffer.size, &uint_view)) + if (!vkd3d_create_buffer_view(device, VKD3D_DESCRIPTOR_MAGIC_UAV, resource_impl->u.vk_buffer, + uint_format, view->info.buffer.offset, view->info.buffer.size, &uint_view)) { ERR("Failed to create buffer view.\n"); return; @@ -5337,16 +5398,17 @@ static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewUint(ID view_desc.layer_idx = view->info.texture.layer_idx; view_desc.layer_count = view->info.texture.layer_count;
- if (!vkd3d_create_texture_view(device, resource_impl->u.vk_image, &view_desc, &uint_view)) + if (!vkd3d_create_texture_view(device, VKD3D_DESCRIPTOR_MAGIC_UAV, resource_impl->u.vk_image, &view_desc, + &uint_view)) { ERR("Failed to create image view.\n"); return; } } - view = uint_view; + descriptor = uint_view; }
- d3d12_command_list_clear_uav(list, resource_impl, view, &colour, rect_count, rects); + d3d12_command_list_clear_uav(list, resource_impl, descriptor, &colour, rect_count, rects);
if (uint_view) vkd3d_view_decref(uint_view, device); @@ -5365,7 +5427,8 @@ static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewFloat(I iface, gpu_handle.ptr, cpu_handle.ptr, resource, values, rect_count, rects);
resource_impl = unsafe_impl_from_ID3D12Resource(resource); - view = d3d12_desc_from_cpu_handle(cpu_handle)->s.u.view_info.view; + if (!(view = d3d12_desc_from_cpu_handle(cpu_handle)->s.u.view)) + return; memcpy(colour.float32, values, sizeof(colour.float32));
d3d12_command_list_clear_uav(list, resource_impl, view, &colour, rect_count, rects); @@ -5906,6 +5969,7 @@ static HRESULT d3d12_command_list_init(struct d3d12_command_list *list, struct d
list->update_descriptors = device->use_vk_heaps ? d3d12_command_list_update_heap_descriptors : d3d12_command_list_update_descriptors; + list->descriptor_heap_count = 0;
if (SUCCEEDED(hr = d3d12_command_allocator_allocate_command_buffer(allocator, list))) { @@ -6199,6 +6263,8 @@ static void STDMETHODCALLTYPE d3d12_command_queue_ExecuteCommandLists(ID3D12Comm return; }
+ command_list_flush_vk_heap_updates(cmd_list); + buffers[i] = cmd_list->vk_command_buffer; }
diff --git a/libs/vkd3d/libs/vkd3d/device.c b/libs/vkd3d/libs/vkd3d/device.c index 39a5ca013c7..4263dcf4184 100644 --- a/libs/vkd3d/libs/vkd3d/device.c +++ b/libs/vkd3d/libs/vkd3d/device.c @@ -19,6 +19,8 @@ #include "vkd3d_private.h" #include "vkd3d_version.h"
+#define VKD3D_MAX_UAV_CLEAR_DESCRIPTORS_PER_TYPE 256u + struct vkd3d_struct { enum vkd3d_structure_type type; @@ -2393,9 +2395,23 @@ static void vkd3d_time_domains_init(struct d3d12_device *device) WARN("Found no acceptable host time domain. Calibrated timestamps will not be available.\n"); }
-static void vkd3d_init_descriptor_pool_sizes(VkDescriptorPoolSize *pool_sizes, - const struct vkd3d_device_descriptor_limits *limits) +static void device_init_descriptor_pool_sizes(struct d3d12_device *device) { + const struct vkd3d_device_descriptor_limits *limits = &device->vk_info.descriptor_limits; + VkDescriptorPoolSize *pool_sizes = device->vk_pool_sizes; + + if (device->use_vk_heaps) + { + pool_sizes[0].type = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; + pool_sizes[0].descriptorCount = min(limits->storage_image_max_descriptors, + VKD3D_MAX_UAV_CLEAR_DESCRIPTORS_PER_TYPE); + pool_sizes[1].type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; + pool_sizes[1].descriptorCount = pool_sizes[0].descriptorCount; + device->vk_pool_count = 2; + return; + } + + assert(ARRAY_SIZE(device->vk_pool_sizes) >= 6); pool_sizes[0].type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; pool_sizes[0].descriptorCount = min(limits->uniform_buffer_max_descriptors, VKD3D_MAX_VIRTUAL_HEAP_DESCRIPTORS_PER_TYPE); @@ -2412,8 +2428,27 @@ static void vkd3d_init_descriptor_pool_sizes(VkDescriptorPoolSize *pool_sizes, pool_sizes[5].type = VK_DESCRIPTOR_TYPE_SAMPLER; pool_sizes[5].descriptorCount = min(limits->sampler_max_descriptors, VKD3D_MAX_VIRTUAL_HEAP_DESCRIPTORS_PER_TYPE); + device->vk_pool_count = 6; };
+static void vkd3d_desc_object_cache_init(struct vkd3d_desc_object_cache *cache, size_t size) +{ + cache->head = NULL; + cache->size = size; +} + +static void vkd3d_desc_object_cache_cleanup(struct vkd3d_desc_object_cache *cache) +{ + union d3d12_desc_object u; + void *next; + + for (u.object = cache->head; u.object; u.object = next) + { + next = u.header->next; + vkd3d_free(u.object); + } +} + /* ID3D12Device */ static inline struct d3d12_device *impl_from_ID3D12Device(ID3D12Device *iface) { @@ -2454,7 +2489,6 @@ static ULONG STDMETHODCALLTYPE d3d12_device_Release(ID3D12Device *iface) { struct d3d12_device *device = impl_from_ID3D12Device(iface); ULONG refcount = InterlockedDecrement(&device->refcount); - size_t i;
TRACE("%p decreasing refcount to %u.\n", device, refcount);
@@ -2474,8 +2508,8 @@ static ULONG STDMETHODCALLTYPE d3d12_device_Release(ID3D12Device *iface) vkd3d_render_pass_cache_cleanup(&device->render_pass_cache, device); d3d12_device_destroy_pipeline_cache(device); d3d12_device_destroy_vkd3d_queues(device); - for (i = 0; i < ARRAY_SIZE(device->desc_mutex); ++i) - vkd3d_mutex_destroy(&device->desc_mutex[i]); + vkd3d_desc_object_cache_cleanup(&device->view_desc_cache); + vkd3d_desc_object_cache_cleanup(&device->cbuffer_desc_cache); VK_CALL(vkDestroyDevice(device->vk_device, NULL)); if (device->parent) IUnknown_Release(device->parent); @@ -3368,132 +3402,6 @@ static void STDMETHODCALLTYPE d3d12_device_CreateSampler(ID3D12Device *iface, d3d12_desc_write_atomic(d3d12_desc_from_cpu_handle(descriptor), &tmp, device); }
-static void flush_desc_writes(struct d3d12_desc_copy_location locations[][VKD3D_DESCRIPTOR_WRITE_BUFFER_SIZE], - struct d3d12_desc_copy_info *infos, struct d3d12_descriptor_heap *descriptor_heap, struct d3d12_device *device) -{ - enum vkd3d_vk_descriptor_set_index set; - for (set = 0; set < VKD3D_SET_INDEX_COUNT; ++set) - { - if (!infos[set].count) - continue; - d3d12_desc_copy_vk_heap_range(locations[set], &infos[set], descriptor_heap, set, device); - infos[set].count = 0; - infos[set].uav_counter = false; - } -} - -static void d3d12_desc_buffered_copy_atomic(struct d3d12_desc *dst, const struct d3d12_desc *src, - struct d3d12_desc_copy_location locations[][VKD3D_DESCRIPTOR_WRITE_BUFFER_SIZE], - struct d3d12_desc_copy_info *infos, struct d3d12_descriptor_heap *descriptor_heap, struct d3d12_device *device) -{ - struct d3d12_desc_copy_location *location; - enum vkd3d_vk_descriptor_set_index set; - struct vkd3d_mutex *mutex; - - mutex = d3d12_device_get_descriptor_mutex(device, src); - vkd3d_mutex_lock(mutex); - - if (src->s.magic == VKD3D_DESCRIPTOR_MAGIC_FREE) - { - /* Source must be unlocked first, and therefore can't be used as a null source. */ - static const struct d3d12_desc null = {0}; - vkd3d_mutex_unlock(mutex); - d3d12_desc_write_atomic(dst, &null, device); - return; - } - - set = vkd3d_vk_descriptor_set_index_from_vk_descriptor_type(src->s.vk_descriptor_type); - location = &locations[set][infos[set].count++]; - - location->src.s = src->s; - - if (location->src.s.magic & VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW) - vkd3d_view_incref(location->src.s.u.view_info.view); - - vkd3d_mutex_unlock(mutex); - - infos[set].uav_counter |= (location->src.s.magic == VKD3D_DESCRIPTOR_MAGIC_UAV) - && !!location->src.s.u.view_info.view->vk_counter_view; - location->dst = dst; - - if (infos[set].count == ARRAY_SIZE(locations[0])) - { - d3d12_desc_copy_vk_heap_range(locations[set], &infos[set], descriptor_heap, set, device); - infos[set].count = 0; - infos[set].uav_counter = false; - } -} - -/* Some games, e.g. Control, copy a large number of descriptors per frame, so the - * speed of this function is critical. */ -static void d3d12_device_vk_heaps_copy_descriptors(struct d3d12_device *device, - UINT dst_descriptor_range_count, const D3D12_CPU_DESCRIPTOR_HANDLE *dst_descriptor_range_offsets, - const UINT *dst_descriptor_range_sizes, - UINT src_descriptor_range_count, const D3D12_CPU_DESCRIPTOR_HANDLE *src_descriptor_range_offsets, - const UINT *src_descriptor_range_sizes) -{ - struct d3d12_desc_copy_location locations[VKD3D_SET_INDEX_COUNT][VKD3D_DESCRIPTOR_WRITE_BUFFER_SIZE]; - unsigned int dst_range_idx, dst_idx, src_range_idx, src_idx; - /* The locations array is relatively large, and often mostly empty. Keeping these - * values together in a separate array will likely result in fewer cache misses. */ - struct d3d12_desc_copy_info infos[VKD3D_SET_INDEX_COUNT]; - struct d3d12_descriptor_heap *descriptor_heap = NULL; - const struct d3d12_desc *src, *heap_base, *heap_end; - unsigned int dst_range_size, src_range_size; - struct d3d12_desc *dst; - - descriptor_heap = d3d12_desc_get_descriptor_heap(d3d12_desc_from_cpu_handle(dst_descriptor_range_offsets[0])); - heap_base = (const struct d3d12_desc *)descriptor_heap->descriptors; - heap_end = heap_base + descriptor_heap->desc.NumDescriptors; - - memset(infos, 0, sizeof(infos)); - dst_range_idx = dst_idx = 0; - src_range_idx = src_idx = 0; - while (dst_range_idx < dst_descriptor_range_count && src_range_idx < src_descriptor_range_count) - { - dst_range_size = dst_descriptor_range_sizes ? dst_descriptor_range_sizes[dst_range_idx] : 1; - src_range_size = src_descriptor_range_sizes ? src_descriptor_range_sizes[src_range_idx] : 1; - - dst = d3d12_desc_from_cpu_handle(dst_descriptor_range_offsets[dst_range_idx]); - src = d3d12_desc_from_cpu_handle(src_descriptor_range_offsets[src_range_idx]); - - if (dst < heap_base || dst >= heap_end) - { - flush_desc_writes(locations, infos, descriptor_heap, device); - descriptor_heap = d3d12_desc_get_descriptor_heap(dst); - heap_base = (const struct d3d12_desc *)descriptor_heap->descriptors; - heap_end = heap_base + descriptor_heap->desc.NumDescriptors; - } - - for (; dst_idx < dst_range_size && src_idx < src_range_size; src_idx++, dst_idx++) - { - /* We don't need to lock either descriptor for the identity check. The descriptor - * mutex is only intended to prevent use-after-free of the vkd3d_view caused by a - * race condition in the calling app. It is unnecessary to protect this test as it's - * the app's race condition, not ours. */ - if (dst[dst_idx].s.magic == src[src_idx].s.magic && (dst[dst_idx].s.magic & VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW) - && dst[dst_idx].s.u.view_info.written_serial_id == src[src_idx].s.u.view_info.view->serial_id) - continue; - d3d12_desc_buffered_copy_atomic(&dst[dst_idx], &src[src_idx], locations, infos, descriptor_heap, device); - } - - if (dst_idx >= dst_range_size) - { - ++dst_range_idx; - dst_idx = 0; - } - if (src_idx >= src_range_size) - { - ++src_range_idx; - src_idx = 0; - } - } - - flush_desc_writes(locations, infos, descriptor_heap, device); -} - -#define VKD3D_DESCRIPTOR_OPTIMISED_COPY_MIN_COUNT 8 - static void STDMETHODCALLTYPE d3d12_device_CopyDescriptors(ID3D12Device *iface, UINT dst_descriptor_range_count, const D3D12_CPU_DESCRIPTOR_HANDLE *dst_descriptor_range_offsets, const UINT *dst_descriptor_range_sizes, @@ -3525,15 +3433,6 @@ static void STDMETHODCALLTYPE d3d12_device_CopyDescriptors(ID3D12Device *iface, if (!dst_descriptor_range_count) return;
- if (device->use_vk_heaps && (dst_descriptor_range_count > 1 || (dst_descriptor_range_sizes - && dst_descriptor_range_sizes[0] >= VKD3D_DESCRIPTOR_OPTIMISED_COPY_MIN_COUNT))) - { - d3d12_device_vk_heaps_copy_descriptors(device, dst_descriptor_range_count, dst_descriptor_range_offsets, - dst_descriptor_range_sizes, src_descriptor_range_count, src_descriptor_range_offsets, - src_descriptor_range_sizes); - return; - } - dst_range_idx = dst_idx = 0; src_range_idx = src_idx = 0; while (dst_range_idx < dst_descriptor_range_count && src_range_idx < src_descriptor_range_count) @@ -3544,8 +3443,12 @@ static void STDMETHODCALLTYPE d3d12_device_CopyDescriptors(ID3D12Device *iface, dst = d3d12_desc_from_cpu_handle(dst_descriptor_range_offsets[dst_range_idx]); src = d3d12_desc_from_cpu_handle(src_descriptor_range_offsets[src_range_idx]);
- while (dst_idx < dst_range_size && src_idx < src_range_size) - d3d12_desc_copy(&dst[dst_idx++], &src[src_idx++], device); + for (; dst_idx < dst_range_size && src_idx < src_range_size; ++dst_idx, ++src_idx) + { + if (dst[dst_idx].s.u.object == src[src_idx].s.u.object) + continue; + d3d12_desc_copy(&dst[dst_idx], &src[src_idx], device); + }
if (dst_idx >= dst_range_size) { @@ -3570,17 +3473,6 @@ static void STDMETHODCALLTYPE d3d12_device_CopyDescriptorsSimple(ID3D12Device *i iface, descriptor_count, dst_descriptor_range_offset.ptr, src_descriptor_range_offset.ptr, descriptor_heap_type);
- if (descriptor_count >= VKD3D_DESCRIPTOR_OPTIMISED_COPY_MIN_COUNT) - { - struct d3d12_device *device = impl_from_ID3D12Device(iface); - if (device->use_vk_heaps) - { - d3d12_device_vk_heaps_copy_descriptors(device, 1, &dst_descriptor_range_offset, - &descriptor_count, 1, &src_descriptor_range_offset, &descriptor_count); - return; - } - } - d3d12_device_CopyDescriptors(iface, 1, &dst_descriptor_range_offset, &descriptor_count, 1, &src_descriptor_range_offset, &descriptor_count, descriptor_heap_type); } @@ -4080,7 +3972,6 @@ static HRESULT d3d12_device_init(struct d3d12_device *device, { const struct vkd3d_vk_device_procs *vk_procs; HRESULT hr; - size_t i;
device->ID3D12Device_iface.lpVtbl = &d3d12_device_vtbl; device->refcount = 1; @@ -4123,10 +4014,10 @@ static HRESULT d3d12_device_init(struct d3d12_device *device, device->blocked_queue_count = 0; vkd3d_mutex_init(&device->blocked_queues_mutex);
- for (i = 0; i < ARRAY_SIZE(device->desc_mutex); ++i) - vkd3d_mutex_init(&device->desc_mutex[i]); + vkd3d_desc_object_cache_init(&device->view_desc_cache, sizeof(struct vkd3d_view)); + vkd3d_desc_object_cache_init(&device->cbuffer_desc_cache, sizeof(struct vkd3d_cbuffer_desc));
- vkd3d_init_descriptor_pool_sizes(device->vk_pool_sizes, &device->vk_info.descriptor_limits); + device_init_descriptor_pool_sizes(device);
if ((device->parent = create_info->parent)) IUnknown_AddRef(device->parent); diff --git a/libs/vkd3d/libs/vkd3d/resource.c b/libs/vkd3d/libs/vkd3d/resource.c index 8c050cfeb32..ea7b6859cc1 100644 --- a/libs/vkd3d/libs/vkd3d/resource.c +++ b/libs/vkd3d/libs/vkd3d/resource.c @@ -326,6 +326,9 @@ static void d3d12_heap_destroy(struct d3d12_heap *heap)
vkd3d_private_store_destroy(&heap->private_store);
+ if (heap->map_ptr) + VK_CALL(vkUnmapMemory(device->vk_device, heap->vk_memory)); + VK_CALL(vkFreeMemory(device->vk_device, heap->vk_memory, NULL));
vkd3d_mutex_destroy(&heap->mutex); @@ -346,12 +349,19 @@ static ULONG STDMETHODCALLTYPE d3d12_heap_Release(ID3D12Heap *iface)
TRACE("%p decreasing refcount to %u.\n", heap, refcount);
- if (!refcount) + /* A heap must not be destroyed until all contained resources are destroyed. */ + if (!refcount && !heap->resource_count) d3d12_heap_destroy(heap);
return refcount; }
+static void d3d12_heap_resource_destroyed(struct d3d12_heap *heap) +{ + if (!InterlockedDecrement(&heap->resource_count) && (!heap->refcount || heap->is_private)) + d3d12_heap_destroy(heap); +} + static HRESULT STDMETHODCALLTYPE d3d12_heap_GetPrivateData(ID3D12Heap *iface, REFGUID guid, UINT *data_size, void *data) { @@ -437,97 +447,6 @@ struct d3d12_heap *unsafe_impl_from_ID3D12Heap(ID3D12Heap *iface) return impl_from_ID3D12Heap(iface); }
-static HRESULT d3d12_heap_map(struct d3d12_heap *heap, uint64_t offset, - struct d3d12_resource *resource, void **data) -{ - struct d3d12_device *device = heap->device; - HRESULT hr = S_OK; - VkResult vr; - - vkd3d_mutex_lock(&heap->mutex); - - assert(!resource->map_count || heap->map_ptr); - - if (!resource->map_count) - { - if (!heap->map_ptr) - { - const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; - - TRACE("Mapping heap %p.\n", heap); - - assert(!heap->map_count); - - if ((vr = VK_CALL(vkMapMemory(device->vk_device, heap->vk_memory, - 0, VK_WHOLE_SIZE, 0, &heap->map_ptr))) < 0) - { - WARN("Failed to map device memory, vr %d.\n", vr); - heap->map_ptr = NULL; - } - - hr = hresult_from_vk_result(vr); - } - - if (heap->map_ptr) - ++heap->map_count; - } - - if (hr == S_OK) - { - assert(heap->map_ptr); - if (data) - *data = (BYTE *)heap->map_ptr + offset; - ++resource->map_count; - } - else - { - assert(!heap->map_ptr); - if (data) - *data = NULL; - } - - vkd3d_mutex_unlock(&heap->mutex); - - return hr; -} - -static void d3d12_heap_unmap(struct d3d12_heap *heap, struct d3d12_resource *resource) -{ - struct d3d12_device *device = heap->device; - - vkd3d_mutex_lock(&heap->mutex); - - if (!resource->map_count) - { - WARN("Resource %p is not mapped.\n", resource); - goto done; - } - - --resource->map_count; - if (resource->map_count) - goto done; - - if (!heap->map_count) - { - ERR("Heap %p is not mapped.\n", heap); - goto done; - } - - --heap->map_count; - if (!heap->map_count) - { - const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; - - TRACE("Unmapping heap %p, ptr %p.\n", heap, heap->map_ptr); - - VK_CALL(vkUnmapMemory(device->vk_device, heap->vk_memory)); - heap->map_ptr = NULL; - } - -done: - vkd3d_mutex_unlock(&heap->mutex); -} - static HRESULT validate_heap_desc(const D3D12_HEAP_DESC *desc, const struct d3d12_resource *resource) { if (!resource && !desc->SizeInBytes) @@ -552,15 +471,23 @@ static HRESULT validate_heap_desc(const D3D12_HEAP_DESC *desc, const struct d3d1 return S_OK; }
+static VkMemoryPropertyFlags d3d12_heap_get_memory_property_flags(const struct d3d12_heap *heap) +{ + return heap->device->memory_properties.memoryTypes[heap->vk_memory_type].propertyFlags; +} + static HRESULT d3d12_heap_init(struct d3d12_heap *heap, struct d3d12_device *device, const D3D12_HEAP_DESC *desc, const struct d3d12_resource *resource) { + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; VkMemoryRequirements memory_requirements; VkDeviceSize vk_memory_size; + VkResult vr; HRESULT hr;
heap->ID3D12Heap_iface.lpVtbl = &d3d12_heap_vtbl; heap->refcount = 1; + heap->resource_count = 0;
heap->is_private = !!resource;
@@ -628,6 +555,20 @@ static HRESULT d3d12_heap_init(struct d3d12_heap *heap, heap->device = device; if (!heap->is_private) d3d12_device_add_ref(heap->device); + else + heap->resource_count = 1; + + if (d3d12_heap_get_memory_property_flags(heap) & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) + { + if ((vr = VK_CALL(vkMapMemory(device->vk_device, + heap->vk_memory, 0, VK_WHOLE_SIZE, 0, &heap->map_ptr))) < 0) + { + heap->map_ptr = NULL; + ERR("Failed to map memory, vr %d.\n", vr); + d3d12_heap_destroy(heap); + return hresult_from_vk_result(hr); + } + }
return S_OK; } @@ -1027,8 +968,8 @@ static void d3d12_resource_destroy(struct d3d12_resource *resource, struct d3d12 else VK_CALL(vkDestroyImage(device->vk_device, resource->u.vk_image, NULL));
- if (resource->flags & VKD3D_RESOURCE_DEDICATED_HEAP) - d3d12_heap_destroy(resource->heap); + if (resource->heap) + d3d12_heap_resource_destroyed(resource->heap); }
static ULONG d3d12_resource_incref(struct d3d12_resource *resource) @@ -1223,12 +1164,55 @@ static HRESULT STDMETHODCALLTYPE d3d12_resource_GetDevice(ID3D12Resource *iface, return d3d12_device_query_interface(resource->device, iid, device); }
+static void *d3d12_resource_get_map_ptr(struct d3d12_resource *resource) +{ + assert(resource->heap->map_ptr); + return (uint8_t *)resource->heap->map_ptr + resource->heap_offset; +} + +static void d3d12_resource_get_vk_range(struct d3d12_resource *resource, + uint64_t offset, uint64_t size, VkMappedMemoryRange *vk_range) +{ + vk_range->sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; + vk_range->pNext = NULL; + vk_range->memory = resource->heap->vk_memory; + vk_range->offset = resource->heap_offset + offset; + vk_range->size = size; +} + +static void d3d12_resource_invalidate(struct d3d12_resource *resource, uint64_t offset, uint64_t size) +{ + const struct vkd3d_vk_device_procs *vk_procs = &resource->device->vk_procs; + VkMappedMemoryRange vk_range; + VkResult vr; + + if (d3d12_heap_get_memory_property_flags(resource->heap) & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) + return; + + d3d12_resource_get_vk_range(resource, offset, size, &vk_range); + if ((vr = VK_CALL(vkInvalidateMappedMemoryRanges(resource->device->vk_device, 1, &vk_range))) < 0) + ERR("Failed to invalidate memory, vr %d.\n", vr); +} + +static void d3d12_resource_flush(struct d3d12_resource *resource, uint64_t offset, uint64_t size) +{ + const struct vkd3d_vk_device_procs *vk_procs = &resource->device->vk_procs; + VkMappedMemoryRange vk_range; + VkResult vr; + + if (d3d12_heap_get_memory_property_flags(resource->heap) & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) + return; + + d3d12_resource_get_vk_range(resource, offset, size, &vk_range); + if ((vr = VK_CALL(vkFlushMappedMemoryRanges(resource->device->vk_device, 1, &vk_range))) < 0) + ERR("Failed to flush memory, vr %d.\n", vr); +} + static HRESULT STDMETHODCALLTYPE d3d12_resource_Map(ID3D12Resource *iface, UINT sub_resource, const D3D12_RANGE *read_range, void **data) { struct d3d12_resource *resource = impl_from_ID3D12Resource(iface); unsigned int sub_resource_count; - HRESULT hr;
TRACE("iface %p, sub_resource %u, read_range %p, data %p.\n", iface, sub_resource, read_range, data); @@ -1259,15 +1243,18 @@ static HRESULT STDMETHODCALLTYPE d3d12_resource_Map(ID3D12Resource *iface, UINT return E_NOTIMPL; }
- WARN("Ignoring read range %p.\n", read_range); - - if (FAILED(hr = d3d12_heap_map(resource->heap, resource->heap_offset, resource, data))) - WARN("Failed to map resource %p, hr %#x.\n", resource, hr); - if (data) + { + *data = d3d12_resource_get_map_ptr(resource); TRACE("Returning pointer %p.\n", *data); + }
- return hr; + if (!read_range) + d3d12_resource_invalidate(resource, 0, resource->desc.Width); + else if (read_range->End > read_range->Begin) + d3d12_resource_invalidate(resource, read_range->Begin, read_range->End - read_range->Begin); + + return S_OK; }
static void STDMETHODCALLTYPE d3d12_resource_Unmap(ID3D12Resource *iface, UINT sub_resource, @@ -1286,9 +1273,10 @@ static void STDMETHODCALLTYPE d3d12_resource_Unmap(ID3D12Resource *iface, UINT s return; }
- WARN("Ignoring written range %p.\n", written_range); - - d3d12_heap_unmap(resource->heap, resource); + if (!written_range) + d3d12_resource_flush(resource, 0, resource->desc.Width); + else if (written_range->End > written_range->Begin) + d3d12_resource_flush(resource, written_range->Begin, written_range->End - written_range->Begin); }
static D3D12_RESOURCE_DESC * STDMETHODCALLTYPE d3d12_resource_GetDesc(ID3D12Resource *iface, @@ -1320,10 +1308,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_resource_WriteToSubresource(ID3D12Resourc VkImageSubresource vk_sub_resource; const struct vkd3d_format *format; VkSubresourceLayout vk_layout; + uint64_t dst_offset, dst_size; struct d3d12_device *device; uint8_t *dst_data; D3D12_BOX box; - HRESULT hr;
TRACE("iface %p, src_data %p, src_row_pitch %u, src_slice_pitch %u, " "dst_sub_resource %u, dst_box %s.\n", @@ -1381,20 +1369,17 @@ static HRESULT STDMETHODCALLTYPE d3d12_resource_WriteToSubresource(ID3D12Resourc TRACE("Offset %#"PRIx64", size %#"PRIx64", row pitch %#"PRIx64", depth pitch %#"PRIx64".\n", vk_layout.offset, vk_layout.size, vk_layout.rowPitch, vk_layout.depthPitch);
- if (FAILED(hr = d3d12_heap_map(resource->heap, resource->heap_offset, resource, (void **)&dst_data))) - { - WARN("Failed to map resource %p, hr %#x.\n", resource, hr); - return hr; - } - - dst_data += vk_layout.offset + vkd3d_format_get_data_offset(format, vk_layout.rowPitch, + dst_data = d3d12_resource_get_map_ptr(resource); + dst_offset = vk_layout.offset + vkd3d_format_get_data_offset(format, vk_layout.rowPitch, vk_layout.depthPitch, dst_box->left, dst_box->top, dst_box->front); + dst_size = vk_layout.offset + vkd3d_format_get_data_offset(format, vk_layout.rowPitch, + vk_layout.depthPitch, dst_box->right, dst_box->bottom - 1, dst_box->back - 1) - dst_offset;
vkd3d_format_copy_data(format, src_data, src_row_pitch, src_slice_pitch, - dst_data, vk_layout.rowPitch, vk_layout.depthPitch, dst_box->right - dst_box->left, + dst_data + dst_offset, vk_layout.rowPitch, vk_layout.depthPitch, dst_box->right - dst_box->left, dst_box->bottom - dst_box->top, dst_box->back - dst_box->front);
- d3d12_heap_unmap(resource->heap, resource); + d3d12_resource_flush(resource, dst_offset, dst_size);
return S_OK; } @@ -1408,10 +1393,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_resource_ReadFromSubresource(ID3D12Resour VkImageSubresource vk_sub_resource; const struct vkd3d_format *format; VkSubresourceLayout vk_layout; + uint64_t src_offset, src_size; struct d3d12_device *device; uint8_t *src_data; D3D12_BOX box; - HRESULT hr;
TRACE("iface %p, dst_data %p, dst_row_pitch %u, dst_slice_pitch %u, " "src_sub_resource %u, src_box %s.\n", @@ -1469,21 +1454,18 @@ static HRESULT STDMETHODCALLTYPE d3d12_resource_ReadFromSubresource(ID3D12Resour TRACE("Offset %#"PRIx64", size %#"PRIx64", row pitch %#"PRIx64", depth pitch %#"PRIx64".\n", vk_layout.offset, vk_layout.size, vk_layout.rowPitch, vk_layout.depthPitch);
- if (FAILED(hr = d3d12_heap_map(resource->heap, resource->heap_offset, resource, (void **)&src_data))) - { - WARN("Failed to map resource %p, hr %#x.\n", resource, hr); - return hr; - } - - src_data += vk_layout.offset + vkd3d_format_get_data_offset(format, vk_layout.rowPitch, + src_data = d3d12_resource_get_map_ptr(resource); + src_offset = vk_layout.offset + vkd3d_format_get_data_offset(format, vk_layout.rowPitch, vk_layout.depthPitch, src_box->left, src_box->top, src_box->front); + src_size = vk_layout.offset + vkd3d_format_get_data_offset(format, vk_layout.rowPitch, + vk_layout.depthPitch, src_box->right, src_box->bottom - 1, src_box->back - 1) - src_offset;
- vkd3d_format_copy_data(format, src_data, vk_layout.rowPitch, vk_layout.depthPitch, + d3d12_resource_invalidate(resource, src_offset, src_size); + + vkd3d_format_copy_data(format, src_data + src_offset, vk_layout.rowPitch, vk_layout.depthPitch, dst_data, dst_row_pitch, dst_slice_pitch, src_box->right - src_box->left, src_box->bottom - src_box->top, src_box->back - src_box->front);
- d3d12_heap_unmap(resource->heap, resource); - return S_OK; }
@@ -1941,6 +1923,7 @@ static HRESULT vkd3d_bind_heap_memory(struct d3d12_device *device, { resource->heap = heap; resource->heap_offset = heap_offset; + InterlockedIncrement(&heap->resource_count); } else { @@ -2061,24 +2044,72 @@ ULONG vkd3d_resource_decref(ID3D12Resource *resource) return d3d12_resource_decref(impl_from_ID3D12Resource(resource)); }
-/* CBVs, SRVs, UAVs */ -static struct vkd3d_view *vkd3d_view_create(enum vkd3d_view_type type) +/* Objects are cached so that vkd3d_view_incref() can safely check the refcount + * of an object freed by another thread. */ +static void *vkd3d_desc_object_cache_get(struct vkd3d_desc_object_cache *cache) { - struct vkd3d_view *view; + union d3d12_desc_object u; + void *next;
- if ((view = vkd3d_malloc(sizeof(*view)))) + do { - view->refcount = 1; - view->type = type; - view->serial_id = InterlockedIncrement64(&object_global_serial_id); - view->vk_counter_view = VK_NULL_HANDLE; + u.object = cache->head; + if (!u.object) + return vkd3d_malloc(cache->size); + next = u.header->next; } - return view; + while (!vkd3d_atomic_compare_exchange_pointer(&cache->head, u.object, next)); + + return u.object; +} + +static void vkd3d_desc_object_cache_push(struct vkd3d_desc_object_cache *cache, void *object) +{ + union d3d12_desc_object u = {object}; + void *head; + + do + { + head = cache->head; + u.header->next = head; + } + while (!vkd3d_atomic_compare_exchange_pointer(&cache->head, head, u.object)); +} + +static struct vkd3d_cbuffer_desc *vkd3d_cbuffer_desc_create(struct d3d12_device *device) +{ + struct vkd3d_cbuffer_desc *desc; + + if (!(desc = vkd3d_desc_object_cache_get(&device->cbuffer_desc_cache))) + return NULL; + + desc->h.magic = VKD3D_DESCRIPTOR_MAGIC_CBV; + desc->h.vk_descriptor_type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + desc->h.refcount = 1; + + return desc; }
-void vkd3d_view_incref(struct vkd3d_view *view) +static struct vkd3d_view *vkd3d_view_create(uint32_t magic, VkDescriptorType vk_descriptor_type, + enum vkd3d_view_type type, struct d3d12_device *device) { - InterlockedIncrement(&view->refcount); + struct vkd3d_view *view; + + assert(magic); + + if (!(view = vkd3d_desc_object_cache_get(&device->view_desc_cache))) + { + ERR("Failed to allocate descriptor object.\n"); + return NULL; + } + + view->h.magic = magic; + view->h.vk_descriptor_type = vk_descriptor_type; + view->h.refcount = 1; + view->v.type = type; + view->v.vk_counter_view = VK_NULL_HANDLE; + + return view; }
static void vkd3d_view_destroy(struct vkd3d_view *view, struct d3d12_device *device) @@ -2087,313 +2118,299 @@ static void vkd3d_view_destroy(struct vkd3d_view *view, struct d3d12_device *dev
TRACE("Destroying view %p.\n", view);
- switch (view->type) + switch (view->v.type) { case VKD3D_VIEW_TYPE_BUFFER: - VK_CALL(vkDestroyBufferView(device->vk_device, view->u.vk_buffer_view, NULL)); + VK_CALL(vkDestroyBufferView(device->vk_device, view->v.u.vk_buffer_view, NULL)); break; case VKD3D_VIEW_TYPE_IMAGE: - VK_CALL(vkDestroyImageView(device->vk_device, view->u.vk_image_view, NULL)); + VK_CALL(vkDestroyImageView(device->vk_device, view->v.u.vk_image_view, NULL)); break; case VKD3D_VIEW_TYPE_SAMPLER: - VK_CALL(vkDestroySampler(device->vk_device, view->u.vk_sampler, NULL)); + VK_CALL(vkDestroySampler(device->vk_device, view->v.u.vk_sampler, NULL)); break; default: - WARN("Unhandled view type %d.\n", view->type); + WARN("Unhandled view type %d.\n", view->v.type); }
- if (view->vk_counter_view) - VK_CALL(vkDestroyBufferView(device->vk_device, view->vk_counter_view, NULL)); + if (view->v.vk_counter_view) + VK_CALL(vkDestroyBufferView(device->vk_device, view->v.vk_counter_view, NULL));
- vkd3d_free(view); + vkd3d_desc_object_cache_push(&device->view_desc_cache, view); }
-void vkd3d_view_decref(struct vkd3d_view *view, struct d3d12_device *device) +void vkd3d_view_decref(void *view, struct d3d12_device *device) { - if (!InterlockedDecrement(&view->refcount)) - vkd3d_view_destroy(view, device); + union d3d12_desc_object u = {view}; + + if (vkd3d_atomic_decrement(&u.header->refcount)) + return; + + if (u.header->magic != VKD3D_DESCRIPTOR_MAGIC_CBV) + vkd3d_view_destroy(u.view, device); + else + vkd3d_desc_object_cache_push(&device->cbuffer_desc_cache, u.object); }
-/* TODO: write null descriptors to all applicable sets (invalid behaviour workaround). */ -static void d3d12_descriptor_heap_write_vk_descriptor_range(struct d3d12_descriptor_heap_vk_set *descriptor_set, - struct d3d12_desc_copy_location *locations, unsigned int write_count) +static inline void d3d12_desc_replace(struct d3d12_desc *dst, void *view, struct d3d12_device *device) { - unsigned int i, info_index = 0, write_index = 0; + if ((view = vkd3d_atomic_exchange_pointer(&dst->s.u.object, view))) + vkd3d_view_decref(view, device); +}
- switch (locations[0].src.s.vk_descriptor_type) - { - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: - for (; write_index < write_count; ++write_index) - { - descriptor_set->vk_descriptor_writes[write_index].pBufferInfo = &descriptor_set->vk_buffer_infos[info_index]; - for (i = 0; i < descriptor_set->vk_descriptor_writes[write_index].descriptorCount; ++i, ++info_index) - descriptor_set->vk_buffer_infos[info_index] = locations[info_index].src.s.u.vk_cbv_info; - } - break; - case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: - case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: - for (; write_index < write_count; ++write_index) - { - descriptor_set->vk_descriptor_writes[write_index].pImageInfo = &descriptor_set->vk_image_infos[info_index]; - for (i = 0; i < descriptor_set->vk_descriptor_writes[write_index].descriptorCount; ++i, ++info_index) - descriptor_set->vk_image_infos[info_index].imageView = locations[info_index].src.s.u.view_info.view->u.vk_image_view; - } - break; - case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: - case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: - for (; write_index < write_count; ++write_index) - { - descriptor_set->vk_descriptor_writes[write_index].pTexelBufferView = &descriptor_set->vk_buffer_views[info_index]; - for (i = 0; i < descriptor_set->vk_descriptor_writes[write_index].descriptorCount; ++i, ++info_index) - descriptor_set->vk_buffer_views[info_index] = locations[info_index].src.s.u.view_info.view->u.vk_buffer_view; - } - break; - case VK_DESCRIPTOR_TYPE_SAMPLER: - for (; write_index < write_count; ++write_index) - { - descriptor_set->vk_descriptor_writes[write_index].pImageInfo = &descriptor_set->vk_image_infos[info_index]; - for (i = 0; i < descriptor_set->vk_descriptor_writes[write_index].descriptorCount; ++i, ++info_index) - descriptor_set->vk_image_infos[info_index].sampler = locations[info_index].src.s.u.view_info.view->u.vk_sampler; - } - break; - default: - ERR("Unhandled descriptor type %#x.\n", locations[0].src.s.vk_descriptor_type); - break; - } +#define VKD3D_DESCRIPTOR_WRITE_BUFFER_SIZE 24 + +struct descriptor_writes +{ + VkDescriptorBufferInfo null_vk_cbv_info; + VkBufferView null_vk_buffer_view; + VkDescriptorImageInfo vk_image_infos[VKD3D_DESCRIPTOR_WRITE_BUFFER_SIZE]; + VkWriteDescriptorSet vk_descriptor_writes[VKD3D_DESCRIPTOR_WRITE_BUFFER_SIZE]; + void *held_refs[VKD3D_DESCRIPTOR_WRITE_BUFFER_SIZE]; + unsigned int count; + unsigned int held_ref_count; +}; + +static void descriptor_writes_free_object_refs(struct descriptor_writes *writes, struct d3d12_device *device) +{ + unsigned int i; + for (i = 0; i < writes->held_ref_count; ++i) + vkd3d_view_decref(writes->held_refs[i], device); + writes->held_ref_count = 0; }
static void d3d12_desc_write_vk_heap_null_descriptor(struct d3d12_descriptor_heap *descriptor_heap, - uint32_t dst_array_element, const struct d3d12_device *device) + uint32_t dst_array_element, struct descriptor_writes *writes, struct d3d12_device *device) { const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; struct d3d12_descriptor_heap_vk_set *descriptor_set; - VkBufferView vk_buffer_view = VK_NULL_HANDLE; - enum vkd3d_vk_descriptor_set_index i; - VkDescriptorBufferInfo vk_cbv_info; - - vk_cbv_info.buffer = VK_NULL_HANDLE; - vk_cbv_info.offset = 0; - vk_cbv_info.range = VK_WHOLE_SIZE; + enum vkd3d_vk_descriptor_set_index set; + unsigned int i = writes->count;
/* Binding a shader with the wrong null descriptor type works in Windows. * To support that here we must write one to all applicable Vulkan sets. */ - for (i = VKD3D_SET_INDEX_UNIFORM_BUFFER; i <= VKD3D_SET_INDEX_STORAGE_IMAGE; ++i) - { - descriptor_set = &descriptor_heap->vk_descriptor_sets[i]; - descriptor_set->vk_descriptor_writes[0].dstArrayElement = dst_array_element; - descriptor_set->vk_descriptor_writes[0].descriptorCount = 1; - switch (i) + for (set = VKD3D_SET_INDEX_UNIFORM_BUFFER; set <= VKD3D_SET_INDEX_STORAGE_IMAGE; ++set) + { + descriptor_set = &descriptor_heap->vk_descriptor_sets[set]; + writes->vk_descriptor_writes[i].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + writes->vk_descriptor_writes[i].pNext = NULL; + writes->vk_descriptor_writes[i].dstSet = descriptor_set->vk_set; + writes->vk_descriptor_writes[i].dstBinding = 0; + writes->vk_descriptor_writes[i].dstArrayElement = dst_array_element; + writes->vk_descriptor_writes[i].descriptorCount = 1; + writes->vk_descriptor_writes[i].descriptorType = descriptor_set->vk_type; + switch (set) { case VKD3D_SET_INDEX_UNIFORM_BUFFER: - descriptor_set->vk_descriptor_writes[0].pBufferInfo = &vk_cbv_info; + writes->vk_descriptor_writes[i].pImageInfo = NULL; + writes->vk_descriptor_writes[i].pBufferInfo = &writes->null_vk_cbv_info; + writes->vk_descriptor_writes[i].pTexelBufferView = NULL; break; case VKD3D_SET_INDEX_SAMPLED_IMAGE: case VKD3D_SET_INDEX_STORAGE_IMAGE: - descriptor_set->vk_image_infos[0].imageView = VK_NULL_HANDLE; + writes->vk_descriptor_writes[i].pImageInfo = &writes->vk_image_infos[i]; + writes->vk_descriptor_writes[i].pBufferInfo = NULL; + writes->vk_descriptor_writes[i].pTexelBufferView = NULL; + writes->vk_image_infos[i].sampler = VK_NULL_HANDLE; + writes->vk_image_infos[i].imageView = VK_NULL_HANDLE; + writes->vk_image_infos[i].imageLayout = (set == VKD3D_SET_INDEX_STORAGE_IMAGE) + ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; break; case VKD3D_SET_INDEX_UNIFORM_TEXEL_BUFFER: case VKD3D_SET_INDEX_STORAGE_TEXEL_BUFFER: - descriptor_set->vk_descriptor_writes[0].pTexelBufferView = &vk_buffer_view; + writes->vk_descriptor_writes[i].pImageInfo = NULL; + writes->vk_descriptor_writes[i].pBufferInfo = NULL; + writes->vk_descriptor_writes[i].pTexelBufferView = &writes->null_vk_buffer_view; break; default: assert(false); break; } - VK_CALL(vkUpdateDescriptorSets(device->vk_device, 1, descriptor_set->vk_descriptor_writes, 0, NULL)); + if (++i < ARRAY_SIZE(writes->vk_descriptor_writes) - 1) + continue; + VK_CALL(vkUpdateDescriptorSets(device->vk_device, i, writes->vk_descriptor_writes, 0, NULL)); + descriptor_writes_free_object_refs(writes, device); + i = 0; } + + writes->count = i; }
-/* dst and src contain the same data unless another thread overwrites dst. The array index is - * calculated from dst, and src is thread safe. */ -static void d3d12_desc_write_vk_heap(const struct d3d12_desc *dst, const struct d3d12_desc *src, - struct d3d12_device *device) +static void d3d12_desc_write_vk_heap(struct d3d12_descriptor_heap *descriptor_heap, unsigned int dst_array_element, + struct descriptor_writes *writes, void *object, struct d3d12_device *device) { struct d3d12_descriptor_heap_vk_set *descriptor_set; - struct d3d12_descriptor_heap *descriptor_heap; const struct vkd3d_vk_device_procs *vk_procs; + union d3d12_desc_object u = {object}; + unsigned int i = writes->count; + VkDescriptorType type; bool is_null = false;
- descriptor_heap = d3d12_desc_get_descriptor_heap(dst); - descriptor_set = &descriptor_heap->vk_descriptor_sets[vkd3d_vk_descriptor_set_index_from_vk_descriptor_type( - src->s.vk_descriptor_type)]; + type = u.header->vk_descriptor_type; + descriptor_set = &descriptor_heap->vk_descriptor_sets[vkd3d_vk_descriptor_set_index_from_vk_descriptor_type(type)]; vk_procs = &device->vk_procs;
- vkd3d_mutex_lock(&descriptor_heap->vk_sets_mutex); - - descriptor_set->vk_descriptor_writes[0].dstArrayElement = dst->index; - descriptor_set->vk_descriptor_writes[0].descriptorCount = 1; - switch (src->s.vk_descriptor_type) + writes->vk_descriptor_writes[i].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + writes->vk_descriptor_writes[i].pNext = NULL; + writes->vk_descriptor_writes[i].dstSet = descriptor_set->vk_set; + writes->vk_descriptor_writes[i].dstBinding = 0; + writes->vk_descriptor_writes[i].dstArrayElement = dst_array_element; + writes->vk_descriptor_writes[i].descriptorCount = 1; + writes->vk_descriptor_writes[i].descriptorType = type; + switch (type) { case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: - descriptor_set->vk_descriptor_writes[0].pBufferInfo = &src->s.u.vk_cbv_info; - is_null = !src->s.u.vk_cbv_info.buffer; + writes->vk_descriptor_writes[i].pImageInfo = NULL; + writes->vk_descriptor_writes[i].pBufferInfo = &u.cb_desc->vk_cbv_info; + writes->vk_descriptor_writes[i].pTexelBufferView = NULL; + is_null = !u.cb_desc->vk_cbv_info.buffer; break; case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: - is_null = !(descriptor_set->vk_image_infos[0].imageView = src->s.u.view_info.view->u.vk_image_view); + writes->vk_descriptor_writes[i].pImageInfo = &writes->vk_image_infos[i]; + writes->vk_descriptor_writes[i].pBufferInfo = NULL; + writes->vk_descriptor_writes[i].pTexelBufferView = NULL; + writes->vk_image_infos[i].sampler = VK_NULL_HANDLE; + is_null = !(writes->vk_image_infos[i].imageView = u.view->v.u.vk_image_view); + writes->vk_image_infos[i].imageLayout = (type == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE) + ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; break; case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: - descriptor_set->vk_descriptor_writes[0].pTexelBufferView = &src->s.u.view_info.view->u.vk_buffer_view; - is_null = !src->s.u.view_info.view->u.vk_buffer_view; + writes->vk_descriptor_writes[i].pImageInfo = NULL; + writes->vk_descriptor_writes[i].pBufferInfo = NULL; + writes->vk_descriptor_writes[i].pTexelBufferView = &u.view->v.u.vk_buffer_view; + is_null = !u.view->v.u.vk_buffer_view; break; case VK_DESCRIPTOR_TYPE_SAMPLER: - descriptor_set->vk_image_infos[0].sampler = src->s.u.view_info.view->u.vk_sampler; + writes->vk_descriptor_writes[i].pImageInfo = &writes->vk_image_infos[i]; + writes->vk_descriptor_writes[i].pBufferInfo = NULL; + writes->vk_descriptor_writes[i].pTexelBufferView = NULL; + writes->vk_image_infos[i].sampler = u.view->v.u.vk_sampler; + writes->vk_image_infos[i].imageView = VK_NULL_HANDLE; + writes->vk_image_infos[i].imageLayout = VK_IMAGE_LAYOUT_UNDEFINED; break; default: - ERR("Unhandled descriptor type %#x.\n", src->s.vk_descriptor_type); + ERR("Unhandled descriptor type %#x.\n", type); break; } if (is_null && device->vk_info.EXT_robustness2) + return d3d12_desc_write_vk_heap_null_descriptor(descriptor_heap, dst_array_element, writes, device); + + ++i; + if (u.header->magic == VKD3D_DESCRIPTOR_MAGIC_UAV && u.view->v.vk_counter_view) { - d3d12_desc_write_vk_heap_null_descriptor(descriptor_heap, - descriptor_set->vk_descriptor_writes[0].dstArrayElement, device); - vkd3d_mutex_unlock(&descriptor_heap->vk_sets_mutex); - return; + descriptor_set = &descriptor_heap->vk_descriptor_sets[VKD3D_SET_INDEX_UAV_COUNTER]; + writes->vk_descriptor_writes[i].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + writes->vk_descriptor_writes[i].pNext = NULL; + writes->vk_descriptor_writes[i].dstSet = descriptor_set->vk_set; + writes->vk_descriptor_writes[i].dstBinding = 0; + writes->vk_descriptor_writes[i].dstArrayElement = dst_array_element; + writes->vk_descriptor_writes[i].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; + writes->vk_descriptor_writes[i].descriptorCount = 1; + writes->vk_descriptor_writes[i].pImageInfo = NULL; + writes->vk_descriptor_writes[i].pBufferInfo = NULL; + writes->vk_descriptor_writes[i++].pTexelBufferView = &u.view->v.vk_counter_view; }
- VK_CALL(vkUpdateDescriptorSets(device->vk_device, 1, descriptor_set->vk_descriptor_writes, 0, NULL)); - - if (src->s.magic == VKD3D_DESCRIPTOR_MAGIC_UAV && src->s.u.view_info.view->vk_counter_view) + if (i >= ARRAY_SIZE(writes->vk_descriptor_writes) - 1) { - descriptor_set = &descriptor_heap->vk_descriptor_sets[VKD3D_SET_INDEX_UAV_COUNTER]; - descriptor_set->vk_descriptor_writes[0].dstArrayElement = dst->index; - descriptor_set->vk_descriptor_writes[0].descriptorCount = 1; - descriptor_set->vk_descriptor_writes[0].pTexelBufferView = &src->s.u.view_info.view->vk_counter_view; - VK_CALL(vkUpdateDescriptorSets(device->vk_device, 1, descriptor_set->vk_descriptor_writes, 0, NULL)); + VK_CALL(vkUpdateDescriptorSets(device->vk_device, i, writes->vk_descriptor_writes, 0, NULL)); + descriptor_writes_free_object_refs(writes, device); + i = 0; }
- vkd3d_mutex_unlock(&descriptor_heap->vk_sets_mutex); + writes->count = i; }
-static void d3d12_desc_write_atomic_d3d12_only(struct d3d12_desc *dst, const struct d3d12_desc *src, struct d3d12_device *device) +void d3d12_desc_flush_vk_heap_updates_locked(struct d3d12_descriptor_heap *descriptor_heap, struct d3d12_device *device) { - struct vkd3d_view *defunct_view; - struct vkd3d_mutex *mutex; - - mutex = d3d12_device_get_descriptor_mutex(device, dst); - vkd3d_mutex_lock(mutex); + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + struct d3d12_desc *descriptors, *src; + struct descriptor_writes writes; + union d3d12_desc_object u; + unsigned int i, next;
- if (!(dst->s.magic & VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW) || InterlockedDecrement(&dst->s.u.view_info.view->refcount)) - { - d3d12_desc_copy_raw(dst, src); - vkd3d_mutex_unlock(mutex); + if ((i = vkd3d_atomic_exchange(&descriptor_heap->dirty_list_head, UINT_MAX)) == UINT_MAX) return; - }
- defunct_view = dst->s.u.view_info.view; - d3d12_desc_copy_raw(dst, src); - vkd3d_mutex_unlock(mutex); + writes.null_vk_cbv_info.buffer = VK_NULL_HANDLE; + writes.null_vk_cbv_info.offset = 0; + writes.null_vk_cbv_info.range = VK_WHOLE_SIZE; + writes.null_vk_buffer_view = VK_NULL_HANDLE; + writes.count = 0; + writes.held_ref_count = 0;
- /* Destroy the view after unlocking to reduce wait time. */ - vkd3d_view_destroy(defunct_view, device); -} - -void d3d12_desc_write_atomic(struct d3d12_desc *dst, const struct d3d12_desc *src, - struct d3d12_device *device) -{ - struct vkd3d_view *defunct_view = NULL; - struct vkd3d_mutex *mutex; + descriptors = (struct d3d12_desc *)descriptor_heap->descriptors;
- mutex = d3d12_device_get_descriptor_mutex(device, dst); - vkd3d_mutex_lock(mutex); + for (; i != UINT_MAX; i = next) + { + src = &descriptors[i]; + next = (int)src->next >> 1;
- /* Nothing to do for VKD3D_DESCRIPTOR_MAGIC_CBV. */ - if ((dst->s.magic & VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW) - && !InterlockedDecrement(&dst->s.u.view_info.view->refcount)) - defunct_view = dst->s.u.view_info.view; + u.object = d3d12_desc_get_object_ref(src, device);
- d3d12_desc_copy_raw(dst, src); + if (!u.object) + { + vkd3d_atomic_exchange(&src->next, 0); + continue; + }
- vkd3d_mutex_unlock(mutex); + writes.held_refs[writes.held_ref_count++] = u.object; + d3d12_desc_write_vk_heap(descriptor_heap, i, &writes, u.object, device);
- /* Destroy the view after unlocking to reduce wait time. */ - if (defunct_view) - vkd3d_view_destroy(defunct_view, device); + vkd3d_atomic_exchange(&src->next, 0); + }
- if (device->use_vk_heaps && dst->s.magic) - d3d12_desc_write_vk_heap(dst, src, device); + /* Avoid thunk calls wherever possible. */ + if (writes.count) + VK_CALL(vkUpdateDescriptorSets(device->vk_device, writes.count, writes.vk_descriptor_writes, 0, NULL)); + descriptor_writes_free_object_refs(&writes, device); }
-static void d3d12_desc_destroy(struct d3d12_desc *descriptor, struct d3d12_device *device) +static void d3d12_desc_mark_as_modified(struct d3d12_desc *dst) { - static const struct d3d12_desc null_desc = {0}; + struct d3d12_descriptor_heap *descriptor_heap; + unsigned int i, head; + + i = dst->index; + descriptor_heap = d3d12_desc_get_descriptor_heap(dst); + head = descriptor_heap->dirty_list_head;
- d3d12_desc_write_atomic(descriptor, &null_desc, device); + /* Only one thread can swap the value away from zero. */ + if (!vkd3d_atomic_compare_exchange(&dst->next, 0, (head << 1) | 1)) + return; + /* Now it is safe to modify 'next' to another nonzero value if necessary. */ + while (!vkd3d_atomic_compare_exchange(&descriptor_heap->dirty_list_head, head, i)) + { + head = descriptor_heap->dirty_list_head; + vkd3d_atomic_exchange(&dst->next, (head << 1) | 1); + } }
-void d3d12_desc_copy_vk_heap_range(struct d3d12_desc_copy_location *locations, const struct d3d12_desc_copy_info *info, - struct d3d12_descriptor_heap *descriptor_heap, enum vkd3d_vk_descriptor_set_index set, +void d3d12_desc_write_atomic(struct d3d12_desc *dst, const struct d3d12_desc *src, struct d3d12_device *device) { - struct d3d12_descriptor_heap_vk_set *descriptor_set = &descriptor_heap->vk_descriptor_sets[set]; - const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; - unsigned int i, write_count; - - vkd3d_mutex_lock(&descriptor_heap->vk_sets_mutex); - - for (i = 0, write_count = 0; i < info->count; ++i) - { - d3d12_desc_write_atomic_d3d12_only(locations[i].dst, &locations[i].src, device); + void *object = src->s.u.object;
- if (i && locations[i].dst == locations[i - 1].dst + 1) - { - ++descriptor_set->vk_descriptor_writes[write_count - 1].descriptorCount; - continue; - } - /* Accessing dst->index will be slow if a cache miss occurs, so calculate instead. */ - descriptor_set->vk_descriptor_writes[write_count].dstArrayElement = locations[i].dst - - (const struct d3d12_desc *)descriptor_heap->descriptors; - descriptor_set->vk_descriptor_writes[write_count++].descriptorCount = 1; - } - d3d12_descriptor_heap_write_vk_descriptor_range(descriptor_set, locations, write_count); - /* We could pass a VkCopyDescriptorSet array instead, but that would require also storing a src array index - * for each location, which means querying the src descriptor heap. Contiguous copies require contiguous src - * descriptors as well as dst, which is less likely to occur. And client race conditions may break it. */ - VK_CALL(vkUpdateDescriptorSets(device->vk_device, write_count, descriptor_set->vk_descriptor_writes, 0, NULL)); - - if (!info->uav_counter) - goto done; - - descriptor_set = &descriptor_heap->vk_descriptor_sets[VKD3D_SET_INDEX_UAV_COUNTER]; - - for (i = 0, write_count = 0; i < info->count; ++i) - { - if (!locations[i].src.s.u.view_info.view->vk_counter_view) - continue; - descriptor_set->vk_buffer_views[write_count] = locations[i].src.s.u.view_info.view->vk_counter_view; - descriptor_set->vk_descriptor_writes[write_count].pTexelBufferView = &descriptor_set->vk_buffer_views[write_count]; - /* Accessing dst->index will be slow if a cache miss occurs, so calculate instead. */ - descriptor_set->vk_descriptor_writes[write_count].dstArrayElement = locations[i].dst - - (const struct d3d12_desc *)descriptor_heap->descriptors; - descriptor_set->vk_descriptor_writes[write_count++].descriptorCount = 1; - } - VK_CALL(vkUpdateDescriptorSets(device->vk_device, write_count, descriptor_set->vk_descriptor_writes, 0, NULL)); + d3d12_desc_replace(dst, object, device); + if (device->use_vk_heaps && object && !dst->next) + d3d12_desc_mark_as_modified(dst); +}
-done: - vkd3d_mutex_unlock(&descriptor_heap->vk_sets_mutex); +static void d3d12_desc_destroy(struct d3d12_desc *descriptor, struct d3d12_device *device) +{ + d3d12_desc_replace(descriptor, NULL, device); }
void d3d12_desc_copy(struct d3d12_desc *dst, const struct d3d12_desc *src, struct d3d12_device *device) { struct d3d12_desc tmp; - struct vkd3d_mutex *mutex;
assert(dst != src);
- /* Shadow of the Tomb Raider and possibly other titles sometimes destroy - * and rewrite a descriptor in another thread while it is being copied. */ - mutex = d3d12_device_get_descriptor_mutex(device, src); - vkd3d_mutex_lock(mutex); - - if (src->s.magic & VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW) - vkd3d_view_incref(src->s.u.view_info.view); - - d3d12_desc_copy_raw(&tmp, src); - - vkd3d_mutex_unlock(mutex); - + tmp.s.u.object = d3d12_desc_get_object_ref(src, device); d3d12_desc_write_atomic(dst, &tmp, device); }
@@ -2455,8 +2472,9 @@ static bool vkd3d_create_vk_buffer_view(struct d3d12_device *device, return vr == VK_SUCCESS; }
-bool vkd3d_create_buffer_view(struct d3d12_device *device, VkBuffer vk_buffer, const struct vkd3d_format *format, - VkDeviceSize offset, VkDeviceSize size, struct vkd3d_view **view) +bool vkd3d_create_buffer_view(struct d3d12_device *device, uint32_t magic, VkBuffer vk_buffer, + const struct vkd3d_format *format, VkDeviceSize offset, VkDeviceSize size, + struct vkd3d_view **view) { const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; VkBufferView vk_view = VK_NULL_HANDLE; @@ -2465,16 +2483,18 @@ bool vkd3d_create_buffer_view(struct d3d12_device *device, VkBuffer vk_buffer, c if (vk_buffer && !vkd3d_create_vk_buffer_view(device, vk_buffer, format, offset, size, &vk_view)) return false;
- if (!(object = vkd3d_view_create(VKD3D_VIEW_TYPE_BUFFER))) + if (!(object = vkd3d_view_create(magic, magic == VKD3D_DESCRIPTOR_MAGIC_UAV + ? VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER : VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, + VKD3D_VIEW_TYPE_BUFFER, device))) { VK_CALL(vkDestroyBufferView(device->vk_device, vk_view, NULL)); return false; }
- object->u.vk_buffer_view = vk_view; - object->format = format; - object->info.buffer.offset = offset; - object->info.buffer.size = size; + object->v.u.vk_buffer_view = vk_view; + object->v.format = format; + object->v.info.buffer.offset = offset; + object->v.info.buffer.size = size; *view = object; return true; } @@ -2482,7 +2502,7 @@ bool vkd3d_create_buffer_view(struct d3d12_device *device, VkBuffer vk_buffer, c #define VKD3D_VIEW_RAW_BUFFER 0x1
static bool vkd3d_create_buffer_view_for_resource(struct d3d12_device *device, - struct d3d12_resource *resource, DXGI_FORMAT view_format, + uint32_t magic, struct d3d12_resource *resource, DXGI_FORMAT view_format, unsigned int offset, unsigned int size, unsigned int structure_stride, unsigned int flags, struct vkd3d_view **view) { @@ -2513,7 +2533,7 @@ static bool vkd3d_create_buffer_view_for_resource(struct d3d12_device *device,
assert(d3d12_resource_is_buffer(resource));
- return vkd3d_create_buffer_view(device, resource->u.vk_buffer, + return vkd3d_create_buffer_view(device, magic, resource->u.vk_buffer, format, offset * element_size, size * element_size, view); }
@@ -2741,7 +2761,7 @@ static void vkd3d_texture_view_desc_normalise(struct vkd3d_texture_view_desc *de desc->layer_count = max_layer_count; }
-bool vkd3d_create_texture_view(struct d3d12_device *device, VkImage vk_image, +bool vkd3d_create_texture_view(struct d3d12_device *device, uint32_t magic, VkImage vk_image, const struct vkd3d_texture_view_desc *desc, struct vkd3d_view **view) { const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; @@ -2774,18 +2794,19 @@ bool vkd3d_create_texture_view(struct d3d12_device *device, VkImage vk_image, } }
- if (!(object = vkd3d_view_create(VKD3D_VIEW_TYPE_IMAGE))) + if (!(object = vkd3d_view_create(magic, magic == VKD3D_DESCRIPTOR_MAGIC_UAV ? VK_DESCRIPTOR_TYPE_STORAGE_IMAGE + : VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, VKD3D_VIEW_TYPE_IMAGE, device))) { VK_CALL(vkDestroyImageView(device->vk_device, vk_view, NULL)); return false; }
- object->u.vk_image_view = vk_view; - object->format = format; - object->info.texture.vk_view_type = desc->view_type; - object->info.texture.miplevel_idx = desc->miplevel_idx; - object->info.texture.layer_idx = desc->layer_idx; - object->info.texture.layer_count = desc->layer_count; + object->v.u.vk_image_view = vk_view; + object->v.format = format; + object->v.info.texture.vk_view_type = desc->view_type; + object->v.info.texture.miplevel_idx = desc->miplevel_idx; + object->v.info.texture.layer_idx = desc->layer_idx; + object->v.info.texture.layer_count = desc->layer_count; *view = object; return true; } @@ -2794,6 +2815,7 @@ void d3d12_desc_create_cbv(struct d3d12_desc *descriptor, struct d3d12_device *device, const D3D12_CONSTANT_BUFFER_VIEW_DESC *desc) { struct VkDescriptorBufferInfo *buffer_info; + struct vkd3d_cbuffer_desc *cb_desc; struct d3d12_resource *resource;
if (!desc) @@ -2802,13 +2824,19 @@ void d3d12_desc_create_cbv(struct d3d12_desc *descriptor, return; }
+ if (!(cb_desc = vkd3d_cbuffer_desc_create(device))) + { + ERR("Failed to allocate descriptor object.\n"); + return; + } + if (desc->SizeInBytes & (D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT - 1)) { WARN("Size is not %u bytes aligned.\n", D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT); return; }
- buffer_info = &descriptor->s.u.vk_cbv_info; + buffer_info = &cb_desc->vk_cbv_info; if (desc->BufferLocation) { resource = vkd3d_gpu_va_allocator_dereference(&device->gpu_va_allocator, desc->BufferLocation); @@ -2824,8 +2852,7 @@ void d3d12_desc_create_cbv(struct d3d12_desc *descriptor, buffer_info->range = VK_WHOLE_SIZE; }
- descriptor->s.magic = VKD3D_DESCRIPTOR_MAGIC_CBV; - descriptor->s.vk_descriptor_type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + descriptor->s.u.cb_desc = cb_desc; }
static unsigned int vkd3d_view_flags_from_d3d12_buffer_srv_flags(D3D12_BUFFER_SRV_FLAGS flags) @@ -2842,7 +2869,6 @@ static void vkd3d_create_null_srv(struct d3d12_desc *descriptor, { struct vkd3d_null_resources *null_resources = &device->null_resources; struct vkd3d_texture_view_desc vkd3d_desc; - struct vkd3d_view *view; VkImage vk_image;
if (!desc) @@ -2857,15 +2883,9 @@ static void vkd3d_create_null_srv(struct d3d12_desc *descriptor, if (!device->vk_info.EXT_robustness2) WARN("Creating NULL buffer SRV %#x.\n", desc->Format);
- if (vkd3d_create_buffer_view(device, null_resources->vk_buffer, + vkd3d_create_buffer_view(device, VKD3D_DESCRIPTOR_MAGIC_SRV, null_resources->vk_buffer, vkd3d_get_format(device, DXGI_FORMAT_R32_UINT, false), - 0, VKD3D_NULL_BUFFER_SIZE, &view)) - { - descriptor->s.magic = VKD3D_DESCRIPTOR_MAGIC_SRV; - descriptor->s.vk_descriptor_type = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; - descriptor->s.u.view_info.view = view; - descriptor->s.u.view_info.written_serial_id = view->serial_id; - } + 0, VKD3D_NULL_BUFFER_SIZE, &descriptor->s.u.view); return;
case D3D12_SRV_DIMENSION_TEXTURE2D: @@ -2904,20 +2924,13 @@ static void vkd3d_create_null_srv(struct d3d12_desc *descriptor, vkd3d_desc.components.a = VK_COMPONENT_SWIZZLE_ZERO; vkd3d_desc.allowed_swizzle = true;
- if (!vkd3d_create_texture_view(device, vk_image, &vkd3d_desc, &view)) - return; - - descriptor->s.magic = VKD3D_DESCRIPTOR_MAGIC_SRV; - descriptor->s.vk_descriptor_type = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; - descriptor->s.u.view_info.view = view; - descriptor->s.u.view_info.written_serial_id = view->serial_id; + vkd3d_create_texture_view(device, VKD3D_DESCRIPTOR_MAGIC_SRV, vk_image, &vkd3d_desc, &descriptor->s.u.view); }
static void vkd3d_create_buffer_srv(struct d3d12_desc *descriptor, struct d3d12_device *device, struct d3d12_resource *resource, const D3D12_SHADER_RESOURCE_VIEW_DESC *desc) { - struct vkd3d_view *view; unsigned int flags;
if (!desc) @@ -2933,15 +2946,9 @@ static void vkd3d_create_buffer_srv(struct d3d12_desc *descriptor, }
flags = vkd3d_view_flags_from_d3d12_buffer_srv_flags(desc->u.Buffer.Flags); - if (!vkd3d_create_buffer_view_for_resource(device, resource, desc->Format, + vkd3d_create_buffer_view_for_resource(device, VKD3D_DESCRIPTOR_MAGIC_SRV, resource, desc->Format, desc->u.Buffer.FirstElement, desc->u.Buffer.NumElements, - desc->u.Buffer.StructureByteStride, flags, &view)) - return; - - descriptor->s.magic = VKD3D_DESCRIPTOR_MAGIC_SRV; - descriptor->s.vk_descriptor_type = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; - descriptor->s.u.view_info.view = view; - descriptor->s.u.view_info.written_serial_id = view->serial_id; + desc->u.Buffer.StructureByteStride, flags, &descriptor->s.u.view); }
static VkImageAspectFlags vk_image_aspect_flags_from_d3d12_plane_slice(const struct vkd3d_format *format, @@ -2970,7 +2977,6 @@ void d3d12_desc_create_srv(struct d3d12_desc *descriptor, const D3D12_SHADER_RESOURCE_VIEW_DESC *desc) { struct vkd3d_texture_view_desc vkd3d_desc; - struct vkd3d_view *view;
if (!resource) { @@ -3002,6 +3008,11 @@ void d3d12_desc_create_srv(struct d3d12_desc *descriptor,
switch (desc->ViewDimension) { + case D3D12_SRV_DIMENSION_TEXTURE1D: + vkd3d_desc.view_type = VK_IMAGE_VIEW_TYPE_1D; + vkd3d_desc.miplevel_idx = desc->u.Texture1D.MostDetailedMip; + vkd3d_desc.miplevel_count = desc->u.Texture1D.MipLevels; + break; case D3D12_SRV_DIMENSION_TEXTURE2D: vkd3d_desc.view_type = VK_IMAGE_VIEW_TYPE_2D; vkd3d_desc.miplevel_idx = desc->u.Texture2D.MostDetailedMip; @@ -3066,13 +3077,8 @@ void d3d12_desc_create_srv(struct d3d12_desc *descriptor, } }
- if (!vkd3d_create_texture_view(device, resource->u.vk_image, &vkd3d_desc, &view)) - return; - - descriptor->s.magic = VKD3D_DESCRIPTOR_MAGIC_SRV; - descriptor->s.vk_descriptor_type = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; - descriptor->s.u.view_info.view = view; - descriptor->s.u.view_info.written_serial_id = view->serial_id; + vkd3d_create_texture_view(device, VKD3D_DESCRIPTOR_MAGIC_SRV, resource->u.vk_image, &vkd3d_desc, + &descriptor->s.u.view); }
static unsigned int vkd3d_view_flags_from_d3d12_buffer_uav_flags(D3D12_BUFFER_UAV_FLAGS flags) @@ -3089,7 +3095,6 @@ static void vkd3d_create_null_uav(struct d3d12_desc *descriptor, { struct vkd3d_null_resources *null_resources = &device->null_resources; struct vkd3d_texture_view_desc vkd3d_desc; - struct vkd3d_view *view; VkImage vk_image;
if (!desc) @@ -3104,15 +3109,9 @@ static void vkd3d_create_null_uav(struct d3d12_desc *descriptor, if (!device->vk_info.EXT_robustness2) WARN("Creating NULL buffer UAV %#x.\n", desc->Format);
- if (vkd3d_create_buffer_view(device, null_resources->vk_storage_buffer, + vkd3d_create_buffer_view(device, VKD3D_DESCRIPTOR_MAGIC_UAV, null_resources->vk_storage_buffer, vkd3d_get_format(device, DXGI_FORMAT_R32_UINT, false), - 0, VKD3D_NULL_BUFFER_SIZE, &view)) - { - descriptor->s.magic = VKD3D_DESCRIPTOR_MAGIC_UAV; - descriptor->s.vk_descriptor_type = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; - descriptor->s.u.view_info.view = view; - descriptor->s.u.view_info.written_serial_id = view->serial_id; - } + 0, VKD3D_NULL_BUFFER_SIZE, &descriptor->s.u.view); return;
case D3D12_UAV_DIMENSION_TEXTURE2D: @@ -3150,13 +3149,7 @@ static void vkd3d_create_null_uav(struct d3d12_desc *descriptor, vkd3d_desc.components.a = VK_COMPONENT_SWIZZLE_A; vkd3d_desc.allowed_swizzle = false;
- if (!vkd3d_create_texture_view(device, vk_image, &vkd3d_desc, &view)) - return; - - descriptor->s.magic = VKD3D_DESCRIPTOR_MAGIC_UAV; - descriptor->s.vk_descriptor_type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; - descriptor->s.u.view_info.view = view; - descriptor->s.u.view_info.written_serial_id = view->serial_id; + vkd3d_create_texture_view(device, VKD3D_DESCRIPTOR_MAGIC_UAV, vk_image, &vkd3d_desc, &descriptor->s.u.view); }
static void vkd3d_create_buffer_uav(struct d3d12_desc *descriptor, struct d3d12_device *device, @@ -3179,16 +3172,11 @@ static void vkd3d_create_buffer_uav(struct d3d12_desc *descriptor, struct d3d12_ }
flags = vkd3d_view_flags_from_d3d12_buffer_uav_flags(desc->u.Buffer.Flags); - if (!vkd3d_create_buffer_view_for_resource(device, resource, desc->Format, + if (!vkd3d_create_buffer_view_for_resource(device, VKD3D_DESCRIPTOR_MAGIC_UAV, resource, desc->Format, desc->u.Buffer.FirstElement, desc->u.Buffer.NumElements, desc->u.Buffer.StructureByteStride, flags, &view)) return;
- descriptor->s.magic = VKD3D_DESCRIPTOR_MAGIC_UAV; - descriptor->s.vk_descriptor_type = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; - descriptor->s.u.view_info.view = view; - descriptor->s.u.view_info.written_serial_id = view->serial_id; - if (counter_resource) { const struct vkd3d_format *format; @@ -3198,13 +3186,16 @@ static void vkd3d_create_buffer_uav(struct d3d12_desc *descriptor, struct d3d12_
format = vkd3d_get_format(device, DXGI_FORMAT_R32_UINT, false); if (!vkd3d_create_vk_buffer_view(device, counter_resource->u.vk_buffer, format, - desc->u.Buffer.CounterOffsetInBytes, sizeof(uint32_t), &view->vk_counter_view)) + desc->u.Buffer.CounterOffsetInBytes, sizeof(uint32_t), &view->v.vk_counter_view)) { WARN("Failed to create counter buffer view.\n"); - view->vk_counter_view = VK_NULL_HANDLE; - d3d12_desc_destroy(descriptor, device); + view->v.vk_counter_view = VK_NULL_HANDLE; + vkd3d_view_decref(view, device); + return; } } + + descriptor->s.u.view = view; }
static void vkd3d_create_texture_uav(struct d3d12_desc *descriptor, @@ -3212,7 +3203,6 @@ static void vkd3d_create_texture_uav(struct d3d12_desc *descriptor, const D3D12_UNORDERED_ACCESS_VIEW_DESC *desc) { struct vkd3d_texture_view_desc vkd3d_desc; - struct vkd3d_view *view;
if (!init_default_texture_view_desc(&vkd3d_desc, resource, desc ? desc->Format : 0)) return; @@ -3227,6 +3217,9 @@ static void vkd3d_create_texture_uav(struct d3d12_desc *descriptor, { switch (desc->ViewDimension) { + case D3D12_UAV_DIMENSION_TEXTURE1D: + vkd3d_desc.miplevel_idx = desc->u.Texture1D.MipSlice; + break; case D3D12_UAV_DIMENSION_TEXTURE2D: vkd3d_desc.miplevel_idx = desc->u.Texture2D.MipSlice; if (desc->u.Texture2D.PlaneSlice) @@ -3257,13 +3250,8 @@ static void vkd3d_create_texture_uav(struct d3d12_desc *descriptor, } }
- if (!vkd3d_create_texture_view(device, resource->u.vk_image, &vkd3d_desc, &view)) - return; - - descriptor->s.magic = VKD3D_DESCRIPTOR_MAGIC_UAV; - descriptor->s.vk_descriptor_type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; - descriptor->s.u.view_info.view = view; - descriptor->s.u.view_info.written_serial_id = view->serial_id; + vkd3d_create_texture_view(device, VKD3D_DESCRIPTOR_MAGIC_UAV, resource->u.vk_image, &vkd3d_desc, + &descriptor->s.u.view); }
void d3d12_desc_create_uav(struct d3d12_desc *descriptor, struct d3d12_device *device, @@ -3291,12 +3279,26 @@ void d3d12_desc_create_uav(struct d3d12_desc *descriptor, struct d3d12_device *d }
bool vkd3d_create_raw_buffer_view(struct d3d12_device *device, - D3D12_GPU_VIRTUAL_ADDRESS gpu_address, VkBufferView *vk_buffer_view) + D3D12_GPU_VIRTUAL_ADDRESS gpu_address, D3D12_ROOT_PARAMETER_TYPE parameter_type, VkBufferView *vk_buffer_view) { const struct vkd3d_format *format; struct d3d12_resource *resource;
format = vkd3d_get_format(device, DXGI_FORMAT_R32_UINT, false); + + if (!gpu_address) + { + if (device->vk_info.EXT_robustness2) + { + *vk_buffer_view = VK_NULL_HANDLE; + return true; + } + WARN("Creating null buffer view.\n"); + return vkd3d_create_vk_buffer_view(device, parameter_type == D3D12_ROOT_PARAMETER_TYPE_UAV + ? device->null_resources.vk_storage_buffer : device->null_resources.vk_buffer, + format, 0, VK_WHOLE_SIZE, vk_buffer_view); + } + resource = vkd3d_gpu_va_allocator_dereference(&device->gpu_va_allocator, gpu_address); assert(d3d12_resource_is_buffer(resource)); return vkd3d_create_vk_buffer_view(device, resource->u.vk_buffer, format, @@ -3412,21 +3414,21 @@ void d3d12_desc_create_sampler(struct d3d12_desc *sampler, FIXME("Ignoring border color {%.8e, %.8e, %.8e, %.8e}.\n", desc->BorderColor[0], desc->BorderColor[1], desc->BorderColor[2], desc->BorderColor[3]);
- if (!(view = vkd3d_view_create(VKD3D_VIEW_TYPE_SAMPLER))) + if (!(view = vkd3d_view_create(VKD3D_DESCRIPTOR_MAGIC_SAMPLER, VK_DESCRIPTOR_TYPE_SAMPLER, + VKD3D_VIEW_TYPE_SAMPLER, device))) return; + view->v.u.vk_sampler = VK_NULL_HANDLE; + view->v.format = NULL;
if (d3d12_create_sampler(device, desc->Filter, desc->AddressU, desc->AddressV, desc->AddressW, desc->MipLODBias, desc->MaxAnisotropy, - desc->ComparisonFunc, desc->MinLOD, desc->MaxLOD, &view->u.vk_sampler) < 0) + desc->ComparisonFunc, desc->MinLOD, desc->MaxLOD, &view->v.u.vk_sampler) < 0) { - vkd3d_free(view); + vkd3d_view_decref(view, device); return; }
- sampler->s.magic = VKD3D_DESCRIPTOR_MAGIC_SAMPLER; - sampler->s.vk_descriptor_type = VK_DESCRIPTOR_TYPE_SAMPLER; - sampler->s.u.view_info.view = view; - sampler->s.u.view_info.written_serial_id = view->serial_id; + sampler->s.u.view = view; }
HRESULT vkd3d_create_static_sampler(struct d3d12_device *device, @@ -3448,7 +3450,7 @@ HRESULT vkd3d_create_static_sampler(struct d3d12_device *device, /* RTVs */ static void d3d12_rtv_desc_destroy(struct d3d12_rtv_desc *rtv, struct d3d12_device *device) { - if (rtv->magic != VKD3D_DESCRIPTOR_MAGIC_RTV) + if (!rtv->view) return;
vkd3d_view_decref(rtv->view, device); @@ -3527,10 +3529,9 @@ void d3d12_rtv_desc_create_rtv(struct d3d12_rtv_desc *rtv_desc, struct d3d12_dev
assert(d3d12_resource_is_texture(resource));
- if (!vkd3d_create_texture_view(device, resource->u.vk_image, &vkd3d_desc, &view)) + if (!vkd3d_create_texture_view(device, VKD3D_DESCRIPTOR_MAGIC_RTV, resource->u.vk_image, &vkd3d_desc, &view)) return;
- rtv_desc->magic = VKD3D_DESCRIPTOR_MAGIC_RTV; rtv_desc->sample_count = vk_samples_from_dxgi_sample_desc(&resource->desc.SampleDesc); rtv_desc->format = vkd3d_desc.format; rtv_desc->width = d3d12_resource_desc_get_width(&resource->desc, vkd3d_desc.miplevel_idx); @@ -3543,7 +3544,7 @@ void d3d12_rtv_desc_create_rtv(struct d3d12_rtv_desc *rtv_desc, struct d3d12_dev /* DSVs */ static void d3d12_dsv_desc_destroy(struct d3d12_dsv_desc *dsv, struct d3d12_device *device) { - if (dsv->magic != VKD3D_DESCRIPTOR_MAGIC_DSV) + if (!dsv->view) return;
vkd3d_view_decref(dsv->view, device); @@ -3612,10 +3613,9 @@ void d3d12_dsv_desc_create_dsv(struct d3d12_dsv_desc *dsv_desc, struct d3d12_dev
assert(d3d12_resource_is_texture(resource));
- if (!vkd3d_create_texture_view(device, resource->u.vk_image, &vkd3d_desc, &view)) + if (!vkd3d_create_texture_view(device, VKD3D_DESCRIPTOR_MAGIC_DSV, resource->u.vk_image, &vkd3d_desc, &view)) return;
- dsv_desc->magic = VKD3D_DESCRIPTOR_MAGIC_DSV; dsv_desc->sample_count = vk_samples_from_dxgi_sample_desc(&resource->desc.SampleDesc); dsv_desc->format = vkd3d_desc.format; dsv_desc->width = d3d12_resource_desc_get_width(&resource->desc, vkd3d_desc.miplevel_idx); @@ -3883,7 +3883,6 @@ static HRESULT d3d12_descriptor_heap_create_descriptor_set(struct d3d12_descript const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; VkDescriptorSetVariableDescriptorCountAllocateInfoEXT set_size; VkDescriptorSetAllocateInfo set_desc; - unsigned int i; VkResult vr;
set_desc.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; @@ -3897,8 +3896,7 @@ static HRESULT d3d12_descriptor_heap_create_descriptor_set(struct d3d12_descript set_size.pDescriptorCounts = &variable_binding_size; if ((vr = VK_CALL(vkAllocateDescriptorSets(device->vk_device, &set_desc, &descriptor_set->vk_set))) >= 0) { - for (i = 0; i < ARRAY_SIZE(descriptor_set->vk_descriptor_writes); ++i) - descriptor_set->vk_descriptor_writes[i].dstSet = descriptor_set->vk_set; + descriptor_set->vk_type = device->vk_descriptor_heap_layouts[set].type; return S_OK; }
@@ -3914,7 +3912,6 @@ static HRESULT d3d12_descriptor_heap_vk_descriptor_sets_init(struct d3d12_descri
descriptor_heap->vk_descriptor_pool = VK_NULL_HANDLE; memset(descriptor_heap->vk_descriptor_sets, 0, sizeof(descriptor_heap->vk_descriptor_sets)); - vkd3d_mutex_init(&descriptor_heap->vk_sets_mutex);
if (!device->use_vk_heaps || (desc->Type != D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV && desc->Type != D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER)) @@ -3925,53 +3922,6 @@ static HRESULT d3d12_descriptor_heap_vk_descriptor_sets_init(struct d3d12_descri
for (set = 0; set < ARRAY_SIZE(descriptor_heap->vk_descriptor_sets); ++set) { - struct d3d12_descriptor_heap_vk_set *descriptor_set = &descriptor_heap->vk_descriptor_sets[set]; - unsigned int i; - - for (i = 0; i < ARRAY_SIZE(descriptor_set->vk_descriptor_writes); ++i) - { - descriptor_set->vk_descriptor_writes[i].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - descriptor_set->vk_descriptor_writes[i].pNext = NULL; - descriptor_set->vk_descriptor_writes[i].dstBinding = 0; - descriptor_set->vk_descriptor_writes[i].descriptorType = device->vk_descriptor_heap_layouts[set].type; - descriptor_set->vk_descriptor_writes[i].pImageInfo = NULL; - descriptor_set->vk_descriptor_writes[i].pBufferInfo = NULL; - descriptor_set->vk_descriptor_writes[i].pTexelBufferView = NULL; - } - switch (device->vk_descriptor_heap_layouts[set].type) - { - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: - case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: - case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: - break; - case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: - descriptor_set->vk_descriptor_writes[0].pImageInfo = &descriptor_set->vk_image_infos[0]; - for (i = 0; i < ARRAY_SIZE(descriptor_set->vk_image_infos); ++i) - { - descriptor_set->vk_image_infos[i].sampler = VK_NULL_HANDLE; - descriptor_set->vk_image_infos[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - } - break; - case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: - descriptor_set->vk_descriptor_writes[0].pImageInfo = &descriptor_set->vk_image_infos[0]; - for (i = 0; i < ARRAY_SIZE(descriptor_set->vk_image_infos); ++i) - { - descriptor_set->vk_image_infos[i].sampler = VK_NULL_HANDLE; - descriptor_set->vk_image_infos[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL; - } - break; - case VK_DESCRIPTOR_TYPE_SAMPLER: - descriptor_set->vk_descriptor_writes[0].pImageInfo = &descriptor_set->vk_image_infos[0]; - for (i = 0; i < ARRAY_SIZE(descriptor_set->vk_image_infos); ++i) - { - descriptor_set->vk_image_infos[i].imageView = VK_NULL_HANDLE; - descriptor_set->vk_image_infos[i].imageLayout = VK_IMAGE_LAYOUT_UNDEFINED; - } - break; - default: - ERR("Unhandled descriptor type %#x.\n", device->vk_descriptor_heap_layouts[set].type); - return E_FAIL; - } if (device->vk_descriptor_heap_layouts[set].applicable_heap_type == desc->Type && FAILED(hr = d3d12_descriptor_heap_create_descriptor_set(descriptor_heap, device, set))) return hr; @@ -3995,6 +3945,7 @@ static HRESULT d3d12_descriptor_heap_init(struct d3d12_descriptor_heap *descript return hr;
d3d12_descriptor_heap_vk_descriptor_sets_init(descriptor_heap, device, desc); + vkd3d_mutex_init(&descriptor_heap->vk_sets_mutex);
d3d12_device_add_ref(descriptor_heap->device = device);
@@ -4047,7 +3998,9 @@ HRESULT d3d12_descriptor_heap_create(struct d3d12_device *device, { memset(&dst[i].s, 0, sizeof(dst[i].s)); dst[i].index = i; + dst[i].next = 0; } + object->dirty_list_head = UINT_MAX; } else { diff --git a/libs/vkd3d/libs/vkd3d/state.c b/libs/vkd3d/libs/vkd3d/state.c index c964ea8fe3a..5e46b467252 100644 --- a/libs/vkd3d/libs/vkd3d/state.c +++ b/libs/vkd3d/libs/vkd3d/state.c @@ -1958,7 +1958,7 @@ static HRESULT create_shader_stage(struct d3d12_device *device,
const struct vkd3d_shader_compile_option options[] = { - {VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_1_7}, + {VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_1_8}, {VKD3D_SHADER_COMPILE_OPTION_TYPED_UAV, typed_uav_compile_option(device)}, {VKD3D_SHADER_COMPILE_OPTION_WRITE_TESS_GEOM_POINT_SIZE, 0}, }; @@ -2011,7 +2011,7 @@ static int vkd3d_scan_dxbc(const struct d3d12_device *device, const D3D12_SHADER
const struct vkd3d_shader_compile_option options[] = { - {VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_1_7}, + {VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_1_8}, {VKD3D_SHADER_COMPILE_OPTION_TYPED_UAV, typed_uav_compile_option(device)}, };
diff --git a/libs/vkd3d/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/libs/vkd3d/vkd3d_private.h index 77b795d6278..e8d6371709c 100644 --- a/libs/vkd3d/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/libs/vkd3d/vkd3d_private.h @@ -44,13 +44,11 @@
#define VK_CALL(f) (vk_procs->f)
-#define VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW 0x01000000u - #define VKD3D_DESCRIPTOR_MAGIC_FREE 0x00000000u #define VKD3D_DESCRIPTOR_MAGIC_CBV VKD3D_MAKE_TAG('C', 'B', 'V', 0) -#define VKD3D_DESCRIPTOR_MAGIC_SRV VKD3D_MAKE_TAG('S', 'R', 'V', 1) -#define VKD3D_DESCRIPTOR_MAGIC_UAV VKD3D_MAKE_TAG('U', 'A', 'V', 1) -#define VKD3D_DESCRIPTOR_MAGIC_SAMPLER VKD3D_MAKE_TAG('S', 'M', 'P', 1) +#define VKD3D_DESCRIPTOR_MAGIC_SRV VKD3D_MAKE_TAG('S', 'R', 'V', 0) +#define VKD3D_DESCRIPTOR_MAGIC_UAV VKD3D_MAKE_TAG('U', 'A', 'V', 0) +#define VKD3D_DESCRIPTOR_MAGIC_SAMPLER VKD3D_MAKE_TAG('S', 'M', 'P', 0) #define VKD3D_DESCRIPTOR_MAGIC_DSV VKD3D_MAKE_TAG('D', 'S', 'V', 0) #define VKD3D_DESCRIPTOR_MAGIC_RTV VKD3D_MAKE_TAG('R', 'T', 'V', 0)
@@ -252,6 +250,31 @@ static inline void vkd3d_cond_destroy(struct vkd3d_cond *cond) { }
+static inline unsigned int vkd3d_atomic_decrement(unsigned int volatile *x) +{ + return InterlockedDecrement((LONG volatile *)x); +} + +static inline bool vkd3d_atomic_compare_exchange(unsigned int volatile *x, unsigned int cmp, unsigned int xchg) +{ + return InterlockedCompareExchange((LONG volatile *)x, xchg, cmp) == cmp; +} + +static inline unsigned int vkd3d_atomic_exchange(unsigned int volatile *x, unsigned int val) +{ + return InterlockedExchange((LONG volatile *)x, val); +} + +static inline bool vkd3d_atomic_compare_exchange_pointer(void * volatile *x, void *cmp, void *xchg) +{ + return InterlockedCompareExchangePointer(x, xchg, cmp) == cmp; +} + +static inline void *vkd3d_atomic_exchange_pointer(void * volatile *x, void *val) +{ + return InterlockedExchangePointer(x, val); +} + #else /* _WIN32 */
#include <pthread.h> @@ -354,6 +377,63 @@ static inline void vkd3d_cond_destroy(struct vkd3d_cond *cond) ERR("Could not destroy the condition variable, error %d.\n", ret); }
+# if HAVE_SYNC_SUB_AND_FETCH +static inline unsigned int vkd3d_atomic_decrement(unsigned int volatile *x) +{ + return __sync_sub_and_fetch(x, 1); +} +# else +# error "vkd3d_atomic_decrement() not implemented for this platform" +# endif /* HAVE_SYNC_ADD_AND_FETCH */ + +# if HAVE_SYNC_BOOL_COMPARE_AND_SWAP +static inline bool vkd3d_atomic_compare_exchange(unsigned int volatile *x, unsigned int cmp, unsigned int xchg) +{ + return __sync_bool_compare_and_swap(x, cmp, xchg); +} + +static inline bool vkd3d_atomic_compare_exchange_pointer(void * volatile *x, void *cmp, void *xchg) +{ + return __sync_bool_compare_and_swap(x, cmp, xchg); +} +# else +# error "vkd3d_atomic_compare_exchange() not implemented for this platform" +# endif + +# if HAVE_ATOMIC_EXCHANGE_N +static inline unsigned int vkd3d_atomic_exchange(unsigned int volatile *x, unsigned int val) +{ + return __atomic_exchange_n(x, val, __ATOMIC_SEQ_CST); +} + +static inline void *vkd3d_atomic_exchange_pointer(void * volatile *x, void *val) +{ + return __atomic_exchange_n(x, val, __ATOMIC_SEQ_CST); +} +# elif HAVE_SYNC_BOOL_COMPARE_AND_SWAP +static inline unsigned int vkd3d_atomic_exchange(unsigned int volatile *x, unsigned int val) +{ + unsigned int i; + do + { + i = *x; + } while (!__sync_bool_compare_and_swap(x, i, val)); + return i; +} + +static inline void *vkd3d_atomic_exchange_pointer(void * volatile *x, void *val) +{ + void *p; + do + { + p = *x; + } while (!__sync_bool_compare_and_swap(x, p, val)); + return p; +} +# else +# error "vkd3d_atomic_exchange() not implemented for this platform" +# endif + #endif /* _WIN32 */
HRESULT vkd3d_create_thread(struct vkd3d_instance *instance, @@ -563,6 +643,7 @@ struct d3d12_heap { ID3D12Heap ID3D12Heap_iface; LONG refcount; + LONG resource_count;
bool is_private; D3D12_HEAP_DESC desc; @@ -661,11 +742,9 @@ enum vkd3d_view_type VKD3D_VIEW_TYPE_SAMPLER, };
-struct vkd3d_view +struct vkd3d_resource_view { - LONG refcount; enum vkd3d_view_type type; - uint64_t serial_id; union { VkBufferView vk_buffer_view; @@ -691,9 +770,6 @@ struct vkd3d_view } info; };
-void vkd3d_view_decref(struct vkd3d_view *view, struct d3d12_device *device); -void vkd3d_view_incref(struct vkd3d_view *view); - struct vkd3d_texture_view_desc { VkImageViewType view_type; @@ -707,32 +783,88 @@ struct vkd3d_texture_view_desc bool allowed_swizzle; };
-bool vkd3d_create_buffer_view(struct d3d12_device *device, VkBuffer vk_buffer, const struct vkd3d_format *format, - VkDeviceSize offset, VkDeviceSize size, struct vkd3d_view **view); -bool vkd3d_create_texture_view(struct d3d12_device *device, VkImage vk_image, +struct vkd3d_desc_header +{ + uint32_t magic; + unsigned int volatile refcount; + void *next; + VkDescriptorType vk_descriptor_type; +}; + +struct vkd3d_view +{ + struct vkd3d_desc_header h; + struct vkd3d_resource_view v; +}; + +bool vkd3d_create_buffer_view(struct d3d12_device *device, uint32_t magic, VkBuffer vk_buffer, + const struct vkd3d_format *format, VkDeviceSize offset, VkDeviceSize size, struct vkd3d_view **view); +bool vkd3d_create_texture_view(struct d3d12_device *device, uint32_t magic, VkImage vk_image, const struct vkd3d_texture_view_desc *desc, struct vkd3d_view **view);
-struct vkd3d_view_info +struct vkd3d_cbuffer_desc { - uint64_t written_serial_id; - struct vkd3d_view *view; + struct vkd3d_desc_header h; + VkDescriptorBufferInfo vk_cbv_info; };
struct d3d12_desc { struct { - uint32_t magic; - VkDescriptorType vk_descriptor_type; - union + union d3d12_desc_object { - VkDescriptorBufferInfo vk_cbv_info; - struct vkd3d_view_info view_info; + struct vkd3d_desc_header *header; + struct vkd3d_view *view; + struct vkd3d_cbuffer_desc *cb_desc; + void *object; } u; } s; unsigned int index; + unsigned int next; };
+void vkd3d_view_decref(void *view, struct d3d12_device *device); + +static inline bool vkd3d_view_incref(void *desc) +{ + struct vkd3d_desc_header *h = desc; + unsigned int refcount; + + do + { + refcount = h->refcount; + /* Avoid incrementing a freed object. Reading the value is safe because objects are recycled. */ + if (refcount <= 0) + return false; + } + while (!vkd3d_atomic_compare_exchange(&h->refcount, refcount, refcount + 1)); + + return true; +} + +static inline void *d3d12_desc_get_object_ref(const volatile struct d3d12_desc *src, struct d3d12_device *device) +{ + void *view; + + /* Some games, e.g. Shadow of the Tomb Raider, GRID 2019, and Horizon Zero Dawn, write descriptors + * from multiple threads without syncronisation. This is apparently valid in Windows. */ + for (;;) + { + do + { + view = src->s.u.object; + } while (view && !vkd3d_view_incref(view)); + + /* Check if the object is still in src to handle the case where it was + * already freed and reused elsewhere when the refcount was incremented. */ + if (view == src->s.u.object) + return view; + + vkd3d_view_decref(view, device); + } +} + static inline struct d3d12_desc *d3d12_desc_from_cpu_handle(D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle) { return (struct d3d12_desc *)cpu_handle.ptr; @@ -761,13 +893,12 @@ void d3d12_desc_create_sampler(struct d3d12_desc *sampler, struct d3d12_device * void d3d12_desc_write_atomic(struct d3d12_desc *dst, const struct d3d12_desc *src, struct d3d12_device *device);
bool vkd3d_create_raw_buffer_view(struct d3d12_device *device, - D3D12_GPU_VIRTUAL_ADDRESS gpu_address, VkBufferView *vk_buffer_view); + D3D12_GPU_VIRTUAL_ADDRESS gpu_address, D3D12_ROOT_PARAMETER_TYPE parameter_type, VkBufferView *vk_buffer_view); HRESULT vkd3d_create_static_sampler(struct d3d12_device *device, const D3D12_STATIC_SAMPLER_DESC *desc, VkSampler *vk_sampler);
struct d3d12_rtv_desc { - uint32_t magic; VkSampleCountFlagBits sample_count; const struct vkd3d_format *format; uint64_t width; @@ -787,7 +918,6 @@ void d3d12_rtv_desc_create_rtv(struct d3d12_rtv_desc *rtv_desc, struct d3d12_dev
struct d3d12_dsv_desc { - uint32_t magic; VkSampleCountFlagBits sample_count; const struct vkd3d_format *format; uint64_t width; @@ -837,15 +967,10 @@ struct vkd3d_vk_descriptor_heap_layout VkDescriptorSetLayout vk_set_layout; };
-#define VKD3D_DESCRIPTOR_WRITE_BUFFER_SIZE 64 - struct d3d12_descriptor_heap_vk_set { VkDescriptorSet vk_set; - VkDescriptorBufferInfo vk_buffer_infos[VKD3D_DESCRIPTOR_WRITE_BUFFER_SIZE]; - VkBufferView vk_buffer_views[VKD3D_DESCRIPTOR_WRITE_BUFFER_SIZE]; - VkDescriptorImageInfo vk_image_infos[VKD3D_DESCRIPTOR_WRITE_BUFFER_SIZE]; - VkWriteDescriptorSet vk_descriptor_writes[VKD3D_DESCRIPTOR_WRITE_BUFFER_SIZE]; + VkDescriptorType vk_type; };
/* ID3D12DescriptorHeap */ @@ -865,9 +990,13 @@ struct d3d12_descriptor_heap struct d3d12_descriptor_heap_vk_set vk_descriptor_sets[VKD3D_SET_INDEX_COUNT]; struct vkd3d_mutex vk_sets_mutex;
- BYTE descriptors[]; + unsigned int volatile dirty_list_head; + + uint8_t DECLSPEC_ALIGN(sizeof(void *)) descriptors[]; };
+void d3d12_desc_flush_vk_heap_updates_locked(struct d3d12_descriptor_heap *descriptor_heap, struct d3d12_device *device); + static inline struct d3d12_descriptor_heap *d3d12_desc_get_descriptor_heap(const struct d3d12_desc *descriptor) { return CONTAINING_RECORD(descriptor - descriptor->index, struct d3d12_descriptor_heap, descriptors); @@ -882,22 +1011,6 @@ static inline unsigned int d3d12_desc_heap_range_size(const struct d3d12_desc *d HRESULT d3d12_descriptor_heap_create(struct d3d12_device *device, const D3D12_DESCRIPTOR_HEAP_DESC *desc, struct d3d12_descriptor_heap **descriptor_heap);
-struct d3d12_desc_copy_location -{ - struct d3d12_desc src; - struct d3d12_desc *dst; -}; - -struct d3d12_desc_copy_info -{ - unsigned int count; - bool uav_counter; -}; - -void d3d12_desc_copy_vk_heap_range(struct d3d12_desc_copy_location *locations, const struct d3d12_desc_copy_info *info, - struct d3d12_descriptor_heap *descriptor_heap, enum vkd3d_vk_descriptor_set_index set, - struct d3d12_device *device); - /* ID3D12QueryHeap */ struct d3d12_query_heap { @@ -1295,6 +1408,8 @@ struct d3d12_command_list VkDeviceSize so_counter_buffer_offsets[D3D12_SO_BUFFER_SLOT_COUNT];
void (*update_descriptors)(struct d3d12_command_list *list, enum vkd3d_pipeline_bind_point bind_point); + struct d3d12_descriptor_heap *descriptor_heaps[64]; + unsigned int descriptor_heap_count;
struct vkd3d_private_store private_store; }; @@ -1485,6 +1600,12 @@ struct vkd3d_uav_clear_state HRESULT vkd3d_uav_clear_state_init(struct vkd3d_uav_clear_state *state, struct d3d12_device *device); void vkd3d_uav_clear_state_cleanup(struct vkd3d_uav_clear_state *state, struct d3d12_device *device);
+struct vkd3d_desc_object_cache +{ + void * volatile head; + size_t size; +}; + #define VKD3D_DESCRIPTOR_POOL_COUNT 6
/* ID3D12Device */ @@ -1502,7 +1623,8 @@ struct d3d12_device struct vkd3d_gpu_va_allocator gpu_va_allocator;
struct vkd3d_mutex mutex; - struct vkd3d_mutex desc_mutex[8]; + struct vkd3d_desc_object_cache view_desc_cache; + struct vkd3d_desc_object_cache cbuffer_desc_cache; struct vkd3d_render_pass_cache render_pass_cache; VkPipelineCache vk_pipeline_cache;
@@ -1544,6 +1666,7 @@ struct d3d12_device struct vkd3d_uav_clear_state uav_clear_state;
VkDescriptorPoolSize vk_pool_sizes[VKD3D_DESCRIPTOR_POOL_COUNT]; + unsigned int vk_pool_count; struct vkd3d_vk_descriptor_heap_layout vk_descriptor_heap_layouts[VKD3D_SET_INDEX_COUNT]; bool use_vk_heaps; }; @@ -1577,19 +1700,6 @@ static inline unsigned int d3d12_device_get_descriptor_handle_increment_size(str return ID3D12Device_GetDescriptorHandleIncrementSize(&device->ID3D12Device_iface, descriptor_type); }
-static inline struct vkd3d_mutex *d3d12_device_get_descriptor_mutex(struct d3d12_device *device, - const struct d3d12_desc *descriptor) -{ - STATIC_ASSERT(!(ARRAY_SIZE(device->desc_mutex) & (ARRAY_SIZE(device->desc_mutex) - 1))); - uintptr_t idx = (uintptr_t)descriptor; - - idx ^= idx >> 12; - idx ^= idx >> 6; - idx ^= idx >> 3; - - return &device->desc_mutex[idx & (ARRAY_SIZE(device->desc_mutex) - 1)]; -} - /* utils */ enum vkd3d_format_type {
From: Alexandre Julliard julliard@winehq.org
To test the upcoming 1.9 release. --- dlls/d3dcompiler_43/tests/hlsl_d3d11.c | 4 +- dlls/d3dcompiler_43/tests/hlsl_d3d9.c | 3 + include/d3d12.idl | 70 +- libs/vkd3d/Makefile.in | 1 + libs/vkd3d/include/private/vkd3d_common.h | 22 + .../include/private/vkd3d_shader_utils.h | 63 + libs/vkd3d/include/vkd3d.h | 35 + libs/vkd3d/include/vkd3d_shader.h | 284 +- libs/vkd3d/libs/vkd3d-common/debug.c | 17 +- libs/vkd3d/libs/vkd3d-shader/d3d_asm.c | 48 +- libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 415 ++- libs/vkd3d/libs/vkd3d-shader/dxbc.c | 17 +- libs/vkd3d/libs/vkd3d-shader/dxil.c | 2370 +++++++++++++++++ libs/vkd3d/libs/vkd3d-shader/hlsl.c | 211 +- libs/vkd3d/libs/vkd3d-shader/hlsl.h | 46 +- libs/vkd3d/libs/vkd3d-shader/hlsl.y | 1633 +++++++----- libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 643 +++-- .../libs/vkd3d-shader/hlsl_constant_ops.c | 525 +++- libs/vkd3d/libs/vkd3d-shader/ir.c | 230 +- libs/vkd3d/libs/vkd3d-shader/preproc.l | 140 +- libs/vkd3d/libs/vkd3d-shader/spirv.c | 438 +-- libs/vkd3d/libs/vkd3d-shader/tpf.c | 1360 ++++++---- .../libs/vkd3d-shader/vkd3d_shader_main.c | 432 ++- .../libs/vkd3d-shader/vkd3d_shader_private.h | 118 +- libs/vkd3d/libs/vkd3d/command.c | 676 +++-- libs/vkd3d/libs/vkd3d/device.c | 245 +- libs/vkd3d/libs/vkd3d/resource.c | 342 ++- libs/vkd3d/libs/vkd3d/state.c | 14 +- libs/vkd3d/libs/vkd3d/vkd3d_main.c | 4 +- libs/vkd3d/libs/vkd3d/vkd3d_private.h | 124 +- 30 files changed, 8298 insertions(+), 2232 deletions(-) create mode 100644 libs/vkd3d/include/private/vkd3d_shader_utils.h create mode 100644 libs/vkd3d/libs/vkd3d-shader/dxil.c
diff --git a/dlls/d3dcompiler_43/tests/hlsl_d3d11.c b/dlls/d3dcompiler_43/tests/hlsl_d3d11.c index c853b14f13a..d0c3c223353 100644 --- a/dlls/d3dcompiler_43/tests/hlsl_d3d11.c +++ b/dlls/d3dcompiler_43/tests/hlsl_d3d11.c @@ -616,8 +616,7 @@ static void test_sampling(void) winetest_push_context("Test %u", i);
ID3D11DeviceContext_ClearRenderTargetView(test_context.immediate_context, test_context.rtv, red); - todo_wine_if (i < 3) - ps_code = compile_shader_flags(tests[i], "ps_4_0", D3DCOMPILE_ENABLE_BACKWARDS_COMPATIBILITY); + ps_code = compile_shader_flags(tests[i], "ps_4_0", D3DCOMPILE_ENABLE_BACKWARDS_COMPATIBILITY); if (ps_code) { draw_quad(&test_context, ps_code); @@ -902,7 +901,6 @@ static void test_reflection(void) refcount = reflection->lpVtbl->Release(reflection); ok(!refcount, "Got unexpected refcount %lu.\n", refcount);
- todo_wine code = compile_shader_flags(ps_source, "ps_4_0", D3DCOMPILE_ENABLE_BACKWARDS_COMPATIBILITY); if (!code) return; diff --git a/dlls/d3dcompiler_43/tests/hlsl_d3d9.c b/dlls/d3dcompiler_43/tests/hlsl_d3d9.c index 7f84c2c62e3..2631f463e96 100644 --- a/dlls/d3dcompiler_43/tests/hlsl_d3d9.c +++ b/dlls/d3dcompiler_43/tests/hlsl_d3d9.c @@ -559,6 +559,8 @@ static void test_conditionals(void) device = test_context.device;
ps_code = compile_shader(ps_if_source, "ps_2_0", 0); + if (ps_code) + { draw_quad(device, ps_code); init_readback(device, &rb);
@@ -578,6 +580,7 @@ static void test_conditionals(void)
release_readback(&rb); ID3D10Blob_Release(ps_code); + }
ps_code = compile_shader(ps_ternary_source, "ps_2_0", 0); if (ps_code) diff --git a/include/d3d12.idl b/include/d3d12.idl index 4fec32d2656..5811608b94f 100644 --- a/include/d3d12.idl +++ b/include/d3d12.idl @@ -44,6 +44,7 @@ const UINT D3D12_DEFAULT_STENCIL_WRITE_MASK = 0xff; const UINT D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND = 0xffffffff; cpp_quote("#define D3D12_FLOAT32_MAX (3.402823466e+38f)") const UINT D3D12_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT = 32; +const UINT D3D12_PACKED_TILE = 0xffffffff; const UINT D3D12_UAV_SLOT_COUNT = 64; const UINT D3D12_REQ_CONSTANT_BUFFER_ELEMENT_COUNT = 4096; const UINT D3D12_REQ_IMMEDIATE_CONSTANT_BUFFER_ELEMENT_COUNT = 4096; @@ -72,6 +73,7 @@ const UINT D3D12_SMALL_RESOURCE_PLACEMENT_ALIGNMENT = 4096; const UINT D3D12_STANDARD_MAXIMUM_ELEMENT_ALIGNMENT_BYTE_MULTIPLE = 4; const UINT D3D12_TEXTURE_DATA_PITCH_ALIGNMENT = 256; const UINT D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT = 512; +const UINT D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES = 65536; const UINT D3D12_UAV_COUNTER_PLACEMENT_ALIGNMENT = 4096; const UINT D3D12_VS_INPUT_REGISTER_COUNT = 32; const UINT D3D12_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE = 16; @@ -1853,6 +1855,24 @@ typedef struct D3D12_WRITEBUFFERIMMEDIATE_PARAMETER UINT32 Value; } D3D12_WRITEBUFFERIMMEDIATE_PARAMETER;
+typedef enum D3D12_PROTECTED_RESOURCE_SESSION_FLAGS +{ + D3D12_PROTECTED_RESOURCE_SESSION_FLAG_NONE = 0, +} D3D12_PROTECTED_RESOURCE_SESSION_FLAGS; +cpp_quote("DEFINE_ENUM_FLAG_OPERATORS(D3D12_PROTECTED_RESOURCE_SESSION_FLAGS);") + +typedef enum D3D12_PROTECTED_SESSION_STATUS +{ + D3D12_PROTECTED_SESSION_STATUS_OK = 0, + D3D12_PROTECTED_SESSION_STATUS_INVALID = 1, +} D3D12_PROTECTED_SESSION_STATUS; + +typedef struct D3D12_PROTECTED_RESOURCE_SESSION_DESC +{ + UINT NodeMask; + D3D12_PROTECTED_RESOURCE_SESSION_FLAGS Flags; +} D3D12_PROTECTED_RESOURCE_SESSION_DESC; + [ uuid(c4fec28f-7966-4e95-9f94-f431cb56c3b8), object, @@ -2214,6 +2234,41 @@ interface ID3D12GraphicsCommandList2 : ID3D12GraphicsCommandList1 const D3D12_WRITEBUFFERIMMEDIATE_MODE *modes); }
+[ + uuid(a1533d18-0ac1-4084-85b9-89a96116806b), + object, + local, + pointer_default(unique) +] +interface ID3D12ProtectedSession : ID3D12DeviceChild +{ + HRESULT GetStatusFence(REFIID riid, void **fence); + + D3D12_PROTECTED_SESSION_STATUS GetSessionStatus(); +} + +[ + uuid(6cd696f4-f289-40cc-8091-5a6c0a099c3d), + object, + local, + pointer_default(unique) +] +interface ID3D12ProtectedResourceSession : ID3D12ProtectedSession +{ + D3D12_PROTECTED_RESOURCE_SESSION_DESC GetDesc(); +} + +[ + uuid(6fda83a7-b84c-4e38-9ac8-c7bd22016b3d), + object, + local, + pointer_default(unique) +] +interface ID3D12GraphicsCommandList3 : ID3D12GraphicsCommandList2 +{ + void SetProtectedResourceSession(ID3D12ProtectedResourceSession *protected_resource_session); +} + typedef enum D3D12_TILE_RANGE_FLAGS { D3D12_TILE_RANGE_FLAG_NONE = 0x0, @@ -2243,8 +2298,8 @@ interface ID3D12CommandQueue : ID3D12Pageable ID3D12Heap *heap, UINT range_count, const D3D12_TILE_RANGE_FLAGS *range_flags, - UINT *heap_range_offsets, - UINT *range_tile_counts, + const UINT *heap_range_offsets, + const UINT *range_tile_counts, D3D12_TILE_MAPPING_FLAGS flags);
void CopyTileMappings(ID3D12Resource *dst_resource, @@ -2378,6 +2433,17 @@ interface ID3D12Fence : ID3D12Pageable HRESULT Signal(UINT64 value); }
+[ + uuid(433685fe-e22b-4ca0-a8db-b5b4f4dd0e4a), + object, + local, + pointer_default(unique) +] +interface ID3D12Fence1 : ID3D12Fence +{ + D3D12_FENCE_FLAGS GetCreationFlags(); +} + [ uuid(6102dee4-af59-4b09-b999-b44d73f09b24), object, diff --git a/libs/vkd3d/Makefile.in b/libs/vkd3d/Makefile.in index 1ba0e9f71e1..f647af11d07 100644 --- a/libs/vkd3d/Makefile.in +++ b/libs/vkd3d/Makefile.in @@ -17,6 +17,7 @@ SOURCES = \ libs/vkd3d-shader/d3d_asm.c \ libs/vkd3d-shader/d3dbc.c \ libs/vkd3d-shader/dxbc.c \ + libs/vkd3d-shader/dxil.c \ libs/vkd3d-shader/glsl.c \ libs/vkd3d-shader/hlsl.c \ libs/vkd3d-shader/hlsl.l \ diff --git a/libs/vkd3d/include/private/vkd3d_common.h b/libs/vkd3d/include/private/vkd3d_common.h index 08dde1b2e7f..ee733ee0d76 100644 --- a/libs/vkd3d/include/private/vkd3d_common.h +++ b/libs/vkd3d/include/private/vkd3d_common.h @@ -20,6 +20,7 @@ #define __VKD3D_COMMON_H
#include "config.h" +#define WIN32_LEAN_AND_MEAN #include "windows.h" #include "vkd3d_types.h"
@@ -28,6 +29,7 @@ #include <stdbool.h> #include <stdint.h> #include <stdio.h> +#include <stdlib.h>
#ifdef _MSC_VER #include <intrin.h> @@ -171,6 +173,11 @@ static inline bool vkd3d_bound_range(size_t start, size_t count, size_t limit) #endif }
+static inline bool vkd3d_object_range_overflow(size_t start, size_t count, size_t size) +{ + return (~(size_t)0 - start) / size < count; +} + static inline uint16_t vkd3d_make_u16(uint8_t low, uint8_t high) { return low | ((uint16_t)high << 8); @@ -186,6 +193,21 @@ static inline int vkd3d_u32_compare(uint32_t x, uint32_t y) return (x > y) - (x < y); }
+static inline bool bitmap_clear(uint32_t *map, unsigned int idx) +{ + return map[idx >> 5] &= ~(1u << (idx & 0x1f)); +} + +static inline bool bitmap_set(uint32_t *map, unsigned int idx) +{ + return map[idx >> 5] |= (1u << (idx & 0x1f)); +} + +static inline bool bitmap_is_set(const uint32_t *map, unsigned int idx) +{ + return map[idx >> 5] & (1u << (idx & 0x1f)); +} + static inline int ascii_isupper(int c) { return 'A' <= c && c <= 'Z'; diff --git a/libs/vkd3d/include/private/vkd3d_shader_utils.h b/libs/vkd3d/include/private/vkd3d_shader_utils.h new file mode 100644 index 00000000000..00052a89988 --- /dev/null +++ b/libs/vkd3d/include/private/vkd3d_shader_utils.h @@ -0,0 +1,63 @@ +/* + * Copyright 2023 Conor McCarthy for CodeWeavers + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#ifndef __VKD3D_SHADER_UTILS_H +#define __VKD3D_SHADER_UTILS_H + +#include "vkd3d_shader.h" + +#define TAG_DXIL VKD3D_MAKE_TAG('D', 'X', 'I', 'L') +#define TAG_SHDR VKD3D_MAKE_TAG('S', 'H', 'D', 'R') +#define TAG_SHEX VKD3D_MAKE_TAG('S', 'H', 'E', 'X') + +static inline enum vkd3d_result vkd3d_shader_parse_dxbc_source_type(const struct vkd3d_shader_code *dxbc, + enum vkd3d_shader_source_type *type, char **messages) +{ + struct vkd3d_shader_dxbc_desc desc; + enum vkd3d_result ret; + unsigned int i; + + *type = VKD3D_SHADER_SOURCE_NONE; + + if ((ret = vkd3d_shader_parse_dxbc(dxbc, 0, &desc, messages)) < 0) + return ret; + + for (i = 0; i < desc.section_count; ++i) + { + uint32_t tag = desc.sections[i].tag; + if (tag == TAG_SHDR || tag == TAG_SHEX) + { + *type = VKD3D_SHADER_SOURCE_DXBC_TPF; + } + else if (tag == TAG_DXIL) + { + *type = VKD3D_SHADER_SOURCE_DXBC_DXIL; + /* Default to DXIL if both are present. */ + break; + } + } + + vkd3d_shader_free_dxbc(&desc); + + if (*type == VKD3D_SHADER_SOURCE_NONE) + return VKD3D_ERROR_INVALID_SHADER; + + return VKD3D_OK; +} + +#endif /* __VKD3D_SHADER_UTILS_H */ diff --git a/libs/vkd3d/include/vkd3d.h b/libs/vkd3d/include/vkd3d.h index 72ed3ced671..2ccda47248a 100644 --- a/libs/vkd3d/include/vkd3d.h +++ b/libs/vkd3d/include/vkd3d.h @@ -207,7 +207,42 @@ VKD3D_API VkPhysicalDevice vkd3d_get_vk_physical_device(ID3D12Device *device); VKD3D_API struct vkd3d_instance *vkd3d_instance_from_device(ID3D12Device *device);
VKD3D_API uint32_t vkd3d_get_vk_queue_family_index(ID3D12CommandQueue *queue); + +/** + * Acquire the Vulkan queue backing a command queue. + * + * While a queue is acquired by the client, it is locked so that + * neither the vkd3d library nor other threads can submit work to + * it. For that reason it should be released as soon as possible with + * vkd3d_release_vk_queue(). The lock is not reentrant, so the same + * queue must not be acquired more than once by the same thread. + * + * Work submitted through the Direct3D 12 API exposed by vkd3d is not + * always immediately submitted to the Vulkan queue; sometimes it is + * kept in another internal queue, which might not necessarily be + * empty at the time vkd3d_acquire_vk_queue() is called. For this + * reason, work submitted directly to the Vulkan queue might appear to + * the Vulkan driver as being submitted before other work submitted + * though the Direct3D 12 API. If this is not desired, it is + * recommended to synchronize work submission using an ID3D12Fence + * object, by submitting to the queue a signal operation after all the + * Direct3D 12 work is submitted and waiting for it before calling + * vkd3d_acquire_vk_queue(). + * + * \since 1.0 + */ VKD3D_API VkQueue vkd3d_acquire_vk_queue(ID3D12CommandQueue *queue); + +/** + * Release the Vulkan queue backing a command queue. + * + * This must be paired to an earlier corresponding + * vkd3d_acquire_vk_queue(). After this function is called, the Vulkan + * queue returned by vkd3d_acquire_vk_queue() must not be used any + * more. + * + * \since 1.0 + */ VKD3D_API void vkd3d_release_vk_queue(ID3D12CommandQueue *queue);
VKD3D_API HRESULT vkd3d_create_image_resource(ID3D12Device *device, diff --git a/libs/vkd3d/include/vkd3d_shader.h b/libs/vkd3d/include/vkd3d_shader.h index 274241546ea..cfe54dbff53 100644 --- a/libs/vkd3d/include/vkd3d_shader.h +++ b/libs/vkd3d/include/vkd3d_shader.h @@ -85,6 +85,16 @@ enum vkd3d_shader_structure_type * \since 1.3 */ VKD3D_SHADER_STRUCTURE_TYPE_DESCRIPTOR_OFFSET_INFO, + /** + * The structure is a vkd3d_shader_scan_signature_info structure. + * \since 1.9 + */ + VKD3D_SHADER_STRUCTURE_TYPE_SCAN_SIGNATURE_INFO, + /** + * The structure is a vkd3d_shader_next_stage_info structure. + * \since 1.9 + */ + VKD3D_SHADER_STRUCTURE_TYPE_NEXT_STAGE_INFO,
VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_STRUCTURE_TYPE), }; @@ -134,6 +144,14 @@ enum vkd3d_shader_compile_option_formatting_flags VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_COMPILE_OPTION_FORMATTING_FLAGS), };
+enum vkd3d_shader_compile_option_pack_matrix_order +{ + VKD3D_SHADER_COMPILE_OPTION_PACK_MATRIX_ROW_MAJOR = 0x00000001, + VKD3D_SHADER_COMPILE_OPTION_PACK_MATRIX_COLUMN_MAJOR = 0x00000002, + + VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_COMPILE_OPTION_PACK_MATRIX_ORDER), +}; + enum vkd3d_shader_compile_option_name { /** @@ -164,6 +182,15 @@ enum vkd3d_shader_compile_option_name * \since 1.7 */ VKD3D_SHADER_COMPILE_OPTION_WRITE_TESS_GEOM_POINT_SIZE = 0x00000006, + /** + * This option specifies default matrix packing order. It's only supported for HLSL source type. + * Explicit variable modifiers or pragmas will take precedence. + * + * \a value is a member of enum vkd3d_shader_compile_option_pack_matrix_order. + * + * \since 1.9 + */ + VKD3D_SHADER_COMPILE_OPTION_PACK_MATRIX_ORDER = 0x00000007,
VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_COMPILE_OPTION_NAME), }; @@ -327,6 +354,25 @@ struct vkd3d_shader_parameter } u; };
+/** + * Symbolic register indices for mapping uniform constant register sets in + * legacy Direct3D bytecode to constant buffer views in the target environment. + * + * Members of this enumeration are used in + * \ref vkd3d_shader_resource_binding.register_index. + * + * \since 1.9 + */ +enum vkd3d_shader_d3dbc_constant_register +{ + /** The float constant register set, c# in Direct3D assembly. */ + VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER = 0x0, + /** The integer constant register set, i# in Direct3D assembly. */ + VKD3D_SHADER_D3DBC_INT_CONSTANT_REGISTER = 0x1, + /** The boolean constant register set, b# in Direct3D assembly. */ + VKD3D_SHADER_D3DBC_BOOL_CONSTANT_REGISTER = 0x2, +}; + /** * Describes the mapping of a single resource or resource array to its binding * point in the target environment. @@ -351,7 +397,14 @@ struct vkd3d_shader_resource_binding * support multiple register spaces, this parameter must be set to 0. */ unsigned int register_space; - /** Register index of the DXBC resource. */ + /** + * Register index of the Direct3D resource. + * + * For legacy Direct3D shaders, vkd3d-shader maps each constant register + * set to a single constant buffer view. This parameter names the register + * set to map, and must be a member of + * enum vkd3d_shader_d3dbc_constant_register. + */ unsigned int register_index; /** Shader stage(s) to which the resource is visible. */ enum vkd3d_shader_visibility shader_visibility; @@ -611,6 +664,11 @@ enum vkd3d_shader_source_type * model 1, 2, and 3 shaders. \since 1.3 */ VKD3D_SHADER_SOURCE_D3D_BYTECODE, + /** + * A 'DirectX Intermediate Language' shader embedded in a DXBC container. This is + * the format used for Direct3D shader model 6 shaders. \since 1.9 + */ + VKD3D_SHADER_SOURCE_DXBC_DXIL,
VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_SOURCE_TYPE), }; @@ -620,7 +678,7 @@ enum vkd3d_shader_target_type { /** * The shader has no type or is to be ignored. This is not a valid value - * for vkd3d_shader_compile() or vkd3d_shader_scan(). + * for vkd3d_shader_compile(). */ VKD3D_SHADER_TARGET_NONE, /** @@ -1281,6 +1339,8 @@ enum vkd3d_shader_descriptor_info_flag /** The descriptor is a UAV resource, on which the shader performs * atomic ops. \since 1.6 */ VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_ATOMICS = 0x00000008, + /** The descriptor is a raw (byte-addressed) buffer. \since 1.9 */ + VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_RAW_BUFFER = 0x00000010,
VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_DESCRIPTOR_INFO_FLAG), }; @@ -1320,6 +1380,20 @@ struct vkd3d_shader_descriptor_info * A chained structure enumerating the descriptors declared by a shader. * * This structure extends vkd3d_shader_compile_info. + * + * When scanning a legacy Direct3D shader, vkd3d-shader enumerates each + * constant register set used by the shader as a single constant buffer + * descriptor, as follows: + * - The \ref vkd3d_shader_descriptor_info.type field is set to + * VKD3D_SHADER_DESCRIPTOR_TYPE_CBV. + * - The \ref vkd3d_shader_descriptor_info.register_space field is set to zero. + * - The \ref vkd3d_shader_descriptor_info.register_index field is set to a + * member of enum vkd3d_shader_d3dbc_constant_register denoting which set + * is used. + * - The \ref vkd3d_shader_descriptor_info.count field is set to one. + * + * In summary, there may be up to three such descriptors, one for each register + * set used by the shader: float, integer, and boolean. */ struct vkd3d_shader_scan_descriptor_info { @@ -1551,6 +1625,134 @@ static inline uint32_t vkd3d_shader_create_swizzle(enum vkd3d_shader_swizzle_com | ((w & VKD3D_SHADER_SWIZZLE_MASK) << VKD3D_SHADER_SWIZZLE_SHIFT(3)); }
+/** + * A chained structure containing descriptions of shader inputs and outputs. + * + * This structure is currently implemented only for DXBC and legacy D3D bytecode + * source types. + * For DXBC shaders, the returned information is parsed directly from the + * signatures embedded in the DXBC shader. + * For legacy D3D shaders, the returned information is synthesized based on + * registers declared or used by shader instructions. + * For all other shader types, the structure is zeroed. + * + * All members (except for \ref type and \ref next) are output-only. + * + * This structure is passed to vkd3d_shader_scan() and extends + * vkd3d_shader_compile_info. + * + * Members of this structure are allocated by vkd3d-shader and should be freed + * with vkd3d_shader_free_scan_signature_info() when no longer needed. + * + * All signatures may contain pointers into the input shader, and should only + * be accessed while the input shader remains valid. + * + * Signature elements are synthesized from legacy Direct3D bytecode as follows: + * - The \ref vkd3d_shader_signature_element.semantic_name field is set to an + * uppercase string corresponding to the HLSL name for the usage, e.g. + * "POSITION", "BLENDWEIGHT", "COLOR", "PSIZE", etc. + * - The \ref vkd3d_shader_signature_element.semantic_index field is set to the + * usage index. + * - The \ref vkd3d_shader_signature_element.stream_index is always 0. + * + * Signature elements are synthesized for any input or output register declared + * or used in a legacy Direct3D bytecode shader, including the following: + * - Shader model 1 and 2 colour and texture coordinate registers. + * - The shader model 1 pixel shader output register. + * - Shader model 1 and 2 vertex shader output registers (position, fog, and + * point size). + * - Shader model 3 pixel shader system value input registers (pixel position + * and face). + * + * \since 1.9 + */ +struct vkd3d_shader_scan_signature_info +{ + /** Must be set to VKD3D_SHADER_STRUCTURE_TYPE_SCAN_SIGNATURE_INFO. */ + enum vkd3d_shader_structure_type type; + /** Optional pointer to a structure containing further parameters. */ + const void *next; + + /** The shader input varyings. */ + struct vkd3d_shader_signature input; + + /** The shader output varyings. */ + struct vkd3d_shader_signature output; + + /** The shader patch constant varyings. */ + struct vkd3d_shader_signature patch_constant; +}; + +/** + * Describes the mapping of a output varying register in a shader stage, + * to an input varying register in the following shader stage. + * + * This structure is used in struct vkd3d_shader_next_stage_info. + */ +struct vkd3d_shader_varying_map +{ + /** + * The signature index (in the output signature) of the output varying. + * If greater than or equal to the number of elements in the output + * signature, signifies that the varying is consumed by the next stage but + * not written by this one. + */ + unsigned int output_signature_index; + /** The register index of the input varying to map this register to. */ + unsigned int input_register_index; + /** The mask consumed by the destination register. */ + unsigned int input_mask; +}; + +/** + * A chained structure which describes the next shader in the pipeline. + * + * This structure is optional, and should only be provided if there is in fact + * another shader in the pipeline. + * However, depending on the input and output formats, this structure may be + * necessary in order to generate shaders which correctly match each other. + * If the structure or its individual fields are not provided, vkd3d-shader + * will generate shaders which may be correct in isolation, but are not + * guaranteed to correctly match each other. + * + * This structure is passed to vkd3d_shader_compile() and extends + * vkd3d_shader_compile_info. + * + * This structure contains only input parameters. + * + * \since 1.9 + */ +struct vkd3d_shader_next_stage_info +{ + /** Must be set to VKD3D_SHADER_STRUCTURE_TYPE_NEXT_STAGE_INFO. */ + enum vkd3d_shader_structure_type type; + /** Optional pointer to a structure containing further parameters. */ + const void *next; + + /** + * A mapping of output varyings in this shader stage to input varyings + * in the next shader stage. + * + * This mapping should include exactly one element for each varying + * consumed by the next shader stage. + * If this shader stage outputs a varying that is not consumed by the next + * shader stage, that varying should be absent from this array. + * + * If this field is absent, vkd3d-shader will map varyings from one stage + * to another based on their register index. + * For Direct3D shader model 3.0, such a default mapping will be incorrect + * unless the registers are allocated in the same order, and hence this + * field is necessary to correctly match inter-stage varyings. + * This mapping may also be necessary under other circumstances where the + * varying interface does not match exactly. + * + * This mapping may be constructed by vkd3d_shader_build_varying_map(). + */ + const struct vkd3d_shader_varying_map *varying_map; + /** The number of registers provided in \ref varying_map. */ + unsigned int varying_count; +}; + #ifdef LIBVKD3D_SHADER_SOURCE # define VKD3D_SHADER_API VKD3D_EXPORT #else @@ -1623,12 +1825,14 @@ VKD3D_SHADER_API const enum vkd3d_shader_target_type *vkd3d_shader_get_supported * * Depending on the source and target types, this function may support the * following chained structures: + * - vkd3d_shader_hlsl_source_info * - vkd3d_shader_interface_info + * - vkd3d_shader_next_stage_info * - vkd3d_shader_scan_descriptor_info + * - vkd3d_shader_scan_signature_info * - vkd3d_shader_spirv_domain_shader_target_info * - vkd3d_shader_spirv_target_info * - vkd3d_shader_transform_feedback_info - * - vkd3d_shader_hlsl_source_info * * \param compile_info A chained structure containing compilation parameters. * @@ -1784,6 +1988,26 @@ VKD3D_SHADER_API int vkd3d_shader_convert_root_signature(struct vkd3d_shader_ver * Parse shader source code or byte code, returning various types of requested * information. * + * The \a source_type member of \a compile_info must be set to the type of the + * shader. + * + * The \a target_type member may be set to VKD3D_SHADER_TARGET_NONE, in which + * case vkd3d_shader_scan() will return information about the shader in + * isolation. Alternatively, it may be set to a valid compilation target for the + * shader, in which case vkd3d_shader_scan() will return information that + * reflects the interface for a shader as it will be compiled to that target. + * In this case other chained structures may be appended to \a compile_info as + * they would be passed to vkd3d_shader_compile(), and interpreted accordingly, + * such as vkd3d_shader_spirv_target_info. + * + * (For a hypothetical example, suppose the source shader distinguishes float + * and integer texture data, but the target environment does not support integer + * textures. In this case vkd3d_shader_compile() might translate integer + * operations to float. Accordingly using VKD3D_SHADER_TARGET_NONE would + * accurately report whether the texture expects integer or float data, but + * using the relevant specific target type would report + * VKD3D_SHADER_RESOURCE_DATA_FLOAT.) + * * Currently this function supports the following code types: * - VKD3D_SHADER_SOURCE_DXBC_TPF * @@ -1791,6 +2015,7 @@ VKD3D_SHADER_API int vkd3d_shader_convert_root_signature(struct vkd3d_shader_ver * \n * The DXBC_TPF scanner supports the following chained structures: * - vkd3d_shader_scan_descriptor_info + * - vkd3d_shader_scan_signature_info * \n * Although the \a compile_info parameter is read-only, chained structures * passed to this function need not be, and may serve as output parameters, @@ -1827,12 +2052,18 @@ VKD3D_SHADER_API void vkd3d_shader_free_scan_descriptor_info( struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info);
/** - * Read the input signature of a compiled shader, returning a structural + * Read the input signature of a compiled DXBC shader, returning a structural * description which can be easily parsed by C code. * * This function parses a compiled shader. To parse a standalone root signature, * use vkd3d_shader_parse_root_signature(). * + * This function only parses DXBC shaders, and only retrieves the input + * signature. To retrieve signatures from other shader types, or other signature + * types, use vkd3d_shader_scan() and struct vkd3d_shader_scan_signature_info. + * This function returns the same input signature that is returned in + * struct vkd3d_shader_scan_signature_info. + * * \param dxbc Compiled byte code, in DXBC format. * * \param signature Output location in which the parsed root signature will be @@ -2022,6 +2253,48 @@ VKD3D_SHADER_API int vkd3d_shader_parse_dxbc(const struct vkd3d_shader_code *dxb VKD3D_SHADER_API int vkd3d_shader_serialize_dxbc(size_t section_count, const struct vkd3d_shader_dxbc_section_desc *sections, struct vkd3d_shader_code *dxbc, char **messages);
+/** + * Free members of struct vkd3d_shader_scan_signature_info allocated by + * vkd3d_shader_scan(). + * + * This function may free members of vkd3d_shader_scan_signature_info, but + * does not free the structure itself. + * + * \param info Scan information to free. + * + * \since 1.9 + */ +VKD3D_SHADER_API void vkd3d_shader_free_scan_signature_info(struct vkd3d_shader_scan_signature_info *info); + +/** + * Build a mapping of output varyings in a shader stage to input varyings in + * the following shader stage. + * + * This mapping should be used in struct vkd3d_shader_next_stage_info to + * compile the first shader. + * + * \param output_signature The output signature of the first shader. + * + * \param input_signature The input signature of the second shader. + * + * \param count On output, contains the number of entries written into + * \ref varyings. + * + * \param varyings Pointer to an output array of varyings. + * This must point to space for N varyings, where N is the number of elements + * in the input signature. + * + * \remark Valid legacy Direct3D pixel shaders have at most 12 varying inputs: + * 10 inter-stage varyings, face, and position. + * Therefore, in practice, it is safe to call this function with a + * pre-allocated array with a fixed size of 12. + * + * \since 1.9 + */ +VKD3D_SHADER_API void vkd3d_shader_build_varying_map(const struct vkd3d_shader_signature *output_signature, + const struct vkd3d_shader_signature *input_signature, + unsigned int *count, struct vkd3d_shader_varying_map *varyings); + #endif /* VKD3D_SHADER_NO_PROTOTYPES */
/** Type of vkd3d_shader_get_version(). */ @@ -2087,6 +2360,9 @@ typedef int (*PFN_vkd3d_shader_parse_dxbc)(const struct vkd3d_shader_code *dxbc, typedef int (*PFN_vkd3d_shader_serialize_dxbc)(size_t section_count, const struct vkd3d_shader_dxbc_section_desc *sections, struct vkd3d_shader_code *dxbc, char **messages);
+/** Type of vkd3d_shader_free_scan_signature_info(). \since 1.9 */ +typedef void (*PFN_vkd3d_shader_free_scan_signature_info)(struct vkd3d_shader_scan_signature_info *info); + #ifdef __cplusplus } #endif /* __cplusplus */ diff --git a/libs/vkd3d/libs/vkd3d-common/debug.c b/libs/vkd3d/libs/vkd3d-common/debug.c index b363efbd360..aa7df5bd764 100644 --- a/libs/vkd3d/libs/vkd3d-common/debug.c +++ b/libs/vkd3d/libs/vkd3d-common/debug.c @@ -31,6 +31,7 @@ #include <stdlib.h> #include <stdbool.h> #include <string.h> +#include <unistd.h> #ifdef HAVE_PTHREAD_H #include <pthread.h> #endif @@ -44,11 +45,11 @@ extern const char *const vkd3d_dbg_env_name;
static const char *const debug_level_names[] = { - /* VKD3D_DBG_LEVEL_NONE */ "none", - /* VKD3D_DBG_LEVEL_ERR */ "err", - /* VKD3D_DBG_LEVEL_FIXME */ "fixme", - /* VKD3D_DBG_LEVEL_WARN */ "warn", - /* VKD3D_DBG_LEVEL_TRACE */ "trace", + [VKD3D_DBG_LEVEL_NONE ] = "none", + [VKD3D_DBG_LEVEL_ERR ] = "err", + [VKD3D_DBG_LEVEL_FIXME] = "fixme", + [VKD3D_DBG_LEVEL_WARN ] = "warn", + [VKD3D_DBG_LEVEL_TRACE] = "trace", };
enum vkd3d_dbg_level vkd3d_dbg_get_level(void) @@ -105,7 +106,13 @@ void vkd3d_dbg_printf(enum vkd3d_dbg_level level, const char *function, const ch
assert(level < ARRAY_SIZE(debug_level_names));
+#ifdef _WIN32 + vkd3d_dbg_output("vkd3d:%04lx:%s:%s ", GetCurrentThreadId(), debug_level_names[level], function); +#elif HAVE_GETTID + vkd3d_dbg_output("vkd3d:%u:%s:%s ", gettid(), debug_level_names[level], function); +#else vkd3d_dbg_output("vkd3d:%s:%s ", debug_level_names[level], function); +#endif va_start(args, fmt); vkd3d_dbg_voutput(fmt, args); va_end(args); diff --git a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c index 0a821b5c878..d72402eb250 100644 --- a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c +++ b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c @@ -578,17 +578,17 @@ static void shader_dump_resource_type(struct vkd3d_d3d_asm_compiler *compiler, e { static const char *const resource_type_names[] = { - /* VKD3D_SHADER_RESOURCE_NONE */ "none", - /* VKD3D_SHADER_RESOURCE_BUFFER */ "buffer", - /* VKD3D_SHADER_RESOURCE_TEXTURE_1D */ "texture1d", - /* VKD3D_SHADER_RESOURCE_TEXTURE_2D */ "texture2d", - /* VKD3D_SHADER_RESOURCE_TEXTURE_2DMS */ "texture2dms", - /* VKD3D_SHADER_RESOURCE_TEXTURE_3D */ "texture3d", - /* VKD3D_SHADER_RESOURCE_TEXTURE_CUBE */ "texturecube", - /* VKD3D_SHADER_RESOURCE_TEXTURE_1DARRAY */ "texture1darray", - /* VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY */ "texture2darray", - /* VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY */ "texture2dmsarray", - /* VKD3D_SHADER_RESOURCE_TEXTURE_CUBEARRAY */ "texturecubearray", + [VKD3D_SHADER_RESOURCE_NONE ] = "none", + [VKD3D_SHADER_RESOURCE_BUFFER ] = "buffer", + [VKD3D_SHADER_RESOURCE_TEXTURE_1D ] = "texture1d", + [VKD3D_SHADER_RESOURCE_TEXTURE_2D ] = "texture2d", + [VKD3D_SHADER_RESOURCE_TEXTURE_2DMS ] = "texture2dms", + [VKD3D_SHADER_RESOURCE_TEXTURE_3D ] = "texture3d", + [VKD3D_SHADER_RESOURCE_TEXTURE_CUBE ] = "texturecube", + [VKD3D_SHADER_RESOURCE_TEXTURE_1DARRAY ] = "texture1darray", + [VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY ] = "texture2darray", + [VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY] = "texture2dmsarray", + [VKD3D_SHADER_RESOURCE_TEXTURE_CUBEARRAY] = "texturecubearray", };
if (type < ARRAY_SIZE(resource_type_names)) @@ -601,19 +601,19 @@ static void shader_dump_data_type(struct vkd3d_d3d_asm_compiler *compiler, const { static const char *const data_type_names[] = { - /* VKD3D_DATA_FLOAT */ "float", - /* VKD3D_DATA_INT */ "int", - /* VKD3D_DATA_RESOURCE */ "resource", - /* VKD3D_DATA_SAMPLER */ "sampler", - /* VKD3D_DATA_UAV */ "uav", - /* VKD3D_DATA_UINT */ "uint", - /* VKD3D_DATA_UNORM */ "unorm", - /* VKD3D_DATA_SNORM */ "snorm", - /* VKD3D_DATA_OPAQUE */ "opaque", - /* VKD3D_DATA_MIXED */ "mixed", - /* VKD3D_DATA_DOUBLE */ "double", - /* VKD3D_DATA_CONTINUED */ "<continued>", - /* VKD3D_DATA_UNUSED */ "<unused>", + [VKD3D_DATA_FLOAT ] = "float", + [VKD3D_DATA_INT ] = "int", + [VKD3D_DATA_RESOURCE ] = "resource", + [VKD3D_DATA_SAMPLER ] = "sampler", + [VKD3D_DATA_UAV ] = "uav", + [VKD3D_DATA_UINT ] = "uint", + [VKD3D_DATA_UNORM ] = "unorm", + [VKD3D_DATA_SNORM ] = "snorm", + [VKD3D_DATA_OPAQUE ] = "opaque", + [VKD3D_DATA_MIXED ] = "mixed", + [VKD3D_DATA_DOUBLE ] = "double", + [VKD3D_DATA_CONTINUED] = "<continued>", + [VKD3D_DATA_UNUSED ] = "<unused>", }; const char *name; int i; diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c index 712613ac13b..99a5bd7a438 100644 --- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c +++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c @@ -214,6 +214,9 @@ struct vkd3d_shader_sm1_parser bool abort;
struct vkd3d_shader_parser p; + +#define MAX_CONSTANT_COUNT 8192 + uint32_t constant_def_mask[3][MAX_CONSTANT_COUNT / 32]; };
/* This table is not order or position dependent. */ @@ -260,9 +263,9 @@ static const struct vkd3d_sm1_opcode_info vs_opcode_table[] = /* Declarations */ {VKD3D_SM1_OP_DCL, 0, 2, VKD3DSIH_DCL}, /* Constant definitions */ - {VKD3D_SM1_OP_DEF, 1, 4, VKD3DSIH_DEF}, + {VKD3D_SM1_OP_DEF, 1, 1, VKD3DSIH_DEF}, {VKD3D_SM1_OP_DEFB, 1, 1, VKD3DSIH_DEFB}, - {VKD3D_SM1_OP_DEFI, 1, 4, VKD3DSIH_DEFI}, + {VKD3D_SM1_OP_DEFI, 1, 1, VKD3DSIH_DEFI}, /* Control flow */ {VKD3D_SM1_OP_REP, 0, 1, VKD3DSIH_REP, {2, 0}, {~0u, ~0u}}, {VKD3D_SM1_OP_ENDREP, 0, 0, VKD3DSIH_ENDREP, {2, 0}, {~0u, ~0u}}, @@ -327,9 +330,9 @@ static const struct vkd3d_sm1_opcode_info ps_opcode_table[] = /* Declarations */ {VKD3D_SM1_OP_DCL, 0, 2, VKD3DSIH_DCL}, /* Constant definitions */ - {VKD3D_SM1_OP_DEF, 1, 4, VKD3DSIH_DEF}, + {VKD3D_SM1_OP_DEF, 1, 1, VKD3DSIH_DEF}, {VKD3D_SM1_OP_DEFB, 1, 1, VKD3DSIH_DEFB}, - {VKD3D_SM1_OP_DEFI, 1, 4, VKD3DSIH_DEFI}, + {VKD3D_SM1_OP_DEFI, 1, 1, VKD3DSIH_DEFI}, /* Control flow */ {VKD3D_SM1_OP_REP, 0, 1, VKD3DSIH_REP, {2, 1}, {~0u, ~0u}}, {VKD3D_SM1_OP_ENDREP, 0, 0, VKD3DSIH_ENDREP, {2, 1}, {~0u, ~0u}}, @@ -490,6 +493,309 @@ static void shader_sm1_parse_dst_param(uint32_t param, const struct vkd3d_shader dst->shift = (param & VKD3D_SM1_DSTSHIFT_MASK) >> VKD3D_SM1_DSTSHIFT_SHIFT; }
+static struct signature_element *find_signature_element(const struct shader_signature *signature, + const char *semantic_name, unsigned int semantic_index) +{ + struct signature_element *e = signature->elements; + unsigned int i; + + for (i = 0; i < signature->element_count; ++i) + { + if (!ascii_strcasecmp(e[i].semantic_name, semantic_name) + && e[i].semantic_index == semantic_index) + return &e[i]; + } + + return NULL; +} + +static struct signature_element *find_signature_element_by_register_index( + const struct shader_signature *signature, unsigned int register_index) +{ + struct signature_element *e = signature->elements; + unsigned int i; + + for (i = 0; i < signature->element_count; ++i) + { + if (e[i].register_index == register_index) + return &e[i]; + } + + return NULL; +} + +#define SM1_COLOR_REGISTER_OFFSET 8 + +static bool add_signature_element(struct vkd3d_shader_sm1_parser *sm1, bool output, + const char *name, unsigned int index, enum vkd3d_shader_sysval_semantic sysval, + unsigned int register_index, bool is_dcl, unsigned int mask) +{ + struct shader_signature *signature; + struct signature_element *element; + + if (output) + signature = &sm1->p.shader_desc.output_signature; + else + signature = &sm1->p.shader_desc.input_signature; + + if ((element = find_signature_element(signature, name, index))) + { + element->mask |= mask; + if (!is_dcl) + element->used_mask |= mask; + return true; + } + + if (!vkd3d_array_reserve((void **)&signature->elements, &signature->elements_capacity, + signature->element_count + 1, sizeof(*signature->elements))) + return false; + element = &signature->elements[signature->element_count++]; + + element->semantic_name = name; + element->semantic_index = index; + element->stream_index = 0; + element->sysval_semantic = sysval; + element->component_type = VKD3D_SHADER_COMPONENT_FLOAT; + element->register_index = register_index; + element->target_location = register_index; + element->register_count = 1; + element->mask = mask; + element->used_mask = is_dcl ? 0 : mask; + element->min_precision = VKD3D_SHADER_MINIMUM_PRECISION_NONE; + + return true; +} + +static void add_signature_mask(struct vkd3d_shader_sm1_parser *sm1, bool output, + unsigned int register_index, unsigned int mask) +{ + struct shader_signature *signature; + struct signature_element *element; + + if (output) + signature = &sm1->p.shader_desc.output_signature; + else + signature = &sm1->p.shader_desc.input_signature; + + if (!(element = find_signature_element_by_register_index(signature, register_index))) + { + vkd3d_shader_parser_error(&sm1->p, VKD3D_SHADER_ERROR_D3DBC_UNDECLARED_SEMANTIC, + "%s register %u was used without being declared.", output ? "Output" : "Input", register_index); + return; + } + + element->used_mask |= mask; +} + +static bool add_signature_element_from_register(struct vkd3d_shader_sm1_parser *sm1, + const struct vkd3d_shader_register *reg, bool is_dcl, unsigned int mask) +{ + unsigned int register_index = reg->idx[0].offset; + + switch (reg->type) + { + case VKD3DSPR_TEMP: + if (sm1->p.shader_version.type == VKD3D_SHADER_TYPE_PIXEL + && sm1->p.shader_version.major == 1 && !register_index) + return add_signature_element(sm1, true, "COLOR", 0, VKD3D_SHADER_SV_NONE, 0, is_dcl, mask); + return true; + + case VKD3DSPR_INPUT: + /* For vertex shaders or sm3 pixel shaders, we should have already + * had a DCL instruction. Otherwise, this is a colour input. */ + if (sm1->p.shader_version.type == VKD3D_SHADER_TYPE_VERTEX || sm1->p.shader_version.major == 3) + { + add_signature_mask(sm1, false, register_index, mask); + return true; + } + return add_signature_element(sm1, false, "COLOR", register_index, + VKD3D_SHADER_SV_NONE, SM1_COLOR_REGISTER_OFFSET + register_index, is_dcl, mask); + + case VKD3DSPR_TEXTURE: + /* For vertex shaders, this is ADDR. */ + if (sm1->p.shader_version.type == VKD3D_SHADER_TYPE_VERTEX) + return true; + return add_signature_element(sm1, false, "TEXCOORD", register_index, + VKD3D_SHADER_SV_NONE, register_index, is_dcl, mask); + + case VKD3DSPR_OUTPUT: + if (sm1->p.shader_version.type == VKD3D_SHADER_TYPE_VERTEX) + { + /* For sm < 2 vertex shaders, this is TEXCRDOUT. + * + * For sm3 vertex shaders, this is OUTPUT, but we already + * should have had a DCL instruction. */ + if (sm1->p.shader_version.major == 3) + { + add_signature_mask(sm1, true, register_index, mask); + return true; + } + return add_signature_element(sm1, true, "TEXCOORD", register_index, + VKD3D_SHADER_SV_NONE, register_index, is_dcl, mask); + } + /* fall through */ + + case VKD3DSPR_ATTROUT: + return add_signature_element(sm1, true, "COLOR", register_index, + VKD3D_SHADER_SV_NONE, SM1_COLOR_REGISTER_OFFSET + register_index, is_dcl, mask); + + case VKD3DSPR_COLOROUT: + return add_signature_element(sm1, true, "COLOR", register_index, + VKD3D_SHADER_SV_NONE, register_index, is_dcl, mask); + + case VKD3DSPR_DEPTHOUT: + return add_signature_element(sm1, true, "DEPTH", 0, + VKD3D_SHADER_SV_NONE, register_index, is_dcl, 0x1); + + case VKD3DSPR_RASTOUT: + switch (register_index) + { + case 0: + return add_signature_element(sm1, true, "POSITION", 0, + VKD3D_SHADER_SV_POSITION, register_index, is_dcl, mask); + + case 1: + return add_signature_element(sm1, true, "FOG", 0, + VKD3D_SHADER_SV_NONE, register_index, is_dcl, 0x1); + + case 2: + return add_signature_element(sm1, true, "PSIZE", 0, + VKD3D_SHADER_SV_NONE, register_index, is_dcl, 0x1); + + default: + vkd3d_shader_parser_error(&sm1->p, VKD3D_SHADER_ERROR_D3DBC_INVALID_REGISTER_INDEX, + "Invalid rasterizer output index %u.", register_index); + return true; + } + + case VKD3DSPR_MISCTYPE: + switch (register_index) + { + case 0: + return add_signature_element(sm1, false, "VPOS", 0, + VKD3D_SHADER_SV_POSITION, register_index, is_dcl, mask); + + case 1: + return add_signature_element(sm1, false, "VFACE", 0, + VKD3D_SHADER_SV_IS_FRONT_FACE, register_index, is_dcl, 0x1); + + default: + vkd3d_shader_parser_error(&sm1->p, VKD3D_SHADER_ERROR_D3DBC_INVALID_REGISTER_INDEX, + "Invalid miscellaneous fragment input index %u.", register_index); + return true; + } + + default: + return true; + } +} + +static bool add_signature_element_from_semantic(struct vkd3d_shader_sm1_parser *sm1, + const struct vkd3d_shader_semantic *semantic) +{ + const struct vkd3d_shader_register *reg = &semantic->resource.reg.reg; + enum vkd3d_shader_sysval_semantic sysval = VKD3D_SHADER_SV_NONE; + unsigned int mask = semantic->resource.reg.write_mask; + bool output; + + static const char sm1_semantic_names[][13] = + { + [VKD3D_DECL_USAGE_POSITION ] = "POSITION", + [VKD3D_DECL_USAGE_BLEND_WEIGHT ] = "BLENDWEIGHT", + [VKD3D_DECL_USAGE_BLEND_INDICES] = "BLENDINDICES", + [VKD3D_DECL_USAGE_NORMAL ] = "NORMAL", + [VKD3D_DECL_USAGE_PSIZE ] = "PSIZE", + [VKD3D_DECL_USAGE_TEXCOORD ] = "TEXCOORD", + [VKD3D_DECL_USAGE_TANGENT ] = "TANGENT", + [VKD3D_DECL_USAGE_BINORMAL ] = "BINORMAL", + [VKD3D_DECL_USAGE_TESS_FACTOR ] = "TESSFACTOR", + [VKD3D_DECL_USAGE_POSITIONT ] = "POSITIONT", + [VKD3D_DECL_USAGE_COLOR ] = "COLOR", + [VKD3D_DECL_USAGE_FOG ] = "FOG", + [VKD3D_DECL_USAGE_DEPTH ] = "DEPTH", + [VKD3D_DECL_USAGE_SAMPLE ] = "SAMPLE", + }; + + if (reg->type == VKD3DSPR_OUTPUT) + output = true; + else if (reg->type == VKD3DSPR_INPUT || reg->type == VKD3DSPR_TEXTURE) + output = false; + else /* vpos and vface don't have a semantic. */ + return add_signature_element_from_register(sm1, reg, true, mask); + + /* sm2 pixel shaders use DCL but don't provide a semantic. */ + if (sm1->p.shader_version.type == VKD3D_SHADER_TYPE_PIXEL && sm1->p.shader_version.major == 2) + return add_signature_element_from_register(sm1, reg, true, mask); + + /* With the exception of vertex POSITION output, none of these are system + * values. Pixel POSITION input is not equivalent to SV_Position; the closer + * equivalent is VPOS, which is not declared as a semantic. */ + if (sm1->p.shader_version.type == VKD3D_SHADER_TYPE_VERTEX + && output && semantic->usage == VKD3D_DECL_USAGE_POSITION) + sysval = VKD3D_SHADER_SV_POSITION; + + return add_signature_element(sm1, output, sm1_semantic_names[semantic->usage], + semantic->usage_idx, sysval, reg->idx[0].offset, true, mask); +} + +static void record_constant_register(struct vkd3d_shader_sm1_parser *sm1, + enum vkd3d_shader_d3dbc_constant_register set, uint32_t index, bool from_def) +{ + struct vkd3d_shader_desc *desc = &sm1->p.shader_desc; + + desc->flat_constant_count[set].used = max(desc->flat_constant_count[set].used, index + 1); + if (from_def) + { + /* d3d shaders have a maximum of 8192 constants; we should not overrun + * this array. */ + assert((index / 32) <= ARRAY_SIZE(sm1->constant_def_mask[set])); + bitmap_set(sm1->constant_def_mask[set], index); + } +} + +static void shader_sm1_scan_register(struct vkd3d_shader_sm1_parser *sm1, + const struct vkd3d_shader_register *reg, unsigned int mask, bool from_def) +{ + struct vkd3d_shader_desc *desc = &sm1->p.shader_desc; + uint32_t register_index = reg->idx[0].offset; + + switch (reg->type) + { + case VKD3DSPR_TEMP: + desc->temp_count = max(desc->temp_count, register_index + 1); + break; + + case VKD3DSPR_CONST: + record_constant_register(sm1, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, register_index, from_def); + break; + + case VKD3DSPR_CONST2: + record_constant_register(sm1, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, 2048 + register_index, from_def); + break; + + case VKD3DSPR_CONST3: + record_constant_register(sm1, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, 4096 + register_index, from_def); + break; + + case VKD3DSPR_CONST4: + record_constant_register(sm1, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, 6144 + register_index, from_def); + break; + + case VKD3DSPR_CONSTINT: + record_constant_register(sm1, VKD3D_SHADER_D3DBC_INT_CONSTANT_REGISTER, register_index, from_def); + break; + + case VKD3DSPR_CONSTBOOL: + record_constant_register(sm1, VKD3D_SHADER_D3DBC_BOOL_CONSTANT_REGISTER, register_index, from_def); + break; + + default: + break; + } + + add_signature_element_from_register(sm1, reg, false, mask); +} + /* Read a parameter token from the input stream, and possibly a relative * addressing token. */ static void shader_sm1_read_param(struct vkd3d_shader_sm1_parser *sm1, @@ -640,6 +946,8 @@ static void shader_sm1_read_semantic(struct vkd3d_shader_sm1_parser *sm1, range = &semantic->resource.range; range->space = 0; range->first = range->last = semantic->resource.reg.reg.idx[0].offset; + + add_signature_element_from_semantic(sm1, semantic); }
static void shader_sm1_read_immconst(struct vkd3d_shader_sm1_parser *sm1, const uint32_t **ptr, @@ -744,6 +1052,14 @@ static void shader_sm1_validate_instruction(struct vkd3d_shader_sm1_parser *sm1, } }
+static unsigned int mask_from_swizzle(unsigned int swizzle) +{ + return (1u << vkd3d_swizzle_get_component(swizzle, 0)) + | (1u << vkd3d_swizzle_get_component(swizzle, 1)) + | (1u << vkd3d_swizzle_get_component(swizzle, 2)) + | (1u << vkd3d_swizzle_get_component(swizzle, 3)); +} + static void shader_sm1_read_instruction(struct vkd3d_shader_sm1_parser *sm1, struct vkd3d_shader_instruction *ins) { struct vkd3d_shader_src_param *src_params, *predicate; @@ -817,22 +1133,28 @@ static void shader_sm1_read_instruction(struct vkd3d_shader_sm1_parser *sm1, str { shader_sm1_read_dst_param(sm1, &p, dst_param); shader_sm1_read_immconst(sm1, &p, &src_params[0], VKD3D_IMMCONST_VEC4, VKD3D_DATA_FLOAT); + shader_sm1_scan_register(sm1, &dst_param->reg, dst_param->write_mask, true); } else if (ins->handler_idx == VKD3DSIH_DEFB) { shader_sm1_read_dst_param(sm1, &p, dst_param); shader_sm1_read_immconst(sm1, &p, &src_params[0], VKD3D_IMMCONST_SCALAR, VKD3D_DATA_UINT); + shader_sm1_scan_register(sm1, &dst_param->reg, dst_param->write_mask, true); } else if (ins->handler_idx == VKD3DSIH_DEFI) { shader_sm1_read_dst_param(sm1, &p, dst_param); shader_sm1_read_immconst(sm1, &p, &src_params[0], VKD3D_IMMCONST_VEC4, VKD3D_DATA_INT); + shader_sm1_scan_register(sm1, &dst_param->reg, dst_param->write_mask, true); } else { /* Destination token */ if (ins->dst_count) + { shader_sm1_read_dst_param(sm1, &p, dst_param); + shader_sm1_scan_register(sm1, &dst_param->reg, dst_param->write_mask, false); + }
/* Predication token */ if (ins->predicate) @@ -840,7 +1162,10 @@ static void shader_sm1_read_instruction(struct vkd3d_shader_sm1_parser *sm1, str
/* Other source tokens */ for (i = 0; i < ins->src_count; ++i) + { shader_sm1_read_src_param(sm1, &p, &src_params[i]); + shader_sm1_scan_register(sm1, &src_params[i].reg, mask_from_swizzle(src_params[i].swizzle), false); + } }
if (sm1->abort) @@ -947,12 +1272,30 @@ static enum vkd3d_result shader_sm1_init(struct vkd3d_shader_sm1_parser *sm1, return VKD3D_OK; }
+static uint32_t get_external_constant_count(struct vkd3d_shader_sm1_parser *sm1, + enum vkd3d_shader_d3dbc_constant_register set) +{ + unsigned int j; + + /* Find the highest constant index which is not written by a DEF + * instruction. We can't (easily) use an FFZ function for this since it + * needs to be limited by the highest used register index. */ + for (j = sm1->p.shader_desc.flat_constant_count[set].used; j > 0; --j) + { + if (!bitmap_is_set(sm1->constant_def_mask[set], j - 1)) + return j; + } + + return 0; +} + int vkd3d_shader_sm1_parser_create(const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser **parser) { struct vkd3d_shader_instruction_array *instructions; struct vkd3d_shader_instruction *ins; struct vkd3d_shader_sm1_parser *sm1; + unsigned int i; int ret;
if (!(sm1 = vkd3d_calloc(1, sizeof(*sm1)))) @@ -992,6 +1335,9 @@ int vkd3d_shader_sm1_parser_create(const struct vkd3d_shader_compile_info *compi
*parser = &sm1->p;
+ for (i = 0; i < ARRAY_SIZE(sm1->p.shader_desc.flat_constant_count); ++i) + sm1->p.shader_desc.flat_constant_count[i].external = get_external_constant_count(sm1, i); + return sm1->p.failed ? VKD3D_ERROR_INVALID_SHADER : VKD3D_OK; }
@@ -1340,7 +1686,7 @@ static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffe else { put_u32(buffer, vkd3d_make_u32(D3DXRS_SAMPLER, var->regs[r].id)); - put_u32(buffer, var->regs[r].bind_count); + put_u32(buffer, var->bind_count[r]); } put_u32(buffer, 0); /* type */ put_u32(buffer, 0); /* FIXME: default value */ @@ -1553,12 +1899,13 @@ static void write_sm1_constant_defs(struct hlsl_ctx *ctx, struct vkd3d_bytecode_
for (i = 0; i < ctx->constant_defs.count; ++i) { + const struct hlsl_constant_register *constant_reg = &ctx->constant_defs.regs[i]; uint32_t token = D3DSIO_DEF; const struct sm1_dst_register reg = { .type = D3DSPR_CONST, .writemask = VKD3DSP_WRITEMASK_ALL, - .reg = i, + .reg = constant_reg->index, };
if (ctx->profile->major_version > 1) @@ -1567,7 +1914,7 @@ static void write_sm1_constant_defs(struct hlsl_ctx *ctx, struct vkd3d_bytecode_
write_sm1_dst_register(buffer, ®); for (x = 0; x < 4; ++x) - put_f32(buffer, ctx->constant_defs.values[i].f[x]); + put_f32(buffer, constant_reg->value.f[x]); } }
@@ -1686,14 +2033,19 @@ static void write_sm1_sampler_dcls(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b if (!var->regs[HLSL_REGSET_SAMPLERS].allocated) continue;
- count = var->regs[HLSL_REGSET_SAMPLERS].bind_count; + count = var->bind_count[HLSL_REGSET_SAMPLERS];
for (i = 0; i < count; ++i) { if (var->objects_usage[HLSL_REGSET_SAMPLERS][i].used) { sampler_dim = var->objects_usage[HLSL_REGSET_SAMPLERS][i].sampler_dim; - assert(sampler_dim != HLSL_SAMPLER_DIM_GENERIC); + if (sampler_dim == HLSL_SAMPLER_DIM_GENERIC) + { + /* These can appear in sm4-style combined sample instructions. */ + hlsl_fixme(ctx, &var->loc, "Generic samplers need to be lowered."); + continue; + }
reg_id = var->regs[HLSL_REGSET_SAMPLERS].id + i; write_sm1_sampler_dcl(ctx, buffer, reg_id, sampler_dim); @@ -1844,6 +2196,35 @@ static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b } }
+static void write_sm1_jump(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) +{ + const struct hlsl_ir_jump *jump = hlsl_ir_jump(instr); + + switch (jump->type) + { + case HLSL_IR_JUMP_DISCARD_NEG: + { + struct hlsl_reg *reg = &jump->condition.node->reg; + + struct sm1_instruction instr = + { + .opcode = VKD3D_SM1_OP_TEXKILL, + + .dst.type = D3DSPR_TEMP, + .dst.reg = reg->id, + .dst.writemask = reg->writemask, + .has_dst = 1, + }; + + write_sm1_instruction(ctx, buffer, &instr); + break; + } + + default: + hlsl_fixme(ctx, &jump->node.loc, "Jump type %s.\n", hlsl_jump_type_to_string(jump->type)); + } +} + static void write_sm1_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) { const struct hlsl_ir_load *load = hlsl_ir_load(instr); @@ -2038,6 +2419,10 @@ static void write_sm1_instructions(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b write_sm1_expr(ctx, buffer, instr); break;
+ case HLSL_IR_JUMP: + write_sm1_jump(ctx, buffer, instr); + break; + case HLSL_IR_LOAD: write_sm1_load(ctx, buffer, instr); break; @@ -2063,7 +2448,6 @@ static void write_sm1_instructions(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b int hlsl_sm1_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out) { struct vkd3d_bytecode_buffer buffer = {0}; - int ret;
put_u32(&buffer, sm1_version(ctx->profile->type, ctx->profile->major_version, ctx->profile->minor_version));
@@ -2076,10 +2460,17 @@ int hlsl_sm1_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_fun
put_u32(&buffer, D3DSIO_END);
- if (!(ret = buffer.status)) + if (buffer.status) + ctx->result = buffer.status; + + if (!ctx->result) { out->code = buffer.data; out->size = buffer.size; } - return ret; + else + { + vkd3d_free(buffer.data); + } + return ctx->result; } diff --git a/libs/vkd3d/libs/vkd3d-shader/dxbc.c b/libs/vkd3d/libs/vkd3d-shader/dxbc.c index 3e3f06faeb5..cedc3da4a83 100644 --- a/libs/vkd3d/libs/vkd3d-shader/dxbc.c +++ b/libs/vkd3d/libs/vkd3d-shader/dxbc.c @@ -391,6 +391,7 @@ static int shader_parse_signature(const struct vkd3d_shader_dxbc_section_desc *s read_dword(&ptr, &e[i].sysval_semantic); read_dword(&ptr, &e[i].component_type); read_dword(&ptr, &e[i].register_index); + e[i].target_location = e[i].register_index; e[i].register_count = 1; read_dword(&ptr, &mask); e[i].mask = mask & 0xff; @@ -493,8 +494,14 @@ static int shdr_handler(const struct vkd3d_shader_dxbc_section_desc *section, return ret; break;
+ case TAG_DXIL: case TAG_SHDR: case TAG_SHEX: + if ((section->tag == TAG_DXIL) != desc->is_dxil) + { + TRACE("Skipping chunk %#x.\n", section->tag); + break; + } if (desc->byte_code) FIXME("Multiple shader code chunks.\n"); desc->byte_code = section->data.code; @@ -505,10 +512,6 @@ static int shdr_handler(const struct vkd3d_shader_dxbc_section_desc *section, TRACE("Skipping AON9 shader code chunk.\n"); break;
- case TAG_DXIL: - FIXME("Skipping DXIL shader model 6+ code chunk.\n"); - break; - default: TRACE("Skipping chunk %#x.\n", section->tag); break; @@ -529,12 +532,6 @@ int shader_extract_from_dxbc(const struct vkd3d_shader_code *dxbc, { int ret;
- desc->byte_code = NULL; - desc->byte_code_size = 0; - memset(&desc->input_signature, 0, sizeof(desc->input_signature)); - memset(&desc->output_signature, 0, sizeof(desc->output_signature)); - memset(&desc->patch_constant_signature, 0, sizeof(desc->patch_constant_signature)); - ret = for_each_dxbc_section(dxbc, message_context, source_name, shdr_handler, desc); if (!desc->byte_code) ret = VKD3D_ERROR_INVALID_ARGUMENT; diff --git a/libs/vkd3d/libs/vkd3d-shader/dxil.c b/libs/vkd3d/libs/vkd3d-shader/dxil.c new file mode 100644 index 00000000000..f9efe47f95d --- /dev/null +++ b/libs/vkd3d/libs/vkd3d-shader/dxil.c @@ -0,0 +1,2370 @@ +/* + * Copyright 2023 Conor McCarthy for CodeWeavers + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#include "vkd3d_shader_private.h" + +#define VKD3D_SM6_VERSION_MAJOR(version) (((version) >> 4) & 0xf) +#define VKD3D_SM6_VERSION_MINOR(version) (((version) >> 0) & 0xf) + +#define BITCODE_MAGIC VKD3D_MAKE_TAG('B', 'C', 0xc0, 0xde) + +enum bitcode_block_id +{ + BLOCKINFO_BLOCK = 0, + MODULE_BLOCK = 8, + PARAMATTR_BLOCK = 9, + PARAMATTR_GROUP_BLOCK = 10, + CONSTANTS_BLOCK = 11, + FUNCTION_BLOCK = 12, + VALUE_SYMTAB_BLOCK = 14, + METADATA_BLOCK = 15, + METADATA_ATTACHMENT_BLOCK = 16, + TYPE_BLOCK = 17, + USELIST_BLOCK = 18, +}; + +enum bitcode_blockinfo_code +{ + SETBID = 1, + BLOCKNAME = 2, + SETRECORDNAME = 3, +}; + +enum bitcode_block_abbreviation +{ + END_BLOCK = 0, + ENTER_SUBBLOCK = 1, + DEFINE_ABBREV = 2, + UNABBREV_RECORD = 3, +}; + +enum bitcode_abbrev_type +{ + ABBREV_FIXED = 1, + ABBREV_VBR = 2, + ABBREV_ARRAY = 3, + ABBREV_CHAR = 4, + ABBREV_BLOB = 5, +}; + +enum bitcode_address_space +{ + ADDRESS_SPACE_DEFAULT, + ADDRESS_SPACE_DEVICEMEM, + ADDRESS_SPACE_CBUFFER, + ADDRESS_SPACE_GROUPSHARED, +}; + +enum bitcode_module_code +{ + MODULE_CODE_VERSION = 1, + MODULE_CODE_GLOBALVAR = 7, + MODULE_CODE_FUNCTION = 8, +}; + +enum bitcode_constant_code +{ + CST_CODE_SETTYPE = 1, + CST_CODE_NULL = 2, + CST_CODE_UNDEF = 3, + CST_CODE_INTEGER = 4, + CST_CODE_FLOAT = 6, + CST_CODE_STRING = 8, + CST_CODE_CE_GEP = 12, + CST_CODE_CE_INBOUNDS_GEP = 20, + CST_CODE_DATA = 22, +}; + +enum bitcode_function_code +{ + FUNC_CODE_DECLAREBLOCKS = 1, + FUNC_CODE_INST_BINOP = 2, + FUNC_CODE_INST_CAST = 3, + FUNC_CODE_INST_RET = 10, + FUNC_CODE_INST_BR = 11, + FUNC_CODE_INST_SWITCH = 12, + FUNC_CODE_INST_PHI = 16, + FUNC_CODE_INST_ALLOCA = 19, + FUNC_CODE_INST_LOAD = 20, + FUNC_CODE_INST_EXTRACTVAL = 26, + FUNC_CODE_INST_CMP2 = 28, + FUNC_CODE_INST_VSELECT = 29, + FUNC_CODE_INST_CALL = 34, + FUNC_CODE_INST_ATOMICRMW = 38, + FUNC_CODE_INST_LOADATOMIC = 41, + FUNC_CODE_INST_GEP = 43, + FUNC_CODE_INST_STORE = 44, + FUNC_CODE_INST_STOREATOMIC = 45, + FUNC_CODE_INST_CMPXCHG = 46, +}; + +enum bitcode_type_code +{ + TYPE_CODE_NUMENTRY = 1, + TYPE_CODE_VOID = 2, + TYPE_CODE_FLOAT = 3, + TYPE_CODE_DOUBLE = 4, + TYPE_CODE_LABEL = 5, + TYPE_CODE_INTEGER = 7, + TYPE_CODE_POINTER = 8, + TYPE_CODE_HALF = 10, + TYPE_CODE_ARRAY = 11, + TYPE_CODE_VECTOR = 12, + TYPE_CODE_METADATA = 16, + TYPE_CODE_STRUCT_ANON = 18, + TYPE_CODE_STRUCT_NAME = 19, + TYPE_CODE_STRUCT_NAMED = 20, + TYPE_CODE_FUNCTION = 21, +}; + +enum bitcode_value_symtab_code +{ + VST_CODE_ENTRY = 1, + VST_CODE_BBENTRY = 2, +}; + +struct sm6_pointer_info +{ + const struct sm6_type *type; + enum bitcode_address_space addr_space; +}; + +struct sm6_struct_info +{ + const char *name; + unsigned int elem_count; + const struct sm6_type *elem_types[]; +}; + +struct sm6_function_info +{ + const struct sm6_type *ret_type; + unsigned int param_count; + const struct sm6_type *param_types[]; +}; + +struct sm6_array_info +{ + unsigned int count; + const struct sm6_type *elem_type; +}; + +enum sm6_type_class +{ + TYPE_CLASS_VOID, + TYPE_CLASS_INTEGER, + TYPE_CLASS_FLOAT, + TYPE_CLASS_POINTER, + TYPE_CLASS_STRUCT, + TYPE_CLASS_FUNCTION, + TYPE_CLASS_VECTOR, + TYPE_CLASS_ARRAY, + TYPE_CLASS_LABEL, + TYPE_CLASS_METADATA, +}; + +struct sm6_type +{ + enum sm6_type_class class; + union + { + unsigned int width; + struct sm6_pointer_info pointer; + struct sm6_struct_info *struc; + struct sm6_function_info *function; + struct sm6_array_info array; + } u; +}; + +enum sm6_value_type +{ + VALUE_TYPE_FUNCTION, + VALUE_TYPE_REG, +}; + +struct sm6_function_data +{ + const char *name; + bool is_prototype; + unsigned int attribs_id; +}; + +struct sm6_value +{ + const struct sm6_type *type; + enum sm6_value_type value_type; + bool is_undefined; + union + { + struct sm6_function_data function; + struct vkd3d_shader_register reg; + } u; +}; + +struct dxil_record +{ + unsigned int code; + unsigned int operand_count; + uint64_t operands[]; +}; + +struct sm6_symbol +{ + unsigned int id; + const char *name; +}; + +struct sm6_block +{ + struct vkd3d_shader_instruction *instructions; + size_t instruction_capacity; + size_t instruction_count; +}; + +struct sm6_function +{ + const struct sm6_value *declaration; + + struct sm6_block *blocks[1]; + size_t block_count; +}; + +struct dxil_block +{ + const struct dxil_block *parent; + enum bitcode_block_id id; + unsigned int abbrev_len; + unsigned int start; + unsigned int length; + unsigned int level; + + /* The abbrev, block and record structs are not relocatable. */ + struct dxil_abbrev **abbrevs; + size_t abbrev_capacity; + size_t abbrev_count; + unsigned int blockinfo_bid; + bool has_bid; + + struct dxil_block **child_blocks; + size_t child_block_capacity; + size_t child_block_count; + + struct dxil_record **records; + size_t record_capacity; + size_t record_count; +}; + +struct sm6_parser +{ + const uint32_t *ptr, *start, *end; + unsigned int bitpos; + + struct dxil_block root_block; + struct dxil_block *current_block; + + struct dxil_global_abbrev **abbrevs; + size_t abbrev_capacity; + size_t abbrev_count; + + struct sm6_type *types; + size_t type_count; + + struct sm6_symbol *global_symbols; + size_t global_symbol_count; + + struct sm6_function *functions; + size_t function_count; + + struct sm6_value *values; + size_t value_count; + size_t value_capacity; + + struct vkd3d_shader_parser p; +}; + +struct dxil_abbrev_operand +{ + uint64_t context; + bool (*read_operand)(struct sm6_parser *sm6, uint64_t context, uint64_t *operand); +}; + +struct dxil_abbrev +{ + unsigned int count; + bool is_array; + struct dxil_abbrev_operand operands[]; +}; + +struct dxil_global_abbrev +{ + unsigned int block_id; + struct dxil_abbrev abbrev; +}; + +static size_t size_add_with_overflow_check(size_t a, size_t b) +{ + size_t i = a + b; + return (i < a) ? SIZE_MAX : i; +} + +static struct sm6_parser *sm6_parser(struct vkd3d_shader_parser *parser) +{ + return CONTAINING_RECORD(parser, struct sm6_parser, p); +} + +static bool sm6_parser_is_end(struct sm6_parser *sm6) +{ + return sm6->ptr == sm6->end; +} + +static uint32_t sm6_parser_read_uint32(struct sm6_parser *sm6) +{ + if (sm6_parser_is_end(sm6)) + { + sm6->p.failed = true; + return 0; + } + return *sm6->ptr++; +} + +static uint32_t sm6_parser_read_bits(struct sm6_parser *sm6, unsigned int length) +{ + unsigned int l, prev_len = 0; + uint32_t bits; + + if (!length) + return 0; + + assert(length < 32); + + if (sm6_parser_is_end(sm6)) + { + sm6->p.failed = true; + return 0; + } + + assert(sm6->bitpos < 32); + bits = *sm6->ptr >> sm6->bitpos; + l = 32 - sm6->bitpos; + if (l <= length) + { + ++sm6->ptr; + if (sm6_parser_is_end(sm6) && l < length) + { + sm6->p.failed = true; + return bits; + } + sm6->bitpos = 0; + bits |= *sm6->ptr << l; + prev_len = l; + } + sm6->bitpos += length - prev_len; + + return bits & ((1 << length) - 1); +} + +static uint64_t sm6_parser_read_vbr(struct sm6_parser *sm6, unsigned int length) +{ + unsigned int bits, flag, mask, shift = 0; + uint64_t result = 0; + + if (!length) + return 0; + + if (sm6_parser_is_end(sm6)) + { + sm6->p.failed = true; + return 0; + } + + flag = 1 << (length - 1); + mask = flag - 1; + do + { + bits = sm6_parser_read_bits(sm6, length); + result |= (uint64_t)(bits & mask) << shift; + shift += length - 1; + } while ((bits & flag) && !sm6->p.failed && shift < 64); + + sm6->p.failed |= !!(bits & flag); + + return result; +} + +static void sm6_parser_align_32(struct sm6_parser *sm6) +{ + if (!sm6->bitpos) + return; + + if (sm6_parser_is_end(sm6)) + { + sm6->p.failed = true; + return; + } + + ++sm6->ptr; + sm6->bitpos = 0; +} + +static bool dxil_block_handle_blockinfo_record(struct dxil_block *block, struct dxil_record *record) +{ + /* BLOCKINFO blocks must only occur immediately below the module root block. */ + if (block->level > 1) + { + WARN("Invalid blockinfo block level %u.\n", block->level); + return false; + } + + switch (record->code) + { + case SETBID: + if (!record->operand_count) + { + WARN("Missing id operand.\n"); + return false; + } + if (record->operands[0] > UINT_MAX) + WARN("Truncating block id %"PRIu64".\n", record->operands[0]); + block->blockinfo_bid = record->operands[0]; + block->has_bid = true; + break; + case BLOCKNAME: + case SETRECORDNAME: + break; + default: + FIXME("Unhandled BLOCKINFO record type %u.\n", record->code); + break; + } + + return true; +} + +static enum vkd3d_result dxil_block_add_record(struct dxil_block *block, struct dxil_record *record) +{ + unsigned int reserve; + + switch (block->id) + { + /* Rough initial reserve sizes for small shaders. */ + case CONSTANTS_BLOCK: reserve = 32; break; + case FUNCTION_BLOCK: reserve = 128; break; + case METADATA_BLOCK: reserve = 32; break; + case TYPE_BLOCK: reserve = 32; break; + default: reserve = 8; break; + } + reserve = max(reserve, block->record_count + 1); + if (!vkd3d_array_reserve((void **)&block->records, &block->record_capacity, reserve, sizeof(*block->records))) + { + ERR("Failed to allocate %u records.\n", reserve); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + + if (block->id == BLOCKINFO_BLOCK && !dxil_block_handle_blockinfo_record(block, record)) + return VKD3D_ERROR_INVALID_SHADER; + + block->records[block->record_count++] = record; + + return VKD3D_OK; +} + +static enum vkd3d_result sm6_parser_read_unabbrev_record(struct sm6_parser *sm6) +{ + struct dxil_block *block = sm6->current_block; + enum vkd3d_result ret = VKD3D_OK; + unsigned int code, count, i; + struct dxil_record *record; + + code = sm6_parser_read_vbr(sm6, 6); + + count = sm6_parser_read_vbr(sm6, 6); + if (!(record = vkd3d_malloc(sizeof(*record) + count * sizeof(record->operands[0])))) + { + ERR("Failed to allocate record with %u operands.\n", count); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + + record->code = code; + record->operand_count = count; + + for (i = 0; i < count; ++i) + record->operands[i] = sm6_parser_read_vbr(sm6, 6); + if (sm6->p.failed) + ret = VKD3D_ERROR_INVALID_SHADER; + + if (ret < 0 || (ret = dxil_block_add_record(block, record)) < 0) + vkd3d_free(record); + + return ret; +} + +static bool sm6_parser_read_literal_operand(struct sm6_parser *sm6, uint64_t context, uint64_t *op) +{ + *op = context; + return !sm6->p.failed; +} + +static bool sm6_parser_read_fixed_operand(struct sm6_parser *sm6, uint64_t context, uint64_t *op) +{ + *op = sm6_parser_read_bits(sm6, context); + return !sm6->p.failed; +} + +static bool sm6_parser_read_vbr_operand(struct sm6_parser *sm6, uint64_t context, uint64_t *op) +{ + *op = sm6_parser_read_vbr(sm6, context); + return !sm6->p.failed; +} + +static bool sm6_parser_read_char6_operand(struct sm6_parser *sm6, uint64_t context, uint64_t *op) +{ + *op = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789._"[sm6_parser_read_bits(sm6, 6)]; + return !sm6->p.failed; +} + +static bool sm6_parser_read_blob_operand(struct sm6_parser *sm6, uint64_t context, uint64_t *op) +{ + int count = sm6_parser_read_vbr(sm6, 6); + sm6_parser_align_32(sm6); + for (; count > 0; count -= 4) + sm6_parser_read_uint32(sm6); + FIXME("Unhandled blob operand.\n"); + return false; +} + +static enum vkd3d_result dxil_abbrev_init(struct dxil_abbrev *abbrev, unsigned int count, struct sm6_parser *sm6) +{ + enum bitcode_abbrev_type prev_type, type; + unsigned int i; + + abbrev->is_array = false; + + for (i = 0, prev_type = 0; i < count && !sm6->p.failed; ++i) + { + if (sm6_parser_read_bits(sm6, 1)) + { + if (prev_type == ABBREV_ARRAY) + { + WARN("Unexpected literal abbreviation after array.\n"); + return VKD3D_ERROR_INVALID_SHADER; + } + abbrev->operands[i].context = sm6_parser_read_vbr(sm6, 8); + abbrev->operands[i].read_operand = sm6_parser_read_literal_operand; + continue; + } + + switch (type = sm6_parser_read_bits(sm6, 3)) + { + case ABBREV_FIXED: + case ABBREV_VBR: + abbrev->operands[i].context = sm6_parser_read_vbr(sm6, 5); + abbrev->operands[i].read_operand = (type == ABBREV_FIXED) ? sm6_parser_read_fixed_operand + : sm6_parser_read_vbr_operand; + break; + + case ABBREV_ARRAY: + if (prev_type == ABBREV_ARRAY || i != count - 2) + { + WARN("Unexpected array abbreviation.\n"); + return VKD3D_ERROR_INVALID_SHADER; + } + abbrev->is_array = true; + --i; + --count; + break; + + case ABBREV_CHAR: + abbrev->operands[i].read_operand = sm6_parser_read_char6_operand; + break; + + case ABBREV_BLOB: + if (prev_type == ABBREV_ARRAY || i != count - 1) + { + WARN("Unexpected blob abbreviation.\n"); + return VKD3D_ERROR_INVALID_SHADER; + } + abbrev->operands[i].read_operand = sm6_parser_read_blob_operand; + break; + } + + prev_type = type; + } + + abbrev->count = count; + + return sm6->p.failed ? VKD3D_ERROR_INVALID_SHADER : VKD3D_OK; +} + +static enum vkd3d_result sm6_parser_add_global_abbrev(struct sm6_parser *sm6) +{ + struct dxil_block *block = sm6->current_block; + unsigned int count = sm6_parser_read_vbr(sm6, 5); + struct dxil_global_abbrev *global_abbrev; + enum vkd3d_result ret; + + assert(block->id == BLOCKINFO_BLOCK); + + if (!vkd3d_array_reserve((void **)&sm6->abbrevs, &sm6->abbrev_capacity, sm6->abbrev_count + 1, sizeof(*sm6->abbrevs)) + || !(global_abbrev = vkd3d_malloc(sizeof(*global_abbrev) + count * sizeof(global_abbrev->abbrev.operands[0])))) + { + ERR("Failed to allocate global abbreviation.\n"); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + + if ((ret = dxil_abbrev_init(&global_abbrev->abbrev, count, sm6)) < 0) + { + vkd3d_free(global_abbrev); + return ret; + } + + if (!block->has_bid) + { + WARN("Missing blockinfo block id.\n"); + return VKD3D_ERROR_INVALID_SHADER; + } + if (block->blockinfo_bid == MODULE_BLOCK) + { + FIXME("Unhandled global abbreviation for module block.\n"); + return VKD3D_ERROR_INVALID_SHADER; + } + global_abbrev->block_id = block->blockinfo_bid; + + sm6->abbrevs[sm6->abbrev_count++] = global_abbrev; + + return VKD3D_OK; +} + +static enum vkd3d_result sm6_parser_add_block_abbrev(struct sm6_parser *sm6) +{ + struct dxil_block *block = sm6->current_block; + struct dxil_abbrev *abbrev; + enum vkd3d_result ret; + unsigned int count; + + if (block->id == BLOCKINFO_BLOCK) + return sm6_parser_add_global_abbrev(sm6); + + count = sm6_parser_read_vbr(sm6, 5); + if (!vkd3d_array_reserve((void **)&block->abbrevs, &block->abbrev_capacity, block->abbrev_count + 1, sizeof(*block->abbrevs)) + || !(abbrev = vkd3d_malloc(sizeof(*abbrev) + count * sizeof(abbrev->operands[0])))) + { + ERR("Failed to allocate block abbreviation.\n"); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + + if ((ret = dxil_abbrev_init(abbrev, count, sm6)) < 0) + { + vkd3d_free(abbrev); + return ret; + } + + block->abbrevs[block->abbrev_count++] = abbrev; + + return VKD3D_OK; +} + +static enum vkd3d_result sm6_parser_read_abbrev_record(struct sm6_parser *sm6, unsigned int abbrev_id) +{ + enum vkd3d_result ret = VKD3D_ERROR_INVALID_SHADER; + struct dxil_block *block = sm6->current_block; + struct dxil_record *temp, *record; + unsigned int i, count, array_len; + struct dxil_abbrev *abbrev; + uint64_t code; + + if (abbrev_id >= block->abbrev_count) + { + WARN("Invalid abbreviation id %u.\n", abbrev_id); + return VKD3D_ERROR_INVALID_SHADER; + } + + abbrev = block->abbrevs[abbrev_id]; + if (!(count = abbrev->count)) + return VKD3D_OK; + if (count == 1 && abbrev->is_array) + return VKD3D_ERROR_INVALID_SHADER; + + /* First operand is the record code. The array is included in the count, but will be done separately. */ + count -= abbrev->is_array + 1; + if (!(record = vkd3d_malloc(sizeof(*record) + count * sizeof(record->operands[0])))) + { + ERR("Failed to allocate record with %u operands.\n", count); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + + if (!abbrev->operands[0].read_operand(sm6, abbrev->operands[0].context, &code)) + goto fail; + if (code > UINT_MAX) + FIXME("Truncating 64-bit record code %#"PRIx64".\n", code); + record->code = code; + + for (i = 0; i < count; ++i) + if (!abbrev->operands[i + 1].read_operand(sm6, abbrev->operands[i + 1].context, &record->operands[i])) + goto fail; + record->operand_count = count; + + /* An array can occur only as the last operand. */ + if (abbrev->is_array) + { + array_len = sm6_parser_read_vbr(sm6, 6); + if (!(temp = vkd3d_realloc(record, sizeof(*record) + (count + array_len) * sizeof(record->operands[0])))) + { + ERR("Failed to allocate record with %u operands.\n", count + array_len); + ret = VKD3D_ERROR_OUT_OF_MEMORY; + goto fail; + } + record = temp; + + for (i = 0; i < array_len; ++i) + { + if (!abbrev->operands[count + 1].read_operand(sm6, abbrev->operands[count + 1].context, + &record->operands[count + i])) + { + goto fail; + } + } + record->operand_count += array_len; + } + + if ((ret = dxil_block_add_record(block, record)) < 0) + goto fail; + + return VKD3D_OK; + +fail: + vkd3d_free(record); + return ret; +} + +static enum vkd3d_result dxil_block_init(struct dxil_block *block, const struct dxil_block *parent, + struct sm6_parser *sm6); + +static enum vkd3d_result dxil_block_read(struct dxil_block *parent, struct sm6_parser *sm6) +{ + unsigned int reserve = (parent->id == MODULE_BLOCK) ? 12 : 2; + struct dxil_block *block; + enum vkd3d_result ret; + + sm6->current_block = parent; + + do + { + unsigned int abbrev_id = sm6_parser_read_bits(sm6, parent->abbrev_len); + + switch (abbrev_id) + { + case END_BLOCK: + sm6_parser_align_32(sm6); + return VKD3D_OK; + + case ENTER_SUBBLOCK: + if (parent->id != MODULE_BLOCK && parent->id != FUNCTION_BLOCK) + { + WARN("Invalid subblock parent id %u.\n", parent->id); + return VKD3D_ERROR_INVALID_SHADER; + } + + if (!vkd3d_array_reserve((void **)&parent->child_blocks, &parent->child_block_capacity, + max(reserve, parent->child_block_count + 1), sizeof(*parent->child_blocks)) + || !(block = vkd3d_calloc(1, sizeof(*block)))) + { + ERR("Failed to allocate block.\n"); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + + if ((ret = dxil_block_init(block, parent, sm6)) < 0) + { + vkd3d_free(block); + return ret; + } + + parent->child_blocks[parent->child_block_count++] = block; + sm6->current_block = parent; + break; + + case DEFINE_ABBREV: + if ((ret = sm6_parser_add_block_abbrev(sm6)) < 0) + return ret; + break; + + case UNABBREV_RECORD: + if ((ret = sm6_parser_read_unabbrev_record(sm6)) < 0) + { + WARN("Failed to read unabbreviated record.\n"); + return ret; + } + break; + + default: + if ((ret = sm6_parser_read_abbrev_record(sm6, abbrev_id - 4)) < 0) + { + WARN("Failed to read abbreviated record.\n"); + return ret; + } + break; + } + } while (!sm6->p.failed); + + return VKD3D_ERROR_INVALID_SHADER; +} + +static size_t sm6_parser_compute_global_abbrev_count_for_block_id(struct sm6_parser *sm6, + unsigned int block_id) +{ + size_t i, count; + + for (i = 0, count = 0; i < sm6->abbrev_count; ++i) + count += sm6->abbrevs[i]->block_id == block_id; + + return count; +} + +static void dxil_block_destroy(struct dxil_block *block) +{ + size_t i; + + for (i = 0; i < block->record_count; ++i) + vkd3d_free(block->records[i]); + vkd3d_free(block->records); + + for (i = 0; i < block->child_block_count; ++i) + { + dxil_block_destroy(block->child_blocks[i]); + vkd3d_free(block->child_blocks[i]); + } + vkd3d_free(block->child_blocks); + + block->records = NULL; + block->record_count = 0; + block->child_blocks = NULL; + block->child_block_count = 0; +} + +static enum vkd3d_result dxil_block_init(struct dxil_block *block, const struct dxil_block *parent, + struct sm6_parser *sm6) +{ + size_t i, abbrev_count = 0; + enum vkd3d_result ret; + + block->parent = parent; + block->level = parent ? parent->level + 1 : 0; + block->id = sm6_parser_read_vbr(sm6, 8); + block->abbrev_len = sm6_parser_read_vbr(sm6, 4); + sm6_parser_align_32(sm6); + block->length = sm6_parser_read_uint32(sm6); + block->start = sm6->ptr - sm6->start; + + if (sm6->p.failed) + return VKD3D_ERROR_INVALID_SHADER; + + if ((block->abbrev_count = sm6_parser_compute_global_abbrev_count_for_block_id(sm6, block->id))) + { + if (!vkd3d_array_reserve((void **)&block->abbrevs, &block->abbrev_capacity, + block->abbrev_count, sizeof(*block->abbrevs))) + { + ERR("Failed to allocate block abbreviations.\n"); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + + for (i = 0; i < sm6->abbrev_count; ++i) + if (sm6->abbrevs[i]->block_id == block->id) + block->abbrevs[abbrev_count++] = &sm6->abbrevs[i]->abbrev; + + assert(abbrev_count == block->abbrev_count); + } + + if ((ret = dxil_block_read(block, sm6)) < 0) + dxil_block_destroy(block); + + for (i = abbrev_count; i < block->abbrev_count; ++i) + vkd3d_free(block->abbrevs[i]); + vkd3d_free(block->abbrevs); + block->abbrevs = NULL; + block->abbrev_count = 0; + + return ret; +} + +static size_t dxil_block_compute_function_count(const struct dxil_block *root) +{ + size_t i, count; + + for (i = 0, count = 0; i < root->child_block_count; ++i) + count += root->child_blocks[i]->id == FUNCTION_BLOCK; + + return count; +} + +static size_t dxil_block_compute_module_decl_count(const struct dxil_block *block) +{ + size_t i, count; + + for (i = 0, count = 0; i < block->record_count; ++i) + count += block->records[i]->code == MODULE_CODE_FUNCTION; + return count; +} + +static size_t dxil_block_compute_constants_count(const struct dxil_block *block) +{ + size_t i, count; + + for (i = 0, count = 0; i < block->record_count; ++i) + count += block->records[i]->code != CST_CODE_SETTYPE; + return count; +} + +static void dxil_global_abbrevs_cleanup(struct dxil_global_abbrev **abbrevs, size_t count) +{ + size_t i; + + for (i = 0; i < count; ++i) + vkd3d_free(abbrevs[i]); + vkd3d_free(abbrevs); +} + +static const struct dxil_block *sm6_parser_get_level_one_block(const struct sm6_parser *sm6, + enum bitcode_block_id id, bool *is_unique) +{ + const struct dxil_block *block, *found = NULL; + size_t i; + + for (i = 0, *is_unique = true; i < sm6->root_block.child_block_count; ++i) + { + block = sm6->root_block.child_blocks[i]; + if (block->id != id) + continue; + + if (!found) + found = block; + else + *is_unique = false; + } + + return found; +} + +static char *dxil_record_to_string(const struct dxil_record *record, unsigned int offset) +{ + unsigned int i; + char *str; + + assert(offset <= record->operand_count); + if (!(str = vkd3d_calloc(record->operand_count - offset + 1, 1))) + return NULL; + + for (i = offset; i < record->operand_count; ++i) + str[i - offset] = record->operands[i]; + + return str; +} + +static bool dxil_record_validate_operand_min_count(const struct dxil_record *record, unsigned int min_count, + struct sm6_parser *sm6) +{ + if (record->operand_count >= min_count) + return true; + + WARN("Invalid operand count %u for code %u.\n", record->operand_count, record->code); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND_COUNT, + "Invalid operand count %u for record code %u.", record->operand_count, record->code); + return false; +} + +static void dxil_record_validate_operand_max_count(const struct dxil_record *record, unsigned int max_count, + struct sm6_parser *sm6) +{ + if (record->operand_count <= max_count) + return; + + WARN("Ignoring %u extra operands for code %u.\n", record->operand_count - max_count, record->code); + vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_IGNORING_OPERANDS, + "Ignoring %u extra operands for record code %u.", record->operand_count - max_count, record->code); +} + +static bool dxil_record_validate_operand_count(const struct dxil_record *record, unsigned int min_count, + unsigned int max_count, struct sm6_parser *sm6) +{ + dxil_record_validate_operand_max_count(record, max_count, sm6); + return dxil_record_validate_operand_min_count(record, min_count, sm6); +} + +static enum vkd3d_result sm6_parser_type_table_init(struct sm6_parser *sm6) +{ + const struct dxil_record *record; + size_t i, type_count, type_index; + const struct dxil_block *block; + char *struct_name = NULL; + unsigned int j, count; + struct sm6_type *type; + uint64_t type_id; + bool is_unique; + + sm6->p.location.line = 0; + sm6->p.location.column = 0; + + if (!(block = sm6_parser_get_level_one_block(sm6, TYPE_BLOCK, &is_unique))) + { + WARN("No type definitions found.\n"); + return VKD3D_OK; + } + if (!is_unique) + WARN("Ignoring invalid extra type table(s).\n"); + + sm6->p.location.line = block->id; + + type_count = 0; + for (i = 0; i < block->record_count; ++i) + type_count += block->records[i]->code != TYPE_CODE_NUMENTRY && block->records[i]->code != TYPE_CODE_STRUCT_NAME; + + /* The type array must not be relocated. */ + if (!(sm6->types = vkd3d_calloc(type_count, sizeof(*sm6->types)))) + { + ERR("Failed to allocate type array.\n"); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + + for (i = 0; i < block->record_count; ++i) + { + sm6->p.location.column = i; + record = block->records[i]; + + type = &sm6->types[sm6->type_count]; + type_index = sm6->type_count; + + switch (record->code) + { + case TYPE_CODE_ARRAY: + case TYPE_CODE_VECTOR: + if (!dxil_record_validate_operand_count(record, 2, 2, sm6)) + return VKD3D_ERROR_INVALID_SHADER; + + type->class = record->code == TYPE_CODE_ARRAY ? TYPE_CLASS_ARRAY : TYPE_CLASS_VECTOR; + + if (!(type->u.array.count = record->operands[0])) + { + TRACE("Setting unbounded for type %zu.\n", type_index); + type->u.array.count = UINT_MAX; + } + + if ((type_id = record->operands[1]) >= type_count) + { + WARN("Invalid contained type id %"PRIu64" for type %zu.\n", type_id, type_index); + return VKD3D_ERROR_INVALID_SHADER; + } + type->u.array.elem_type = &sm6->types[type_id]; + break; + + case TYPE_CODE_DOUBLE: + dxil_record_validate_operand_max_count(record, 0, sm6); + type->class = TYPE_CLASS_FLOAT; + type->u.width = 64; + break; + + case TYPE_CODE_FLOAT: + dxil_record_validate_operand_max_count(record, 0, sm6); + type->class = TYPE_CLASS_FLOAT; + type->u.width = 32; + break; + + case TYPE_CODE_FUNCTION: + if (!dxil_record_validate_operand_min_count(record, 2, sm6)) + return VKD3D_ERROR_INVALID_SHADER; + if (record->operands[0]) + FIXME("Unhandled vararg function type %zu.\n", type_index); + + type->class = TYPE_CLASS_FUNCTION; + + if ((type_id = record->operands[1]) >= type_count) + { + WARN("Invalid return type id %"PRIu64" for type %zu.\n", type_id, type_index); + return VKD3D_ERROR_INVALID_SHADER; + } + + count = record->operand_count - 2; + if (vkd3d_object_range_overflow(sizeof(type->u.function), count, sizeof(type->u.function->param_types[0])) + || !(type->u.function = vkd3d_malloc(offsetof(struct sm6_function_info, param_types[count])))) + { + ERR("Failed to allocate function parameter types.\n"); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + + type->u.function->ret_type = &sm6->types[type_id]; + type->u.function->param_count = count; + for (j = 0; j < count; ++j) + { + if ((type_id = record->operands[j + 2]) >= type_count) + { + WARN("Invalid parameter type id %"PRIu64" for type %zu.\n", type_id, type_index); + vkd3d_free(type->u.function); + return VKD3D_ERROR_INVALID_SHADER; + } + type->u.function->param_types[j] = &sm6->types[type_id]; + } + break; + + case TYPE_CODE_HALF: + dxil_record_validate_operand_max_count(record, 0, sm6); + type->class = TYPE_CLASS_FLOAT; + type->u.width = 16; + break; + + case TYPE_CODE_INTEGER: + { + uint64_t width; + + if (!dxil_record_validate_operand_count(record, 1, 1, sm6)) + return VKD3D_ERROR_INVALID_SHADER; + + type->class = TYPE_CLASS_INTEGER; + + switch ((width = record->operands[0])) + { + case 1: + case 8: + case 16: + case 32: + case 64: + break; + default: + WARN("Invalid integer width %"PRIu64" for type %zu.\n", width, type_index); + return VKD3D_ERROR_INVALID_SHADER; + } + type->u.width = width; + break; + } + + case TYPE_CODE_LABEL: + type->class = TYPE_CLASS_LABEL; + break; + + case TYPE_CODE_METADATA: + type->class = TYPE_CLASS_METADATA; + break; + + case TYPE_CODE_NUMENTRY: + continue; + + case TYPE_CODE_POINTER: + if (!dxil_record_validate_operand_count(record, 1, 2, sm6)) + return VKD3D_ERROR_INVALID_SHADER; + + type->class = TYPE_CLASS_POINTER; + + if ((type_id = record->operands[0]) >= type_count) + { + WARN("Invalid pointee type id %"PRIu64" for type %zu.\n", type_id, type_index); + return VKD3D_ERROR_INVALID_SHADER; + } + type->u.pointer.type = &sm6->types[type_id]; + type->u.pointer.addr_space = (record->operand_count > 1) ? record->operands[1] : ADDRESS_SPACE_DEFAULT; + break; + + case TYPE_CODE_STRUCT_ANON: + case TYPE_CODE_STRUCT_NAMED: + if (!dxil_record_validate_operand_min_count(record, 2, sm6)) + return VKD3D_ERROR_INVALID_SHADER; + if (record->code == TYPE_CODE_STRUCT_NAMED && !struct_name) + { + WARN("Missing struct name before struct type %zu.\n", type_index); + return VKD3D_ERROR_INVALID_SHADER; + } + + type->class = TYPE_CLASS_STRUCT; + + count = record->operand_count - 1; + if (vkd3d_object_range_overflow(sizeof(type->u.struc), count, sizeof(type->u.struc->elem_types[0])) + || !(type->u.struc = vkd3d_malloc(offsetof(struct sm6_struct_info, elem_types[count])))) + { + ERR("Failed to allocate struct element types.\n"); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + + if (record->operands[0]) + FIXME("Ignoring struct packed attribute.\n"); + + type->u.struc->elem_count = count; + for (j = 0; j < count; ++j) + { + if ((type_id = record->operands[j + 1]) >= type_count) + { + WARN("Invalid contained type id %"PRIu64" for type %zu.\n", type_id, type_index); + vkd3d_free(type->u.struc); + return VKD3D_ERROR_INVALID_SHADER; + } + type->u.struc->elem_types[j] = &sm6->types[type_id]; + } + + if (record->code == TYPE_CODE_STRUCT_ANON) + { + type->u.struc->name = NULL; + break; + } + + type->u.struc->name = struct_name; + struct_name = NULL; + break; + + case TYPE_CODE_STRUCT_NAME: + if (!(struct_name = dxil_record_to_string(record, 0))) + { + ERR("Failed to allocate struct name.\n"); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + if (!struct_name[0]) + WARN("Struct name is empty for type %zu.\n", type_index); + continue; + + case TYPE_CODE_VOID: + dxil_record_validate_operand_max_count(record, 0, sm6); + type->class = TYPE_CLASS_VOID; + break; + + default: + FIXME("Unhandled type %u at index %zu.\n", record->code, type_index); + return VKD3D_ERROR_INVALID_SHADER; + } + ++sm6->type_count; + } + + assert(sm6->type_count == type_count); + + if (struct_name) + { + WARN("Unused struct name %s.\n", struct_name); + vkd3d_free(struct_name); + } + + return VKD3D_OK; +} + +static inline bool sm6_type_is_void(const struct sm6_type *type) +{ + return type->class == TYPE_CLASS_VOID; +} + +static inline bool sm6_type_is_integer(const struct sm6_type *type) +{ + return type->class == TYPE_CLASS_INTEGER; +} + +static inline bool sm6_type_is_floating_point(const struct sm6_type *type) +{ + return type->class == TYPE_CLASS_FLOAT; +} + +static inline bool sm6_type_is_numeric(const struct sm6_type *type) +{ + return type->class == TYPE_CLASS_INTEGER || type->class == TYPE_CLASS_FLOAT; +} + +static inline bool sm6_type_is_pointer(const struct sm6_type *type) +{ + return type->class == TYPE_CLASS_POINTER; +} + +static bool sm6_type_is_numeric_aggregate(const struct sm6_type *type) +{ + unsigned int i; + + switch (type->class) + { + case TYPE_CLASS_ARRAY: + case TYPE_CLASS_VECTOR: + return sm6_type_is_numeric(type->u.array.elem_type); + + case TYPE_CLASS_STRUCT: + /* Do not handle nested structs. Support can be added if they show up. */ + for (i = 0; i < type->u.struc->elem_count; ++i) + if (!sm6_type_is_numeric(type->u.struc->elem_types[i])) + return false; + return true; + + default: + return false; + } +} + +static inline bool sm6_type_is_struct(const struct sm6_type *type) +{ + return type->class == TYPE_CLASS_STRUCT; +} + +static inline bool sm6_type_is_function(const struct sm6_type *type) +{ + return type->class == TYPE_CLASS_FUNCTION; +} + +static inline bool sm6_type_is_function_pointer(const struct sm6_type *type) +{ + return sm6_type_is_pointer(type) && sm6_type_is_function(type->u.pointer.type); +} + +static inline bool sm6_type_is_handle(const struct sm6_type *type) +{ + return sm6_type_is_struct(type) && !strcmp(type->u.struc->name, "dx.types.Handle"); +} + +static inline const struct sm6_type *sm6_type_get_element_type(const struct sm6_type *type) +{ + return (type->class == TYPE_CLASS_ARRAY || type->class == TYPE_CLASS_VECTOR) ? type->u.array.elem_type : type; +} + +static const struct sm6_type *sm6_type_get_pointer_to_type(const struct sm6_type *type, + enum bitcode_address_space addr_space, struct sm6_parser *sm6) +{ + size_t i, start = type - sm6->types; + const struct sm6_type *pointer_type; + + /* DXC seems usually to place the pointer type immediately after its pointee. */ + for (i = (start + 1) % sm6->type_count; i != start; i = (i + 1) % sm6->type_count) + { + pointer_type = &sm6->types[i]; + if (sm6_type_is_pointer(pointer_type) && pointer_type->u.pointer.type == type + && pointer_type->u.pointer.addr_space == addr_space) + return pointer_type; + } + + return NULL; +} + +static const struct sm6_type *sm6_parser_get_type(struct sm6_parser *sm6, uint64_t type_id) +{ + if (type_id >= sm6->type_count) + { + WARN("Invalid type index %"PRIu64" at %zu.\n", type_id, sm6->value_count); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_TYPE_ID, + "DXIL type id %"PRIu64" is invalid.", type_id); + return NULL; + } + return &sm6->types[type_id]; +} + +static int global_symbol_compare(const void *a, const void *b) +{ + return vkd3d_u32_compare(((const struct sm6_symbol *)a)->id, ((const struct sm6_symbol *)b)->id); +} + +static enum vkd3d_result sm6_parser_symtab_init(struct sm6_parser *sm6) +{ + const struct dxil_record *record; + const struct dxil_block *block; + struct sm6_symbol *symbol; + size_t i, count; + bool is_unique; + + sm6->p.location.line = 0; + sm6->p.location.column = 0; + + if (!(block = sm6_parser_get_level_one_block(sm6, VALUE_SYMTAB_BLOCK, &is_unique))) + { + /* There should always be at least one symbol: the name of the entry point function. */ + WARN("No value symtab block found.\n"); + return VKD3D_ERROR_INVALID_SHADER; + } + if (!is_unique) + FIXME("Ignoring extra value symtab block(s).\n"); + + sm6->p.location.line = block->id; + + for (i = 0, count = 0; i < block->record_count; ++i) + count += block->records[i]->code == VST_CODE_ENTRY; + + if (!(sm6->global_symbols = vkd3d_calloc(count, sizeof(*sm6->global_symbols)))) + { + ERR("Failed to allocate global symbols.\n"); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + + for (i = 0; i < block->record_count; ++i) + { + sm6->p.location.column = i; + record = block->records[i]; + + if (record->code != VST_CODE_ENTRY) + { + FIXME("Unhandled symtab code %u.\n", record->code); + continue; + } + if (!dxil_record_validate_operand_min_count(record, 1, sm6)) + continue; + + symbol = &sm6->global_symbols[sm6->global_symbol_count]; + symbol->id = record->operands[0]; + if (!(symbol->name = dxil_record_to_string(record, 1))) + { + ERR("Failed to allocate symbol name.\n"); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + ++sm6->global_symbol_count; + } + + sm6->p.location.column = block->record_count; + + qsort(sm6->global_symbols, sm6->global_symbol_count, sizeof(*sm6->global_symbols), global_symbol_compare); + for (i = 1; i < sm6->global_symbol_count; ++i) + { + if (sm6->global_symbols[i].id == sm6->global_symbols[i - 1].id) + { + WARN("Invalid duplicate symbol id %u.\n", sm6->global_symbols[i].id); + return VKD3D_ERROR_INVALID_SHADER; + } + } + + return VKD3D_OK; +} + +static const char *sm6_parser_get_global_symbol_name(const struct sm6_parser *sm6, size_t id) +{ + size_t i, start; + + /* id == array index is normally true */ + i = start = id % sm6->global_symbol_count; + do + { + if (sm6->global_symbols[i].id == id) + return sm6->global_symbols[i].name; + i = (i + 1) % sm6->global_symbol_count; + } while (i != start); + + return NULL; +} + +static inline bool sm6_value_is_dx_intrinsic_dcl(const struct sm6_value *fn) +{ + assert(fn->value_type == VALUE_TYPE_FUNCTION); + return fn->u.function.is_prototype && !strncmp(fn->u.function.name, "dx.op.", 6); +} + +static inline struct sm6_value *sm6_parser_get_current_value(const struct sm6_parser *sm6) +{ + assert(sm6->value_count < sm6->value_capacity); + return &sm6->values[sm6->value_count]; +} + +static enum vkd3d_data_type vkd3d_data_type_from_sm6_type(const struct sm6_type *type) +{ + if (type->class == TYPE_CLASS_INTEGER) + { + switch (type->u.width) + { + case 8: + return VKD3D_DATA_UINT8; + case 32: + return VKD3D_DATA_UINT; + default: + FIXME("Unhandled width %u.\n", type->u.width); + return VKD3D_DATA_UINT; + } + } + else if (type->class == TYPE_CLASS_FLOAT) + { + switch (type->u.width) + { + case 32: + return VKD3D_DATA_FLOAT; + case 64: + return VKD3D_DATA_DOUBLE; + default: + FIXME("Unhandled width %u.\n", type->u.width); + return VKD3D_DATA_FLOAT; + } + } + + FIXME("Unhandled type %u.\n", type->class); + return VKD3D_DATA_UINT; +} + +/* Recurse through the block tree while maintaining a current value count. The current + * count is the sum of the global count plus all declarations within the current function. + * Store into value_capacity the highest count seen. */ +static size_t sm6_parser_compute_max_value_count(struct sm6_parser *sm6, + const struct dxil_block *block, size_t value_count) +{ + size_t i, old_value_count = value_count; + + if (block->id == MODULE_BLOCK) + value_count = size_add_with_overflow_check(value_count, dxil_block_compute_module_decl_count(block)); + + for (i = 0; i < block->child_block_count; ++i) + value_count = sm6_parser_compute_max_value_count(sm6, block->child_blocks[i], value_count); + + switch (block->id) + { + case CONSTANTS_BLOCK: + /* Function local constants are contained in a child block of the function block. */ + value_count = size_add_with_overflow_check(value_count, dxil_block_compute_constants_count(block)); + break; + case FUNCTION_BLOCK: + /* A function must start with a block count, which emits no value. This formula is likely to + * overestimate the value count somewhat, but this should be no problem. */ + value_count = size_add_with_overflow_check(value_count, max(block->record_count, 1u) - 1); + sm6->value_capacity = max(sm6->value_capacity, value_count); + /* The value count returns to its previous value after handling a function. */ + if (value_count < SIZE_MAX) + value_count = old_value_count; + break; + default: + break; + } + + return value_count; +} + +static bool sm6_parser_declare_function(struct sm6_parser *sm6, const struct dxil_record *record) +{ + const unsigned int max_count = 15; + const struct sm6_type *ret_type; + struct sm6_value *fn; + unsigned int i, j; + + if (!dxil_record_validate_operand_count(record, 8, max_count, sm6)) + return false; + + fn = sm6_parser_get_current_value(sm6); + fn->value_type = VALUE_TYPE_FUNCTION; + if (!(fn->u.function.name = sm6_parser_get_global_symbol_name(sm6, sm6->value_count))) + { + WARN("Missing symbol name for function %zu.\n", sm6->value_count); + fn->u.function.name = ""; + } + + if (!(fn->type = sm6_parser_get_type(sm6, record->operands[0]))) + return false; + if (!sm6_type_is_function(fn->type)) + { + WARN("Type is not a function.\n"); + return false; + } + ret_type = fn->type->u.function->ret_type; + + if (!(fn->type = sm6_type_get_pointer_to_type(fn->type, ADDRESS_SPACE_DEFAULT, sm6))) + { + WARN("Failed to get pointer type for type %u.\n", fn->type->class); + return false; + } + + if (record->operands[1]) + WARN("Ignoring calling convention %#"PRIx64".\n", record->operands[1]); + + fn->u.function.is_prototype = !!record->operands[2]; + + if (record->operands[3]) + WARN("Ignoring linkage %#"PRIx64".\n", record->operands[3]); + + if (record->operands[4] > UINT_MAX) + WARN("Invalid attributes id %#"PRIx64".\n", record->operands[4]); + /* 1-based index. */ + if ((fn->u.function.attribs_id = record->operands[4])) + TRACE("Ignoring function attributes.\n"); + + /* These always seem to be zero. */ + for (i = 5, j = 0; i < min(record->operand_count, max_count); ++i) + j += !!record->operands[i]; + if (j) + WARN("Ignoring %u operands.\n", j); + + if (sm6_value_is_dx_intrinsic_dcl(fn) && !sm6_type_is_void(ret_type) && !sm6_type_is_numeric(ret_type) + && !sm6_type_is_numeric_aggregate(ret_type) && !sm6_type_is_handle(ret_type)) + { + WARN("Unexpected return type for dx intrinsic function '%s'.\n", fn->u.function.name); + } + + ++sm6->value_count; + + return true; +} + +static inline uint64_t decode_rotated_signed_value(uint64_t value) +{ + if (value != 1) + { + bool neg = value & 1; + value >>= 1; + return neg ? -value : value; + } + return value << 63; +} + +static inline float bitcast_uint64_to_float(uint64_t value) +{ + union + { + uint32_t uint32_value; + float float_value; + } u; + + u.uint32_value = value; + return u.float_value; +} + +static inline double bitcast_uint64_to_double(uint64_t value) +{ + union + { + uint64_t uint64_value; + double double_value; + } u; + + u.uint64_value = value; + return u.double_value; +} + +static enum vkd3d_result sm6_parser_constants_init(struct sm6_parser *sm6, const struct dxil_block *block) +{ + enum vkd3d_shader_register_type reg_type = VKD3DSPR_INVALID; + const struct sm6_type *type, *elem_type; + enum vkd3d_data_type reg_data_type; + const struct dxil_record *record; + struct sm6_value *dst; + size_t i, value_idx; + uint64_t value; + + for (i = 0, type = NULL; i < block->record_count; ++i) + { + sm6->p.location.column = i; + record = block->records[i]; + value_idx = sm6->value_count; + + if (record->code == CST_CODE_SETTYPE) + { + if (!dxil_record_validate_operand_count(record, 1, 1, sm6)) + return VKD3D_ERROR_INVALID_SHADER; + + if (!(type = sm6_parser_get_type(sm6, record->operands[0]))) + return VKD3D_ERROR_INVALID_SHADER; + + elem_type = sm6_type_get_element_type(type); + if (sm6_type_is_numeric(elem_type)) + { + reg_data_type = vkd3d_data_type_from_sm6_type(elem_type); + reg_type = elem_type->u.width > 32 ? VKD3DSPR_IMMCONST64 : VKD3DSPR_IMMCONST; + } + else + { + reg_data_type = VKD3D_DATA_UNUSED; + reg_type = VKD3DSPR_INVALID; + } + + if (i == block->record_count - 1) + WARN("Unused SETTYPE record.\n"); + + continue; + } + + if (!type) + { + WARN("Constant record %zu has no type.\n", value_idx); + return VKD3D_ERROR_INVALID_SHADER; + } + + dst = sm6_parser_get_current_value(sm6); + dst->type = type; + dst->value_type = VALUE_TYPE_REG; + dst->u.reg.type = reg_type; + dst->u.reg.immconst_type = VKD3D_IMMCONST_SCALAR; + dst->u.reg.data_type = reg_data_type; + + switch (record->code) + { + case CST_CODE_NULL: + /* Register constant data is already zero-filled. */ + break; + + case CST_CODE_INTEGER: + if (!dxil_record_validate_operand_count(record, 1, 1, sm6)) + return VKD3D_ERROR_INVALID_SHADER; + + if (!sm6_type_is_integer(type)) + { + WARN("Invalid integer of non-integer type %u at constant idx %zu.\n", type->class, value_idx); + return VKD3D_ERROR_INVALID_SHADER; + } + + value = decode_rotated_signed_value(record->operands[0]); + if (type->u.width <= 32) + dst->u.reg.u.immconst_uint[0] = value & ((1ull << type->u.width) - 1); + else + dst->u.reg.u.immconst_uint64[0] = value; + + break; + + case CST_CODE_FLOAT: + if (!dxil_record_validate_operand_count(record, 1, 1, sm6)) + return VKD3D_ERROR_INVALID_SHADER; + + if (!sm6_type_is_floating_point(type)) + { + WARN("Invalid float of non-fp type %u at constant idx %zu.\n", type->class, value_idx); + return VKD3D_ERROR_INVALID_SHADER; + } + + if (type->u.width == 16) + FIXME("Half float type is not supported yet.\n"); + else if (type->u.width == 32) + dst->u.reg.u.immconst_float[0] = bitcast_uint64_to_float(record->operands[0]); + else if (type->u.width == 64) + dst->u.reg.u.immconst_double[0] = bitcast_uint64_to_double(record->operands[0]); + else + vkd3d_unreachable(); + + break; + + case CST_CODE_DATA: + WARN("Unhandled constant array.\n"); + break; + + case CST_CODE_UNDEF: + dxil_record_validate_operand_max_count(record, 0, sm6); + dst->u.reg.type = VKD3DSPR_UNDEF; + /* Mark as explicitly undefined, not the result of a missing constant code or instruction. */ + dst->is_undefined = true; + break; + + default: + FIXME("Unhandled constant code %u.\n", record->code); + dst->u.reg.type = VKD3DSPR_UNDEF; + break; + } + + ++sm6->value_count; + } + + return VKD3D_OK; +} + +static struct vkd3d_shader_instruction *sm6_parser_require_space(struct sm6_parser *sm6, size_t extra) +{ + if (!shader_instruction_array_reserve(&sm6->p.instructions, sm6->p.instructions.count + extra)) + { + ERR("Failed to allocate instruction.\n"); + return NULL; + } + return &sm6->p.instructions.elements[sm6->p.instructions.count]; +} + +/* Space should be reserved before calling this. It is intended to require no checking of the returned pointer. */ +static struct vkd3d_shader_instruction *sm6_parser_add_instruction(struct sm6_parser *sm6, + enum vkd3d_shader_opcode handler_idx) +{ + struct vkd3d_shader_instruction *ins = sm6_parser_require_space(sm6, 1); + assert(ins); + shader_instruction_init(ins, handler_idx); + ++sm6->p.instructions.count; + return ins; +} + +static enum vkd3d_result sm6_parser_globals_init(struct sm6_parser *sm6) +{ + const struct dxil_block *block = &sm6->root_block; + const struct dxil_record *record; + uint64_t version; + size_t i; + + sm6->p.location.line = block->id; + sm6->p.location.column = 0; + + for (i = 0; i < block->record_count; ++i) + { + sm6->p.location.column = i; + record = block->records[i]; + switch (record->code) + { + case MODULE_CODE_FUNCTION: + if (!sm6_parser_declare_function(sm6, record)) + { + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_FUNCTION_DCL, + "A DXIL function declaration is invalid."); + return VKD3D_ERROR_INVALID_SHADER; + } + break; + + case MODULE_CODE_GLOBALVAR: + FIXME("Global variables are not implemented yet.\n"); + break; + + case MODULE_CODE_VERSION: + if (!dxil_record_validate_operand_count(record, 1, 1, sm6)) + return VKD3D_ERROR_INVALID_SHADER; + if ((version = record->operands[0]) != 1) + { + FIXME("Unsupported format version %#"PRIx64".\n", version); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_UNSUPPORTED_BITCODE_FORMAT, + "Bitcode format version %#"PRIx64" is unsupported.", version); + return VKD3D_ERROR_INVALID_SHADER; + } + break; + + default: + break; + } + } + + return VKD3D_OK; +} + +static const struct sm6_value *sm6_parser_next_function_definition(struct sm6_parser *sm6) +{ + size_t i, count = sm6->function_count; + + for (i = 0; i < sm6->value_count; ++i) + { + if (sm6_type_is_function_pointer(sm6->values[i].type) && !sm6->values[i].u.function.is_prototype && !count--) + break; + } + if (i == sm6->value_count) + return NULL; + + ++sm6->function_count; + return &sm6->values[i]; +} + +static struct sm6_block *sm6_block_create() +{ + struct sm6_block *block = vkd3d_calloc(1, sizeof(*block)); + return block; +} + +static void sm6_parser_emit_ret(struct sm6_parser *sm6, const struct dxil_record *record, + struct sm6_block *code_block, struct vkd3d_shader_instruction *ins) +{ + if (!dxil_record_validate_operand_count(record, 0, 1, sm6)) + return; + + if (record->operand_count) + FIXME("Non-void return is not implemented.\n"); + + ins->handler_idx = VKD3DSIH_NOP; +} + +static enum vkd3d_result sm6_parser_function_init(struct sm6_parser *sm6, const struct dxil_block *block, + struct sm6_function *function) +{ + struct vkd3d_shader_instruction *ins; + const struct dxil_record *record; + struct sm6_block *code_block; + struct sm6_value *dst; + size_t i, block_idx; + bool ret_found; + enum + { + RESULT_VALUE, + RESULT_TERMINATE, + } result_type; + + if (sm6->function_count) + { + FIXME("Multiple functions are not supported yet.\n"); + return VKD3D_ERROR_INVALID_SHADER; + } + if (!(function->declaration = sm6_parser_next_function_definition(sm6))) + { + WARN("Failed to find definition to match function body.\n"); + return VKD3D_ERROR_INVALID_SHADER; + } + + if (block->record_count < 2) + { + /* It should contain at least a block count and a RET instruction. */ + WARN("Invalid function block record count %zu.\n", block->record_count); + return VKD3D_ERROR_INVALID_SHADER; + } + if (block->records[0]->code != FUNC_CODE_DECLAREBLOCKS || !block->records[0]->operand_count + || block->records[0]->operands[0] > UINT_MAX) + { + WARN("Block count declaration not found or invalid.\n"); + return VKD3D_ERROR_INVALID_SHADER; + } + + if (!(function->block_count = block->records[0]->operands[0])) + { + WARN("Function contains no blocks.\n"); + return VKD3D_ERROR_INVALID_SHADER; + } + if (function->block_count > 1) + { + FIXME("Branched shaders are not supported yet.\n"); + return VKD3D_ERROR_INVALID_SHADER; + } + + if (!(function->blocks[0] = sm6_block_create())) + { + ERR("Failed to allocate code block.\n"); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + code_block = function->blocks[0]; + + for (i = 1, block_idx = 0, ret_found = false; i < block->record_count; ++i) + { + sm6->p.location.column = i; + + /* block->record_count - 1 is the instruction count, but some instructions + * can emit >1 IR instruction, so extra may be used. */ + if (!vkd3d_array_reserve((void **)&code_block->instructions, &code_block->instruction_capacity, + max(code_block->instruction_count + 1, block->record_count), sizeof(*code_block->instructions))) + { + ERR("Failed to allocate instructions.\n"); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + + ins = &code_block->instructions[code_block->instruction_count]; + ins->handler_idx = VKD3DSIH_INVALID; + + dst = sm6_parser_get_current_value(sm6); + dst->type = NULL; + dst->value_type = VALUE_TYPE_REG; + result_type = RESULT_VALUE; + + record = block->records[i]; + switch (record->code) + { + case FUNC_CODE_INST_RET: + sm6_parser_emit_ret(sm6, record, code_block, ins); + result_type = RESULT_TERMINATE; + ret_found = true; + break; + default: + FIXME("Unhandled dxil instruction %u.\n", record->code); + return VKD3D_ERROR_INVALID_SHADER; + } + + if (result_type == RESULT_TERMINATE) + { + ++block_idx; + code_block = (block_idx < function->block_count) ? function->blocks[block_idx] : NULL; + } + if (code_block) + code_block->instruction_count += ins->handler_idx != VKD3DSIH_NOP; + else + assert(ins->handler_idx == VKD3DSIH_NOP); + sm6->value_count += !!dst->type; + } + + if (!ret_found) + { + WARN("Function contains no RET instruction.\n"); + return VKD3D_ERROR_INVALID_SHADER; + } + + return VKD3D_OK; +} + +static bool sm6_block_emit_instructions(struct sm6_block *block, struct sm6_parser *sm6) +{ + struct vkd3d_shader_instruction *ins = sm6_parser_require_space(sm6, block->instruction_count + 1); + + if (!ins) + return false; + + memcpy(ins, block->instructions, block->instruction_count * sizeof(*block->instructions)); + sm6->p.instructions.count += block->instruction_count; + + sm6_parser_add_instruction(sm6, VKD3DSIH_RET); + + return true; +} + +static enum vkd3d_result sm6_parser_module_init(struct sm6_parser *sm6, const struct dxil_block *block, + unsigned int level) +{ + size_t i, old_value_count = sm6->value_count; + struct sm6_function *function; + enum vkd3d_result ret; + + for (i = 0; i < block->child_block_count; ++i) + { + if ((ret = sm6_parser_module_init(sm6, block->child_blocks[i], level + 1)) < 0) + return ret; + } + + sm6->p.location.line = block->id; + sm6->p.location.column = 0; + + switch (block->id) + { + case CONSTANTS_BLOCK: + return sm6_parser_constants_init(sm6, block); + + case FUNCTION_BLOCK: + function = &sm6->functions[sm6->function_count]; + if ((ret = sm6_parser_function_init(sm6, block, function)) < 0) + return ret; + /* The value index returns to its previous value after handling a function. It's usually nonzero + * at the start because of global constants/variables/function declarations. Function constants + * occur in a child block, so value_count is already saved before they are emitted. */ + memset(&sm6->values[old_value_count], 0, (sm6->value_count - old_value_count) * sizeof(*sm6->values)); + sm6->value_count = old_value_count; + break; + + case BLOCKINFO_BLOCK: + case MODULE_BLOCK: + case PARAMATTR_BLOCK: + case PARAMATTR_GROUP_BLOCK: + case VALUE_SYMTAB_BLOCK: + case METADATA_BLOCK: + case METADATA_ATTACHMENT_BLOCK: + case TYPE_BLOCK: + break; + + default: + FIXME("Unhandled block id %u.\n", block->id); + break; + } + + return VKD3D_OK; +} + +static void sm6_type_table_cleanup(struct sm6_type *types, size_t count) +{ + size_t i; + + if (!types) + return; + + for (i = 0; i < count; ++i) + { + switch (types[i].class) + { + case TYPE_CLASS_STRUCT: + vkd3d_free((void *)types[i].u.struc->name); + vkd3d_free(types[i].u.struc); + break; + case TYPE_CLASS_FUNCTION: + vkd3d_free(types[i].u.function); + break; + default: + break; + } + } + + vkd3d_free(types); +} + +static void sm6_symtab_cleanup(struct sm6_symbol *symbols, size_t count) +{ + size_t i; + + for (i = 0; i < count; ++i) + vkd3d_free((void *)symbols[i].name); + vkd3d_free(symbols); +} + +static void sm6_block_destroy(struct sm6_block *block) +{ + vkd3d_free(block->instructions); + vkd3d_free(block); +} + +static void sm6_functions_cleanup(struct sm6_function *functions, size_t count) +{ + size_t i, j; + + for (i = 0; i < count; ++i) + { + for (j = 0; j < functions[i].block_count; ++j) + sm6_block_destroy(functions[i].blocks[j]); + } + vkd3d_free(functions); +} + +static void sm6_parser_destroy(struct vkd3d_shader_parser *parser) +{ + struct sm6_parser *sm6 = sm6_parser(parser); + + dxil_block_destroy(&sm6->root_block); + dxil_global_abbrevs_cleanup(sm6->abbrevs, sm6->abbrev_count); + shader_instruction_array_destroy(&parser->instructions); + sm6_type_table_cleanup(sm6->types, sm6->type_count); + sm6_symtab_cleanup(sm6->global_symbols, sm6->global_symbol_count); + sm6_functions_cleanup(sm6->functions, sm6->function_count); + vkd3d_free(sm6->values); + free_shader_desc(&parser->shader_desc); + vkd3d_free(sm6); +} + +static const struct vkd3d_shader_parser_ops sm6_parser_ops = +{ + .parser_destroy = sm6_parser_destroy, +}; + +static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, const uint32_t *byte_code, size_t byte_code_size, + const char *source_name, struct vkd3d_shader_message_context *message_context) +{ + const struct vkd3d_shader_location location = {.source_name = source_name}; + uint32_t version_token, dxil_version, token_count, magic; + unsigned int chunk_offset, chunk_size; + size_t count, length, function_count; + enum bitcode_block_abbreviation abbr; + struct vkd3d_shader_version version; + struct dxil_block *block; + enum vkd3d_result ret; + unsigned int i; + + count = byte_code_size / sizeof(*byte_code); + if (count < 6) + { + WARN("Invalid data size %zu.\n", byte_code_size); + vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_INVALID_SIZE, + "DXIL chunk size %zu is smaller than the DXIL header size.", byte_code_size); + return VKD3D_ERROR_INVALID_SHADER; + } + + version_token = byte_code[0]; + TRACE("Compiler version: 0x%08x.\n", version_token); + token_count = byte_code[1]; + TRACE("Token count: %u.\n", token_count); + + if (token_count < 6 || count < token_count) + { + WARN("Invalid token count %u (word count %zu).\n", token_count, count); + vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_INVALID_CHUNK_SIZE, + "DXIL chunk token count %#x is invalid (word count %zu).", token_count, count); + return VKD3D_ERROR_INVALID_SHADER; + } + + if (byte_code[2] != TAG_DXIL) + WARN("Unknown magic number 0x%08x.\n", byte_code[2]); + + dxil_version = byte_code[3]; + if (dxil_version > 0x102) + WARN("Unknown DXIL version: 0x%08x.\n", dxil_version); + else + TRACE("DXIL version: 0x%08x.\n", dxil_version); + + chunk_offset = byte_code[4]; + if (chunk_offset < 16 || chunk_offset >= byte_code_size) + { + WARN("Invalid bitcode chunk offset %#x (data size %zu).\n", chunk_offset, byte_code_size); + vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_INVALID_CHUNK_OFFSET, + "DXIL bitcode chunk has invalid offset %#x (data size %#zx).", chunk_offset, byte_code_size); + return VKD3D_ERROR_INVALID_SHADER; + } + chunk_size = byte_code[5]; + if (chunk_size > byte_code_size - chunk_offset) + { + WARN("Invalid bitcode chunk size %#x (data size %zu, chunk offset %#x).\n", + chunk_size, byte_code_size, chunk_offset); + vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_INVALID_CHUNK_SIZE, + "DXIL bitcode chunk has invalid size %#x (data size %#zx, chunk offset %#x).", + chunk_size, byte_code_size, chunk_offset); + return VKD3D_ERROR_INVALID_SHADER; + } + + sm6->start = (const uint32_t *)((const char*)&byte_code[2] + chunk_offset); + if ((magic = sm6->start[0]) != BITCODE_MAGIC) + { + WARN("Unknown magic number 0x%08x.\n", magic); + vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_UNKNOWN_MAGIC_NUMBER, + "DXIL bitcode chunk magic number 0x%08x is not the expected 0x%08x.", magic, BITCODE_MAGIC); + } + + sm6->end = &sm6->start[(chunk_size + sizeof(*sm6->start) - 1) / sizeof(*sm6->start)]; + + if ((version.type = version_token >> 16) >= VKD3D_SHADER_TYPE_COUNT) + { + FIXME("Unknown shader type %#x.\n", version.type); + vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_UNKNOWN_SHADER_TYPE, + "Unknown shader type %#x.", version.type); + } + + version.major = VKD3D_SM6_VERSION_MAJOR(version_token); + version.minor = VKD3D_SM6_VERSION_MINOR(version_token); + + if ((abbr = sm6->start[1] & 3) != ENTER_SUBBLOCK) + { + WARN("Initial block abbreviation %u is not ENTER_SUBBLOCK.\n", abbr); + vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_INVALID_BITCODE, + "DXIL bitcode chunk has invalid initial block abbreviation %u.", abbr); + return VKD3D_ERROR_INVALID_SHADER; + } + + /* Estimate instruction count to avoid reallocation in most shaders. */ + count = max(token_count, 400) - 400; + vkd3d_shader_parser_init(&sm6->p, message_context, source_name, &version, &sm6_parser_ops, + (count + (count >> 2)) / 2u + 10); + sm6->ptr = &sm6->start[1]; + sm6->bitpos = 2; + + block = &sm6->root_block; + if ((ret = dxil_block_init(block, NULL, sm6)) < 0) + { + if (ret == VKD3D_ERROR_OUT_OF_MEMORY) + vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, + "Out of memory parsing DXIL bitcode chunk."); + else if (ret == VKD3D_ERROR_INVALID_SHADER) + vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_INVALID_BITCODE, + "DXIL bitcode chunk has invalid bitcode."); + else + vkd3d_unreachable(); + return ret; + } + + dxil_global_abbrevs_cleanup(sm6->abbrevs, sm6->abbrev_count); + sm6->abbrevs = NULL; + sm6->abbrev_count = 0; + + length = sm6->ptr - sm6->start - block->start; + if (length != block->length) + { + WARN("Invalid block length %zu; expected %u.\n", length, block->length); + vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_INVALID_BLOCK_LENGTH, + "Root block ends with length %zu but indicated length is %u.", length, block->length); + } + if (sm6->ptr != sm6->end) + { + size_t expected_length = sm6->end - sm6->start; + length = sm6->ptr - sm6->start; + WARN("Invalid module length %zu; expected %zu.\n", length, expected_length); + vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_INVALID_MODULE_LENGTH, + "Module ends with length %zu but indicated length is %zu.", length, expected_length); + } + + if ((ret = sm6_parser_type_table_init(sm6)) < 0) + { + if (ret == VKD3D_ERROR_OUT_OF_MEMORY) + vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, + "Out of memory parsing DXIL type table."); + else if (ret == VKD3D_ERROR_INVALID_SHADER) + vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_INVALID_TYPE_TABLE, + "DXIL type table is invalid."); + else + vkd3d_unreachable(); + return ret; + } + + if ((ret = sm6_parser_symtab_init(sm6)) < 0) + { + if (ret == VKD3D_ERROR_OUT_OF_MEMORY) + vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, + "Out of memory parsing DXIL value symbol table."); + else if (ret == VKD3D_ERROR_INVALID_SHADER) + vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_INVALID_VALUE_SYMTAB, + "DXIL value symbol table is invalid."); + else + vkd3d_unreachable(); + return ret; + } + + function_count = dxil_block_compute_function_count(&sm6->root_block); + if (!(sm6->functions = vkd3d_calloc(function_count, sizeof(*sm6->functions)))) + { + ERR("Failed to allocate function array.\n"); + vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, + "Out of memory allocating DXIL function array."); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + + if (sm6_parser_compute_max_value_count(sm6, &sm6->root_block, 0) == SIZE_MAX) + { + WARN("Value array count overflowed.\n"); + vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_INVALID_MODULE, + "Overflow occurred in the DXIL module value count."); + return VKD3D_ERROR_INVALID_SHADER; + } + if (!(sm6->values = vkd3d_calloc(sm6->value_capacity, sizeof(*sm6->values)))) + { + ERR("Failed to allocate value array.\n"); + vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, + "Out of memory allocating DXIL value array."); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + + if ((ret = sm6_parser_globals_init(sm6)) < 0) + { + WARN("Failed to load global declarations.\n"); + return ret; + } + + if ((ret = sm6_parser_module_init(sm6, &sm6->root_block, 0)) < 0) + { + if (ret == VKD3D_ERROR_OUT_OF_MEMORY) + vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, + "Out of memory parsing DXIL module."); + else if (ret == VKD3D_ERROR_INVALID_SHADER) + vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_INVALID_MODULE, + "DXIL module is invalid."); + else + vkd3d_unreachable(); + return ret; + } + + for (i = 0; i < sm6->function_count; ++i) + { + if (!sm6_block_emit_instructions(sm6->functions[i].blocks[0], sm6)) + { + vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, + "Out of memory emitting shader instructions."); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + } + + dxil_block_destroy(&sm6->root_block); + + return VKD3D_OK; +} + +int vkd3d_shader_sm6_parser_create(const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser **parser) +{ + struct vkd3d_shader_desc *shader_desc; + uint32_t *byte_code = NULL; + struct sm6_parser *sm6; + int ret; + + if (!(sm6 = vkd3d_calloc(1, sizeof(*sm6)))) + { + ERR("Failed to allocate parser.\n"); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + + shader_desc = &sm6->p.shader_desc; + shader_desc->is_dxil = true; + if ((ret = shader_extract_from_dxbc(&compile_info->source, message_context, compile_info->source_name, + shader_desc)) < 0) + { + WARN("Failed to extract shader, vkd3d result %d.\n", ret); + vkd3d_free(sm6); + return ret; + } + + sm6->p.shader_desc = *shader_desc; + shader_desc = &sm6->p.shader_desc; + + if (((uintptr_t)shader_desc->byte_code & (VKD3D_DXBC_CHUNK_ALIGNMENT - 1))) + { + /* LLVM bitcode should be 32-bit aligned, but before dxc v1.7.2207 this was not always the case in the DXBC + * container due to missing padding after signature names. Get an aligned copy to prevent unaligned access. */ + if (!(byte_code = vkd3d_malloc(align(shader_desc->byte_code_size, VKD3D_DXBC_CHUNK_ALIGNMENT)))) + ERR("Failed to allocate aligned chunk. Unaligned access will occur.\n"); + else + memcpy(byte_code, shader_desc->byte_code, shader_desc->byte_code_size); + } + + ret = sm6_parser_init(sm6, byte_code ? byte_code : shader_desc->byte_code, shader_desc->byte_code_size, + compile_info->source_name, message_context); + vkd3d_free(byte_code); + + if (ret < 0) + { + WARN("Failed to initialise shader parser.\n"); + sm6_parser_destroy(&sm6->p); + return ret; + } + + *parser = &sm6->p; + + return ret; +} diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c index ba5bcfbfaf0..8b706e1e667 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c @@ -430,6 +430,51 @@ struct hlsl_type *hlsl_type_get_component_type(struct hlsl_ctx *ctx, struct hlsl return type; }
+unsigned int hlsl_type_get_component_offset(struct hlsl_ctx *ctx, struct hlsl_type *type, + enum hlsl_regset regset, unsigned int index) +{ + struct hlsl_type *next_type; + unsigned int offset = 0; + unsigned int idx; + + while (!type_is_single_component(type)) + { + next_type = type; + idx = traverse_path_from_component_index(ctx, &next_type, &index); + + switch (type->class) + { + case HLSL_CLASS_SCALAR: + case HLSL_CLASS_VECTOR: + case HLSL_CLASS_MATRIX: + if (regset == HLSL_REGSET_NUMERIC) + offset += idx; + break; + + case HLSL_CLASS_STRUCT: + offset += type->e.record.fields[idx].reg_offset[regset]; + break; + + case HLSL_CLASS_ARRAY: + if (regset == HLSL_REGSET_NUMERIC) + offset += idx * align(type->e.array.type->reg_size[regset], 4); + else + offset += idx * type->e.array.type->reg_size[regset]; + break; + + case HLSL_CLASS_OBJECT: + assert(idx == 0); + break; + + default: + vkd3d_unreachable(); + } + type = next_type; + } + + return offset; +} + static bool init_deref(struct hlsl_ctx *ctx, struct hlsl_deref *deref, struct hlsl_ir_var *var, unsigned int path_len) { @@ -524,7 +569,9 @@ struct hlsl_type *hlsl_deref_get_type(struct hlsl_ctx *ctx, const struct hlsl_de unsigned int i;
assert(deref); - assert(!deref->offset.node); + + if (deref->offset.node) + return deref->data_type;
type = deref->var->data_type; for (i = 0; i < deref->path_len; ++i) @@ -626,6 +673,7 @@ struct hlsl_type *hlsl_new_array_type(struct hlsl_ctx *ctx, struct hlsl_type *ba type->e.array.type = basic_type; type->dimx = basic_type->dimx; type->dimy = basic_type->dimy; + type->sampler_dim = basic_type->sampler_dim; hlsl_type_calculate_reg_size(ctx, type);
list_add_tail(&ctx->types, &type->entry); @@ -992,20 +1040,31 @@ struct hlsl_ir_var *hlsl_new_synthetic_var(struct hlsl_ctx *ctx, const char *tem struct vkd3d_string_buffer *string; struct hlsl_ir_var *var; static LONG counter; - const char *name;
if (!(string = hlsl_get_string_buffer(ctx))) return NULL; vkd3d_string_buffer_printf(string, "<%s-%u>", template, InterlockedIncrement(&counter)); - if (!(name = hlsl_strdup(ctx, string->buffer))) - { - hlsl_release_string_buffer(ctx, string); - return NULL; - } - var = hlsl_new_var(ctx, name, type, loc, NULL, 0, NULL); + var = hlsl_new_synthetic_var_named(ctx, string->buffer, type, loc, true); hlsl_release_string_buffer(ctx, string); + return var; +} + +struct hlsl_ir_var *hlsl_new_synthetic_var_named(struct hlsl_ctx *ctx, const char *name, + struct hlsl_type *type, const struct vkd3d_shader_location *loc, bool dummy_scope) +{ + struct hlsl_ir_var *var; + const char *name_copy; + + if (!(name_copy = hlsl_strdup(ctx, name))) + return NULL; + var = hlsl_new_var(ctx, name_copy, type, loc, NULL, 0, NULL); if (var) - list_add_tail(&ctx->dummy_scope->vars, &var->scope_entry); + { + if (dummy_scope) + list_add_tail(&ctx->dummy_scope->vars, &var->scope_entry); + else + list_add_tail(&ctx->globals->vars, &var->scope_entry); + } return var; }
@@ -1432,7 +1491,7 @@ struct hlsl_ir_node *hlsl_new_index(struct hlsl_ctx *ctx, struct hlsl_ir_node *v }
struct hlsl_ir_node *hlsl_new_jump(struct hlsl_ctx *ctx, enum hlsl_ir_jump_type type, - const struct vkd3d_shader_location *loc) + struct hlsl_ir_node *condition, const struct vkd3d_shader_location *loc) { struct hlsl_ir_jump *jump;
@@ -1440,6 +1499,7 @@ struct hlsl_ir_node *hlsl_new_jump(struct hlsl_ctx *ctx, enum hlsl_ir_jump_type return NULL; init_node(&jump->node, HLSL_IR_JUMP, NULL, loc); jump->type = type; + hlsl_src_from_node(&jump->condition, condition); return &jump->node; }
@@ -1484,7 +1544,7 @@ static bool clone_block(struct hlsl_ctx *ctx, struct hlsl_block *dst_block, hlsl_block_cleanup(dst_block); return false; } - list_add_tail(&dst_block->instrs, &dst->entry); + hlsl_block_add_instr(dst_block, dst);
if (!list_empty(&src->uses)) { @@ -1585,9 +1645,9 @@ static struct hlsl_ir_node *clone_if(struct hlsl_ctx *ctx, struct clone_instr_ma return dst; }
-static struct hlsl_ir_node *clone_jump(struct hlsl_ctx *ctx, struct hlsl_ir_jump *src) +static struct hlsl_ir_node *clone_jump(struct hlsl_ctx *ctx, struct clone_instr_map *map, struct hlsl_ir_jump *src) { - return hlsl_new_jump(ctx, src->type, &src->node.loc); + return hlsl_new_jump(ctx, src->type, map_instr(map, src->condition.node), &src->node.loc); }
static struct hlsl_ir_node *clone_load(struct hlsl_ctx *ctx, struct clone_instr_map *map, struct hlsl_ir_load *src) @@ -1728,7 +1788,7 @@ static struct hlsl_ir_node *clone_instr(struct hlsl_ctx *ctx, return clone_index(ctx, map, hlsl_ir_index(instr));
case HLSL_IR_JUMP: - return clone_jump(ctx, hlsl_ir_jump(instr)); + return clone_jump(ctx, map, hlsl_ir_jump(instr));
case HLSL_IR_LOAD: return clone_load(ctx, map, hlsl_ir_load(instr)); @@ -2065,6 +2125,31 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru } }
+struct vkd3d_string_buffer *hlsl_component_to_string(struct hlsl_ctx *ctx, const struct hlsl_ir_var *var, + unsigned int index) +{ + struct hlsl_type *type = var->data_type, *current_type; + struct vkd3d_string_buffer *buffer; + unsigned int element_index; + + if (!(buffer = hlsl_get_string_buffer(ctx))) + return NULL; + + vkd3d_string_buffer_printf(buffer, "%s", var->name); + + while (!type_is_single_component(type)) + { + current_type = type; + element_index = traverse_path_from_component_index(ctx, &type, &index); + if (current_type->class == HLSL_CLASS_STRUCT) + vkd3d_string_buffer_printf(buffer, ".%s", current_type->e.record.fields[element_index].name); + else + vkd3d_string_buffer_printf(buffer, "[%u]", element_index); + } + + return buffer; +} + const char *debug_hlsl_type(struct hlsl_ctx *ctx, const struct hlsl_type *type) { struct vkd3d_string_buffer *string; @@ -2123,18 +2208,18 @@ const char *hlsl_node_type_to_string(enum hlsl_ir_node_type type) { static const char * const names[] = { - "HLSL_IR_CALL", - "HLSL_IR_CONSTANT", - "HLSL_IR_EXPR", - "HLSL_IR_IF", - "HLSL_IR_INDEX", - "HLSL_IR_LOAD", - "HLSL_IR_LOOP", - "HLSL_IR_JUMP", - "HLSL_IR_RESOURCE_LOAD", - "HLSL_IR_RESOURCE_STORE", - "HLSL_IR_STORE", - "HLSL_IR_SWIZZLE", + [HLSL_IR_CALL ] = "HLSL_IR_CALL", + [HLSL_IR_CONSTANT ] = "HLSL_IR_CONSTANT", + [HLSL_IR_EXPR ] = "HLSL_IR_EXPR", + [HLSL_IR_IF ] = "HLSL_IR_IF", + [HLSL_IR_INDEX ] = "HLSL_IR_INDEX", + [HLSL_IR_LOAD ] = "HLSL_IR_LOAD", + [HLSL_IR_LOOP ] = "HLSL_IR_LOOP", + [HLSL_IR_JUMP ] = "HLSL_IR_JUMP", + [HLSL_IR_RESOURCE_LOAD ] = "HLSL_IR_RESOURCE_LOAD", + [HLSL_IR_RESOURCE_STORE] = "HLSL_IR_RESOURCE_STORE", + [HLSL_IR_STORE ] = "HLSL_IR_STORE", + [HLSL_IR_SWIZZLE ] = "HLSL_IR_SWIZZLE", };
if (type >= ARRAY_SIZE(names)) @@ -2146,10 +2231,11 @@ const char *hlsl_jump_type_to_string(enum hlsl_ir_jump_type type) { static const char * const names[] = { - "HLSL_IR_JUMP_BREAK", - "HLSL_IR_JUMP_CONTINUE", - "HLSL_IR_JUMP_DISCARD", - "HLSL_IR_JUMP_RETURN", + [HLSL_IR_JUMP_BREAK] = "HLSL_IR_JUMP_BREAK", + [HLSL_IR_JUMP_CONTINUE] = "HLSL_IR_JUMP_CONTINUE", + [HLSL_IR_JUMP_DISCARD_NEG] = "HLSL_IR_JUMP_DISCARD_NEG", + [HLSL_IR_JUMP_DISCARD_NZ] = "HLSL_IR_JUMP_DISCARD_NZ", + [HLSL_IR_JUMP_RETURN] = "HLSL_IR_JUMP_RETURN", };
assert(type < ARRAY_SIZE(names)); @@ -2158,11 +2244,11 @@ const char *hlsl_jump_type_to_string(enum hlsl_ir_jump_type type)
static void dump_instr(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, const struct hlsl_ir_node *instr);
-static void dump_instr_list(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, const struct list *list) +static void dump_block(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, const struct hlsl_block *block) { struct hlsl_ir_node *instr;
- LIST_FOR_EACH_ENTRY(instr, list, struct hlsl_ir_node, entry) + LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) { dump_instr(ctx, buffer, instr); vkd3d_string_buffer_printf(buffer, "\n"); @@ -2337,7 +2423,11 @@ const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op) [HLSL_OP1_COS] = "cos", [HLSL_OP1_COS_REDUCED] = "cos_reduced", [HLSL_OP1_DSX] = "dsx", + [HLSL_OP1_DSX_COARSE] = "dsx_coarse", + [HLSL_OP1_DSX_FINE] = "dsx_fine", [HLSL_OP1_DSY] = "dsy", + [HLSL_OP1_DSY_COARSE] = "dsy_coarse", + [HLSL_OP1_DSY_FINE] = "dsy_fine", [HLSL_OP1_EXP2] = "exp2", [HLSL_OP1_FRACT] = "fract", [HLSL_OP1_LOG2] = "log2", @@ -2400,9 +2490,9 @@ static void dump_ir_if(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, vkd3d_string_buffer_printf(buffer, "if ("); dump_src(buffer, &if_node->condition); vkd3d_string_buffer_printf(buffer, ") {\n"); - dump_instr_list(ctx, buffer, &if_node->then_block.instrs); + dump_block(ctx, buffer, &if_node->then_block); vkd3d_string_buffer_printf(buffer, " %10s } else {\n", ""); - dump_instr_list(ctx, buffer, &if_node->else_block.instrs); + dump_block(ctx, buffer, &if_node->else_block); vkd3d_string_buffer_printf(buffer, " %10s }", ""); }
@@ -2418,8 +2508,12 @@ static void dump_ir_jump(struct vkd3d_string_buffer *buffer, const struct hlsl_i vkd3d_string_buffer_printf(buffer, "continue"); break;
- case HLSL_IR_JUMP_DISCARD: - vkd3d_string_buffer_printf(buffer, "discard"); + case HLSL_IR_JUMP_DISCARD_NEG: + vkd3d_string_buffer_printf(buffer, "discard_neg"); + break; + + case HLSL_IR_JUMP_DISCARD_NZ: + vkd3d_string_buffer_printf(buffer, "discard_nz"); break;
case HLSL_IR_JUMP_RETURN: @@ -2431,7 +2525,7 @@ static void dump_ir_jump(struct vkd3d_string_buffer *buffer, const struct hlsl_i static void dump_ir_loop(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, const struct hlsl_ir_loop *loop) { vkd3d_string_buffer_printf(buffer, "for (;;) {\n"); - dump_instr_list(ctx, buffer, &loop->body.instrs); + dump_block(ctx, buffer, &loop->body); vkd3d_string_buffer_printf(buffer, " %10s }", ""); }
@@ -2450,6 +2544,8 @@ static void dump_ir_resource_load(struct vkd3d_string_buffer *buffer, const stru [HLSL_RESOURCE_GATHER_GREEN] = "gather_green", [HLSL_RESOURCE_GATHER_BLUE] = "gather_blue", [HLSL_RESOURCE_GATHER_ALPHA] = "gather_alpha", + [HLSL_RESOURCE_SAMPLE_INFO] = "sample_info", + [HLSL_RESOURCE_RESINFO] = "resinfo", };
assert(load->load_type < ARRAY_SIZE(type_names)); @@ -2457,8 +2553,11 @@ static void dump_ir_resource_load(struct vkd3d_string_buffer *buffer, const stru dump_deref(buffer, &load->resource); vkd3d_string_buffer_printf(buffer, ", sampler = "); dump_deref(buffer, &load->sampler); - vkd3d_string_buffer_printf(buffer, ", coords = "); - dump_src(buffer, &load->coords); + if (load->coords.node) + { + vkd3d_string_buffer_printf(buffer, ", coords = "); + dump_src(buffer, &load->coords); + } if (load->sample_index.node) { vkd3d_string_buffer_printf(buffer, ", sample index = "); @@ -2614,7 +2713,7 @@ void hlsl_dump_function(struct hlsl_ctx *ctx, const struct hlsl_ir_function_decl vkd3d_string_buffer_printf(&buffer, "\n"); } if (func->has_body) - dump_instr_list(ctx, &buffer, &func->body.instrs); + dump_block(ctx, &buffer, &func->body);
vkd3d_string_buffer_trace(&buffer); vkd3d_string_buffer_cleanup(&buffer); @@ -2703,6 +2802,7 @@ static void free_ir_if(struct hlsl_ir_if *if_node)
static void free_ir_jump(struct hlsl_ir_jump *jump) { + hlsl_src_remove(&jump->condition); vkd3d_free(jump); }
@@ -2822,7 +2922,7 @@ void hlsl_free_attribute(struct hlsl_attribute *attr)
for (i = 0; i < attr->args_count; ++i) hlsl_src_remove(&attr->args[i]); - hlsl_free_instr_list(&attr->instrs); + hlsl_block_cleanup(&attr->instrs); vkd3d_free((void *)attr->name); vkd3d_free(attr); } @@ -3127,8 +3227,8 @@ static void declare_predefined_types(struct hlsl_ctx *ctx)
for (bt = 0; bt <= HLSL_TYPE_LAST_SCALAR; ++bt) { - unsigned int n_variants = 0; const char *const *variants; + unsigned int n_variants;
switch (bt) { @@ -3148,6 +3248,8 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) break;
default: + n_variants = 0; + variants = NULL; break; }
@@ -3199,9 +3301,11 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) } }
-static bool hlsl_ctx_init(struct hlsl_ctx *ctx, const char *source_name, +static bool hlsl_ctx_init(struct hlsl_ctx *ctx, const struct vkd3d_shader_compile_info *compile_info, const struct hlsl_profile_info *profile, struct vkd3d_shader_message_context *message_context) { + unsigned int i; + memset(ctx, 0, sizeof(*ctx));
ctx->profile = profile; @@ -3210,7 +3314,7 @@ static bool hlsl_ctx_init(struct hlsl_ctx *ctx, const char *source_name,
if (!(ctx->source_files = hlsl_alloc(ctx, sizeof(*ctx->source_files)))) return false; - if (!(ctx->source_files[0] = hlsl_strdup(ctx, source_name ? source_name : "<anonymous>"))) + if (!(ctx->source_files[0] = hlsl_strdup(ctx, compile_info->source_name ? compile_info->source_name : "<anonymous>"))) { vkd3d_free(ctx->source_files); return false; @@ -3249,6 +3353,19 @@ static bool hlsl_ctx_init(struct hlsl_ctx *ctx, const char *source_name, return false; ctx->cur_buffer = ctx->globals_buffer;
+ for (i = 0; i < compile_info->option_count; ++i) + { + const struct vkd3d_shader_compile_option *option = &compile_info->options[i]; + + if (option->name == VKD3D_SHADER_COMPILE_OPTION_PACK_MATRIX_ORDER) + { + if (option->value == VKD3D_SHADER_COMPILE_OPTION_PACK_MATRIX_ROW_MAJOR) + ctx->matrix_majority = HLSL_MODIFIER_ROW_MAJOR; + else if (option->value == VKD3D_SHADER_COMPILE_OPTION_PACK_MATRIX_COLUMN_MAJOR) + ctx->matrix_majority = HLSL_MODIFIER_COLUMN_MAJOR; + } + } + return true; }
@@ -3260,6 +3377,8 @@ static void hlsl_ctx_cleanup(struct hlsl_ctx *ctx) struct hlsl_type *type, *next_type; unsigned int i;
+ hlsl_block_cleanup(&ctx->static_initializers); + for (i = 0; i < ctx->source_files_count; ++i) vkd3d_free((void *)ctx->source_files[i]); vkd3d_free(ctx->source_files); @@ -3283,6 +3402,8 @@ static void hlsl_ctx_cleanup(struct hlsl_ctx *ctx) vkd3d_free((void *)buffer->name); vkd3d_free(buffer); } + + vkd3d_free(ctx->constant_defs.regs); }
int hlsl_compile_shader(const struct vkd3d_shader_code *hlsl, const struct vkd3d_shader_compile_info *compile_info, @@ -3324,7 +3445,7 @@ int hlsl_compile_shader(const struct vkd3d_shader_code *hlsl, const struct vkd3d return VKD3D_ERROR_INVALID_ARGUMENT; }
- if (!hlsl_ctx_init(&ctx, compile_info->source_name, profile, message_context)) + if (!hlsl_ctx_init(&ctx, compile_info, profile, message_context)) return VKD3D_ERROR_OUT_OF_MEMORY;
if ((ret = hlsl_lexer_compile(&ctx, hlsl)) == 2) diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h index cd1ad37a542..8c21bd10801 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h @@ -257,7 +257,7 @@ struct hlsl_reg /* Number of registers to be allocated. * Unlike the variable's type's regsize, it is not expressed in register components, but rather * in whole registers, and may depend on which components are used within the shader. */ - uint32_t bind_count; + uint32_t allocation_size; /* For numeric registers, a writemask can be provided to indicate the reservation of only some * of the 4 components. */ unsigned int writemask; @@ -337,7 +337,7 @@ struct hlsl_src struct hlsl_attribute { const char *name; - struct list instrs; + struct hlsl_block instrs; struct vkd3d_shader_location loc; unsigned int args_count; struct hlsl_src args[]; @@ -356,6 +356,7 @@ struct hlsl_attribute #define HLSL_MODIFIER_COLUMN_MAJOR 0x00000400 #define HLSL_STORAGE_IN 0x00000800 #define HLSL_STORAGE_OUT 0x00001000 +#define HLSL_MODIFIER_INLINE 0x00002000
#define HLSL_TYPE_MODIFIERS_MASK (HLSL_MODIFIER_PRECISE | HLSL_MODIFIER_VOLATILE | \ HLSL_MODIFIER_CONST | HLSL_MODIFIER_ROW_MAJOR | \ @@ -417,11 +418,15 @@ struct hlsl_ir_var enum hlsl_sampler_dim sampler_dim; struct vkd3d_shader_location first_sampler_dim_loc; } *objects_usage[HLSL_REGSET_LAST_OBJECT + 1]; + /* Minimum number of binds required to include all object components actually used in the shader. + * It may be less than the allocation size, e.g. for texture arrays. */ + unsigned int bind_count[HLSL_REGSET_LAST_OBJECT + 1];
uint32_t is_input_semantic : 1; uint32_t is_output_semantic : 1; uint32_t is_uniform : 1; uint32_t is_param : 1; + uint32_t is_separated_resource : 1; };
/* Sized array of variables representing a function's parameters. */ @@ -502,7 +507,11 @@ enum hlsl_ir_expr_op HLSL_OP1_COS, HLSL_OP1_COS_REDUCED, /* Reduced range [-pi, pi] */ HLSL_OP1_DSX, + HLSL_OP1_DSX_COARSE, + HLSL_OP1_DSX_FINE, HLSL_OP1_DSY, + HLSL_OP1_DSY_COARSE, + HLSL_OP1_DSY_FINE, HLSL_OP1_EXP2, HLSL_OP1_FLOOR, HLSL_OP1_FRACT, @@ -558,7 +567,8 @@ enum hlsl_ir_jump_type { HLSL_IR_JUMP_BREAK, HLSL_IR_JUMP_CONTINUE, - HLSL_IR_JUMP_DISCARD, + HLSL_IR_JUMP_DISCARD_NEG, + HLSL_IR_JUMP_DISCARD_NZ, HLSL_IR_JUMP_RETURN, };
@@ -566,6 +576,8 @@ struct hlsl_ir_jump { struct hlsl_ir_node node; enum hlsl_ir_jump_type type; + /* Argument used for HLSL_IR_JUMP_DISCARD_NZ and HLSL_IR_JUMP_DISCARD_NEG. */ + struct hlsl_src condition; };
struct hlsl_ir_swizzle @@ -600,9 +612,11 @@ struct hlsl_deref * components, within the pertaining regset), from the start of the variable, of the part * referenced. * The path is lowered to this single offset -- whose value may vary between SM1 and SM4 -- - * before writing the bytecode. */ + * before writing the bytecode. + * Since the type information cannot longer be retrieved from the offset alone, the type is + * stored in the data_type field. */ struct hlsl_src offset; - enum hlsl_regset offset_regset; + struct hlsl_type *data_type; };
struct hlsl_ir_load @@ -624,6 +638,8 @@ enum hlsl_resource_load_type HLSL_RESOURCE_GATHER_GREEN, HLSL_RESOURCE_GATHER_BLUE, HLSL_RESOURCE_GATHER_ALPHA, + HLSL_RESOURCE_SAMPLE_INFO, + HLSL_RESOURCE_RESINFO, };
struct hlsl_ir_resource_load @@ -803,7 +819,11 @@ struct hlsl_ctx * Only used for SM1 profiles. */ struct hlsl_constant_defs { - struct hlsl_vec4 *values; + struct hlsl_constant_register + { + uint32_t index; + struct hlsl_vec4 value; + } *regs; size_t count, size; } constant_defs; /* Number of temp. registers required for the shader to run, i.e. the largest temp register @@ -1055,10 +1075,12 @@ const char *debug_hlsl_writemask(unsigned int writemask); const char *debug_hlsl_swizzle(unsigned int swizzle, unsigned int count);
struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const struct hlsl_type *type); +struct vkd3d_string_buffer *hlsl_component_to_string(struct hlsl_ctx *ctx, const struct hlsl_ir_var *var, + unsigned int index); struct vkd3d_string_buffer *hlsl_modifiers_to_string(struct hlsl_ctx *ctx, unsigned int modifiers); const char *hlsl_node_type_to_string(enum hlsl_ir_node_type type);
-struct hlsl_ir_node *hlsl_add_conditional(struct hlsl_ctx *ctx, struct list *instrs, +struct hlsl_ir_node *hlsl_add_conditional(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *condition, struct hlsl_ir_node *if_true, struct hlsl_ir_node *if_false); void hlsl_add_function(struct hlsl_ctx *ctx, char *name, struct hlsl_ir_function_decl *decl); bool hlsl_add_var(struct hlsl_ctx *ctx, struct hlsl_ir_var *decl, bool local_var); @@ -1120,7 +1142,7 @@ struct hlsl_ir_node *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *cond struct hlsl_block *then_block, struct hlsl_block *else_block, const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_new_int_constant(struct hlsl_ctx *ctx, int32_t n, const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_new_jump(struct hlsl_ctx *ctx, - enum hlsl_ir_jump_type type, const struct vkd3d_shader_location *loc); + enum hlsl_ir_jump_type type, struct hlsl_ir_node *condition, const struct vkd3d_shader_location *loc);
void hlsl_init_simple_deref_from_var(struct hlsl_deref *deref, struct hlsl_ir_var *var);
@@ -1132,6 +1154,8 @@ struct hlsl_ir_load *hlsl_new_load_parent(struct hlsl_ctx *ctx, const struct hls const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_new_load_component(struct hlsl_ctx *ctx, struct hlsl_block *block, const struct hlsl_deref *deref, unsigned int comp, const struct vkd3d_shader_location *loc); +struct hlsl_ir_node *hlsl_add_load_component(struct hlsl_ctx *ctx, struct hlsl_block *block, + struct hlsl_ir_node *var_instr, unsigned int comp, const struct vkd3d_shader_location *loc);
struct hlsl_ir_node *hlsl_new_simple_store(struct hlsl_ctx *ctx, struct hlsl_ir_var *lhs, struct hlsl_ir_node *rhs); struct hlsl_ir_node *hlsl_new_store_index(struct hlsl_ctx *ctx, const struct hlsl_deref *lhs, @@ -1156,6 +1180,8 @@ struct hlsl_ir_node *hlsl_new_swizzle(struct hlsl_ctx *ctx, DWORD s, unsigned in struct hlsl_ir_node *val, const struct vkd3d_shader_location *loc); struct hlsl_ir_var *hlsl_new_synthetic_var(struct hlsl_ctx *ctx, const char *template, struct hlsl_type *type, const struct vkd3d_shader_location *loc); +struct hlsl_ir_var *hlsl_new_synthetic_var_named(struct hlsl_ctx *ctx, const char *name, + struct hlsl_type *type, const struct vkd3d_shader_location *loc, bool dummy_scope); struct hlsl_type *hlsl_new_texture_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, struct hlsl_type *format, unsigned int sample_count); struct hlsl_type *hlsl_new_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, struct hlsl_type *format); @@ -1187,6 +1213,8 @@ unsigned int hlsl_type_component_count(const struct hlsl_type *type); unsigned int hlsl_type_get_array_element_reg_size(const struct hlsl_type *type, enum hlsl_regset regset); struct hlsl_type *hlsl_type_get_component_type(struct hlsl_ctx *ctx, struct hlsl_type *type, unsigned int index); +unsigned int hlsl_type_get_component_offset(struct hlsl_ctx *ctx, struct hlsl_type *type, + enum hlsl_regset regset, unsigned int index); bool hlsl_type_is_row_major(const struct hlsl_type *type); unsigned int hlsl_type_minor_size(const struct hlsl_type *type); unsigned int hlsl_type_major_size(const struct hlsl_type *type); @@ -1227,7 +1255,7 @@ int hlsl_sm1_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_fun bool hlsl_sm4_usage_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, bool output, D3D_NAME *usage); bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, - bool output, unsigned int *type, enum vkd3d_sm4_swizzle_type *swizzle_type, bool *has_idx); + bool output, enum vkd3d_shader_register_type *type, enum vkd3d_sm4_swizzle_type *swizzle_type, bool *has_idx); int hlsl_sm4_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out);
int hlsl_lexer_compile(struct hlsl_ctx *ctx, const struct vkd3d_shader_code *hlsl); diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y index 0e07fe578e1..43ea4b4d038 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y @@ -53,7 +53,7 @@ struct parse_initializer { struct hlsl_ir_node **args; unsigned int args_count; - struct list *instrs; + struct hlsl_block *instrs; bool braces; };
@@ -73,6 +73,10 @@ struct parse_variable_def struct hlsl_semantic semantic; struct hlsl_reg_reservation reg_reservation; struct parse_initializer initializer; + + struct hlsl_type *basic_type; + unsigned int modifiers; + struct vkd3d_shader_location modifiers_loc; };
struct parse_function @@ -85,8 +89,8 @@ struct parse_function
struct parse_if_body { - struct list *then_block; - struct list *else_block; + struct hlsl_block *then_block; + struct hlsl_block *else_block; };
enum parse_assign_op @@ -129,9 +133,18 @@ static void yyerror(YYLTYPE *loc, void *scanner, struct hlsl_ctx *ctx, const cha hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "%s", s); }
-static struct hlsl_ir_node *node_from_list(struct list *list) +static struct hlsl_ir_node *node_from_block(struct hlsl_block *block) +{ + return LIST_ENTRY(list_tail(&block->instrs), struct hlsl_ir_node, entry); +} + +static struct hlsl_block *make_empty_block(struct hlsl_ctx *ctx) { - return LIST_ENTRY(list_tail(list), struct hlsl_ir_node, entry); + struct hlsl_block *block; + + if ((block = hlsl_alloc(ctx, sizeof(*block)))) + hlsl_block_init(block); + return block; }
static struct list *make_empty_list(struct hlsl_ctx *ctx) @@ -143,10 +156,10 @@ static struct list *make_empty_list(struct hlsl_ctx *ctx) return list; }
-static void destroy_instr_list(struct list *list) +static void destroy_block(struct hlsl_block *block) { - hlsl_free_instr_list(list); - vkd3d_free(list); + hlsl_block_cleanup(block); + vkd3d_free(block); }
static bool hlsl_types_are_componentwise_compatible(struct hlsl_ctx *ctx, struct hlsl_type *src, @@ -273,10 +286,7 @@ static bool implicit_compatible_data_types(struct hlsl_ctx *ctx, struct hlsl_typ return hlsl_types_are_componentwise_equal(ctx, src, dst); }
-static struct hlsl_ir_node *add_load_component(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *var_instr, - unsigned int comp, const struct vkd3d_shader_location *loc); - -static struct hlsl_ir_node *add_cast(struct hlsl_ctx *ctx, struct list *instrs, +static struct hlsl_ir_node *add_cast(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *node, struct hlsl_type *dst_type, const struct vkd3d_shader_location *loc) { struct hlsl_type *src_type = node->data_type; @@ -313,7 +323,7 @@ static struct hlsl_ir_node *add_cast(struct hlsl_ctx *ctx, struct list *instrs, { struct hlsl_ir_node *component_load; struct hlsl_type *dst_comp_type; - struct hlsl_block block; + struct hlsl_block store_block; unsigned int src_idx;
if (broadcast) @@ -333,21 +343,21 @@ static struct hlsl_ir_node *add_cast(struct hlsl_ctx *ctx, struct list *instrs,
dst_comp_type = hlsl_type_get_component_type(ctx, dst_type, dst_idx);
- if (!(component_load = add_load_component(ctx, instrs, node, src_idx, loc))) + if (!(component_load = hlsl_add_load_component(ctx, block, node, src_idx, loc))) return NULL;
if (!(cast = hlsl_new_cast(ctx, component_load, dst_comp_type, loc))) return NULL; - list_add_tail(instrs, &cast->entry); + hlsl_block_add_instr(block, cast);
- if (!hlsl_new_store_component(ctx, &block, &var_deref, dst_idx, cast)) + if (!hlsl_new_store_component(ctx, &store_block, &var_deref, dst_idx, cast)) return NULL; - list_move_tail(instrs, &block.instrs); + hlsl_block_add_block(block, &store_block); }
if (!(load = hlsl_new_var_load(ctx, var, loc))) return NULL; - list_add_tail(instrs, &load->node.entry); + hlsl_block_add_instr(block, &load->node);
return &load->node; } @@ -355,12 +365,12 @@ static struct hlsl_ir_node *add_cast(struct hlsl_ctx *ctx, struct list *instrs, { if (!(cast = hlsl_new_cast(ctx, node, dst_type, loc))) return NULL; - list_add_tail(instrs, &cast->entry); + hlsl_block_add_instr(block, cast); return cast; } }
-static struct hlsl_ir_node *add_implicit_conversion(struct hlsl_ctx *ctx, struct list *instrs, +static struct hlsl_ir_node *add_implicit_conversion(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *node, struct hlsl_type *dst_type, const struct vkd3d_shader_location *loc) { struct hlsl_type *src_type = node->data_type; @@ -386,7 +396,7 @@ static struct hlsl_ir_node *add_implicit_conversion(struct hlsl_ctx *ctx, struct hlsl_warning(ctx, loc, VKD3D_SHADER_WARNING_HLSL_IMPLICIT_TRUNCATION, "Implicit truncation of %s type.", src_type->class == HLSL_CLASS_VECTOR ? "vector" : "matrix");
- return add_cast(ctx, instrs, node, dst_type, loc); + return add_cast(ctx, block, node, dst_type, loc); }
static DWORD add_modifiers(struct hlsl_ctx *ctx, DWORD modifiers, DWORD mod, @@ -405,29 +415,29 @@ static DWORD add_modifiers(struct hlsl_ctx *ctx, DWORD modifiers, DWORD mod, return modifiers | mod; }
-static bool append_conditional_break(struct hlsl_ctx *ctx, struct list *cond_list) +static bool append_conditional_break(struct hlsl_ctx *ctx, struct hlsl_block *cond_block) { struct hlsl_ir_node *condition, *not, *iff, *jump; struct hlsl_block then_block;
/* E.g. "for (i = 0; ; ++i)". */ - if (list_empty(cond_list)) + if (list_empty(&cond_block->instrs)) return true;
- condition = node_from_list(cond_list); + condition = node_from_block(cond_block); if (!(not = hlsl_new_unary_expr(ctx, HLSL_OP1_LOGIC_NOT, condition, &condition->loc))) return false; - list_add_tail(cond_list, ¬->entry); + hlsl_block_add_instr(cond_block, not);
hlsl_block_init(&then_block);
- if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_BREAK, &condition->loc))) + if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_BREAK, NULL, &condition->loc))) return false; hlsl_block_add_instr(&then_block, jump);
if (!(iff = hlsl_new_if(ctx, not, &then_block, NULL, &condition->loc))) return false; - list_add_tail(cond_list, &iff->entry); + hlsl_block_add_instr(cond_block, iff); return true; }
@@ -454,10 +464,10 @@ static bool attribute_list_has_duplicates(const struct parse_attribute_list *att return false; }
-static struct list *create_loop(struct hlsl_ctx *ctx, enum loop_type type, const struct parse_attribute_list *attributes, struct list *init, struct list *cond, - struct list *iter, struct list *body, const struct vkd3d_shader_location *loc) +static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum loop_type type, + const struct parse_attribute_list *attributes, struct hlsl_block *init, struct hlsl_block *cond, + struct hlsl_block *iter, struct hlsl_block *body, const struct vkd3d_shader_location *loc) { - struct hlsl_block body_block; struct hlsl_ir_node *loop; unsigned int i;
@@ -476,53 +486,49 @@ static struct list *create_loop(struct hlsl_ctx *ctx, enum loop_type type, const } else { - hlsl_warning(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NOT_IMPLEMENTED, "Loop unrolling is not implemented.\n"); + hlsl_warning(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NOT_IMPLEMENTED, "Loop unrolling is not implemented."); } } else if (!strcmp(attr->name, "loop") || !strcmp(attr->name, "fastopt") || !strcmp(attr->name, "allow_uav_condition")) { - hlsl_fixme(ctx, loc, "Unhandled attribute %s.", attr->name); + hlsl_fixme(ctx, loc, "Unhandled attribute '%s'.", attr->name); } else { - hlsl_warning(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NOT_IMPLEMENTED, "Unrecognized attribute %s.", attr->name); + hlsl_warning(ctx, loc, VKD3D_SHADER_WARNING_HLSL_UNKNOWN_ATTRIBUTE, "Unrecognized attribute '%s'.", attr->name); } }
- if (!init && !(init = make_empty_list(ctx))) + if (!init && !(init = make_empty_block(ctx))) goto oom;
if (!append_conditional_break(ctx, cond)) goto oom;
- hlsl_block_init(&body_block); - - if (type != LOOP_DO_WHILE) - list_move_tail(&body_block.instrs, cond); - - list_move_tail(&body_block.instrs, body); - if (iter) - list_move_tail(&body_block.instrs, iter); + hlsl_block_add_block(body, iter);
if (type == LOOP_DO_WHILE) - list_move_tail(&body_block.instrs, cond); + list_move_tail(&body->instrs, &cond->instrs); + else + list_move_head(&body->instrs, &cond->instrs);
- if (!(loop = hlsl_new_loop(ctx, &body_block, loc))) + if (!(loop = hlsl_new_loop(ctx, body, loc))) goto oom; - list_add_tail(init, &loop->entry); + hlsl_block_add_instr(init, loop);
- vkd3d_free(cond); - vkd3d_free(body); + destroy_block(cond); + destroy_block(body); + destroy_block(iter); return init;
oom: - destroy_instr_list(init); - destroy_instr_list(cond); - destroy_instr_list(iter); - destroy_instr_list(body); + destroy_block(init); + destroy_block(cond); + destroy_block(iter); + destroy_block(body); return NULL; }
@@ -539,7 +545,7 @@ static unsigned int initializer_size(const struct parse_initializer *initializer
static void free_parse_initializer(struct parse_initializer *initializer) { - destroy_instr_list(initializer->instrs); + destroy_block(initializer->instrs); vkd3d_free(initializer->args); }
@@ -625,7 +631,7 @@ static struct hlsl_ir_node *get_swizzle(struct hlsl_ctx *ctx, struct hlsl_ir_nod return NULL; }
-static bool add_return(struct hlsl_ctx *ctx, struct list *instrs, +static bool add_return(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *return_value, const struct vkd3d_shader_location *loc) { struct hlsl_type *return_type = ctx->cur_function->return_type; @@ -637,7 +643,7 @@ static bool add_return(struct hlsl_ctx *ctx, struct list *instrs, { struct hlsl_ir_node *store;
- if (!(return_value = add_implicit_conversion(ctx, instrs, return_value, return_type, loc))) + if (!(return_value = add_implicit_conversion(ctx, block, return_value, return_type, loc))) return false;
if (!(store = hlsl_new_simple_store(ctx, ctx->cur_function->return_var, return_value))) @@ -656,18 +662,18 @@ static bool add_return(struct hlsl_ctx *ctx, struct list *instrs, hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RETURN, "Void functions cannot return a value."); }
- if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_RETURN, loc))) + if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_RETURN, NULL, loc))) return false; - list_add_tail(instrs, &jump->entry); + hlsl_block_add_instr(block, jump);
return true; }
-static struct hlsl_ir_node *add_load_component(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *var_instr, - unsigned int comp, const struct vkd3d_shader_location *loc) +struct hlsl_ir_node *hlsl_add_load_component(struct hlsl_ctx *ctx, struct hlsl_block *block, + struct hlsl_ir_node *var_instr, unsigned int comp, const struct vkd3d_shader_location *loc) { struct hlsl_ir_node *load, *store; - struct hlsl_block block; + struct hlsl_block load_block; struct hlsl_ir_var *var; struct hlsl_deref src;
@@ -676,17 +682,17 @@ static struct hlsl_ir_node *add_load_component(struct hlsl_ctx *ctx, struct list
if (!(store = hlsl_new_simple_store(ctx, var, var_instr))) return NULL; - list_add_tail(instrs, &store->entry); + hlsl_block_add_instr(block, store);
hlsl_init_simple_deref_from_var(&src, var); - if (!(load = hlsl_new_load_component(ctx, &block, &src, comp, loc))) + if (!(load = hlsl_new_load_component(ctx, &load_block, &src, comp, loc))) return NULL; - list_move_tail(instrs, &block.instrs); + hlsl_block_add_block(block, &load_block);
return load; }
-static bool add_record_access(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *record, +static bool add_record_access(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *record, unsigned int idx, const struct vkd3d_shader_location *loc) { struct hlsl_ir_node *index, *c; @@ -695,20 +701,20 @@ static bool add_record_access(struct hlsl_ctx *ctx, struct list *instrs, struct
if (!(c = hlsl_new_uint_constant(ctx, idx, loc))) return false; - list_add_tail(instrs, &c->entry); + hlsl_block_add_instr(block, c);
if (!(index = hlsl_new_index(ctx, record, c, loc))) return false; - list_add_tail(instrs, &index->entry); + hlsl_block_add_instr(block, index);
return true; }
-static struct hlsl_ir_node *add_binary_arithmetic_expr(struct hlsl_ctx *ctx, struct list *instrs, +static struct hlsl_ir_node *add_binary_arithmetic_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, const struct vkd3d_shader_location *loc);
-static bool add_array_access(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *array, +static bool add_array_access(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *array, struct hlsl_ir_node *index, const struct vkd3d_shader_location *loc) { const struct hlsl_type *expr_type = array->data_type, *index_type = index->data_type; @@ -731,13 +737,13 @@ static bool add_array_access(struct hlsl_ctx *ctx, struct list *instrs, struct h return false; }
- if (!(index = add_implicit_conversion(ctx, instrs, index, + if (!(index = add_implicit_conversion(ctx, block, index, hlsl_get_vector_type(ctx, HLSL_TYPE_UINT, dim_count), &index->loc))) return false;
if (!(return_index = hlsl_new_index(ctx, array, index, loc))) return false; - list_add_tail(instrs, &return_index->entry); + hlsl_block_add_instr(block, return_index);
return true; } @@ -750,7 +756,7 @@ static bool add_array_access(struct hlsl_ctx *ctx, struct list *instrs, struct h
if (!(cast = hlsl_new_cast(ctx, index, hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), &index->loc))) return false; - list_add_tail(instrs, &cast->entry); + hlsl_block_add_instr(block, cast); index = cast;
if (expr_type->class != HLSL_CLASS_ARRAY && expr_type->class != HLSL_CLASS_VECTOR && expr_type->class != HLSL_CLASS_MATRIX) @@ -764,7 +770,7 @@ static bool add_array_access(struct hlsl_ctx *ctx, struct list *instrs, struct h
if (!(return_index = hlsl_new_index(ctx, array, index, loc))) return false; - list_add_tail(instrs, &return_index->entry); + hlsl_block_add_instr(block, return_index);
return true; } @@ -830,6 +836,16 @@ static bool shader_is_sm_5_1(const struct hlsl_ctx *ctx) return ctx->profile->major_version == 5 && ctx->profile->minor_version >= 1; }
+static bool shader_profile_version_ge(const struct hlsl_ctx *ctx, unsigned int major, unsigned int minor) +{ + return ctx->profile->major_version > major || (ctx->profile->major_version == major && ctx->profile->minor_version >= minor); +} + +static bool shader_profile_version_lt(const struct hlsl_ctx *ctx, unsigned int major, unsigned int minor) +{ + return !shader_profile_version_ge(ctx, major, minor); +} + static bool gen_struct_fields(struct hlsl_ctx *ctx, struct parse_fields *fields, struct hlsl_type *type, unsigned int modifiers, struct list *defs) { @@ -1020,7 +1036,7 @@ static struct hlsl_reg_reservation parse_packoffset(struct hlsl_ctx *ctx, const struct hlsl_reg_reservation reservation = {0}; char *endptr;
- if (ctx->profile->major_version < 4) + if (shader_profile_version_lt(ctx, 4, 0)) return reservation;
reservation.offset_index = strtoul(reg_string + 1, &endptr, 10); @@ -1079,17 +1095,17 @@ static struct hlsl_ir_function_decl *get_func_decl(struct rb_tree *funcs, return NULL; }
-static struct list *make_list(struct hlsl_ctx *ctx, struct hlsl_ir_node *node) +static struct hlsl_block *make_block(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr) { - struct list *list; + struct hlsl_block *block;
- if (!(list = make_empty_list(ctx))) + if (!(block = make_empty_block(ctx))) { - hlsl_free_instr(node); + hlsl_free_instr(instr); return NULL; } - list_add_tail(list, &node->entry); - return list; + hlsl_block_add_instr(block, instr); + return block; }
static unsigned int evaluate_static_expression_as_uint(struct hlsl_ctx *ctx, struct hlsl_block *block, @@ -1097,20 +1113,50 @@ static unsigned int evaluate_static_expression_as_uint(struct hlsl_ctx *ctx, str { struct hlsl_ir_constant *constant; struct hlsl_ir_node *node; + struct hlsl_block expr; unsigned int ret = 0; bool progress;
- if (!add_implicit_conversion(ctx, &block->instrs, node_from_list(&block->instrs), + LIST_FOR_EACH_ENTRY(node, &block->instrs, struct hlsl_ir_node, entry) + { + switch (node->type) + { + case HLSL_IR_CONSTANT: + case HLSL_IR_EXPR: + case HLSL_IR_SWIZZLE: + case HLSL_IR_LOAD: + case HLSL_IR_INDEX: + continue; + case HLSL_IR_CALL: + case HLSL_IR_IF: + case HLSL_IR_LOOP: + case HLSL_IR_JUMP: + case HLSL_IR_RESOURCE_LOAD: + case HLSL_IR_RESOURCE_STORE: + case HLSL_IR_STORE: + hlsl_error(ctx, &node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, + "Expected literal expression."); + } + } + + if (!hlsl_clone_block(ctx, &expr, &ctx->static_initializers)) + return 0; + hlsl_block_add_block(&expr, block); + + if (!add_implicit_conversion(ctx, &expr, node_from_block(&expr), hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc)) + { + hlsl_block_cleanup(&expr); return 0; + }
do { - progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, block, NULL); - progress |= hlsl_copy_propagation_execute(ctx, block); + progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, &expr, NULL); + progress |= hlsl_copy_propagation_execute(ctx, &expr); } while (progress);
- node = node_from_list(&block->instrs); + node = node_from_block(&expr); if (node->type == HLSL_IR_CONSTANT) { constant = hlsl_ir_constant(node); @@ -1119,9 +1165,11 @@ static unsigned int evaluate_static_expression_as_uint(struct hlsl_ctx *ctx, str else { hlsl_error(ctx, &node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, - "Failed to evaluate constant expression %d.", node->type); + "Failed to evaluate constant expression."); }
+ hlsl_block_cleanup(&expr); + return ret; }
@@ -1253,7 +1301,7 @@ static bool expr_common_shape(struct hlsl_ctx *ctx, struct hlsl_type *t1, struct return true; }
-static struct hlsl_ir_node *add_expr(struct hlsl_ctx *ctx, struct list *instrs, +static struct hlsl_ir_node *add_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, enum hlsl_ir_expr_op op, struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS], struct hlsl_type *type, const struct vkd3d_shader_location *loc) { @@ -1277,38 +1325,38 @@ static struct hlsl_ir_node *add_expr(struct hlsl_ctx *ctx, struct list *instrs, for (i = 0; i < type->dimy * type->dimx; ++i) { struct hlsl_ir_node *value, *cell_operands[HLSL_MAX_OPERANDS] = { NULL }; - struct hlsl_block block; + struct hlsl_block store_block; unsigned int j;
for (j = 0; j < HLSL_MAX_OPERANDS; j++) { if (operands[j]) { - if (!(load = add_load_component(ctx, instrs, operands[j], i, loc))) + if (!(load = hlsl_add_load_component(ctx, block, operands[j], i, loc))) return NULL;
cell_operands[j] = load; } }
- if (!(value = add_expr(ctx, instrs, op, cell_operands, scalar_type, loc))) + if (!(value = add_expr(ctx, block, op, cell_operands, scalar_type, loc))) return NULL;
- if (!hlsl_new_store_component(ctx, &block, &var_deref, i, value)) + if (!hlsl_new_store_component(ctx, &store_block, &var_deref, i, value)) return NULL; - list_move_tail(instrs, &block.instrs); + hlsl_block_add_block(block, &store_block); }
if (!(var_load = hlsl_new_var_load(ctx, var, loc))) return NULL; - list_add_tail(instrs, &var_load->node.entry); + hlsl_block_add_instr(block, &var_load->node);
return &var_load->node; }
if (!(expr = hlsl_new_expr(ctx, op, operands, type, loc))) return NULL; - list_add_tail(instrs, &expr->entry); + hlsl_block_add_instr(block, expr);
return expr; } @@ -1334,23 +1382,23 @@ static void check_integer_type(struct hlsl_ctx *ctx, const struct hlsl_ir_node * } }
-static struct hlsl_ir_node *add_unary_arithmetic_expr(struct hlsl_ctx *ctx, struct list *instrs, +static struct hlsl_ir_node *add_unary_arithmetic_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg, const struct vkd3d_shader_location *loc) { struct hlsl_ir_node *args[HLSL_MAX_OPERANDS] = {arg};
- return add_expr(ctx, instrs, op, args, arg->data_type, loc); + return add_expr(ctx, block, op, args, arg->data_type, loc); }
-static struct hlsl_ir_node *add_unary_bitwise_expr(struct hlsl_ctx *ctx, struct list *instrs, +static struct hlsl_ir_node *add_unary_bitwise_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg, const struct vkd3d_shader_location *loc) { check_integer_type(ctx, arg);
- return add_unary_arithmetic_expr(ctx, instrs, op, arg, loc); + return add_unary_arithmetic_expr(ctx, block, op, arg, loc); }
-static struct hlsl_ir_node *add_unary_logical_expr(struct hlsl_ctx *ctx, struct list *instrs, +static struct hlsl_ir_node *add_unary_logical_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg, const struct vkd3d_shader_location *loc) { struct hlsl_ir_node *args[HLSL_MAX_OPERANDS] = {0}; @@ -1359,10 +1407,10 @@ static struct hlsl_ir_node *add_unary_logical_expr(struct hlsl_ctx *ctx, struct bool_type = hlsl_get_numeric_type(ctx, arg->data_type->class, HLSL_TYPE_BOOL, arg->data_type->dimx, arg->data_type->dimy);
- if (!(args[0] = add_implicit_conversion(ctx, instrs, arg, bool_type, loc))) + if (!(args[0] = add_implicit_conversion(ctx, block, arg, bool_type, loc))) return NULL;
- return add_expr(ctx, instrs, op, args, bool_type, loc); + return add_expr(ctx, block, op, args, bool_type, loc); }
static struct hlsl_type *get_common_numeric_type(struct hlsl_ctx *ctx, const struct hlsl_ir_node *arg1, @@ -1378,7 +1426,7 @@ static struct hlsl_type *get_common_numeric_type(struct hlsl_ctx *ctx, const str return hlsl_get_numeric_type(ctx, type, base, dimx, dimy); }
-static struct hlsl_ir_node *add_binary_arithmetic_expr(struct hlsl_ctx *ctx, struct list *instrs, +static struct hlsl_ir_node *add_binary_arithmetic_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, const struct vkd3d_shader_location *loc) { @@ -1387,49 +1435,26 @@ static struct hlsl_ir_node *add_binary_arithmetic_expr(struct hlsl_ctx *ctx, str
common_type = get_common_numeric_type(ctx, arg1, arg2, loc);
- if (!(args[0] = add_implicit_conversion(ctx, instrs, arg1, common_type, loc))) + if (!(args[0] = add_implicit_conversion(ctx, block, arg1, common_type, loc))) return NULL;
- if (!(args[1] = add_implicit_conversion(ctx, instrs, arg2, common_type, loc))) + if (!(args[1] = add_implicit_conversion(ctx, block, arg2, common_type, loc))) return NULL;
- return add_expr(ctx, instrs, op, args, common_type, loc); + return add_expr(ctx, block, op, args, common_type, loc); }
-static struct list *add_binary_arithmetic_expr_merge(struct hlsl_ctx *ctx, struct list *list1, struct list *list2, - enum hlsl_ir_expr_op op, const struct vkd3d_shader_location *loc) -{ - struct hlsl_ir_node *arg1 = node_from_list(list1), *arg2 = node_from_list(list2); - - list_move_tail(list1, list2); - vkd3d_free(list2); - add_binary_arithmetic_expr(ctx, list1, op, arg1, arg2, loc); - return list1; -} - -static struct hlsl_ir_node *add_binary_bitwise_expr(struct hlsl_ctx *ctx, struct list *instrs, +static struct hlsl_ir_node *add_binary_bitwise_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, const struct vkd3d_shader_location *loc) { check_integer_type(ctx, arg1); check_integer_type(ctx, arg2);
- return add_binary_arithmetic_expr(ctx, instrs, op, arg1, arg2, loc); + return add_binary_arithmetic_expr(ctx, block, op, arg1, arg2, loc); }
-static struct list *add_binary_bitwise_expr_merge(struct hlsl_ctx *ctx, struct list *list1, struct list *list2, - enum hlsl_ir_expr_op op, const struct vkd3d_shader_location *loc) -{ - struct hlsl_ir_node *arg1 = node_from_list(list1), *arg2 = node_from_list(list2); - - list_move_tail(list1, list2); - vkd3d_free(list2); - add_binary_bitwise_expr(ctx, list1, op, arg1, arg2, loc); - - return list1; -} - -static struct hlsl_ir_node *add_binary_comparison_expr(struct hlsl_ctx *ctx, struct list *instrs, +static struct hlsl_ir_node *add_binary_comparison_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, const struct vkd3d_shader_location *loc) { @@ -1445,27 +1470,16 @@ static struct hlsl_ir_node *add_binary_comparison_expr(struct hlsl_ctx *ctx, str common_type = hlsl_get_numeric_type(ctx, type, base, dimx, dimy); return_type = hlsl_get_numeric_type(ctx, type, HLSL_TYPE_BOOL, dimx, dimy);
- if (!(args[0] = add_implicit_conversion(ctx, instrs, arg1, common_type, loc))) + if (!(args[0] = add_implicit_conversion(ctx, block, arg1, common_type, loc))) return NULL;
- if (!(args[1] = add_implicit_conversion(ctx, instrs, arg2, common_type, loc))) + if (!(args[1] = add_implicit_conversion(ctx, block, arg2, common_type, loc))) return NULL;
- return add_expr(ctx, instrs, op, args, return_type, loc); -} - -static struct list *add_binary_comparison_expr_merge(struct hlsl_ctx *ctx, struct list *list1, struct list *list2, - enum hlsl_ir_expr_op op, const struct vkd3d_shader_location *loc) -{ - struct hlsl_ir_node *arg1 = node_from_list(list1), *arg2 = node_from_list(list2); - - list_move_tail(list1, list2); - vkd3d_free(list2); - add_binary_comparison_expr(ctx, list1, op, arg1, arg2, loc); - return list1; + return add_expr(ctx, block, op, args, return_type, loc); }
-static struct hlsl_ir_node *add_binary_logical_expr(struct hlsl_ctx *ctx, struct list *instrs, +static struct hlsl_ir_node *add_binary_logical_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, const struct vkd3d_shader_location *loc) { @@ -1479,28 +1493,16 @@ static struct hlsl_ir_node *add_binary_logical_expr(struct hlsl_ctx *ctx, struct
common_type = hlsl_get_numeric_type(ctx, type, HLSL_TYPE_BOOL, dimx, dimy);
- if (!(args[0] = add_implicit_conversion(ctx, instrs, arg1, common_type, loc))) + if (!(args[0] = add_implicit_conversion(ctx, block, arg1, common_type, loc))) return NULL;
- if (!(args[1] = add_implicit_conversion(ctx, instrs, arg2, common_type, loc))) + if (!(args[1] = add_implicit_conversion(ctx, block, arg2, common_type, loc))) return NULL;
- return add_expr(ctx, instrs, op, args, common_type, loc); + return add_expr(ctx, block, op, args, common_type, loc); }
-static struct list *add_binary_logical_expr_merge(struct hlsl_ctx *ctx, struct list *list1, struct list *list2, - enum hlsl_ir_expr_op op, const struct vkd3d_shader_location *loc) -{ - struct hlsl_ir_node *arg1 = node_from_list(list1), *arg2 = node_from_list(list2); - - list_move_tail(list1, list2); - vkd3d_free(list2); - add_binary_logical_expr(ctx, list1, op, arg1, arg2, loc); - - return list1; -} - -static struct hlsl_ir_node *add_binary_shift_expr(struct hlsl_ctx *ctx, struct list *instrs, +static struct hlsl_ir_node *add_binary_shift_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, const struct vkd3d_shader_location *loc) { @@ -1522,28 +1524,16 @@ static struct hlsl_ir_node *add_binary_shift_expr(struct hlsl_ctx *ctx, struct l return_type = hlsl_get_numeric_type(ctx, type, base, dimx, dimy); integer_type = hlsl_get_numeric_type(ctx, type, HLSL_TYPE_INT, dimx, dimy);
- if (!(args[0] = add_implicit_conversion(ctx, instrs, arg1, return_type, loc))) + if (!(args[0] = add_implicit_conversion(ctx, block, arg1, return_type, loc))) return NULL;
- if (!(args[1] = add_implicit_conversion(ctx, instrs, arg2, integer_type, loc))) + if (!(args[1] = add_implicit_conversion(ctx, block, arg2, integer_type, loc))) return NULL;
- return add_expr(ctx, instrs, op, args, return_type, loc); + return add_expr(ctx, block, op, args, return_type, loc); }
-static struct list *add_binary_shift_expr_merge(struct hlsl_ctx *ctx, struct list *list1, struct list *list2, - enum hlsl_ir_expr_op op, const struct vkd3d_shader_location *loc) -{ - struct hlsl_ir_node *arg1 = node_from_list(list1), *arg2 = node_from_list(list2); - - list_move_tail(list1, list2); - vkd3d_free(list2); - add_binary_shift_expr(ctx, list1, op, arg1, arg2, loc); - - return list1; -} - -static struct hlsl_ir_node *add_binary_dot_expr(struct hlsl_ctx *ctx, struct list *instrs, +static struct hlsl_ir_node *add_binary_dot_expr(struct hlsl_ctx *ctx, struct hlsl_block *instrs, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, const struct vkd3d_shader_location *loc) { enum hlsl_base_type base = expr_common_base_type(arg1->data_type->base_type, arg2->data_type->base_type); @@ -1557,8 +1547,7 @@ static struct hlsl_ir_node *add_binary_dot_expr(struct hlsl_ctx *ctx, struct lis struct vkd3d_string_buffer *string;
if ((string = hlsl_type_to_string(ctx, arg1->data_type))) - hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "Invalid type %s.\n", string->buffer); + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Invalid type %s.", string->buffer); hlsl_release_string_buffer(ctx, string); return NULL; } @@ -1568,8 +1557,7 @@ static struct hlsl_ir_node *add_binary_dot_expr(struct hlsl_ctx *ctx, struct lis struct vkd3d_string_buffer *string;
if ((string = hlsl_type_to_string(ctx, arg2->data_type))) - hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "Invalid type %s.\n", string->buffer); + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Invalid type %s.", string->buffer); hlsl_release_string_buffer(ctx, string); return NULL; } @@ -1598,6 +1586,53 @@ static struct hlsl_ir_node *add_binary_dot_expr(struct hlsl_ctx *ctx, struct lis return add_expr(ctx, instrs, op, args, ret_type, loc); }
+static struct hlsl_block *add_binary_expr_merge(struct hlsl_ctx *ctx, struct hlsl_block *block1, + struct hlsl_block *block2, enum hlsl_ir_expr_op op, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *arg1 = node_from_block(block1), *arg2 = node_from_block(block2); + + hlsl_block_add_block(block1, block2); + destroy_block(block2); + + switch (op) + { + case HLSL_OP2_ADD: + case HLSL_OP2_DIV: + case HLSL_OP2_MOD: + case HLSL_OP2_MUL: + add_binary_arithmetic_expr(ctx, block1, op, arg1, arg2, loc); + break; + + case HLSL_OP2_BIT_AND: + case HLSL_OP2_BIT_OR: + case HLSL_OP2_BIT_XOR: + add_binary_bitwise_expr(ctx, block1, op, arg1, arg2, loc); + break; + + case HLSL_OP2_LESS: + case HLSL_OP2_GEQUAL: + case HLSL_OP2_EQUAL: + case HLSL_OP2_NEQUAL: + add_binary_comparison_expr(ctx, block1, op, arg1, arg2, loc); + break; + + case HLSL_OP2_LOGIC_AND: + case HLSL_OP2_LOGIC_OR: + add_binary_logical_expr(ctx, block1, op, arg1, arg2, loc); + break; + + case HLSL_OP2_LSHIFT: + case HLSL_OP2_RSHIFT: + add_binary_shift_expr(ctx, block1, op, arg1, arg2, loc); + break; + + default: + vkd3d_unreachable(); + } + + return block1; +} + static enum hlsl_ir_expr_op op_from_assignment(enum parse_assign_op op) { static const enum hlsl_ir_expr_op ops[] = @@ -1654,7 +1689,7 @@ static bool invert_swizzle(unsigned int *swizzle, unsigned int *writemask, unsig return true; }
-static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *lhs, +static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *lhs, enum parse_assign_op assign_op, struct hlsl_ir_node *rhs) { struct hlsl_type *lhs_type = lhs->data_type; @@ -1663,7 +1698,7 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in
if (assign_op == ASSIGN_OP_SUB) { - if (!(rhs = add_unary_arithmetic_expr(ctx, instrs, HLSL_OP1_NEG, rhs, &rhs->loc))) + if (!(rhs = add_unary_arithmetic_expr(ctx, block, HLSL_OP1_NEG, rhs, &rhs->loc))) return NULL; assign_op = ASSIGN_OP_ADD; } @@ -1672,14 +1707,14 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in enum hlsl_ir_expr_op op = op_from_assignment(assign_op);
assert(op); - if (!(rhs = add_binary_arithmetic_expr(ctx, instrs, op, lhs, rhs, &rhs->loc))) + if (!(rhs = add_binary_arithmetic_expr(ctx, block, op, lhs, rhs, &rhs->loc))) return NULL; }
if (lhs_type->class <= HLSL_CLASS_LAST_NUMERIC) writemask = (1 << lhs_type->dimx) - 1;
- if (!(rhs = add_implicit_conversion(ctx, instrs, rhs, lhs_type, &rhs->loc))) + if (!(rhs = add_implicit_conversion(ctx, block, rhs, lhs_type, &rhs->loc))) return NULL;
while (lhs->type != HLSL_IR_LOAD && lhs->type != HLSL_IR_INDEX) @@ -1708,7 +1743,7 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in { return NULL; } - list_add_tail(instrs, &new_swizzle->entry); + hlsl_block_add_instr(block, new_swizzle);
lhs = swizzle->val.node; rhs = new_swizzle; @@ -1754,7 +1789,7 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in hlsl_cleanup_deref(&resource_deref); return NULL; } - list_add_tail(instrs, &store->entry); + hlsl_block_add_instr(block, store); hlsl_cleanup_deref(&resource_deref); } else if (lhs->type == HLSL_IR_INDEX && hlsl_index_is_noncontiguous(hlsl_ir_index(lhs))) @@ -1773,13 +1808,13 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in
if (!(c = hlsl_new_uint_constant(ctx, i, &lhs->loc))) return NULL; - list_add_tail(instrs, &c->entry); + hlsl_block_add_instr(block, c);
if (!(cell = hlsl_new_index(ctx, &row->node, c, &lhs->loc))) return NULL; - list_add_tail(instrs, &cell->entry); + hlsl_block_add_instr(block, cell);
- if (!(load = add_load_component(ctx, instrs, rhs, k++, &rhs->loc))) + if (!(load = hlsl_add_load_component(ctx, block, rhs, k++, &rhs->loc))) return NULL;
if (!hlsl_init_deref_from_index_chain(ctx, &deref, cell)) @@ -1790,7 +1825,7 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in hlsl_cleanup_deref(&deref); return NULL; } - list_add_tail(instrs, &store->entry); + hlsl_block_add_instr(block, store); hlsl_cleanup_deref(&deref); } } @@ -1807,7 +1842,7 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in hlsl_cleanup_deref(&deref); return NULL; } - list_add_tail(instrs, &store->entry); + hlsl_block_add_instr(block, store); hlsl_cleanup_deref(&deref); }
@@ -1816,14 +1851,14 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in * the last instruction in the list, we do need to copy. */ if (!(copy = hlsl_new_copy(ctx, rhs))) return NULL; - list_add_tail(instrs, ©->entry); + hlsl_block_add_instr(block, copy); return copy; }
-static bool add_increment(struct hlsl_ctx *ctx, struct list *instrs, bool decrement, bool post, +static bool add_increment(struct hlsl_ctx *ctx, struct hlsl_block *block, bool decrement, bool post, const struct vkd3d_shader_location *loc) { - struct hlsl_ir_node *lhs = node_from_list(instrs); + struct hlsl_ir_node *lhs = node_from_block(block); struct hlsl_ir_node *one;
if (lhs->data_type->modifiers & HLSL_MODIFIER_CONST) @@ -1832,9 +1867,9 @@ static bool add_increment(struct hlsl_ctx *ctx, struct list *instrs, bool decrem
if (!(one = hlsl_new_int_constant(ctx, 1, loc))) return false; - list_add_tail(instrs, &one->entry); + hlsl_block_add_instr(block, one);
- if (!add_assignment(ctx, instrs, lhs, decrement ? ASSIGN_OP_SUB : ASSIGN_OP_ADD, one)) + if (!add_assignment(ctx, block, lhs, decrement ? ASSIGN_OP_SUB : ASSIGN_OP_ADD, one)) return false;
if (post) @@ -1843,7 +1878,7 @@ static bool add_increment(struct hlsl_ctx *ctx, struct list *instrs, bool decrem
if (!(copy = hlsl_new_copy(ctx, lhs))) return false; - list_add_tail(instrs, ©->entry); + hlsl_block_add_instr(block, copy);
/* Post increment/decrement expressions are considered const. */ if (!(copy->data_type = hlsl_type_clone(ctx, copy->data_type, 0, HLSL_MODIFIER_CONST))) @@ -1853,7 +1888,7 @@ static bool add_increment(struct hlsl_ctx *ctx, struct list *instrs, bool decrem return true; }
-static void initialize_var_components(struct hlsl_ctx *ctx, struct list *instrs, +static void initialize_var_components(struct hlsl_ctx *ctx, struct hlsl_block *instrs, struct hlsl_ir_var *dst, unsigned int *store_index, struct hlsl_ir_node *src) { unsigned int src_comp_count = hlsl_type_component_count(src->data_type); @@ -1868,7 +1903,7 @@ static void initialize_var_components(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_type *dst_comp_type; struct hlsl_block block;
- if (!(load = add_load_component(ctx, instrs, src, k, &src->loc))) + if (!(load = hlsl_add_load_component(ctx, instrs, src, k, &src->loc))) return;
dst_comp_type = hlsl_type_get_component_type(ctx, dst->data_type, *store_index); @@ -1878,7 +1913,7 @@ static void initialize_var_components(struct hlsl_ctx *ctx, struct list *instrs,
if (!hlsl_new_store_component(ctx, &block, &dst_deref, *store_index, conv)) return; - list_move_tail(instrs, &block.instrs); + hlsl_block_add_block(instrs, &block);
++*store_index; } @@ -1924,211 +1959,231 @@ static bool type_has_numeric_components(struct hlsl_type *type) return false; }
-static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_type, - unsigned int modifiers, const struct vkd3d_shader_location *modifiers_loc, struct list *var_list) +static void check_invalid_in_out_modifiers(struct hlsl_ctx *ctx, unsigned int modifiers, + const struct vkd3d_shader_location *loc) { - struct parse_variable_def *v, *v_next; + modifiers &= (HLSL_STORAGE_IN | HLSL_STORAGE_OUT); + if (modifiers) + { + struct vkd3d_string_buffer *string; + + if ((string = hlsl_modifiers_to_string(ctx, modifiers))) + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, + "Modifiers '%s' are not allowed on non-parameter variables.", string->buffer); + hlsl_release_string_buffer(ctx, string); + } +} + +static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) +{ + struct hlsl_type *basic_type = v->basic_type; struct hlsl_ir_function_decl *func; - unsigned int invalid_modifiers; - struct list *statements_list; + struct hlsl_semantic new_semantic; + uint32_t modifiers = v->modifiers; + bool unbounded_res_array = false; struct hlsl_ir_var *var; struct hlsl_type *type; bool local = true; + char *var_name; + unsigned int i; + + assert(basic_type);
if (basic_type->class == HLSL_CLASS_MATRIX) assert(basic_type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK);
- if (!(statements_list = make_empty_list(ctx))) - { - LIST_FOR_EACH_ENTRY_SAFE(v, v_next, var_list, struct parse_variable_def, entry) - free_parse_variable_def(v); - vkd3d_free(var_list); - return NULL; - } - - if (!var_list) - return statements_list; + type = basic_type;
- invalid_modifiers = modifiers & (HLSL_STORAGE_IN | HLSL_STORAGE_OUT); - if (invalid_modifiers) + if (shader_is_sm_5_1(ctx) && type->class == HLSL_CLASS_OBJECT) { - struct vkd3d_string_buffer *string; - - if ((string = hlsl_modifiers_to_string(ctx, invalid_modifiers))) - hlsl_error(ctx, modifiers_loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, - "Modifiers '%s' are not allowed on non-parameter variables.", string->buffer); - hlsl_release_string_buffer(ctx, string); + for (i = 0; i < v->arrays.count; ++i) + unbounded_res_array |= (v->arrays.sizes[i] == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT); }
- LIST_FOR_EACH_ENTRY_SAFE(v, v_next, var_list, struct parse_variable_def, entry) + if (unbounded_res_array) { - bool unbounded_res_array = false; - unsigned int i; - - type = basic_type; - - if (shader_is_sm_5_1(ctx) && type->class == HLSL_CLASS_OBJECT) + if (v->arrays.count == 1) { - for (i = 0; i < v->arrays.count; ++i) - unbounded_res_array |= (v->arrays.sizes[i] == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT); + hlsl_fixme(ctx, &v->loc, "Unbounded resource arrays."); + return; } - - if (unbounded_res_array) + else { - if (v->arrays.count == 1) - { - hlsl_fixme(ctx, &v->loc, "Unbounded resource arrays."); - free_parse_variable_def(v); - continue; - } - else - { - hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "Unbounded resource arrays cannot be multi-dimensional."); - } + hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Unbounded resource arrays cannot be multi-dimensional."); } - else + } + else + { + for (i = 0; i < v->arrays.count; ++i) { - for (i = 0; i < v->arrays.count; ++i) + if (v->arrays.sizes[i] == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT) { - if (v->arrays.sizes[i] == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT) - { - unsigned int size = initializer_size(&v->initializer); - unsigned int elem_components = hlsl_type_component_count(type); - - if (i < v->arrays.count - 1) - { - hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "Only innermost array size can be implicit."); - free_parse_initializer(&v->initializer); - v->initializer.args_count = 0; - } - else if (elem_components == 0) - { - hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "Cannot declare an implicit size array of a size 0 type."); - free_parse_initializer(&v->initializer); - v->initializer.args_count = 0; - } - else if (size == 0) - { - hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "Implicit size arrays need to be initialized."); - free_parse_initializer(&v->initializer); - v->initializer.args_count = 0; + unsigned int size = initializer_size(&v->initializer); + unsigned int elem_components = hlsl_type_component_count(type);
- } - else if (size % elem_components != 0) - { - hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, - "Cannot initialize implicit size array with %u components, expected a multiple of %u.", - size, elem_components); - free_parse_initializer(&v->initializer); - v->initializer.args_count = 0; - } - else - { - v->arrays.sizes[i] = size / elem_components; - } + if (i < v->arrays.count - 1) + { + hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Only innermost array size can be implicit."); + v->initializer.args_count = 0; + } + else if (elem_components == 0) + { + hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Cannot declare an implicit size array of a size 0 type."); + v->initializer.args_count = 0; + } + else if (size == 0) + { + hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Implicit size arrays need to be initialized."); + v->initializer.args_count = 0; + } + else if (size % elem_components != 0) + { + hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, + "Cannot initialize implicit size array with %u components, expected a multiple of %u.", + size, elem_components); + v->initializer.args_count = 0; + } + else + { + v->arrays.sizes[i] = size / elem_components; } - type = hlsl_new_array_type(ctx, type, v->arrays.sizes[i]); } + type = hlsl_new_array_type(ctx, type, v->arrays.sizes[i]); } - vkd3d_free(v->arrays.sizes); + } + + if (!(var_name = vkd3d_strdup(v->name))) + return;
- if (!(var = hlsl_new_var(ctx, v->name, type, &v->loc, &v->semantic, modifiers, &v->reg_reservation))) + new_semantic = v->semantic; + if (v->semantic.name) + { + if (!(new_semantic.name = vkd3d_strdup(v->semantic.name))) { - free_parse_variable_def(v); - continue; + vkd3d_free(var_name); + return; } + }
- var->buffer = ctx->cur_buffer; + if (!(var = hlsl_new_var(ctx, var_name, type, &v->loc, &new_semantic, modifiers, &v->reg_reservation))) + { + hlsl_cleanup_semantic(&new_semantic); + vkd3d_free(var_name); + return; + }
- if (var->buffer == ctx->globals_buffer) - { - if (var->reg_reservation.offset_type) - hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, - "packoffset() is only allowed inside constant buffer declarations."); - } + var->buffer = ctx->cur_buffer;
- if (ctx->cur_scope == ctx->globals) - { - local = false; + if (var->buffer == ctx->globals_buffer) + { + if (var->reg_reservation.offset_type) + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, + "packoffset() is only allowed inside constant buffer declarations."); + }
- if ((modifiers & HLSL_STORAGE_UNIFORM) && (modifiers & HLSL_STORAGE_STATIC)) - hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, - "Variable '%s' is declared as both "uniform" and "static".", var->name); + if (ctx->cur_scope == ctx->globals) + { + local = false;
- /* Mark it as uniform. We need to do this here since synthetic - * variables also get put in the global scope, but shouldn't be - * considered uniforms, and we have no way of telling otherwise. */ - if (!(modifiers & HLSL_STORAGE_STATIC)) - var->storage_modifiers |= HLSL_STORAGE_UNIFORM; + if ((modifiers & HLSL_STORAGE_UNIFORM) && (modifiers & HLSL_STORAGE_STATIC)) + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, + "Variable '%s' is declared as both "uniform" and "static".", var->name);
- if (ctx->profile->major_version < 5 && (var->storage_modifiers & HLSL_STORAGE_UNIFORM) && - type_has_object_components(var->data_type, true)) - { - hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "Target profile doesn't support objects as struct members in uniform variables.\n"); - } + /* Mark it as uniform. We need to do this here since synthetic + * variables also get put in the global scope, but shouldn't be + * considered uniforms, and we have no way of telling otherwise. */ + if (!(modifiers & HLSL_STORAGE_STATIC)) + var->storage_modifiers |= HLSL_STORAGE_UNIFORM;
- if ((func = hlsl_get_func_decl(ctx, var->name))) - { - hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_REDEFINED, - "'%s' is already defined as a function.", var->name); - hlsl_note(ctx, &func->loc, VKD3D_SHADER_LOG_ERROR, - "'%s' was previously defined here.", var->name); - } - } - else + if (ctx->profile->major_version < 5 && (var->storage_modifiers & HLSL_STORAGE_UNIFORM) && + type_has_object_components(var->data_type, true)) { - static const unsigned int invalid = HLSL_STORAGE_EXTERN | HLSL_STORAGE_SHARED - | HLSL_STORAGE_GROUPSHARED | HLSL_STORAGE_UNIFORM; - - if (modifiers & invalid) - { - struct vkd3d_string_buffer *string; + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Target profile doesn't support objects as struct members in uniform variables."); + }
- if ((string = hlsl_modifiers_to_string(ctx, modifiers & invalid))) - hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, - "Modifiers '%s' are not allowed on local variables.", string->buffer); - hlsl_release_string_buffer(ctx, string); - } - if (var->semantic.name) - hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, - "Semantics are not allowed on local variables."); + if ((func = hlsl_get_func_decl(ctx, var->name))) + { + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_REDEFINED, + "'%s' is already defined as a function.", var->name); + hlsl_note(ctx, &func->loc, VKD3D_SHADER_LOG_ERROR, + "'%s' was previously defined here.", var->name); } + } + else + { + static const unsigned int invalid = HLSL_STORAGE_EXTERN | HLSL_STORAGE_SHARED + | HLSL_STORAGE_GROUPSHARED | HLSL_STORAGE_UNIFORM;
- if ((var->storage_modifiers & HLSL_STORAGE_STATIC) && type_has_numeric_components(var->data_type) - && type_has_object_components(var->data_type, false)) + if (modifiers & invalid) { - hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "Static variables cannot have both numeric and resource components."); + struct vkd3d_string_buffer *string; + + if ((string = hlsl_modifiers_to_string(ctx, modifiers & invalid))) + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, + "Modifiers '%s' are not allowed on local variables.", string->buffer); + hlsl_release_string_buffer(ctx, string); } + if (var->semantic.name) + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, + "Semantics are not allowed on local variables.");
- if ((type->modifiers & HLSL_MODIFIER_CONST) && !v->initializer.args_count - && !(modifiers & (HLSL_STORAGE_STATIC | HLSL_STORAGE_UNIFORM))) + if ((type->modifiers & HLSL_MODIFIER_CONST) && !v->initializer.args_count && !(modifiers & HLSL_STORAGE_STATIC)) { hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_INITIALIZER, - "Const variable "%s" is missing an initializer.", var->name); - hlsl_free_var(var); - free_parse_initializer(&v->initializer); - vkd3d_free(v); - continue; + "Const variable "%s" is missing an initializer.", var->name); } + } + + if ((var->storage_modifiers & HLSL_STORAGE_STATIC) && type_has_numeric_components(var->data_type) + && type_has_object_components(var->data_type, false)) + { + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Static variables cannot have both numeric and resource components."); + }
- if (!hlsl_add_var(ctx, var, local)) + if (!hlsl_add_var(ctx, var, local)) + { + struct hlsl_ir_var *old = hlsl_get_var(ctx->cur_scope, var->name); + + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_REDEFINED, + "Variable "%s" was already declared in this scope.", var->name); + hlsl_note(ctx, &old->loc, VKD3D_SHADER_LOG_ERROR, ""%s" was previously declared here.", old->name); + hlsl_free_var(var); + return; + } +} + +static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var_list) +{ + struct parse_variable_def *v, *v_next; + struct hlsl_block *initializers; + struct hlsl_ir_var *var; + struct hlsl_type *type; + + if (!(initializers = make_empty_block(ctx))) + { + LIST_FOR_EACH_ENTRY_SAFE(v, v_next, var_list, struct parse_variable_def, entry) { - struct hlsl_ir_var *old = hlsl_get_var(ctx->cur_scope, var->name); + free_parse_variable_def(v); + } + vkd3d_free(var_list); + return NULL; + }
- hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_REDEFINED, - "Variable "%s" was already declared in this scope.", var->name); - hlsl_note(ctx, &old->loc, VKD3D_SHADER_LOG_ERROR, ""%s" was previously declared here.", old->name); - hlsl_free_var(var); - free_parse_initializer(&v->initializer); - vkd3d_free(v); + LIST_FOR_EACH_ENTRY_SAFE(v, v_next, var_list, struct parse_variable_def, entry) + { + /* If this fails, the variable failed to be declared. */ + if (!(var = hlsl_get_var(ctx->cur_scope, v->name))) + { + free_parse_variable_def(v); continue; } + type = var->data_type;
if (v->initializer.args_count) { @@ -2143,8 +2198,7 @@ static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_t hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, "Expected %u components in initializer, but got %u.", hlsl_type_component_count(type), size); - free_parse_initializer(&v->initializer); - vkd3d_free(v); + free_parse_variable_def(v); continue; }
@@ -2159,16 +2213,14 @@ static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_t struct hlsl_ir_load *load = hlsl_new_var_load(ctx, var, &var->loc);
assert(v->initializer.args_count == 1); - list_add_tail(v->initializer.instrs, &load->node.entry); + hlsl_block_add_instr(v->initializer.instrs, &load->node); add_assignment(ctx, v->initializer.instrs, &load->node, ASSIGN_OP_ASSIGN, v->initializer.args[0]); }
- if (modifiers & HLSL_STORAGE_STATIC) - list_move_tail(&ctx->static_initializers.instrs, v->initializer.instrs); + if (var->storage_modifiers & HLSL_STORAGE_STATIC) + hlsl_block_add_block(&ctx->static_initializers, v->initializer.instrs); else - list_move_tail(statements_list, v->initializer.instrs); - vkd3d_free(v->initializer.args); - vkd3d_free(v->initializer.instrs); + hlsl_block_add_block(initializers, v->initializer.instrs); } else if (var->storage_modifiers & HLSL_STORAGE_STATIC) { @@ -2178,34 +2230,35 @@ static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_t
if (type_has_object_components(var->data_type, false)) { - vkd3d_free(v); + free_parse_variable_def(v); continue; }
if (!(zero = hlsl_new_uint_constant(ctx, 0, &var->loc))) { - vkd3d_free(v); + free_parse_variable_def(v); continue; } hlsl_block_add_instr(&ctx->static_initializers, zero);
- if (!(cast = add_cast(ctx, &ctx->static_initializers.instrs, zero, var->data_type, &var->loc))) + if (!(cast = add_cast(ctx, &ctx->static_initializers, zero, var->data_type, &var->loc))) { - vkd3d_free(v); + free_parse_variable_def(v); continue; }
if (!(store = hlsl_new_simple_store(ctx, var, cast))) { - vkd3d_free(v); + free_parse_variable_def(v); continue; } hlsl_block_add_instr(&ctx->static_initializers, store); } - vkd3d_free(v); + free_parse_variable_def(v); } + vkd3d_free(var_list); - return statements_list; + return initializers; }
struct find_function_call_args @@ -2394,18 +2447,18 @@ static bool intrinsic_all(struct hlsl_ctx *ctx,
if (!(one = hlsl_new_float_constant(ctx, 1.0f, loc))) return false; - list_add_tail(params->instrs, &one->entry); + hlsl_block_add_instr(params->instrs, one);
if (!(zero = hlsl_new_float_constant(ctx, 0.0f, loc))) return false; - list_add_tail(params->instrs, &zero->entry); + hlsl_block_add_instr(params->instrs, zero);
mul = one;
count = hlsl_type_component_count(arg->data_type); for (i = 0; i < count; ++i) { - if (!(load = add_load_component(ctx, params->instrs, arg, i, loc))) + if (!(load = hlsl_add_load_component(ctx, params->instrs, arg, i, loc))) return false;
if (!(mul = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, load, mul, loc))) @@ -2431,7 +2484,7 @@ static bool intrinsic_any(struct hlsl_ctx *ctx, { if (!(zero = hlsl_new_float_constant(ctx, 0.0f, loc))) return false; - list_add_tail(params->instrs, &zero->entry); + hlsl_block_add_instr(params->instrs, zero);
if (!(dot = add_binary_dot_expr(ctx, params->instrs, arg, arg, loc))) return false; @@ -2442,14 +2495,14 @@ static bool intrinsic_any(struct hlsl_ctx *ctx, { if (!(bfalse = hlsl_new_bool_constant(ctx, false, loc))) return false; - list_add_tail(params->instrs, &bfalse->entry); + hlsl_block_add_instr(params->instrs, bfalse);
or = bfalse;
count = hlsl_type_component_count(arg->data_type); for (i = 0; i < count; ++i) { - if (!(load = add_load_component(ctx, params->instrs, arg, i, loc))) + if (!(load = hlsl_add_load_component(ctx, params->instrs, arg, i, loc))) return false;
if (!(or = add_binary_bitwise_expr(ctx, params->instrs, HLSL_OP2_BIT_OR, or, load, loc))) @@ -2544,6 +2597,34 @@ static bool intrinsic_clamp(struct hlsl_ctx *ctx, return !!add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MIN, max, params->args[2], loc); }
+static bool intrinsic_clip(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *condition, *jump; + + if (!elementwise_intrinsic_float_convert_args(ctx, params, loc)) + return false; + + condition = params->args[0]; + + if (ctx->profile->major_version < 4 && hlsl_type_component_count(condition->data_type) > 4) + { + struct vkd3d_string_buffer *string; + + if ((string = hlsl_type_to_string(ctx, condition->data_type))) + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Argument type cannot exceed 4 components, got type "%s".", string->buffer); + hlsl_release_string_buffer(ctx, string); + return false; + } + + if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_DISCARD_NEG, condition, loc))) + return false; + hlsl_block_add_instr(params->instrs, jump); + + return true; +} + static bool intrinsic_cos(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -2579,26 +2660,26 @@ static bool intrinsic_cross(struct hlsl_ctx *ctx,
if (!(arg1_swzl1 = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Z, X, Y, Z), 3, arg1_cast, loc))) return false; - list_add_tail(params->instrs, &arg1_swzl1->entry); + hlsl_block_add_instr(params->instrs, arg1_swzl1);
if (!(arg2_swzl1 = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Y, Z, X, Y), 3, arg2_cast, loc))) return false; - list_add_tail(params->instrs, &arg2_swzl1->entry); + hlsl_block_add_instr(params->instrs, arg2_swzl1);
if (!(mul1 = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, arg1_swzl1, arg2_swzl1, loc))) return false;
if (!(mul1_neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, mul1, loc))) return false; - list_add_tail(params->instrs, &mul1_neg->entry); + hlsl_block_add_instr(params->instrs, mul1_neg);
if (!(arg1_swzl2 = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Y, Z, X, Y), 3, arg1_cast, loc))) return false; - list_add_tail(params->instrs, &arg1_swzl2->entry); + hlsl_block_add_instr(params->instrs, arg1_swzl2);
if (!(arg2_swzl2 = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Z, X, Y, Z), 3, arg2_cast, loc))) return false; - list_add_tail(params->instrs, &arg2_swzl2->entry); + hlsl_block_add_instr(params->instrs, arg2_swzl2);
if (!(mul2 = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, arg1_swzl2, arg2_swzl2, loc))) return false; @@ -2617,6 +2698,28 @@ static bool intrinsic_ddx(struct hlsl_ctx *ctx, return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSX, arg, loc); }
+static bool intrinsic_ddx_coarse(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *arg; + + if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) + return false; + + return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSX_COARSE, arg, loc); +} + +static bool intrinsic_ddx_fine(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *arg; + + if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) + return false; + + return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSX_FINE, arg, loc); +} + static bool intrinsic_ddy(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -2628,6 +2731,28 @@ static bool intrinsic_ddy(struct hlsl_ctx *ctx, return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSY, arg, loc); }
+static bool intrinsic_ddy_coarse(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *arg; + + if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) + return false; + + return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSY_COARSE, arg, loc); +} + +static bool intrinsic_ddy_fine(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *arg; + + if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) + return false; + + return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSY_FINE, arg, loc); +} + static bool intrinsic_distance(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -2668,7 +2793,7 @@ static bool intrinsic_exp(struct hlsl_ctx *ctx, /* 1/ln(2) */ if (!(coeff = hlsl_new_float_constant(ctx, 1.442695f, loc))) return false; - list_add_tail(params->instrs, &coeff->entry); + hlsl_block_add_instr(params->instrs, coeff);
if (!(mul = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, coeff, params->args[0], loc))) return false; @@ -2715,7 +2840,7 @@ static bool intrinsic_fmod(struct hlsl_ctx *ctx, const struct parse_initializer
if (!(zero = hlsl_new_constant(ctx, div->data_type, &zero_value, loc))) return false; - list_add_tail(params->instrs, &zero->entry); + hlsl_block_add_instr(params->instrs, zero);
if (!(abs = add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_ABS, div, loc))) return false; @@ -2806,7 +2931,7 @@ static bool intrinsic_lerp(struct hlsl_ctx *ctx, }
static struct hlsl_ir_node * add_pow_expr(struct hlsl_ctx *ctx, - struct list *instrs, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, + struct hlsl_block *instrs, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, const struct vkd3d_shader_location *loc) { struct hlsl_ir_node *log, *mul; @@ -2861,15 +2986,15 @@ static bool intrinsic_lit(struct hlsl_ctx *ctx, init_value.u[3].f = 1.0f; if (!(init = hlsl_new_constant(ctx, ret_type, &init_value, loc))) return false; - list_add_tail(params->instrs, &init->entry); + hlsl_block_add_instr(params->instrs, init);
if (!(store = hlsl_new_simple_store(ctx, var, init))) return false; - list_add_tail(params->instrs, &store->entry); + hlsl_block_add_instr(params->instrs, store);
if (!(zero = hlsl_new_float_constant(ctx, 0.0f, loc))) return false; - list_add_tail(params->instrs, &zero->entry); + hlsl_block_add_instr(params->instrs, zero);
/* Diffuse component. */ if (!(diffuse = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MAX, n_l, zero, loc))) @@ -2877,7 +3002,7 @@ static bool intrinsic_lit(struct hlsl_ctx *ctx,
if (!hlsl_new_store_component(ctx, &block, &var_deref, 1, diffuse)) return false; - list_move_tail(params->instrs, &block.instrs); + hlsl_block_add_block(params->instrs, &block);
/* Specular component. */ if (!(n_h_neg = add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_LESS, n_h, zero, loc))) @@ -2897,11 +3022,11 @@ static bool intrinsic_lit(struct hlsl_ctx *ctx,
if (!hlsl_new_store_component(ctx, &block, &var_deref, 2, load)) return false; - list_move_tail(params->instrs, &block.instrs); + hlsl_block_add_block(params->instrs, &block);
if (!(var_load = hlsl_new_var_load(ctx, var, loc))) return false; - list_add_tail(params->instrs, &var_load->node.entry); + hlsl_block_add_instr(params->instrs, &var_load->node);
return true; } @@ -3034,10 +3159,12 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, { struct hlsl_ir_node *value1, *value2, *mul;
- if (!(value1 = add_load_component(ctx, params->instrs, cast1, j * cast1->data_type->dimx + k, loc))) + if (!(value1 = hlsl_add_load_component(ctx, params->instrs, + cast1, j * cast1->data_type->dimx + k, loc))) return false;
- if (!(value2 = add_load_component(ctx, params->instrs, cast2, k * cast2->data_type->dimx + i, loc))) + if (!(value2 = hlsl_add_load_component(ctx, params->instrs, + cast2, k * cast2->data_type->dimx + i, loc))) return false;
if (!(mul = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, value1, value2, loc))) @@ -3056,13 +3183,13 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx,
if (!hlsl_new_store_component(ctx, &block, &var_deref, j * matrix_type->dimx + i, instr)) return false; - list_move_tail(params->instrs, &block.instrs); + hlsl_block_add_block(params->instrs, &block); } }
if (!(load = hlsl_new_var_load(ctx, var, loc))) return false; - list_add_tail(params->instrs, &load->node.entry); + hlsl_block_add_instr(params->instrs, &load->node);
return !!add_implicit_conversion(ctx, params->instrs, &load->node, ret_type, loc); } @@ -3169,7 +3296,7 @@ static bool intrinsic_sign(struct hlsl_ctx *ctx,
if (!(zero = hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, arg->data_type->base_type), &zero_value, loc))) return false; - list_add_tail(params->instrs, &zero->entry); + hlsl_block_add_instr(params->instrs, zero);
/* Check if 0 < arg, cast bool to int */
@@ -3229,7 +3356,7 @@ static bool intrinsic_smoothstep(struct hlsl_ctx *ctx,
if (!(one = hlsl_new_float_constant(ctx, 1.0, loc))) return false; - list_add_tail(params->instrs, &one->entry); + hlsl_block_add_instr(params->instrs, one);
if (!(p_denom = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_DIV, one, p_denom, loc))) return false; @@ -3242,11 +3369,11 @@ static bool intrinsic_smoothstep(struct hlsl_ctx *ctx,
if (!(minus_two = hlsl_new_float_constant(ctx, -2.0, loc))) return false; - list_add_tail(params->instrs, &minus_two->entry); + hlsl_block_add_instr(params->instrs, minus_two);
if (!(three = hlsl_new_float_constant(ctx, 3.0, loc))) return false; - list_add_tail(params->instrs, &three->entry); + hlsl_block_add_instr(params->instrs, three);
if (!(res = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, minus_two, p, loc))) return false; @@ -3308,7 +3435,7 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer *
if (params->args_count == 4) { - hlsl_fixme(ctx, loc, "Samples with gradients are not implemented.\n"); + hlsl_fixme(ctx, loc, "Samples with gradients are not implemented."); }
sampler_type = params->args[0]->data_type; @@ -3335,7 +3462,7 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer *
if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) return false; - list_add_tail(params->instrs, &load->entry); + hlsl_block_add_instr(params->instrs, load); return true; }
@@ -3369,7 +3496,7 @@ static bool intrinsic_transpose(struct hlsl_ctx *ctx,
if ((string = hlsl_type_to_string(ctx, arg_type))) hlsl_error(ctx, &arg->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "Wrong type for argument 1 of transpose(): expected a matrix or scalar type, but got '%s'.\n", + "Wrong type for argument 1 of transpose(): expected a matrix or scalar type, but got '%s'.", string->buffer); hlsl_release_string_buffer(ctx, string); return false; @@ -3377,7 +3504,7 @@ static bool intrinsic_transpose(struct hlsl_ctx *ctx,
if (arg_type->class == HLSL_CLASS_SCALAR) { - list_add_tail(params->instrs, &arg->entry); + hlsl_block_add_instr(params->instrs, arg); return true; }
@@ -3393,18 +3520,18 @@ static bool intrinsic_transpose(struct hlsl_ctx *ctx, { struct hlsl_block block;
- if (!(load = add_load_component(ctx, params->instrs, arg, j * arg->data_type->dimx + i, loc))) + if (!(load = hlsl_add_load_component(ctx, params->instrs, arg, j * arg->data_type->dimx + i, loc))) return false;
if (!hlsl_new_store_component(ctx, &block, &var_deref, i * var->data_type->dimx + j, load)) return false; - list_move_tail(params->instrs, &block.instrs); + hlsl_block_add_block(params->instrs, &block); } }
if (!(var_load = hlsl_new_var_load(ctx, var, loc))) return false; - list_add_tail(params->instrs, &var_load->node.entry); + hlsl_block_add_instr(params->instrs, &var_load->node);
return true; } @@ -3444,13 +3571,13 @@ static bool intrinsic_d3dcolor_to_ubyte4(struct hlsl_ctx *ctx,
if (!(c = hlsl_new_float_constant(ctx, 255.0f + (0.5f / 256.0f), loc))) return false; - list_add_tail(params->instrs, &c->entry); + hlsl_block_add_instr(params->instrs, c);
if (arg_type->class == HLSL_CLASS_VECTOR) { if (!(swizzle = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Z, Y, X, W), 4, arg, loc))) return false; - list_add_tail(params->instrs, &swizzle->entry); + hlsl_block_add_instr(params->instrs, swizzle);
arg = swizzle; } @@ -3458,7 +3585,7 @@ static bool intrinsic_d3dcolor_to_ubyte4(struct hlsl_ctx *ctx, if (!(ret = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, arg, c, loc))) return false;
- if (ctx->profile->major_version >= 4) + if (shader_profile_version_ge(ctx, 4, 0)) return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_TRUNC, ret, loc);
return true; @@ -3482,10 +3609,15 @@ intrinsic_functions[] = {"asfloat", 1, true, intrinsic_asfloat}, {"asuint", -1, true, intrinsic_asuint}, {"clamp", 3, true, intrinsic_clamp}, + {"clip", 1, true, intrinsic_clip}, {"cos", 1, true, intrinsic_cos}, {"cross", 2, true, intrinsic_cross}, {"ddx", 1, true, intrinsic_ddx}, + {"ddx_coarse", 1, true, intrinsic_ddx_coarse}, + {"ddx_fine", 1, true, intrinsic_ddx_fine}, {"ddy", 1, true, intrinsic_ddy}, + {"ddy_coarse", 1, true, intrinsic_ddy_coarse}, + {"ddy_fine", 1, true, intrinsic_ddy_fine}, {"distance", 2, true, intrinsic_distance}, {"dot", 2, true, intrinsic_dot}, {"exp", 1, true, intrinsic_exp}, @@ -3527,7 +3659,14 @@ static int intrinsic_function_name_compare(const void *a, const void *b) return strcmp(a, func->name); }
-static struct list *add_call(struct hlsl_ctx *ctx, const char *name, +static struct hlsl_ir_node *hlsl_new_void_expr(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0}; + + return hlsl_new_expr(ctx, HLSL_OP0_VOID, operands, ctx->builtin_types.Void, loc); +} + +static struct hlsl_block *add_call(struct hlsl_ctx *ctx, const char *name, struct parse_initializer *args, const struct vkd3d_shader_location *loc) { struct intrinsic_function *intrinsic; @@ -3561,13 +3700,13 @@ static struct list *add_call(struct hlsl_ctx *ctx, const char *name,
if (!(store = hlsl_new_simple_store(ctx, param, arg))) goto fail; - list_add_tail(args->instrs, &store->entry); + hlsl_block_add_instr(args->instrs, store); } }
if (!(call = hlsl_new_call(ctx, decl, loc))) goto fail; - list_add_tail(args->instrs, &call->entry); + hlsl_block_add_instr(args->instrs, call);
for (i = 0; i < decl->parameters.count; ++i) { @@ -3584,7 +3723,7 @@ static struct list *add_call(struct hlsl_ctx *ctx, const char *name,
if (!(load = hlsl_new_var_load(ctx, param, &arg->loc))) goto fail; - list_add_tail(args->instrs, &load->node.entry); + hlsl_block_add_instr(args->instrs, &load->node);
if (!add_assignment(ctx, args->instrs, arg, ASSIGN_OP_ASSIGN, &load->node)) goto fail; @@ -3597,16 +3736,15 @@ static struct list *add_call(struct hlsl_ctx *ctx, const char *name,
if (!(load = hlsl_new_var_load(ctx, decl->return_var, loc))) goto fail; - list_add_tail(args->instrs, &load->node.entry); + hlsl_block_add_instr(args->instrs, &load->node); } else { - struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0}; struct hlsl_ir_node *expr;
- if (!(expr = hlsl_new_expr(ctx, HLSL_OP0_VOID, operands, ctx->builtin_types.Void, loc))) + if (!(expr = hlsl_new_void_expr(ctx, loc))) goto fail; - list_add_tail(args->instrs, &expr->entry); + hlsl_block_add_instr(args->instrs, expr); } } else if ((intrinsic = bsearch(name, intrinsic_functions, ARRAY_SIZE(intrinsic_functions), @@ -3662,7 +3800,7 @@ fail: return NULL; }
-static struct list *add_constructor(struct hlsl_ctx *ctx, struct hlsl_type *type, +static struct hlsl_block *add_constructor(struct hlsl_ctx *ctx, struct hlsl_type *type, struct parse_initializer *params, const struct vkd3d_shader_location *loc) { struct hlsl_ir_load *load; @@ -3692,7 +3830,7 @@ static struct list *add_constructor(struct hlsl_ctx *ctx, struct hlsl_type *type
if (!(load = hlsl_new_var_load(ctx, var, loc))) return NULL; - list_add_tail(params->instrs, &load->node.entry); + hlsl_block_add_instr(params->instrs, &load->node);
vkd3d_free(params->args); return params->instrs; @@ -3733,7 +3871,7 @@ static bool raise_invalid_method_object_type(struct hlsl_ctx *ctx, const struct return false; }
-static bool add_load_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *object, +static bool add_load_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *object, const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { const struct hlsl_type *object_type = object->data_type; @@ -3761,7 +3899,7 @@ static bool add_load_method_call(struct hlsl_ctx *ctx, struct list *instrs, stru } if (multisampled) { - if (!(load_params.sample_index = add_implicit_conversion(ctx, instrs, params->args[1], + if (!(load_params.sample_index = add_implicit_conversion(ctx, block, params->args[1], hlsl_get_scalar_type(ctx, HLSL_TYPE_INT), loc))) return false; } @@ -3769,7 +3907,7 @@ static bool add_load_method_call(struct hlsl_ctx *ctx, struct list *instrs, stru assert(offset_dim); if (params->args_count > 1 + multisampled) { - if (!(load_params.texel_offset = add_implicit_conversion(ctx, instrs, params->args[1 + multisampled], + if (!(load_params.texel_offset = add_implicit_conversion(ctx, block, params->args[1 + multisampled], hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) return false; } @@ -3779,7 +3917,7 @@ static bool add_load_method_call(struct hlsl_ctx *ctx, struct list *instrs, stru }
/* +1 for the mipmap level for non-multisampled textures */ - if (!(load_params.coords = add_implicit_conversion(ctx, instrs, params->args[0], + if (!(load_params.coords = add_implicit_conversion(ctx, block, params->args[0], hlsl_get_vector_type(ctx, HLSL_TYPE_INT, sampler_dim + !multisampled), loc))) return false;
@@ -3788,11 +3926,11 @@ static bool add_load_method_call(struct hlsl_ctx *ctx, struct list *instrs, stru
if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) return false; - list_add_tail(instrs, &load->entry); + hlsl_block_add_instr(block, load); return true; }
-static bool add_sample_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *object, +static bool add_sample_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *object, const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { const struct hlsl_type *object_type = object->data_type; @@ -3829,13 +3967,13 @@ static bool add_sample_method_call(struct hlsl_ctx *ctx, struct list *instrs, st return false; }
- if (!(load_params.coords = add_implicit_conversion(ctx, instrs, params->args[1], + if (!(load_params.coords = add_implicit_conversion(ctx, block, params->args[1], hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) return false;
if (offset_dim && params->args_count > 2) { - if (!(load_params.texel_offset = add_implicit_conversion(ctx, instrs, params->args[2], + if (!(load_params.texel_offset = add_implicit_conversion(ctx, block, params->args[2], hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) return false; } @@ -3851,12 +3989,12 @@ static bool add_sample_method_call(struct hlsl_ctx *ctx, struct list *instrs, st
if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) return false; - list_add_tail(instrs, &load->entry); + hlsl_block_add_instr(block, load);
return true; }
-static bool add_sample_cmp_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *object, +static bool add_sample_cmp_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *object, const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { const struct hlsl_type *object_type = object->data_type; @@ -3899,17 +4037,17 @@ static bool add_sample_cmp_method_call(struct hlsl_ctx *ctx, struct list *instrs return false; }
- if (!(load_params.coords = add_implicit_conversion(ctx, instrs, params->args[1], + if (!(load_params.coords = add_implicit_conversion(ctx, block, params->args[1], hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) return false;
- if (!(load_params.cmp = add_implicit_conversion(ctx, instrs, params->args[2], + if (!(load_params.cmp = add_implicit_conversion(ctx, block, params->args[2], hlsl_get_scalar_type(ctx, HLSL_TYPE_FLOAT), loc))) load_params.cmp = params->args[2];
if (offset_dim && params->args_count > 3) { - if (!(load_params.texel_offset = add_implicit_conversion(ctx, instrs, params->args[2], + if (!(load_params.texel_offset = add_implicit_conversion(ctx, block, params->args[2], hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) return false; } @@ -3925,12 +4063,12 @@ static bool add_sample_cmp_method_call(struct hlsl_ctx *ctx, struct list *instrs
if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) return false; - list_add_tail(instrs, &load->entry); + hlsl_block_add_instr(block, load);
return true; }
-static bool add_gather_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *object, +static bool add_gather_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *object, const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { const struct hlsl_type *object_type = object->data_type; @@ -3997,7 +4135,7 @@ static bool add_gather_method_call(struct hlsl_ctx *ctx, struct list *instrs, st } else if (offset_dim && params->args_count > 2) { - if (!(load_params.texel_offset = add_implicit_conversion(ctx, instrs, params->args[2], + if (!(load_params.texel_offset = add_implicit_conversion(ctx, block, params->args[2], hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) return false; } @@ -4022,7 +4160,7 @@ static bool add_gather_method_call(struct hlsl_ctx *ctx, struct list *instrs, st return false; }
- if (!(load_params.coords = add_implicit_conversion(ctx, instrs, params->args[1], + if (!(load_params.coords = add_implicit_conversion(ctx, block, params->args[1], hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) return false;
@@ -4032,11 +4170,187 @@ static bool add_gather_method_call(struct hlsl_ctx *ctx, struct list *instrs, st
if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) return false; - list_add_tail(instrs, &load->entry); + hlsl_block_add_instr(block, load); + return true; +} + +static bool add_assignment_from_component(struct hlsl_ctx *ctx, struct hlsl_block *instrs, struct hlsl_ir_node *dest, + struct hlsl_ir_node *src, unsigned int component, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *load; + + if (!dest) + return true; + + if (!(load = hlsl_add_load_component(ctx, instrs, src, component, loc))) + return false; + + if (!add_assignment(ctx, instrs, dest, ASSIGN_OP_ASSIGN, load)) + return false; + return true; }
-static bool add_sample_lod_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *object, +static bool add_getdimensions_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *object, + const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + const struct hlsl_type *object_type = object->data_type; + bool uint_resinfo, has_uint_arg, has_float_arg; + struct hlsl_resource_load_params load_params; + struct hlsl_ir_node *sample_info, *res_info; + struct hlsl_ir_node *zero = NULL, *void_ret; + struct hlsl_type *uint_type, *float_type; + unsigned int i, j; + enum func_argument + { + ARG_MIP_LEVEL, + ARG_WIDTH, + ARG_HEIGHT, + ARG_ELEMENT_COUNT, + ARG_LEVEL_COUNT, + ARG_SAMPLE_COUNT, + ARG_MAX_ARGS, + }; + struct hlsl_ir_node *args[ARG_MAX_ARGS] = { 0 }; + static const struct overload + { + enum hlsl_sampler_dim sampler_dim; + unsigned int args_count; + enum func_argument args[ARG_MAX_ARGS]; + } + overloads[] = + { + { HLSL_SAMPLER_DIM_1D, 1, { ARG_WIDTH } }, + { HLSL_SAMPLER_DIM_1D, 3, { ARG_MIP_LEVEL, ARG_WIDTH, ARG_LEVEL_COUNT } }, + { HLSL_SAMPLER_DIM_1DARRAY, 2, { ARG_WIDTH, ARG_ELEMENT_COUNT } }, + { HLSL_SAMPLER_DIM_1DARRAY, 4, { ARG_MIP_LEVEL, ARG_WIDTH, ARG_ELEMENT_COUNT, ARG_LEVEL_COUNT } }, + { HLSL_SAMPLER_DIM_2D, 2, { ARG_WIDTH, ARG_HEIGHT } }, + { HLSL_SAMPLER_DIM_2D, 4, { ARG_MIP_LEVEL, ARG_WIDTH, ARG_HEIGHT, ARG_LEVEL_COUNT } }, + { HLSL_SAMPLER_DIM_2DARRAY, 3, { ARG_WIDTH, ARG_HEIGHT, ARG_ELEMENT_COUNT } }, + { HLSL_SAMPLER_DIM_2DARRAY, 5, { ARG_MIP_LEVEL, ARG_WIDTH, ARG_HEIGHT, ARG_ELEMENT_COUNT, ARG_LEVEL_COUNT } }, + { HLSL_SAMPLER_DIM_3D, 3, { ARG_WIDTH, ARG_HEIGHT, ARG_ELEMENT_COUNT } }, + { HLSL_SAMPLER_DIM_3D, 5, { ARG_MIP_LEVEL, ARG_WIDTH, ARG_HEIGHT, ARG_ELEMENT_COUNT, ARG_LEVEL_COUNT } }, + { HLSL_SAMPLER_DIM_CUBE, 2, { ARG_WIDTH, ARG_HEIGHT } }, + { HLSL_SAMPLER_DIM_CUBE, 4, { ARG_MIP_LEVEL, ARG_WIDTH, ARG_HEIGHT, ARG_LEVEL_COUNT } }, + { HLSL_SAMPLER_DIM_CUBEARRAY, 3, { ARG_WIDTH, ARG_HEIGHT, ARG_ELEMENT_COUNT } }, + { HLSL_SAMPLER_DIM_CUBEARRAY, 5, { ARG_MIP_LEVEL, ARG_WIDTH, ARG_HEIGHT, ARG_ELEMENT_COUNT, ARG_LEVEL_COUNT } }, + { HLSL_SAMPLER_DIM_2DMS, 3, { ARG_WIDTH, ARG_HEIGHT, ARG_SAMPLE_COUNT } }, + { HLSL_SAMPLER_DIM_2DMSARRAY, 4, { ARG_WIDTH, ARG_HEIGHT, ARG_ELEMENT_COUNT, ARG_SAMPLE_COUNT } }, + }; + const struct overload *o = NULL; + + if (object_type->sampler_dim > HLSL_SAMPLER_DIM_LAST_TEXTURE) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "GetDimensions() is not defined for this type."); + } + + uint_type = hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT); + float_type = hlsl_get_scalar_type(ctx, HLSL_TYPE_FLOAT); + has_uint_arg = has_float_arg = false; + for (i = 0; i < ARRAY_SIZE(overloads); ++i) + { + const struct overload *iter = &overloads[i]; + + if (iter->sampler_dim == object_type->sampler_dim && iter->args_count == params->args_count) + { + for (j = 0; j < params->args_count; ++j) + { + args[iter->args[j]] = params->args[j]; + + /* Input parameter. */ + if (iter->args[j] == ARG_MIP_LEVEL) + { + if (!(args[ARG_MIP_LEVEL] = add_implicit_conversion(ctx, block, args[ARG_MIP_LEVEL], + hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc))) + { + return false; + } + + continue; + } + + has_float_arg |= hlsl_types_are_equal(params->args[j]->data_type, float_type); + has_uint_arg |= hlsl_types_are_equal(params->args[j]->data_type, uint_type); + + if (params->args[j]->data_type->class != HLSL_CLASS_SCALAR) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Expected scalar arguments."); + break; + } + } + o = iter; + break; + } + } + uint_resinfo = !has_float_arg && has_uint_arg; + + if (!o) + { + struct vkd3d_string_buffer *string; + + if ((string = hlsl_type_to_string(ctx, object_type))) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, + "Unexpected number of arguments %u for %s.%s().", params->args_count, string->buffer, name); + hlsl_release_string_buffer(ctx, string); + } + } + + if (!args[ARG_MIP_LEVEL]) + { + if (!(zero = hlsl_new_uint_constant(ctx, 0, loc))) + return false; + hlsl_block_add_instr(block, zero); + args[ARG_MIP_LEVEL] = zero; + } + + memset(&load_params, 0, sizeof(load_params)); + load_params.type = HLSL_RESOURCE_RESINFO; + load_params.resource = object; + load_params.lod = args[ARG_MIP_LEVEL]; + load_params.format = hlsl_get_vector_type(ctx, uint_resinfo ? HLSL_TYPE_UINT : HLSL_TYPE_FLOAT, 4); + + if (!(res_info = hlsl_new_resource_load(ctx, &load_params, loc))) + return false; + hlsl_block_add_instr(block, res_info); + + if (!add_assignment_from_component(ctx, block, args[ARG_WIDTH], res_info, 0, loc)) + return false; + + if (!add_assignment_from_component(ctx, block, args[ARG_HEIGHT], res_info, 1, loc)) + return false; + + if (!add_assignment_from_component(ctx, block, args[ARG_ELEMENT_COUNT], res_info, + object_type->sampler_dim == HLSL_SAMPLER_DIM_1DARRAY ? 1 : 2, loc)) + { + return false; + } + + if (!add_assignment_from_component(ctx, block, args[ARG_LEVEL_COUNT], res_info, 3, loc)) + return false; + + if (args[ARG_SAMPLE_COUNT]) + { + memset(&load_params, 0, sizeof(load_params)); + load_params.type = HLSL_RESOURCE_SAMPLE_INFO; + load_params.resource = object; + load_params.format = args[ARG_SAMPLE_COUNT]->data_type; + if (!(sample_info = hlsl_new_resource_load(ctx, &load_params, loc))) + return false; + hlsl_block_add_instr(block, sample_info); + + if (!add_assignment(ctx, block, args[ARG_SAMPLE_COUNT], ASSIGN_OP_ASSIGN, sample_info)) + return false; + } + + if (!(void_ret = hlsl_new_void_expr(ctx, loc))) + return false; + hlsl_block_add_instr(block, void_ret); + + return true; +} + +static bool add_sample_lod_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *object, const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { const struct hlsl_type *object_type = object->data_type; @@ -4078,17 +4392,17 @@ static bool add_sample_lod_method_call(struct hlsl_ctx *ctx, struct list *instrs return false; }
- if (!(load_params.coords = add_implicit_conversion(ctx, instrs, params->args[1], + if (!(load_params.coords = add_implicit_conversion(ctx, block, params->args[1], hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) load_params.coords = params->args[1];
- if (!(load_params.lod = add_implicit_conversion(ctx, instrs, params->args[2], + if (!(load_params.lod = add_implicit_conversion(ctx, block, params->args[2], hlsl_get_scalar_type(ctx, HLSL_TYPE_FLOAT), loc))) load_params.lod = params->args[2];
if (offset_dim && params->args_count > 3) { - if (!(load_params.texel_offset = add_implicit_conversion(ctx, instrs, params->args[3], + if (!(load_params.texel_offset = add_implicit_conversion(ctx, block, params->args[3], hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) return false; } @@ -4102,11 +4416,11 @@ static bool add_sample_lod_method_call(struct hlsl_ctx *ctx, struct list *instrs
if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) return false; - list_add_tail(instrs, &load->entry); + hlsl_block_add_instr(block, load); return true; }
-static bool add_sample_grad_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *object, +static bool add_sample_grad_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *object, const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { const struct hlsl_type *object_type = object->data_type; @@ -4145,21 +4459,21 @@ static bool add_sample_grad_method_call(struct hlsl_ctx *ctx, struct list *instr return false; }
- if (!(load_params.coords = add_implicit_conversion(ctx, instrs, params->args[1], + if (!(load_params.coords = add_implicit_conversion(ctx, block, params->args[1], hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) load_params.coords = params->args[1];
- if (!(load_params.ddx = add_implicit_conversion(ctx, instrs, params->args[2], + if (!(load_params.ddx = add_implicit_conversion(ctx, block, params->args[2], hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) load_params.ddx = params->args[2];
- if (!(load_params.ddy = add_implicit_conversion(ctx, instrs, params->args[3], + if (!(load_params.ddy = add_implicit_conversion(ctx, block, params->args[3], hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) load_params.ddy = params->args[3];
if (offset_dim && params->args_count > 4) { - if (!(load_params.texel_offset = add_implicit_conversion(ctx, instrs, params->args[4], + if (!(load_params.texel_offset = add_implicit_conversion(ctx, block, params->args[4], hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) return false; } @@ -4173,14 +4487,14 @@ static bool add_sample_grad_method_call(struct hlsl_ctx *ctx, struct list *instr
if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) return false; - list_add_tail(instrs, &load->entry); + hlsl_block_add_instr(block, load); return true; }
static const struct method_function { const char *name; - bool (*handler)(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *object, + bool (*handler)(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *object, const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc); } object_methods[] = @@ -4191,6 +4505,8 @@ object_methods[] = { "GatherGreen", add_gather_method_call }, { "GatherRed", add_gather_method_call },
+ { "GetDimensions", add_getdimensions_method_call }, + { "Load", add_load_method_call },
{ "Sample", add_sample_method_call }, @@ -4208,7 +4524,7 @@ static int object_method_function_name_compare(const void *a, const void *b) return strcmp(a, func->name); }
-static bool add_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *object, +static bool add_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *object, const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { const struct hlsl_type *object_type = object->data_type; @@ -4229,7 +4545,7 @@ static bool add_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hl if ((method = bsearch(name, object_methods, ARRAY_SIZE(object_methods), sizeof(*method), object_method_function_name_compare))) { - return method->handler(ctx, instrs, object, name, params, loc); + return method->handler(ctx, block, object, name, params, loc); } else { @@ -4272,6 +4588,7 @@ static void validate_texture_format_type(struct hlsl_ctx *ctx, struct hlsl_type char *name; DWORD modifiers; struct hlsl_ir_node *instr; + struct hlsl_block *block; struct list *list; struct parse_fields fields; struct parse_function function; @@ -4399,38 +4716,9 @@ static void validate_texture_format_type(struct hlsl_ctx *ctx, struct hlsl_type %token <intval> C_INTEGER %token <intval> PRE_LINE
-%type <list> add_expr -%type <list> assignment_expr -%type <list> bitand_expr -%type <list> bitor_expr -%type <list> bitxor_expr -%type <list> compound_statement -%type <list> conditional_expr -%type <list> declaration -%type <list> declaration_statement -%type <list> discard_statement -%type <list> equality_expr -%type <list> expr -%type <list> expr_optional -%type <list> expr_statement -%type <list> initializer_expr -%type <list> jump_statement -%type <list> logicand_expr -%type <list> logicor_expr -%type <list> loop_statement -%type <list> mul_expr -%type <list> postfix_expr -%type <list> primary_expr -%type <list> relational_expr -%type <list> selection_statement -%type <list> shift_expr -%type <list> statement -%type <list> statement_list -%type <list> struct_declaration %type <list> type_specs -%type <list> unary_expr %type <list> variables_def -%type <list> variables_def_optional +%type <list> variables_def_typed
%token <name> VAR_IDENTIFIER %token <name> NEW_IDENTIFIER @@ -4446,6 +4734,35 @@ static void validate_texture_format_type(struct hlsl_ctx *ctx, struct hlsl_type %type <attr_list> attribute_list %type <attr_list> attribute_list_optional
+%type <block> add_expr +%type <block> assignment_expr +%type <block> bitand_expr +%type <block> bitor_expr +%type <block> bitxor_expr +%type <block> compound_statement +%type <block> conditional_expr +%type <block> declaration +%type <block> declaration_statement +%type <block> equality_expr +%type <block> expr +%type <block> expr_optional +%type <block> expr_statement +%type <block> initializer_expr +%type <block> jump_statement +%type <block> logicand_expr +%type <block> logicor_expr +%type <block> loop_statement +%type <block> mul_expr +%type <block> postfix_expr +%type <block> primary_expr +%type <block> relational_expr +%type <block> shift_expr +%type <block> selection_statement +%type <block> statement +%type <block> statement_list +%type <block> struct_declaration_without_vars +%type <block> unary_expr + %type <boolval> boolean
%type <buffer_type> buffer_type @@ -4493,6 +4810,7 @@ static void validate_texture_format_type(struct hlsl_ctx *ctx, struct hlsl_type %type <variable_def> type_spec %type <variable_def> variable_decl %type <variable_def> variable_def +%type <variable_def> variable_def_typed
%%
@@ -4502,9 +4820,9 @@ hlsl_prog: | hlsl_prog buffer_declaration buffer_body | hlsl_prog declaration_statement { - if (!list_empty($2)) + if (!list_empty(&$2->instrs)) hlsl_fixme(ctx, &@2, "Uniform initializer."); - destroy_instr_list($2); + destroy_block($2); } | hlsl_prog preproc_directive | hlsl_prog ';' @@ -4561,25 +4879,19 @@ preproc_directive: } }
-struct_declaration: - var_modifiers struct_spec variables_def_optional ';' +struct_declaration_without_vars: + var_modifiers struct_spec ';' { - struct hlsl_type *type; - unsigned int modifiers = $1; + if (!$2->name) + hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, + "Anonymous struct type must declare a variable.");
- if (!$3) - { - if (!$2->name) - hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, - "Anonymous struct type must declare a variable."); - if (modifiers) - hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, - "Modifiers are not allowed on struct type declarations."); - } + if ($1) + hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, + "Modifiers are not allowed on struct type declarations.");
- if (!(type = apply_type_modifiers(ctx, $2, &modifiers, true, &@1))) + if (!($$ = make_empty_block(ctx))) YYABORT; - $$ = declare_vars(ctx, type, modifiers, &@1, $3); }
struct_spec: @@ -4686,7 +4998,7 @@ attribute: YYABORT; } $$->name = $2; - list_init(&$$->instrs); + hlsl_block_init(&$$->instrs); $$->loc = @$; $$->args_count = 0; } @@ -4701,8 +5013,8 @@ attribute: YYABORT; } $$->name = $2; - list_init(&$$->instrs); - list_move_tail(&$$->instrs, $4.instrs); + hlsl_block_init(&$$->instrs); + hlsl_block_add_block(&$$->instrs, $4.instrs); vkd3d_free($4.instrs); $$->loc = @$; $$->args_count = $4.args_count; @@ -4758,15 +5070,15 @@ func_declaration: "Function "%s" is already defined.", decl->func->name); hlsl_note(ctx, &decl->loc, VKD3D_SHADER_LOG_ERROR, ""%s" was previously defined here.", decl->func->name); - hlsl_free_instr_list($2); + destroy_block($2); } else { size_t i;
decl->has_body = true; - list_move_tail(&decl->body.instrs, $2); - vkd3d_free($2); + hlsl_block_add_block(&decl->body, $2); + destroy_block($2);
/* Semantics are taken from whichever definition has a body. * We can't just replace the hlsl_ir_var pointers, though: if @@ -4817,6 +5129,9 @@ func_prototype_no_attrs: struct hlsl_ir_var *var; struct hlsl_type *type;
+ /* Functions are unconditionally inlined. */ + modifiers &= ~HLSL_MODIFIER_INLINE; + if (modifiers & ~HLSL_MODIFIERS_MAJORITY_MASK) hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, "Only majority modifiers are allowed on functions."); @@ -4943,7 +5258,7 @@ func_prototype: compound_statement: '{' '}' { - if (!($$ = make_empty_list(ctx))) + if (!($$ = make_empty_block(ctx))) YYABORT; } | '{' scope_start statement_list '}' @@ -5261,7 +5576,12 @@ type_no_void: { validate_texture_format_type(ctx, $3, &@3);
- /* TODO: unspecified sample count is not allowed for all targets */ + if (shader_profile_version_lt(ctx, 4, 1)) + { + hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Multisampled texture object declaration needs sample count for profile %s.", ctx->profile->name); + } + $$ = hlsl_new_texture_type(ctx, $1, $3, 0); } | texture_ms_type '<' type ',' shift_expr '>' @@ -5270,7 +5590,7 @@ type_no_void: struct hlsl_block block;
hlsl_block_init(&block); - list_move_tail(&block.instrs, $5); + hlsl_block_add_block(&block, $5);
sample_count = evaluate_static_expression_as_uint(ctx, &block, &@5);
@@ -5325,7 +5645,7 @@ type_no_void: $$ = hlsl_get_type(ctx->cur_scope, $1, true, true); if ($$->is_minimum_precision) { - if (ctx->profile->major_version < 4) + if (shader_profile_version_lt(ctx, 4, 0)) { hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Target profile doesn't support minimum-precision types."); @@ -5354,10 +5674,10 @@ type:
declaration_statement: declaration - | struct_declaration + | struct_declaration_without_vars | typedef { - if (!($$ = make_empty_list(ctx))) + if (!($$ = make_empty_block(ctx))) YYABORT; }
@@ -5416,22 +5736,11 @@ type_spec: }
declaration: - var_modifiers type variables_def ';' + variables_def_typed ';' { - struct hlsl_type *type; - unsigned int modifiers = $1; - - if (!(type = apply_type_modifiers(ctx, $2, &modifiers, true, &@1))) + if (!($$ = initialize_vars(ctx, $1))) YYABORT; - $$ = declare_vars(ctx, type, modifiers, &@1, $3); - } - -variables_def_optional: - %empty - { - $$ = NULL; } - | variables_def
variables_def: variable_def @@ -5446,6 +5755,33 @@ variables_def: list_add_tail($$, &$3->entry); }
+variables_def_typed: + variable_def_typed + { + if (!($$ = make_empty_list(ctx))) + YYABORT; + list_add_head($$, &$1->entry); + + declare_var(ctx, $1); + } + | variables_def_typed ',' variable_def + { + struct parse_variable_def *head_def; + + assert(!list_empty($1)); + head_def = LIST_ENTRY(list_head($1), struct parse_variable_def, entry); + + assert(head_def->basic_type); + $3->basic_type = head_def->basic_type; + $3->modifiers = head_def->modifiers; + $3->modifiers_loc = head_def->modifiers_loc; + + declare_var(ctx, $3); + + $$ = $1; + list_add_tail($$, &$3->entry); + } + variable_decl: any_identifier arrays colon_attribute { @@ -5461,7 +5797,7 @@ state: any_identifier '=' expr ';' { vkd3d_free($1); - hlsl_free_instr_list($3); + destroy_block($3); }
state_block_start: @@ -5487,6 +5823,38 @@ variable_def: ctx->in_state_block = 0; }
+variable_def_typed: + var_modifiers struct_spec variable_def + { + unsigned int modifiers = $1; + struct hlsl_type *type; + + if (!(type = apply_type_modifiers(ctx, $2, &modifiers, true, &@1))) + YYABORT; + + check_invalid_in_out_modifiers(ctx, modifiers, &@1); + + $$ = $3; + $$->basic_type = type; + $$->modifiers = modifiers; + $$->modifiers_loc = @1; + } + | var_modifiers type variable_def + { + unsigned int modifiers = $1; + struct hlsl_type *type; + + if (!(type = apply_type_modifiers(ctx, $2, &modifiers, true, &@1))) + YYABORT; + + check_invalid_in_out_modifiers(ctx, modifiers, &@1); + + $$ = $3; + $$->basic_type = type; + $$->modifiers = modifiers; + $$->modifiers_loc = @1; + } + arrays: %empty { @@ -5495,17 +5863,12 @@ arrays: } | '[' expr ']' arrays { - struct hlsl_block block; uint32_t *new_array; unsigned int size;
- hlsl_clone_block(ctx, &block, &ctx->static_initializers); - list_move_tail(&block.instrs, $2); + size = evaluate_static_expression_as_uint(ctx, $2, &@2);
- size = evaluate_static_expression_as_uint(ctx, &block, &@2); - - hlsl_block_cleanup(&block); - vkd3d_free($2); + destroy_block($2);
$$ = $4;
@@ -5610,6 +5973,10 @@ var_modifiers: { $$ = add_modifiers(ctx, $2, HLSL_STORAGE_IN | HLSL_STORAGE_OUT, &@1); } + | KW_INLINE var_modifiers + { + $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_INLINE, &@1); + }
complex_initializer: @@ -5618,10 +5985,10 @@ complex_initializer: $$.args_count = 1; if (!($$.args = hlsl_alloc(ctx, sizeof(*$$.args)))) { - destroy_instr_list($1); + destroy_block($1); YYABORT; } - $$.args[0] = node_from_list($1); + $$.args[0] = node_from_block($1); $$.instrs = $1; $$.braces = false; } @@ -5653,7 +6020,7 @@ complex_initializer_list: $$.args = new_args; for (i = 0; i < $3.args_count; ++i) $$.args[$$.args_count++] = $3.args[i]; - list_move_tail($$.instrs, $3.instrs); + hlsl_block_add_block($$.instrs, $3.instrs); free_parse_initializer(&$3); }
@@ -5666,10 +6033,10 @@ initializer_expr_list: $$.args_count = 1; if (!($$.args = hlsl_alloc(ctx, sizeof(*$$.args)))) { - destroy_instr_list($1); + destroy_block($1); YYABORT; } - $$.args[0] = node_from_list($1); + $$.args[0] = node_from_block($1); $$.instrs = $1; $$.braces = false; } @@ -5681,13 +6048,13 @@ initializer_expr_list: if (!(new_args = hlsl_realloc(ctx, $$.args, ($$.args_count + 1) * sizeof(*$$.args)))) { free_parse_initializer(&$$); - destroy_instr_list($3); + destroy_block($3); YYABORT; } $$.args = new_args; - $$.args[$$.args_count++] = node_from_list($3); - list_move_tail($$.instrs, $3); - vkd3d_free($3); + $$.args[$$.args_count++] = node_from_block($3); + hlsl_block_add_block($$.instrs, $3); + destroy_block($3); }
boolean: @@ -5705,15 +6072,14 @@ statement_list: | statement_list statement { $$ = $1; - list_move_tail($$, $2); - vkd3d_free($2); + hlsl_block_add_block($$, $2); + destroy_block($2); }
statement: declaration_statement | expr_statement | compound_statement - | discard_statement | jump_statement | selection_statement | loop_statement @@ -5721,47 +6087,67 @@ statement: jump_statement: KW_RETURN expr ';' { - if (!add_return(ctx, $2, node_from_list($2), &@1)) - YYABORT; $$ = $2; + if (!add_return(ctx, $$, node_from_block($$), &@1)) + YYABORT; } | KW_RETURN ';' { - if (!($$ = make_empty_list(ctx))) + if (!($$ = make_empty_block(ctx))) YYABORT; if (!add_return(ctx, $$, NULL, &@1)) YYABORT; } - -discard_statement: - KW_DISCARD ';' + | KW_DISCARD ';' { - struct hlsl_ir_node *discard; + struct hlsl_ir_node *discard, *c;
- if (!($$ = make_empty_list(ctx))) + if (!($$ = make_empty_block(ctx))) YYABORT; - if (!(discard = hlsl_new_jump(ctx, HLSL_IR_JUMP_DISCARD, &@1))) + + if (!(c = hlsl_new_uint_constant(ctx, ~0u, &@1))) + return false; + hlsl_block_add_instr($$, c); + + if (!(discard = hlsl_new_jump(ctx, HLSL_IR_JUMP_DISCARD_NZ, c, &@1))) return false; - list_add_tail($$, &discard->entry); + hlsl_block_add_instr($$, discard); }
selection_statement: - KW_IF '(' expr ')' if_body + attribute_list_optional KW_IF '(' expr ')' if_body { - struct hlsl_ir_node *condition = node_from_list($3); - struct hlsl_block then_block, else_block; + struct hlsl_ir_node *condition = node_from_block($4); + const struct parse_attribute_list *attributes = &$1; struct hlsl_ir_node *instr; + unsigned int i; + + if (attribute_list_has_duplicates(attributes)) + hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Found duplicate attribute."); + + for (i = 0; i < attributes->count; ++i) + { + const struct hlsl_attribute *attr = attributes->attrs[i];
- hlsl_block_init(&then_block); - list_move_tail(&then_block.instrs, $5.then_block); - hlsl_block_init(&else_block); - if ($5.else_block) - list_move_tail(&else_block.instrs, $5.else_block); - vkd3d_free($5.then_block); - vkd3d_free($5.else_block); + if (!strcmp(attr->name, "branch") + || !strcmp(attr->name, "flatten")) + { + hlsl_warning(ctx, &@1, VKD3D_SHADER_WARNING_HLSL_IGNORED_ATTRIBUTE, "Unhandled attribute '%s'.", attr->name); + } + else + { + hlsl_warning(ctx, &@1, VKD3D_SHADER_WARNING_HLSL_UNKNOWN_ATTRIBUTE, "Unrecognized attribute '%s'.", attr->name); + } + }
- if (!(instr = hlsl_new_if(ctx, condition, &then_block, &else_block, &@1))) + if (!(instr = hlsl_new_if(ctx, condition, $6.then_block, $6.else_block, &@2))) + { + destroy_block($6.then_block); + destroy_block($6.else_block); YYABORT; + } + destroy_block($6.then_block); + destroy_block($6.else_block); if (condition->data_type->dimx > 1 || condition->data_type->dimy > 1) { struct vkd3d_string_buffer *string; @@ -5771,8 +6157,8 @@ selection_statement: "if condition type %s is not scalar.", string->buffer); hlsl_release_string_buffer(ctx, string); } - $$ = $3; - list_add_tail($$, &instr->entry); + $$ = $4; + hlsl_block_add_instr($$, instr); }
if_body: @@ -5810,7 +6196,7 @@ loop_statement: expr_optional: %empty { - if (!($$ = make_empty_list(ctx))) + if (!($$ = make_empty_block(ctx))) YYABORT; } | expr @@ -5826,7 +6212,7 @@ func_arguments: { $$.args = NULL; $$.args_count = 0; - if (!($$.instrs = make_empty_list(ctx))) + if (!($$.instrs = make_empty_block(ctx))) YYABORT; $$.braces = false; } @@ -5839,7 +6225,7 @@ primary_expr:
if (!(c = hlsl_new_float_constant(ctx, $1, &@1))) YYABORT; - if (!($$ = make_list(ctx, c))) + if (!($$ = make_block(ctx, c))) YYABORT; } | C_INTEGER @@ -5848,7 +6234,7 @@ primary_expr:
if (!(c = hlsl_new_int_constant(ctx, $1, &@1))) YYABORT; - if (!($$ = make_list(ctx, c))) + if (!($$ = make_block(ctx, c))) YYABORT; } | boolean @@ -5857,7 +6243,7 @@ primary_expr:
if (!(c = hlsl_new_bool_constant(ctx, $1, &@1))) YYABORT; - if (!($$ = make_list(ctx, c))) + if (!($$ = make_block(ctx, c))) { hlsl_free_instr(c); YYABORT; @@ -5875,7 +6261,7 @@ primary_expr: } if (!(load = hlsl_new_var_load(ctx, var, &@1))) YYABORT; - if (!($$ = make_list(ctx, &load->node))) + if (!($$ = make_block(ctx, &load->node))) YYABORT; } | '(' expr ')' @@ -5903,7 +6289,7 @@ primary_expr: YYABORT; if (!(load = hlsl_new_var_load(ctx, var, &@1))) YYABORT; - if (!($$ = make_list(ctx, &load->node))) + if (!($$ = make_block(ctx, &load->node))) YYABORT; } else @@ -5919,7 +6305,7 @@ postfix_expr: { if (!add_increment(ctx, $1, false, true, &@2)) { - destroy_instr_list($1); + destroy_block($1); YYABORT; } $$ = $1; @@ -5928,14 +6314,14 @@ postfix_expr: { if (!add_increment(ctx, $1, true, true, &@2)) { - destroy_instr_list($1); + destroy_block($1); YYABORT; } $$ = $1; } | postfix_expr '.' any_identifier { - struct hlsl_ir_node *node = node_from_list($1); + struct hlsl_ir_node *node = node_from_block($1);
if (node->data_type->class == HLSL_CLASS_STRUCT) { @@ -5963,7 +6349,7 @@ postfix_expr: hlsl_error(ctx, &@3, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Invalid swizzle "%s".", $3); YYABORT; } - list_add_tail($1, &swizzle->entry); + hlsl_block_add_instr($1, swizzle); $$ = $1; } else @@ -5974,17 +6360,17 @@ postfix_expr: } | postfix_expr '[' expr ']' { - struct hlsl_ir_node *array = node_from_list($1), *index = node_from_list($3); + struct hlsl_ir_node *array = node_from_block($1), *index = node_from_block($3);
- list_move_head($1, $3); - vkd3d_free($3); + hlsl_block_add_block($3, $1); + destroy_block($1);
- if (!add_array_access(ctx, $1, array, index, &@2)) + if (!add_array_access(ctx, $3, array, index, &@2)) { - destroy_instr_list($1); + destroy_block($3); YYABORT; } - $$ = $1; + $$ = $3; }
/* var_modifiers is necessary to avoid shift/reduce conflicts. */ @@ -6025,14 +6411,14 @@ postfix_expr: } | postfix_expr '.' any_identifier '(' func_arguments ')' { - struct hlsl_ir_node *object = node_from_list($1); + struct hlsl_ir_node *object = node_from_block($1);
- list_move_tail($1, $5.instrs); + hlsl_block_add_block($1, $5.instrs); vkd3d_free($5.instrs);
if (!add_method_call(ctx, $1, object, $3, &$5, &@3)) { - hlsl_free_instr_list($1); + destroy_block($1); vkd3d_free($5.args); YYABORT; } @@ -6046,7 +6432,7 @@ unary_expr: { if (!add_increment(ctx, $2, false, false, &@1)) { - destroy_instr_list($2); + destroy_block($2); YYABORT; } $$ = $2; @@ -6055,7 +6441,7 @@ unary_expr: { if (!add_increment(ctx, $2, true, false, &@1)) { - destroy_instr_list($2); + destroy_block($2); YYABORT; } $$ = $2; @@ -6066,23 +6452,23 @@ unary_expr: } | '-' unary_expr { - add_unary_arithmetic_expr(ctx, $2, HLSL_OP1_NEG, node_from_list($2), &@1); + add_unary_arithmetic_expr(ctx, $2, HLSL_OP1_NEG, node_from_block($2), &@1); $$ = $2; } | '~' unary_expr { - add_unary_bitwise_expr(ctx, $2, HLSL_OP1_BIT_NOT, node_from_list($2), &@1); + add_unary_bitwise_expr(ctx, $2, HLSL_OP1_BIT_NOT, node_from_block($2), &@1); $$ = $2; } | '!' unary_expr { - add_unary_logical_expr(ctx, $2, HLSL_OP1_LOGIC_NOT, node_from_list($2), &@1); + add_unary_logical_expr(ctx, $2, HLSL_OP1_LOGIC_NOT, node_from_block($2), &@1); $$ = $2; } /* var_modifiers is necessary to avoid shift/reduce conflicts. */ | '(' var_modifiers type arrays ')' unary_expr { - struct hlsl_type *src_type = node_from_list($6)->data_type; + struct hlsl_type *src_type = node_from_block($6)->data_type; struct hlsl_type *dst_type; unsigned int i;
@@ -6118,9 +6504,9 @@ unary_expr: YYABORT; }
- if (!add_cast(ctx, $6, node_from_list($6), dst_type, &@3)) + if (!add_cast(ctx, $6, node_from_block($6), dst_type, &@3)) { - hlsl_free_instr_list($6); + destroy_block($6); YYABORT; } $$ = $6; @@ -6130,120 +6516,121 @@ mul_expr: unary_expr | mul_expr '*' unary_expr { - $$ = add_binary_arithmetic_expr_merge(ctx, $1, $3, HLSL_OP2_MUL, &@2); + $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_MUL, &@2); } | mul_expr '/' unary_expr { - $$ = add_binary_arithmetic_expr_merge(ctx, $1, $3, HLSL_OP2_DIV, &@2); + $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_DIV, &@2); } | mul_expr '%' unary_expr { - $$ = add_binary_arithmetic_expr_merge(ctx, $1, $3, HLSL_OP2_MOD, &@2); + $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_MOD, &@2); }
add_expr: mul_expr | add_expr '+' mul_expr { - $$ = add_binary_arithmetic_expr_merge(ctx, $1, $3, HLSL_OP2_ADD, &@2); + $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_ADD, &@2); } | add_expr '-' mul_expr { struct hlsl_ir_node *neg;
- if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, node_from_list($3), &@2))) + if (!(neg = add_unary_arithmetic_expr(ctx, $3, HLSL_OP1_NEG, node_from_block($3), &@2))) YYABORT; - list_add_tail($3, &neg->entry); - $$ = add_binary_arithmetic_expr_merge(ctx, $1, $3, HLSL_OP2_ADD, &@2); + $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_ADD, &@2); }
shift_expr: add_expr | shift_expr OP_LEFTSHIFT add_expr { - $$ = add_binary_shift_expr_merge(ctx, $1, $3, HLSL_OP2_LSHIFT, &@2); + $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_LSHIFT, &@2); } | shift_expr OP_RIGHTSHIFT add_expr { - $$ = add_binary_shift_expr_merge(ctx, $1, $3, HLSL_OP2_RSHIFT, &@2); + $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_RSHIFT, &@2); }
relational_expr: shift_expr | relational_expr '<' shift_expr { - $$ = add_binary_comparison_expr_merge(ctx, $1, $3, HLSL_OP2_LESS, &@2); + $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_LESS, &@2); } | relational_expr '>' shift_expr { - $$ = add_binary_comparison_expr_merge(ctx, $3, $1, HLSL_OP2_LESS, &@2); + $$ = add_binary_expr_merge(ctx, $3, $1, HLSL_OP2_LESS, &@2); } | relational_expr OP_LE shift_expr { - $$ = add_binary_comparison_expr_merge(ctx, $3, $1, HLSL_OP2_GEQUAL, &@2); + $$ = add_binary_expr_merge(ctx, $3, $1, HLSL_OP2_GEQUAL, &@2); } | relational_expr OP_GE shift_expr { - $$ = add_binary_comparison_expr_merge(ctx, $1, $3, HLSL_OP2_GEQUAL, &@2); + $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_GEQUAL, &@2); }
equality_expr: relational_expr | equality_expr OP_EQ relational_expr { - $$ = add_binary_comparison_expr_merge(ctx, $1, $3, HLSL_OP2_EQUAL, &@2); + $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_EQUAL, &@2); } | equality_expr OP_NE relational_expr { - $$ = add_binary_comparison_expr_merge(ctx, $1, $3, HLSL_OP2_NEQUAL, &@2); + $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_NEQUAL, &@2); }
bitand_expr: equality_expr | bitand_expr '&' equality_expr { - $$ = add_binary_bitwise_expr_merge(ctx, $1, $3, HLSL_OP2_BIT_AND, &@2); + $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_BIT_AND, &@2); }
bitxor_expr: bitand_expr | bitxor_expr '^' bitand_expr { - $$ = add_binary_bitwise_expr_merge(ctx, $1, $3, HLSL_OP2_BIT_XOR, &@2); + $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_BIT_XOR, &@2); }
bitor_expr: bitxor_expr | bitor_expr '|' bitxor_expr { - $$ = add_binary_bitwise_expr_merge(ctx, $1, $3, HLSL_OP2_BIT_OR, &@2); + $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_BIT_OR, &@2); }
logicand_expr: bitor_expr | logicand_expr OP_AND bitor_expr { - $$ = add_binary_logical_expr_merge(ctx, $1, $3, HLSL_OP2_LOGIC_AND, &@2); + $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_LOGIC_AND, &@2); }
logicor_expr: logicand_expr | logicor_expr OP_OR logicand_expr { - $$ = add_binary_logical_expr_merge(ctx, $1, $3, HLSL_OP2_LOGIC_OR, &@2); + $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_LOGIC_OR, &@2); }
conditional_expr: logicor_expr | logicor_expr '?' expr ':' assignment_expr { - struct hlsl_ir_node *cond = node_from_list($1), *first = node_from_list($3), *second = node_from_list($5); + struct hlsl_ir_node *cond = node_from_block($1); + struct hlsl_ir_node *first = node_from_block($3); + struct hlsl_ir_node *second = node_from_block($5); struct hlsl_type *common_type;
- list_move_tail($1, $3); - list_move_tail($1, $5); - vkd3d_free($3); - vkd3d_free($5); + hlsl_block_add_block($1, $3); + hlsl_block_add_block($1, $5); + destroy_block($3); + destroy_block($5);
if (!(common_type = get_common_numeric_type(ctx, first, second, &@3))) YYABORT; @@ -6264,15 +6651,15 @@ assignment_expr: conditional_expr | unary_expr assign_op assignment_expr { - struct hlsl_ir_node *lhs = node_from_list($1), *rhs = node_from_list($3); + struct hlsl_ir_node *lhs = node_from_block($1), *rhs = node_from_block($3);
if (lhs->data_type->modifiers & HLSL_MODIFIER_CONST) { hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_MODIFIES_CONST, "Statement modifies a const expression."); YYABORT; } - list_move_tail($3, $1); - vkd3d_free($1); + hlsl_block_add_block($3, $1); + destroy_block($1); if (!add_assignment(ctx, $3, lhs, $2, rhs)) YYABORT; $$ = $3; @@ -6329,6 +6716,6 @@ expr: | expr ',' assignment_expr { $$ = $1; - list_move_tail($$, $3); - vkd3d_free($3); + hlsl_block_add_block($$, $3); + destroy_block($3); } diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c index 765b1907426..bae8e5f9a5f 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c @@ -97,6 +97,7 @@ static struct hlsl_ir_node *new_offset_from_path_index(struct hlsl_ctx *ctx, str static struct hlsl_ir_node *new_offset_instr_from_deref(struct hlsl_ctx *ctx, struct hlsl_block *block, const struct hlsl_deref *deref, const struct vkd3d_shader_location *loc) { + enum hlsl_regset regset = hlsl_type_get_regset(deref->data_type); struct hlsl_ir_node *offset = NULL; struct hlsl_type *type; unsigned int i; @@ -111,7 +112,7 @@ static struct hlsl_ir_node *new_offset_instr_from_deref(struct hlsl_ctx *ctx, st struct hlsl_block idx_block;
if (!(offset = new_offset_from_path_index(ctx, &idx_block, type, offset, deref->path[i].node, - deref->offset_regset, loc))) + regset, loc))) return NULL;
hlsl_block_add_block(block, &idx_block); @@ -126,7 +127,7 @@ static struct hlsl_ir_node *new_offset_instr_from_deref(struct hlsl_ctx *ctx, st static bool replace_deref_path_with_offset(struct hlsl_ctx *ctx, struct hlsl_deref *deref, struct hlsl_ir_node *instr) { - const struct hlsl_type *type; + struct hlsl_type *type; struct hlsl_ir_node *offset; struct hlsl_block block;
@@ -145,7 +146,7 @@ static bool replace_deref_path_with_offset(struct hlsl_ctx *ctx, struct hlsl_der return true; }
- deref->offset_regset = hlsl_type_get_regset(type); + deref->data_type = type;
if (!(offset = new_offset_instr_from_deref(ctx, &block, deref, &instr->loc))) return false; @@ -160,7 +161,7 @@ static bool replace_deref_path_with_offset(struct hlsl_ctx *ctx, struct hlsl_der /* Split uniforms into two variables representing the constant and temp * registers, and copy the former to the latter, so that writes to uniforms * work. */ -static void prepend_uniform_copy(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_var *temp) +static void prepend_uniform_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_var *temp) { struct vkd3d_string_buffer *name; struct hlsl_ir_var *uniform; @@ -187,7 +188,7 @@ static void prepend_uniform_copy(struct hlsl_ctx *ctx, struct list *instrs, stru
if (!(load = hlsl_new_var_load(ctx, uniform, &temp->loc))) return; - list_add_head(instrs, &load->node.entry); + list_add_head(&block->instrs, &load->node.entry);
if (!(store = hlsl_new_simple_store(ctx, temp, &load->node))) return; @@ -300,7 +301,7 @@ static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir return ext_var; }
-static void prepend_input_copy(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_load *lhs, +static void prepend_input_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_load *lhs, unsigned int modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) { struct hlsl_type *type = lhs->node.data_type, *vector_type_src, *vector_type_dst; @@ -320,9 +321,10 @@ static void prepend_input_copy(struct hlsl_ctx *ctx, struct list *instrs, struct if (!semantic->name) return;
- vector_type_src = hlsl_get_vector_type(ctx, type->base_type, - (ctx->profile->major_version < 4) ? 4 : hlsl_type_minor_size(type)); vector_type_dst = hlsl_get_vector_type(ctx, type->base_type, hlsl_type_minor_size(type)); + vector_type_src = vector_type_dst; + if (ctx->profile->major_version < 4 && ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX) + vector_type_src = hlsl_get_vector_type(ctx, type->base_type, 4);
for (i = 0; i < hlsl_type_major_size(type); ++i) { @@ -363,7 +365,7 @@ static void prepend_input_copy(struct hlsl_ctx *ctx, struct list *instrs, struct } }
-static void prepend_input_copy_recurse(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_load *lhs, +static void prepend_input_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_load *lhs, unsigned int modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) { struct vkd3d_shader_location *loc = &lhs->node.loc; @@ -405,30 +407,30 @@ static void prepend_input_copy_recurse(struct hlsl_ctx *ctx, struct list *instrs return; list_add_after(&c->entry, &element_load->node.entry);
- prepend_input_copy_recurse(ctx, instrs, element_load, modifiers, semantic, elem_semantic_index); + prepend_input_copy_recurse(ctx, block, element_load, modifiers, semantic, elem_semantic_index); } } else { - prepend_input_copy(ctx, instrs, lhs, modifiers, semantic, semantic_index); + prepend_input_copy(ctx, block, lhs, modifiers, semantic, semantic_index); } }
/* Split inputs into two variables representing the semantic and temp registers, * and copy the former to the latter, so that writes to input variables work. */ -static void prepend_input_var_copy(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_var *var) +static void prepend_input_var_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_var *var) { struct hlsl_ir_load *load;
/* This redundant load is expected to be deleted later by DCE. */ if (!(load = hlsl_new_var_load(ctx, var, &var->loc))) return; - list_add_head(instrs, &load->node.entry); + list_add_head(&block->instrs, &load->node.entry);
- prepend_input_copy_recurse(ctx, instrs, load, var->storage_modifiers, &var->semantic, var->semantic.index); + prepend_input_copy_recurse(ctx, block, load, var->storage_modifiers, &var->semantic, var->semantic.index); }
-static void append_output_copy(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_load *rhs, +static void append_output_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_load *rhs, unsigned int modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) { struct hlsl_type *type = rhs->node.data_type, *vector_type; @@ -463,11 +465,11 @@ static void append_output_copy(struct hlsl_ctx *ctx, struct list *instrs, struct { if (!(c = hlsl_new_uint_constant(ctx, i, &var->loc))) return; - list_add_tail(instrs, &c->entry); + hlsl_block_add_instr(block, c);
if (!(load = hlsl_new_load_index(ctx, &rhs->src, c, &var->loc))) return; - list_add_tail(instrs, &load->node.entry); + hlsl_block_add_instr(block, &load->node); } else { @@ -475,16 +477,16 @@ static void append_output_copy(struct hlsl_ctx *ctx, struct list *instrs, struct
if (!(load = hlsl_new_load_index(ctx, &rhs->src, NULL, &var->loc))) return; - list_add_tail(instrs, &load->node.entry); + hlsl_block_add_instr(block, &load->node); }
if (!(store = hlsl_new_simple_store(ctx, output, &load->node))) return; - list_add_tail(instrs, &store->entry); + hlsl_block_add_instr(block, store); } }
-static void append_output_copy_recurse(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_load *rhs, +static void append_output_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_load *rhs, unsigned int modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) { struct vkd3d_shader_location *loc = &rhs->node.loc; @@ -519,34 +521,34 @@ static void append_output_copy_recurse(struct hlsl_ctx *ctx, struct list *instrs
if (!(c = hlsl_new_uint_constant(ctx, i, &var->loc))) return; - list_add_tail(instrs, &c->entry); + hlsl_block_add_instr(block, c);
if (!(element_load = hlsl_new_load_index(ctx, &rhs->src, c, loc))) return; - list_add_tail(instrs, &element_load->node.entry); + hlsl_block_add_instr(block, &element_load->node);
- append_output_copy_recurse(ctx, instrs, element_load, modifiers, semantic, elem_semantic_index); + append_output_copy_recurse(ctx, block, element_load, modifiers, semantic, elem_semantic_index); } } else { - append_output_copy(ctx, instrs, rhs, modifiers, semantic, semantic_index); + append_output_copy(ctx, block, rhs, modifiers, semantic, semantic_index); } }
/* Split outputs into two variables representing the temp and semantic * registers, and copy the former to the latter, so that reads from output * variables work. */ -static void append_output_var_copy(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_var *var) +static void append_output_var_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_var *var) { struct hlsl_ir_load *load;
/* This redundant load is expected to be deleted later by DCE. */ if (!(load = hlsl_new_var_load(ctx, var, &var->loc))) return; - list_add_tail(instrs, &load->node.entry); + hlsl_block_add_instr(block, &load->node);
- append_output_copy_recurse(ctx, instrs, load, var->storage_modifiers, &var->semantic, var->semantic.index); + append_output_copy_recurse(ctx, block, load, var->storage_modifiers, &var->semantic, var->semantic.index); }
bool hlsl_transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx, struct hlsl_ir_node *, void *), @@ -573,6 +575,37 @@ bool hlsl_transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx, return progress; }
+typedef bool (*PFN_lower_func)(struct hlsl_ctx *, struct hlsl_ir_node *, struct hlsl_block *); + +static bool call_lower_func(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +{ + PFN_lower_func func = context; + struct hlsl_block block; + + hlsl_block_init(&block); + if (func(ctx, instr, &block)) + { + struct hlsl_ir_node *replacement = LIST_ENTRY(list_tail(&block.instrs), struct hlsl_ir_node, entry); + + list_move_before(&instr->entry, &block.instrs); + hlsl_replace_node(instr, replacement); + return true; + } + else + { + hlsl_block_cleanup(&block); + return false; + } +} + +/* Specific form of transform_ir() for passes which convert a single instruction + * to a block of one or more instructions. This helper takes care of setting up + * the block and calling hlsl_replace_node_with_block(). */ +static bool lower_ir(struct hlsl_ctx *ctx, PFN_lower_func func, struct hlsl_block *block) +{ + return hlsl_transform_ir(ctx, call_lower_func, block, func); +} + static bool transform_instr_derefs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) { bool res; @@ -666,7 +699,7 @@ static void insert_early_return_break(struct hlsl_ctx *ctx, return; list_add_after(&cf_instr->entry, &load->node.entry);
- if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_BREAK, &cf_instr->loc))) + if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_BREAK, NULL, &cf_instr->loc))) return; hlsl_block_add_instr(&then_block, jump);
@@ -1689,7 +1722,7 @@ static bool validate_static_object_references(struct hlsl_ctx *ctx, struct hlsl_ { struct hlsl_ir_resource_load *load = hlsl_ir_resource_load(instr);
- if (!(load->resource.var->storage_modifiers & HLSL_STORAGE_UNIFORM)) + if (!load->resource.var->is_uniform) { hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, "Loaded resource must have a single uniform source."); @@ -1704,7 +1737,7 @@ static bool validate_static_object_references(struct hlsl_ctx *ctx, struct hlsl_
if (load->sampler.var) { - if (!(load->sampler.var->storage_modifiers & HLSL_STORAGE_UNIFORM)) + if (!load->sampler.var->is_uniform) { hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, "Resource load sampler must have a single uniform source."); @@ -1722,7 +1755,7 @@ static bool validate_static_object_references(struct hlsl_ctx *ctx, struct hlsl_ { struct hlsl_ir_resource_store *store = hlsl_ir_resource_store(instr);
- if (!(store->resource.var->storage_modifiers & HLSL_STORAGE_UNIFORM)) + if (!store->resource.var->is_uniform) { hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, "Accessed resource must have a single uniform source."); @@ -1889,7 +1922,7 @@ static bool split_matrix_copies(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr
if (rhs->type != HLSL_IR_LOAD) { - hlsl_fixme(ctx, &instr->loc, "Copying from unsupported node type.\n"); + hlsl_fixme(ctx, &instr->loc, "Copying from unsupported node type."); return false; }
@@ -2066,6 +2099,137 @@ static bool lower_nonconstant_vector_derefs(struct hlsl_ctx *ctx, struct hlsl_ir return false; }
+/* Lower combined samples and sampler variables to synthesized separated textures and samplers. + * That is, translate SM1-style samples in the source to SM4-style samples in the bytecode. */ +static bool lower_combined_samples(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +{ + struct hlsl_ir_resource_load *load; + struct vkd3d_string_buffer *name; + struct hlsl_ir_var *var; + unsigned int i; + + if (instr->type != HLSL_IR_RESOURCE_LOAD) + return false; + load = hlsl_ir_resource_load(instr); + + switch (load->load_type) + { + case HLSL_RESOURCE_LOAD: + case HLSL_RESOURCE_GATHER_RED: + case HLSL_RESOURCE_GATHER_GREEN: + case HLSL_RESOURCE_GATHER_BLUE: + case HLSL_RESOURCE_GATHER_ALPHA: + case HLSL_RESOURCE_RESINFO: + case HLSL_RESOURCE_SAMPLE_CMP: + case HLSL_RESOURCE_SAMPLE_CMP_LZ: + case HLSL_RESOURCE_SAMPLE_GRAD: + case HLSL_RESOURCE_SAMPLE_INFO: + return false; + + case HLSL_RESOURCE_SAMPLE: + case HLSL_RESOURCE_SAMPLE_LOD: + case HLSL_RESOURCE_SAMPLE_LOD_BIAS: + break; + } + if (load->sampler.var) + return false; + + if (!hlsl_type_is_resource(load->resource.var->data_type)) + { + hlsl_fixme(ctx, &instr->loc, "Lower combined samplers within structs."); + return false; + } + + assert(hlsl_type_get_regset(load->resource.var->data_type) == HLSL_REGSET_SAMPLERS); + + if (!(name = hlsl_get_string_buffer(ctx))) + return false; + vkd3d_string_buffer_printf(name, "<resource>%s", load->resource.var->name); + + TRACE("Lowering to separate resource %s.\n", debugstr_a(name->buffer)); + + if (!(var = hlsl_get_var(ctx->globals, name->buffer))) + { + struct hlsl_type *texture_array_type = hlsl_new_texture_type(ctx, load->sampling_dim, + hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, 4), 0); + + /* Create (possibly multi-dimensional) texture array type with the same dims as the sampler array. */ + struct hlsl_type *arr_type = load->resource.var->data_type; + for (i = 0; i < load->resource.path_len; ++i) + { + assert(arr_type->class == HLSL_CLASS_ARRAY); + texture_array_type = hlsl_new_array_type(ctx, texture_array_type, arr_type->e.array.elements_count); + arr_type = arr_type->e.array.type; + } + + if (!(var = hlsl_new_synthetic_var_named(ctx, name->buffer, texture_array_type, &instr->loc, false))) + { + hlsl_release_string_buffer(ctx, name); + return false; + } + var->is_uniform = 1; + var->is_separated_resource = true; + + list_add_tail(&ctx->extern_vars, &var->extern_entry); + } + hlsl_release_string_buffer(ctx, name); + + if (load->sampling_dim != var->data_type->sampler_dim) + { + hlsl_error(ctx, &load->node.loc, VKD3D_SHADER_ERROR_HLSL_INCONSISTENT_SAMPLER, + "Cannot split combined samplers from "%s" if they have different usage dimensions.", + load->resource.var->name); + hlsl_note(ctx, &var->loc, VKD3D_SHADER_LOG_ERROR, "First use as combined sampler is here."); + return false; + + } + + hlsl_copy_deref(ctx, &load->sampler, &load->resource); + load->resource.var = var; + assert(hlsl_deref_get_type(ctx, &load->resource)->base_type == HLSL_TYPE_TEXTURE); + assert(hlsl_deref_get_type(ctx, &load->sampler)->base_type == HLSL_TYPE_SAMPLER); + + return true; +} + +static void insert_ensuring_decreasing_bind_count(struct list *list, struct hlsl_ir_var *to_add, + enum hlsl_regset regset) +{ + struct hlsl_ir_var *var; + + LIST_FOR_EACH_ENTRY(var, list, struct hlsl_ir_var, extern_entry) + { + if (var->bind_count[regset] < to_add->bind_count[regset]) + { + list_add_before(&var->extern_entry, &to_add->extern_entry); + return; + } + } + + list_add_tail(list, &to_add->extern_entry); +} + +static bool sort_synthetic_separated_samplers_first(struct hlsl_ctx *ctx) +{ + struct list separated_resources; + struct hlsl_ir_var *var, *next; + + list_init(&separated_resources); + + LIST_FOR_EACH_ENTRY_SAFE(var, next, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + if (var->is_separated_resource) + { + list_remove(&var->extern_entry); + insert_ensuring_decreasing_bind_count(&separated_resources, var, HLSL_REGSET_TEXTURES); + } + } + + list_move_head(&ctx->extern_vars, &separated_resources); + + return false; +} + /* Lower DIV to RCP + MUL. */ static bool lower_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) { @@ -2264,7 +2428,7 @@ static bool lower_casts_to_bool(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr return true; }
-struct hlsl_ir_node *hlsl_add_conditional(struct hlsl_ctx *ctx, struct list *instrs, +struct hlsl_ir_node *hlsl_add_conditional(struct hlsl_ctx *ctx, struct hlsl_block *instrs, struct hlsl_ir_node *condition, struct hlsl_ir_node *if_true, struct hlsl_ir_node *if_false) { struct hlsl_block then_block, else_block; @@ -2290,18 +2454,18 @@ struct hlsl_ir_node *hlsl_add_conditional(struct hlsl_ctx *ctx, struct list *ins
if (!(iff = hlsl_new_if(ctx, condition, &then_block, &else_block, &condition->loc))) return NULL; - list_add_tail(instrs, &iff->entry); + hlsl_block_add_instr(instrs, iff);
if (!(load = hlsl_new_var_load(ctx, var, &condition->loc))) return NULL; - list_add_tail(instrs, &load->node.entry); + hlsl_block_add_instr(instrs, &load->node);
return &load->node; }
-static bool lower_int_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +static bool lower_int_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) { - struct hlsl_ir_node *arg1, *arg2, *xor, *and, *abs1, *abs2, *div, *neg, *cast1, *cast2, *cast3, *cond, *high_bit; + struct hlsl_ir_node *arg1, *arg2, *xor, *and, *abs1, *abs2, *div, *neg, *cast1, *cast2, *cast3, *high_bit; struct hlsl_type *type = instr->data_type, *utype; struct hlsl_constant_value high_bit_value; struct hlsl_ir_expr *expr; @@ -2322,56 +2486,52 @@ static bool lower_int_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr,
if (!(xor = hlsl_new_binary_expr(ctx, HLSL_OP2_BIT_XOR, arg1, arg2))) return false; - list_add_before(&instr->entry, &xor->entry); + hlsl_block_add_instr(block, xor);
for (i = 0; i < type->dimx; ++i) high_bit_value.u[i].u = 0x80000000; if (!(high_bit = hlsl_new_constant(ctx, type, &high_bit_value, &instr->loc))) return false; - list_add_before(&instr->entry, &high_bit->entry); + hlsl_block_add_instr(block, high_bit);
if (!(and = hlsl_new_binary_expr(ctx, HLSL_OP2_BIT_AND, xor, high_bit))) return false; - list_add_before(&instr->entry, &and->entry); + hlsl_block_add_instr(block, and);
if (!(abs1 = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, arg1, &instr->loc))) return false; - list_add_before(&instr->entry, &abs1->entry); + hlsl_block_add_instr(block, abs1);
if (!(cast1 = hlsl_new_cast(ctx, abs1, utype, &instr->loc))) return false; - list_add_before(&instr->entry, &cast1->entry); + hlsl_block_add_instr(block, cast1);
if (!(abs2 = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, arg2, &instr->loc))) return false; - list_add_before(&instr->entry, &abs2->entry); + hlsl_block_add_instr(block, abs2);
if (!(cast2 = hlsl_new_cast(ctx, abs2, utype, &instr->loc))) return false; - list_add_before(&instr->entry, &cast2->entry); + hlsl_block_add_instr(block, cast2);
if (!(div = hlsl_new_binary_expr(ctx, HLSL_OP2_DIV, cast1, cast2))) return false; - list_add_before(&instr->entry, &div->entry); + hlsl_block_add_instr(block, div);
if (!(cast3 = hlsl_new_cast(ctx, div, type, &instr->loc))) return false; - list_add_before(&instr->entry, &cast3->entry); + hlsl_block_add_instr(block, cast3);
if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, cast3, &instr->loc))) return false; - list_add_before(&instr->entry, &neg->entry); + hlsl_block_add_instr(block, neg);
- if (!(cond = hlsl_add_conditional(ctx, &instr->entry, and, neg, cast3))) - return false; - hlsl_replace_node(instr, cond); - - return true; + return hlsl_add_conditional(ctx, block, and, neg, cast3); }
-static bool lower_int_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +static bool lower_int_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) { - struct hlsl_ir_node *arg1, *arg2, *and, *abs1, *abs2, *div, *neg, *cast1, *cast2, *cast3, *cond, *high_bit; + struct hlsl_ir_node *arg1, *arg2, *and, *abs1, *abs2, *div, *neg, *cast1, *cast2, *cast3, *high_bit; struct hlsl_type *type = instr->data_type, *utype; struct hlsl_constant_value high_bit_value; struct hlsl_ir_expr *expr; @@ -2394,45 +2554,41 @@ static bool lower_int_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, high_bit_value.u[i].u = 0x80000000; if (!(high_bit = hlsl_new_constant(ctx, type, &high_bit_value, &instr->loc))) return false; - list_add_before(&instr->entry, &high_bit->entry); + hlsl_block_add_instr(block, high_bit);
if (!(and = hlsl_new_binary_expr(ctx, HLSL_OP2_BIT_AND, arg1, high_bit))) return false; - list_add_before(&instr->entry, &and->entry); + hlsl_block_add_instr(block, and);
if (!(abs1 = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, arg1, &instr->loc))) return false; - list_add_before(&instr->entry, &abs1->entry); + hlsl_block_add_instr(block, abs1);
if (!(cast1 = hlsl_new_cast(ctx, abs1, utype, &instr->loc))) return false; - list_add_before(&instr->entry, &cast1->entry); + hlsl_block_add_instr(block, cast1);
if (!(abs2 = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, arg2, &instr->loc))) return false; - list_add_before(&instr->entry, &abs2->entry); + hlsl_block_add_instr(block, abs2);
if (!(cast2 = hlsl_new_cast(ctx, abs2, utype, &instr->loc))) return false; - list_add_before(&instr->entry, &cast2->entry); + hlsl_block_add_instr(block, cast2);
if (!(div = hlsl_new_binary_expr(ctx, HLSL_OP2_MOD, cast1, cast2))) return false; - list_add_before(&instr->entry, &div->entry); + hlsl_block_add_instr(block, div);
if (!(cast3 = hlsl_new_cast(ctx, div, type, &instr->loc))) return false; - list_add_before(&instr->entry, &cast3->entry); + hlsl_block_add_instr(block, cast3);
if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, cast3, &instr->loc))) return false; - list_add_before(&instr->entry, &neg->entry); + hlsl_block_add_instr(block, neg);
- if (!(cond = hlsl_add_conditional(ctx, &instr->entry, and, neg, cast3))) - return false; - hlsl_replace_node(instr, cond); - - return true; + return hlsl_add_conditional(ctx, block, and, neg, cast3); }
static bool lower_int_abs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) @@ -2516,9 +2672,9 @@ static bool lower_int_dot(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void return false; }
-static bool lower_float_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +static bool lower_float_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) { - struct hlsl_ir_node *arg1, *arg2, *mul1, *neg1, *ge, *neg2, *div, *mul2, *frc, *cond, *one; + struct hlsl_ir_node *arg1, *arg2, *mul1, *neg1, *ge, *neg2, *div, *mul2, *frc, *cond, *one, *mul3; struct hlsl_type *type = instr->data_type, *btype; struct hlsl_constant_value one_value; struct hlsl_ir_expr *expr; @@ -2539,47 +2695,100 @@ static bool lower_float_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr
if (!(mul1 = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, arg2, arg1))) return false; - list_add_before(&instr->entry, &mul1->entry); + hlsl_block_add_instr(block, mul1);
if (!(neg1 = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, mul1, &instr->loc))) return false; - list_add_before(&instr->entry, &neg1->entry); + hlsl_block_add_instr(block, neg1);
if (!(ge = hlsl_new_binary_expr(ctx, HLSL_OP2_GEQUAL, mul1, neg1))) return false; ge->data_type = btype; - list_add_before(&instr->entry, &ge->entry); + hlsl_block_add_instr(block, ge);
if (!(neg2 = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, arg2, &instr->loc))) return false; - list_add_before(&instr->entry, &neg2->entry); + hlsl_block_add_instr(block, neg2);
- if (!(cond = hlsl_add_conditional(ctx, &instr->entry, ge, arg2, neg2))) + if (!(cond = hlsl_add_conditional(ctx, block, ge, arg2, neg2))) return false;
for (i = 0; i < type->dimx; ++i) one_value.u[i].f = 1.0f; if (!(one = hlsl_new_constant(ctx, type, &one_value, &instr->loc))) return false; - list_add_before(&instr->entry, &one->entry); + hlsl_block_add_instr(block, one);
if (!(div = hlsl_new_binary_expr(ctx, HLSL_OP2_DIV, one, cond))) return false; - list_add_before(&instr->entry, &div->entry); + hlsl_block_add_instr(block, div);
if (!(mul2 = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, div, arg1))) return false; - list_add_before(&instr->entry, &mul2->entry); + hlsl_block_add_instr(block, mul2);
if (!(frc = hlsl_new_unary_expr(ctx, HLSL_OP1_FRACT, mul2, &instr->loc))) return false; - list_add_before(&instr->entry, &frc->entry); + hlsl_block_add_instr(block, frc);
- expr->op = HLSL_OP2_MUL; - hlsl_src_remove(&expr->operands[0]); - hlsl_src_remove(&expr->operands[1]); - hlsl_src_from_node(&expr->operands[0], frc); - hlsl_src_from_node(&expr->operands[1], cond); + if (!(mul3 = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, frc, cond))) + return false; + hlsl_block_add_instr(block, mul3); + + return true; +} + +static bool lower_discard_neg(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +{ + struct hlsl_ir_node *zero, *bool_false, *or, *cmp, *load; + static const struct hlsl_constant_value zero_value; + struct hlsl_type *arg_type, *cmp_type; + struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = { 0 }; + struct hlsl_ir_jump *jump; + struct hlsl_block block; + unsigned int i, count; + + if (instr->type != HLSL_IR_JUMP) + return false; + jump = hlsl_ir_jump(instr); + if (jump->type != HLSL_IR_JUMP_DISCARD_NEG) + return false; + + hlsl_block_init(&block); + + arg_type = jump->condition.node->data_type; + if (!(zero = hlsl_new_constant(ctx, arg_type, &zero_value, &instr->loc))) + return false; + hlsl_block_add_instr(&block, zero); + + operands[0] = jump->condition.node; + operands[1] = zero; + cmp_type = hlsl_get_numeric_type(ctx, arg_type->class, HLSL_TYPE_BOOL, arg_type->dimx, arg_type->dimy); + if (!(cmp = hlsl_new_expr(ctx, HLSL_OP2_LESS, operands, cmp_type, &instr->loc))) + return false; + hlsl_block_add_instr(&block, cmp); + + if (!(bool_false = hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, HLSL_TYPE_BOOL), &zero_value, &instr->loc))) + return false; + hlsl_block_add_instr(&block, bool_false); + + or = bool_false; + + count = hlsl_type_component_count(cmp_type); + for (i = 0; i < count; ++i) + { + if (!(load = hlsl_add_load_component(ctx, &block, cmp, i, &instr->loc))) + return false; + + if (!(or = hlsl_new_binary_expr(ctx, HLSL_OP2_LOGIC_OR, or, load))) + return NULL; + hlsl_block_add_instr(&block, or); + } + + list_move_tail(&instr->entry, &block.instrs); + hlsl_src_remove(&jump->condition); + hlsl_src_from_node(&jump->condition, or); + jump->type = HLSL_IR_JUMP_DISCARD_NZ;
return true; } @@ -2698,7 +2907,7 @@ static void allocate_register_reservations(struct hlsl_ctx *ctx) continue; regset = hlsl_type_get_regset(var->data_type);
- if (var->reg_reservation.reg_type && var->regs[regset].bind_count) + if (var->reg_reservation.reg_type && var->regs[regset].allocation_size) { if (var->reg_reservation.reg_type != get_regset_name(regset)) { @@ -2716,7 +2925,7 @@ static void allocate_register_reservations(struct hlsl_ctx *ctx) var->regs[regset].id = var->reg_reservation.reg_index; TRACE("Allocated reserved %s to %c%u-%c%u.\n", var->name, var->reg_reservation.reg_type, var->reg_reservation.reg_index, var->reg_reservation.reg_type, - var->reg_reservation.reg_index + var->regs[regset].bind_count); + var->reg_reservation.reg_index + var->regs[regset].allocation_size); } } } @@ -2806,7 +3015,8 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop load->sampler.offset.node->last_read = last_read; }
- load->coords.node->last_read = last_read; + if (load->coords.node) + load->coords.node->last_read = last_read; if (load->texel_offset.node) load->texel_offset.node->last_read = last_read; if (load->lod.node) @@ -2848,8 +3058,15 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop index->idx.node->last_read = last_read; break; } - case HLSL_IR_CONSTANT: case HLSL_IR_JUMP: + { + struct hlsl_ir_jump *jump = hlsl_ir_jump(instr); + + if (jump->condition.node) + jump->condition.node->last_read = last_read; + break; + } + case HLSL_IR_CONSTANT: break; } } @@ -2966,7 +3183,7 @@ static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct register_a record_allocation(ctx, allocator, reg_idx, writemask, first_write, last_read);
ret.id = reg_idx; - ret.bind_count = 1; + ret.allocation_size = 1; ret.writemask = hlsl_combine_writemasks(writemask, (1u << component_count) - 1); ret.allocated = true; return ret; @@ -3002,7 +3219,7 @@ static struct hlsl_reg allocate_range(struct hlsl_ctx *ctx, struct register_allo record_allocation(ctx, allocator, reg_idx + i, VKD3DSP_WRITEMASK_ALL, first_write, last_read);
ret.id = reg_idx; - ret.bind_count = align(reg_size, 4) / 4; + ret.allocation_size = align(reg_size, 4) / 4; ret.allocated = true; return ret; } @@ -3034,7 +3251,7 @@ static const char *debug_register(char class, struct hlsl_reg reg, const struct return vkd3d_dbg_sprintf("%c%u%s", class, reg.id, debug_hlsl_writemask(reg.writemask)); }
-static bool track_object_components_usage(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +static bool track_object_components_sampler_dim(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) { struct hlsl_ir_resource_load *load; struct hlsl_ir_var *var; @@ -3046,15 +3263,16 @@ static bool track_object_components_usage(struct hlsl_ctx *ctx, struct hlsl_ir_n
load = hlsl_ir_resource_load(instr); var = load->resource.var; + regset = hlsl_type_get_regset(hlsl_deref_get_type(ctx, &load->resource)); + if (!hlsl_regset_index_from_deref(ctx, &load->resource, regset, &index)) + return false;
if (regset == HLSL_REGSET_SAMPLERS) { enum hlsl_sampler_dim dim;
assert(!load->sampler.var); - if (!hlsl_regset_index_from_deref(ctx, &load->resource, regset, &index)) - return false;
dim = var->objects_usage[regset][index].sampler_dim; if (dim != load->sampling_dim) @@ -3072,25 +3290,39 @@ static bool track_object_components_usage(struct hlsl_ctx *ctx, struct hlsl_ir_n return false; } } - var->objects_usage[regset][index].used = true; - var->objects_usage[regset][index].sampler_dim = load->sampling_dim; } - else - { - if (!hlsl_regset_index_from_deref(ctx, &load->resource, regset, &index)) - return false; + var->objects_usage[regset][index].sampler_dim = load->sampling_dim;
- var->objects_usage[regset][index].used = true; - var->objects_usage[regset][index].sampler_dim = load->sampling_dim; + return false; +}
- if (load->sampler.var) - { - var = load->sampler.var; - if (!hlsl_regset_index_from_deref(ctx, &load->sampler, HLSL_REGSET_SAMPLERS, &index)) - return false; +static bool track_object_components_usage(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +{ + struct hlsl_ir_resource_load *load; + struct hlsl_ir_var *var; + enum hlsl_regset regset; + unsigned int index;
- var->objects_usage[HLSL_REGSET_SAMPLERS][index].used = true; - } + if (instr->type != HLSL_IR_RESOURCE_LOAD) + return false; + + load = hlsl_ir_resource_load(instr); + var = load->resource.var; + + regset = hlsl_type_get_regset(hlsl_deref_get_type(ctx, &load->resource)); + if (!hlsl_regset_index_from_deref(ctx, &load->resource, regset, &index)) + return false; + + var->objects_usage[regset][index].used = true; + var->bind_count[regset] = max(var->bind_count[regset], index + 1); + if (load->sampler.var) + { + var = load->sampler.var; + if (!hlsl_regset_index_from_deref(ctx, &load->sampler, HLSL_REGSET_SAMPLERS, &index)) + return false; + + var->objects_usage[HLSL_REGSET_SAMPLERS][index].used = true; + var->bind_count[HLSL_REGSET_SAMPLERS] = max(var->bind_count[HLSL_REGSET_SAMPLERS], index + 1); }
return false; @@ -3100,7 +3332,7 @@ static void calculate_resource_register_counts(struct hlsl_ctx *ctx) { struct hlsl_ir_var *var; struct hlsl_type *type; - unsigned int i, k; + unsigned int k;
LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { @@ -3108,12 +3340,10 @@ static void calculate_resource_register_counts(struct hlsl_ctx *ctx)
for (k = 0; k <= HLSL_REGSET_LAST_OBJECT; ++k) { - for (i = 0; i < type->reg_size[k]; ++i) - { - /* Samplers are only allocated until the last used one. */ - if (var->objects_usage[k][i].used) - var->regs[k].bind_count = (k == HLSL_REGSET_SAMPLERS) ? i + 1 : type->reg_size[k]; - } + bool is_separated = var->is_separated_resource; + + if (var->bind_count[k] > 0) + var->regs[k].allocation_size = (k == HLSL_REGSET_SAMPLERS || is_separated) ? var->bind_count[k] : type->reg_size[k]; } } } @@ -3192,10 +3422,33 @@ static void allocate_temp_registers_recurse(struct hlsl_ctx *ctx, } }
+static void record_constant(struct hlsl_ctx *ctx, unsigned int component_index, float f) +{ + struct hlsl_constant_defs *defs = &ctx->constant_defs; + struct hlsl_constant_register *reg; + size_t i; + + for (i = 0; i < defs->count; ++i) + { + reg = &defs->regs[i]; + if (reg->index == (component_index / 4)) + { + reg->value.f[component_index % 4] = f; + return; + } + } + + if (!hlsl_array_reserve(ctx, (void **)&defs->regs, &defs->size, defs->count + 1, sizeof(*defs->regs))) + return; + reg = &defs->regs[defs->count++]; + memset(reg, 0, sizeof(*reg)); + reg->index = component_index / 4; + reg->value.f[component_index % 4] = f; +} + static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_block *block, struct register_allocator *allocator) { - struct hlsl_constant_defs *defs = &ctx->constant_defs; struct hlsl_ir_node *instr;
LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) @@ -3206,66 +3459,52 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, { struct hlsl_ir_constant *constant = hlsl_ir_constant(instr); const struct hlsl_type *type = instr->data_type; - unsigned int x, y, i, writemask, end_reg; - unsigned int reg_size = type->reg_size[HLSL_REGSET_NUMERIC]; + unsigned int x, i;
constant->reg = allocate_numeric_registers_for_type(ctx, allocator, 1, UINT_MAX, type); TRACE("Allocated constant @%u to %s.\n", instr->index, debug_register('c', constant->reg, type));
- if (!hlsl_array_reserve(ctx, (void **)&defs->values, &defs->size, - constant->reg.id + reg_size / 4, sizeof(*defs->values))) - return; - end_reg = constant->reg.id + reg_size / 4; - if (end_reg > defs->count) - { - memset(&defs->values[defs->count], 0, sizeof(*defs->values) * (end_reg - defs->count)); - defs->count = end_reg; - } - assert(type->class <= HLSL_CLASS_LAST_NUMERIC); + assert(type->dimy == 1); + assert(constant->reg.writemask);
- if (!(writemask = constant->reg.writemask)) - writemask = (1u << type->dimx) - 1; - - for (y = 0; y < type->dimy; ++y) + for (x = 0, i = 0; x < 4; ++x) { - for (x = 0, i = 0; x < 4; ++x) + const union hlsl_constant_value_component *value; + float f; + + if (!(constant->reg.writemask & (1u << x))) + continue; + value = &constant->value.u[i++]; + + switch (type->base_type) { - const union hlsl_constant_value_component *value; - float f; - - if (!(writemask & (1u << x))) - continue; - value = &constant->value.u[i++]; - - switch (type->base_type) - { - case HLSL_TYPE_BOOL: - f = !!value->u; - break; - - case HLSL_TYPE_FLOAT: - case HLSL_TYPE_HALF: - f = value->f; - break; - - case HLSL_TYPE_INT: - f = value->i; - break; - - case HLSL_TYPE_UINT: - f = value->u; - break; - - case HLSL_TYPE_DOUBLE: - FIXME("Double constant.\n"); - return; - - default: - vkd3d_unreachable(); - } - defs->values[constant->reg.id + y].f[x] = f; + case HLSL_TYPE_BOOL: + f = !!value->u; + break; + + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: + f = value->f; + break; + + case HLSL_TYPE_INT: + f = value->i; + break; + + case HLSL_TYPE_UINT: + f = value->u; + break; + + case HLSL_TYPE_DOUBLE: + FIXME("Double constant.\n"); + return; + + default: + vkd3d_unreachable(); } + + record_constant(ctx, constant->reg.id * 4 + x, f); }
break; @@ -3297,8 +3536,6 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi struct register_allocator allocator = {0}; struct hlsl_ir_var *var;
- allocate_const_registers_recurse(ctx, &entry_func->body, &allocator); - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { if (var->is_uniform && var->last_read) @@ -3315,6 +3552,8 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi } }
+ allocate_const_registers_recurse(ctx, &entry_func->body, &allocator); + vkd3d_free(allocator.allocations); }
@@ -3410,7 +3649,7 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var { var->regs[HLSL_REGSET_NUMERIC].allocated = true; var->regs[HLSL_REGSET_NUMERIC].id = (*counter)++; - var->regs[HLSL_REGSET_NUMERIC].bind_count = 1; + var->regs[HLSL_REGSET_NUMERIC].allocation_size = 1; var->regs[HLSL_REGSET_NUMERIC].writemask = (1 << var->data_type->dimx) - 1; TRACE("Allocated %s to %s.\n", var->name, debug_register(output ? 'o' : 'v', var->regs[HLSL_REGSET_NUMERIC], var->data_type)); @@ -3497,7 +3736,7 @@ static void validate_buffer_offsets(struct hlsl_ctx *ctx)
LIST_FOR_EACH_ENTRY(var1, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { - if (!var1->is_uniform || var1->data_type->class == HLSL_CLASS_OBJECT) + if (!var1->is_uniform || hlsl_type_is_resource(var1->data_type)) continue;
buffer = var1->buffer; @@ -3508,7 +3747,7 @@ static void validate_buffer_offsets(struct hlsl_ctx *ctx) { unsigned int var1_reg_size, var2_reg_size;
- if (!var2->is_uniform || var2->data_type->class == HLSL_CLASS_OBJECT) + if (!var2->is_uniform || hlsl_type_is_resource(var2->data_type)) continue;
if (var1 == var2 || var1->buffer != var2->buffer) @@ -3558,7 +3797,7 @@ static void allocate_buffers(struct hlsl_ctx *ctx)
LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { - if (var->is_uniform && var->data_type->class != HLSL_CLASS_OBJECT) + if (var->is_uniform && !hlsl_type_is_resource(var->data_type)) { if (var->is_param) var->buffer = ctx->params_buffer; @@ -3589,7 +3828,7 @@ static void allocate_buffers(struct hlsl_ctx *ctx) }
buffer->reg.id = buffer->reservation.reg_index; - buffer->reg.bind_count = 1; + buffer->reg.allocation_size = 1; buffer->reg.allocated = true; TRACE("Allocated reserved %s to cb%u.\n", buffer->name, index); } @@ -3599,7 +3838,7 @@ static void allocate_buffers(struct hlsl_ctx *ctx) ++index;
buffer->reg.id = index; - buffer->reg.bind_count = 1; + buffer->reg.allocation_size = 1; buffer->reg.allocated = true; TRACE("Allocated %s to cb%u.\n", buffer->name, index); ++index; @@ -3618,7 +3857,7 @@ static void allocate_buffers(struct hlsl_ctx *ctx) }
static const struct hlsl_ir_var *get_allocated_object(struct hlsl_ctx *ctx, enum hlsl_regset regset, - uint32_t index) + uint32_t index, bool allocated_only) { const struct hlsl_ir_var *var; unsigned int start, count; @@ -3632,11 +3871,14 @@ static const struct hlsl_ir_var *get_allocated_object(struct hlsl_ctx *ctx, enum * bound there even if the reserved vars aren't used. */ start = var->reg_reservation.reg_index; count = var->data_type->reg_size[regset]; + + if (!var->regs[regset].allocated && allocated_only) + continue; } else if (var->regs[regset].allocated) { start = var->regs[regset].id; - count = var->regs[regset].bind_count; + count = var->regs[regset].allocation_size; } else { @@ -3667,11 +3909,12 @@ static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_regset regset)
LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { - unsigned int count = var->regs[regset].bind_count; + unsigned int count = var->regs[regset].allocation_size;
if (count == 0) continue;
+ /* The variable was already allocated if it has a reservation. */ if (var->regs[regset].allocated) { const struct hlsl_ir_var *reserved_object, *last_reported = NULL; @@ -3690,7 +3933,10 @@ static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_regset regset) { index = var->regs[regset].id + i;
- reserved_object = get_allocated_object(ctx, regset, index); + /* get_allocated_object() may return "var" itself, but we + * actually want that, otherwise we'll end up reporting the + * same conflict between the same two variables twice. */ + reserved_object = get_allocated_object(ctx, regset, index, true); if (reserved_object && reserved_object != var && reserved_object != last_reported) { hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_OVERLAPPING_RESERVATIONS, @@ -3709,7 +3955,7 @@ static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_regset regset)
while (available < count) { - if (get_allocated_object(ctx, regset, index)) + if (get_allocated_object(ctx, regset, index, false)) available = 0; else ++available; @@ -3853,6 +4099,7 @@ bool hlsl_regset_index_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref bool hlsl_offset_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, unsigned int *offset) { struct hlsl_ir_node *offset_node = deref->offset.node; + enum hlsl_regset regset; unsigned int size;
if (!offset_node) @@ -3869,8 +4116,9 @@ bool hlsl_offset_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref return false;
*offset = hlsl_ir_constant(offset_node)->value.u[0].u; + regset = hlsl_type_get_regset(deref->data_type);
- size = deref->var->data_type->reg_size[deref->offset_regset]; + size = deref->var->data_type->reg_size[regset]; if (*offset >= size) { hlsl_error(ctx, &deref->offset.node->loc, VKD3D_SHADER_ERROR_HLSL_OFFSET_OUT_OF_BOUNDS, @@ -3900,7 +4148,8 @@ struct hlsl_reg hlsl_reg_from_deref(struct hlsl_ctx *ctx, const struct hlsl_dere struct hlsl_reg ret = var->regs[HLSL_REGSET_NUMERIC]; unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref);
- assert(deref->offset_regset == HLSL_REGSET_NUMERIC); + assert(deref->data_type); + assert(deref->data_type->class <= HLSL_CLASS_LAST_NUMERIC);
ret.id += offset / 4;
@@ -4008,7 +4257,7 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry LIST_FOR_EACH_ENTRY(var, &ctx->globals->vars, struct hlsl_ir_var, scope_entry) { if (var->storage_modifiers & HLSL_STORAGE_UNIFORM) - prepend_uniform_copy(ctx, &body->instrs, var); + prepend_uniform_copy(ctx, body, var); }
for (i = 0; i < entry_func->parameters.count; ++i) @@ -4017,7 +4266,7 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry
if (hlsl_type_is_resource(var->data_type) || (var->storage_modifiers & HLSL_STORAGE_UNIFORM)) { - prepend_uniform_copy(ctx, &body->instrs, var); + prepend_uniform_copy(ctx, body, var); } else { @@ -4033,9 +4282,9 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry }
if (var->storage_modifiers & HLSL_STORAGE_IN) - prepend_input_var_copy(ctx, &body->instrs, var); + prepend_input_var_copy(ctx, body, var); if (var->storage_modifiers & HLSL_STORAGE_OUT) - append_output_var_copy(ctx, &body->instrs, var); + append_output_var_copy(ctx, body, var); } } if (entry_func->return_var) @@ -4044,7 +4293,7 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_SEMANTIC, "Entry point "%s" is missing a return value semantic.", entry_func->func->name);
- append_output_var_copy(ctx, &body->instrs, entry_func->return_var); + append_output_var_copy(ctx, body, entry_func->return_var); }
for (i = 0; i < entry_func->attr_count; ++i) @@ -4062,6 +4311,10 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_ATTRIBUTE, "Entry point "%s" is missing a [numthreads] attribute.", entry_func->func->name);
+ if (profile->major_version >= 4) + { + hlsl_transform_ir(ctx, lower_discard_neg, body, NULL); + } hlsl_transform_ir(ctx, lower_broadcasts, body, NULL); while (hlsl_transform_ir(ctx, fold_redundant_casts, body, NULL)); do @@ -4075,10 +4328,10 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry hlsl_transform_ir(ctx, lower_narrowing_casts, body, NULL); hlsl_transform_ir(ctx, lower_casts_to_bool, body, NULL); hlsl_transform_ir(ctx, lower_int_dot, body, NULL); - hlsl_transform_ir(ctx, lower_int_division, body, NULL); - hlsl_transform_ir(ctx, lower_int_modulus, body, NULL); + lower_ir(ctx, lower_int_division, body); + lower_ir(ctx, lower_int_modulus, body); hlsl_transform_ir(ctx, lower_int_abs, body, NULL); - hlsl_transform_ir(ctx, lower_float_modulus, body, NULL); + lower_ir(ctx, lower_float_modulus, body); hlsl_transform_ir(ctx, fold_redundant_casts, body, NULL); do { @@ -4094,6 +4347,13 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry hlsl_transform_ir(ctx, lower_casts_to_bool, body, NULL); hlsl_transform_ir(ctx, lower_int_dot, body, NULL);
+ hlsl_transform_ir(ctx, validate_static_object_references, body, NULL); + hlsl_transform_ir(ctx, track_object_components_sampler_dim, body, NULL); + if (profile->major_version >= 4) + hlsl_transform_ir(ctx, lower_combined_samples, body, NULL); + hlsl_transform_ir(ctx, track_object_components_usage, body, NULL); + sort_synthetic_separated_samplers_first(ctx); + if (profile->major_version < 4) { hlsl_transform_ir(ctx, lower_division, body, NULL); @@ -4107,9 +4367,6 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry hlsl_transform_ir(ctx, lower_abs, body, NULL); }
- hlsl_transform_ir(ctx, validate_static_object_references, body, NULL); - hlsl_transform_ir(ctx, track_object_components_usage, body, NULL); - /* TODO: move forward, remove when no longer needed */ transform_derefs(ctx, replace_deref_path_with_offset, body); while (hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL)); diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c index 301113c8477..41a72ab6c0d 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c @@ -80,7 +80,7 @@ static bool fold_cast(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, return false; }
- for (k = 0; k < 4; ++k) + for (k = 0; k < dst_type->dimx; ++k) { switch (src->node.data_type->base_type) { @@ -152,6 +152,51 @@ static bool fold_cast(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, return true; }
+static bool fold_log2(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src, const struct vkd3d_shader_location *loc) +{ + enum hlsl_base_type type = dst_type->base_type; + unsigned int k; + + assert(type == src->node.data_type->base_type); + + for (k = 0; k < dst_type->dimx; ++k) + { + switch (type) + { + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: + if (ctx->profile->major_version >= 4 && src->value.u[k].f < 0.0f) + { + hlsl_warning(ctx, loc, VKD3D_SHADER_WARNING_HLSL_NON_FINITE_RESULT, + "Indefinite logarithm result."); + } + dst->u[k].f = log2f(src->value.u[k].f); + if (ctx->profile->major_version < 4 && !isfinite(dst->u[k].f)) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NON_FINITE_RESULT, + "Infinities and NaNs are not allowed by the shader model."); + } + break; + + case HLSL_TYPE_DOUBLE: + if (src->value.u[k].d < 0.0) + { + hlsl_warning(ctx, loc, VKD3D_SHADER_WARNING_HLSL_NON_FINITE_RESULT, + "Indefinite logarithm result."); + } + dst->u[k].d = log2(src->value.u[k].d); + break; + + default: + FIXME("Fold 'log2' for type %s.\n", debug_hlsl_type(ctx, dst_type)); + return false; + } + } + + return true; +} + static bool fold_neg(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src) { @@ -160,7 +205,7 @@ static bool fold_neg(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst,
assert(type == src->node.data_type->base_type);
- for (k = 0; k < 4; ++k) + for (k = 0; k < dst_type->dimx; ++k) { switch (type) { @@ -186,6 +231,96 @@ static bool fold_neg(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, return true; }
+static bool fold_rcp(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src, const struct vkd3d_shader_location *loc) +{ + enum hlsl_base_type type = dst_type->base_type; + unsigned int k; + + assert(type == src->node.data_type->base_type); + + for (k = 0; k < dst_type->dimx; ++k) + { + switch (type) + { + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: + if (ctx->profile->major_version >= 4 && src->value.u[k].f == 0.0f) + { + hlsl_warning(ctx, loc, VKD3D_SHADER_WARNING_HLSL_DIVISION_BY_ZERO, + "Floating point division by zero."); + } + dst->u[k].f = 1.0f / src->value.u[k].f; + if (ctx->profile->major_version < 4 && !isfinite(dst->u[k].f)) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, + "Infinities and NaNs are not allowed by the shader model."); + } + break; + + case HLSL_TYPE_DOUBLE: + if (src->value.u[k].d == 0.0) + { + hlsl_warning(ctx, loc, VKD3D_SHADER_WARNING_HLSL_DIVISION_BY_ZERO, + "Floating point division by zero."); + } + dst->u[k].d = 1.0 / src->value.u[k].d; + break; + + default: + FIXME("Fold 'rcp' for type %s.\n", debug_hlsl_type(ctx, dst_type)); + return false; + } + } + + return true; +} + +static bool fold_sqrt(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src, const struct vkd3d_shader_location *loc) +{ + enum hlsl_base_type type = dst_type->base_type; + unsigned int k; + + assert(type == src->node.data_type->base_type); + + for (k = 0; k < dst_type->dimx; ++k) + { + switch (type) + { + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: + if (ctx->profile->major_version >= 4 && src->value.u[k].f < 0.0f) + { + hlsl_warning(ctx, loc, VKD3D_SHADER_WARNING_HLSL_IMAGINARY_NUMERIC_RESULT, + "Imaginary square root result."); + } + dst->u[k].f = sqrtf(src->value.u[k].f); + if (ctx->profile->major_version < 4 && !isfinite(dst->u[k].f)) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NON_FINITE_RESULT, + "Infinities and NaNs are not allowed by the shader model."); + } + break; + + case HLSL_TYPE_DOUBLE: + if (src->value.u[k].d < 0.0) + { + hlsl_warning(ctx, loc, VKD3D_SHADER_WARNING_HLSL_IMAGINARY_NUMERIC_RESULT, + "Imaginary square root result."); + } + dst->u[k].d = sqrt(src->value.u[k].d); + break; + + default: + FIXME("Fold 'sqrt' for type %s.\n", debug_hlsl_type(ctx, dst_type)); + return false; + } + } + + return true; +} + static bool fold_add(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) { @@ -195,7 +330,7 @@ static bool fold_add(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons assert(type == src1->node.data_type->base_type); assert(type == src2->node.data_type->base_type);
- for (k = 0; k < 4; ++k) + for (k = 0; k < dst_type->dimx; ++k) { switch (type) { @@ -223,7 +358,7 @@ static bool fold_add(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons return true; }
-static bool fold_mul(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, +static bool fold_and(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) { enum hlsl_base_type type = dst_type->base_type; @@ -232,65 +367,132 @@ static bool fold_mul(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons assert(type == src1->node.data_type->base_type); assert(type == src2->node.data_type->base_type);
- for (k = 0; k < 4; ++k) + for (k = 0; k < dst_type->dimx; ++k) { switch (type) { - case HLSL_TYPE_FLOAT: - case HLSL_TYPE_HALF: - dst->u[k].f = src1->value.u[k].f * src2->value.u[k].f; + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: + case HLSL_TYPE_BOOL: + dst->u[k].u = src1->value.u[k].u & src2->value.u[k].u; break;
- case HLSL_TYPE_DOUBLE: - dst->u[k].d = src1->value.u[k].d * src2->value.u[k].d; + default: + FIXME("Fold bit/logic and for type %s.\n", debug_hlsl_type(ctx, dst_type)); + return false; + } + } + return true; +} + +static bool fold_or(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) +{ + enum hlsl_base_type type = dst_type->base_type; + unsigned int k; + + assert(type == src1->node.data_type->base_type); + assert(type == src2->node.data_type->base_type); + + for (k = 0; k < dst_type->dimx; ++k) + { + switch (type) + { + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: + case HLSL_TYPE_BOOL: + dst->u[k].u = src1->value.u[k].u | src2->value.u[k].u; break;
+ default: + FIXME("Fold bit/logic or for type %s.\n", debug_hlsl_type(ctx, dst_type)); + return false; + } + } + return true; +} + +static bool fold_bit_xor(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) +{ + enum hlsl_base_type type = dst_type->base_type; + unsigned int k; + + assert(type == src1->node.data_type->base_type); + assert(type == src2->node.data_type->base_type); + + for (k = 0; k < dst_type->dimx; ++k) + { + switch (type) + { case HLSL_TYPE_INT: case HLSL_TYPE_UINT: - dst->u[k].u = src1->value.u[k].u * src2->value.u[k].u; + dst->u[k].u = src1->value.u[k].u ^ src2->value.u[k].u; break;
default: - FIXME("Fold multiplication for type %s.\n", debug_hlsl_type(ctx, dst_type)); + FIXME("Fold bit xor for type %s.\n", debug_hlsl_type(ctx, dst_type)); return false; } } return true; }
-static bool fold_nequal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, +static bool fold_dot(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) { + enum hlsl_base_type type = dst_type->base_type; unsigned int k;
- assert(dst_type->base_type == HLSL_TYPE_BOOL); - assert(src1->node.data_type->base_type == src2->node.data_type->base_type); + assert(type == src1->node.data_type->base_type); + assert(type == src2->node.data_type->base_type); + assert(src1->node.data_type->dimx == src2->node.data_type->dimx);
- for (k = 0; k < 4; ++k) + dst->u[0].f = 0.0f; + for (k = 0; k < src1->node.data_type->dimx; ++k) { - switch (src1->node.data_type->base_type) + switch (type) { case HLSL_TYPE_FLOAT: case HLSL_TYPE_HALF: - dst->u[k].u = src1->value.u[k].f != src2->value.u[k].f; + dst->u[0].f += src1->value.u[k].f * src2->value.u[k].f; break; + default: + FIXME("Fold 'dot' for type %s.\n", debug_hlsl_type(ctx, dst_type)); + return false; + } + }
- case HLSL_TYPE_DOUBLE: - dst->u[k].u = src1->value.u[k].d != src2->value.u[k].d; - break; + return true; +}
- case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: - case HLSL_TYPE_BOOL: - dst->u[k].u = src1->value.u[k].u != src2->value.u[k].u; - break; +static bool fold_dp2add(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2, const struct hlsl_ir_constant *src3) +{ + enum hlsl_base_type type = dst_type->base_type; + unsigned int k;
+ assert(type == src1->node.data_type->base_type); + assert(type == src2->node.data_type->base_type); + assert(type == src3->node.data_type->base_type); + assert(src1->node.data_type->dimx == src2->node.data_type->dimx); + assert(src3->node.data_type->dimx == 1); + + dst->u[0].f = src3->value.u[0].f; + for (k = 0; k < src1->node.data_type->dimx; ++k) + { + switch (type) + { + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: + dst->u[0].f += src1->value.u[k].f * src2->value.u[k].f; + break; default: - vkd3d_unreachable(); + FIXME("Fold 'dp2add' for type %s.\n", debug_hlsl_type(ctx, dst_type)); + return false; } - - dst->u[k].u *= ~0u; } + return true; }
@@ -363,45 +565,116 @@ static bool fold_div(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons return true; }
-static bool fold_mod(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, - const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2, - const struct vkd3d_shader_location *loc) +static bool fold_equal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) { - enum hlsl_base_type type = dst_type->base_type; unsigned int k;
- assert(type == src1->node.data_type->base_type); - assert(type == src2->node.data_type->base_type); + assert(dst_type->base_type == HLSL_TYPE_BOOL); + assert(src1->node.data_type->base_type == src2->node.data_type->base_type);
for (k = 0; k < dst_type->dimx; ++k) { - switch (type) + switch (src1->node.data_type->base_type) { + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: + dst->u[k].u = src1->value.u[k].f == src2->value.u[k].f; + break; + + case HLSL_TYPE_DOUBLE: + dst->u[k].u = src1->value.u[k].d == src2->value.u[k].d; + break; + case HLSL_TYPE_INT: - if (src2->value.u[k].i == 0) - { - hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, "Division by zero."); - return false; - } - if (src1->value.u[k].i == INT_MIN && src2->value.u[k].i == -1) - dst->u[k].i = 0; - else - dst->u[k].i = src1->value.u[k].i % src2->value.u[k].i; + case HLSL_TYPE_UINT: + case HLSL_TYPE_BOOL: + dst->u[k].u = src1->value.u[k].u == src2->value.u[k].u; + break; + + default: + vkd3d_unreachable(); + } + + dst->u[k].u *= ~0u; + } + return true; +} + +static bool fold_gequal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) +{ + unsigned int k; + + assert(dst_type->base_type == HLSL_TYPE_BOOL); + assert(src1->node.data_type->base_type == src2->node.data_type->base_type); + + for (k = 0; k < dst_type->dimx; ++k) + { + switch (src1->node.data_type->base_type) + { + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: + dst->u[k].u = src1->value.u[k].f >= src2->value.u[k].f; + break; + + case HLSL_TYPE_DOUBLE: + dst->u[k].u = src1->value.u[k].d >= src2->value.u[k].d; + break; + + case HLSL_TYPE_INT: + dst->u[k].u = src1->value.u[k].i >= src2->value.u[k].i; break;
case HLSL_TYPE_UINT: - if (src2->value.u[k].u == 0) - { - hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, "Division by zero."); - return false; - } - dst->u[k].u = src1->value.u[k].u % src2->value.u[k].u; + case HLSL_TYPE_BOOL: + dst->u[k].u = src1->value.u[k].u >= src2->value.u[k].u; break;
default: - FIXME("Fold modulus for type %s.\n", debug_hlsl_type(ctx, dst_type)); - return false; + vkd3d_unreachable(); } + + dst->u[k].u *= ~0u; + } + return true; +} + +static bool fold_less(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) +{ + unsigned int k; + + assert(dst_type->base_type == HLSL_TYPE_BOOL); + assert(src1->node.data_type->base_type == src2->node.data_type->base_type); + + for (k = 0; k < dst_type->dimx; ++k) + { + switch (src1->node.data_type->base_type) + { + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: + dst->u[k].u = src1->value.u[k].f < src2->value.u[k].f; + break; + + case HLSL_TYPE_DOUBLE: + dst->u[k].u = src1->value.u[k].d < src2->value.u[k].d; + break; + + case HLSL_TYPE_INT: + dst->u[k].u = src1->value.u[k].i < src2->value.u[k].i; + break; + + case HLSL_TYPE_UINT: + case HLSL_TYPE_BOOL: + dst->u[k].u = src1->value.u[k].u < src2->value.u[k].u; + break; + + default: + vkd3d_unreachable(); + } + + dst->u[k].u *= ~0u; } return true; } @@ -419,6 +692,15 @@ static bool fold_max(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons { switch (type) { + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: + dst->u[k].f = fmaxf(src1->value.u[k].f, src2->value.u[k].f); + break; + + case HLSL_TYPE_DOUBLE: + dst->u[k].d = fmax(src1->value.u[k].d, src2->value.u[k].d); + break; + case HLSL_TYPE_INT: dst->u[k].i = max(src1->value.u[k].i, src2->value.u[k].i); break; @@ -448,6 +730,15 @@ static bool fold_min(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons { switch (type) { + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: + dst->u[k].f = fminf(src1->value.u[k].f, src2->value.u[k].f); + break; + + case HLSL_TYPE_DOUBLE: + dst->u[k].d = fmin(src1->value.u[k].d, src2->value.u[k].d); + break; + case HLSL_TYPE_INT: dst->u[k].i = min(src1->value.u[k].i, src2->value.u[k].i); break; @@ -464,8 +755,9 @@ static bool fold_min(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons return true; }
-static bool fold_bit_xor(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, - const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) +static bool fold_mod(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2, + const struct vkd3d_shader_location *loc) { enum hlsl_base_type type = dst_type->base_type; unsigned int k; @@ -478,19 +770,35 @@ static bool fold_bit_xor(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, switch (type) { case HLSL_TYPE_INT: + if (src2->value.u[k].i == 0) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, "Division by zero."); + return false; + } + if (src1->value.u[k].i == INT_MIN && src2->value.u[k].i == -1) + dst->u[k].i = 0; + else + dst->u[k].i = src1->value.u[k].i % src2->value.u[k].i; + break; + case HLSL_TYPE_UINT: - dst->u[k].u = src1->value.u[k].u ^ src2->value.u[k].u; + if (src2->value.u[k].u == 0) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, "Division by zero."); + return false; + } + dst->u[k].u = src1->value.u[k].u % src2->value.u[k].u; break;
default: - FIXME("Fold bit xor for type %s.\n", debug_hlsl_type(ctx, dst_type)); + FIXME("Fold modulus for type %s.\n", debug_hlsl_type(ctx, dst_type)); return false; } } return true; }
-static bool fold_bit_and(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, +static bool fold_mul(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) { enum hlsl_base_type type = dst_type->base_type; @@ -503,48 +811,67 @@ static bool fold_bit_and(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, { switch (type) { + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: + dst->u[k].f = src1->value.u[k].f * src2->value.u[k].f; + break; + + case HLSL_TYPE_DOUBLE: + dst->u[k].d = src1->value.u[k].d * src2->value.u[k].d; + break; + case HLSL_TYPE_INT: case HLSL_TYPE_UINT: - dst->u[k].u = src1->value.u[k].u & src2->value.u[k].u; + dst->u[k].u = src1->value.u[k].u * src2->value.u[k].u; break;
default: - FIXME("Fold bit and for type %s.\n", debug_hlsl_type(ctx, dst_type)); + FIXME("Fold multiplication for type %s.\n", debug_hlsl_type(ctx, dst_type)); return false; } } return true; }
-static bool fold_bit_or(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, +static bool fold_nequal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) { - enum hlsl_base_type type = dst_type->base_type; unsigned int k;
- assert(type == src1->node.data_type->base_type); - assert(type == src2->node.data_type->base_type); + assert(dst_type->base_type == HLSL_TYPE_BOOL); + assert(src1->node.data_type->base_type == src2->node.data_type->base_type);
for (k = 0; k < dst_type->dimx; ++k) { - switch (type) + switch (src1->node.data_type->base_type) { + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: + dst->u[k].u = src1->value.u[k].f != src2->value.u[k].f; + break; + + case HLSL_TYPE_DOUBLE: + dst->u[k].u = src1->value.u[k].d != src2->value.u[k].d; + break; + case HLSL_TYPE_INT: case HLSL_TYPE_UINT: - dst->u[k].u = src1->value.u[k].u | src2->value.u[k].u; + case HLSL_TYPE_BOOL: + dst->u[k].u = src1->value.u[k].u != src2->value.u[k].u; break;
default: - FIXME("Fold bit or for type %s.\n", debug_hlsl_type(ctx, dst_type)); - return false; + vkd3d_unreachable(); } + + dst->u[k].u *= ~0u; } return true; }
bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) { - struct hlsl_ir_constant *arg1, *arg2 = NULL; + struct hlsl_ir_constant *arg1, *arg2 = NULL, *arg3 = NULL; struct hlsl_constant_value res = {0}; struct hlsl_ir_node *res_node; struct hlsl_ir_expr *expr; @@ -572,6 +899,8 @@ bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, arg1 = hlsl_ir_constant(expr->operands[0].node); if (expr->operands[1].node) arg2 = hlsl_ir_constant(expr->operands[1].node); + if (expr->operands[2].node) + arg3 = hlsl_ir_constant(expr->operands[2].node);
switch (expr->op) { @@ -583,28 +912,58 @@ bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, success = fold_cast(ctx, &res, instr->data_type, arg1); break;
+ case HLSL_OP1_LOG2: + success = fold_log2(ctx, &res, instr->data_type, arg1, &instr->loc); + break; + case HLSL_OP1_NEG: success = fold_neg(ctx, &res, instr->data_type, arg1); break;
+ case HLSL_OP1_RCP: + success = fold_rcp(ctx, &res, instr->data_type, arg1, &instr->loc); + break; + + case HLSL_OP1_SQRT: + success = fold_sqrt(ctx, &res, instr->data_type, arg1, &instr->loc); + break; + case HLSL_OP2_ADD: success = fold_add(ctx, &res, instr->data_type, arg1, arg2); break;
- case HLSL_OP2_MUL: - success = fold_mul(ctx, &res, instr->data_type, arg1, arg2); + case HLSL_OP2_BIT_AND: + case HLSL_OP2_LOGIC_AND: + success = fold_and(ctx, &res, instr->data_type, arg1, arg2); break;
- case HLSL_OP2_NEQUAL: - success = fold_nequal(ctx, &res, instr->data_type, arg1, arg2); + case HLSL_OP2_BIT_OR: + case HLSL_OP2_LOGIC_OR: + success = fold_or(ctx, &res, instr->data_type, arg1, arg2); + break; + + case HLSL_OP2_BIT_XOR: + success = fold_bit_xor(ctx, &res, instr->data_type, arg1, arg2); + break; + + case HLSL_OP2_DOT: + success = fold_dot(ctx, &res, instr->data_type, arg1, arg2); break;
case HLSL_OP2_DIV: success = fold_div(ctx, &res, instr->data_type, arg1, arg2, &instr->loc); break;
- case HLSL_OP2_MOD: - success = fold_mod(ctx, &res, instr->data_type, arg1, arg2, &instr->loc); + case HLSL_OP2_EQUAL: + success = fold_equal(ctx, &res, instr->data_type, arg1, arg2); + break; + + case HLSL_OP2_GEQUAL: + success = fold_gequal(ctx, &res, instr->data_type, arg1, arg2); + break; + + case HLSL_OP2_LESS: + success = fold_less(ctx, &res, instr->data_type, arg1, arg2); break;
case HLSL_OP2_MAX: @@ -615,16 +974,20 @@ bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, success = fold_min(ctx, &res, instr->data_type, arg1, arg2); break;
- case HLSL_OP2_BIT_XOR: - success = fold_bit_xor(ctx, &res, instr->data_type, arg1, arg2); + case HLSL_OP2_MOD: + success = fold_mod(ctx, &res, instr->data_type, arg1, arg2, &instr->loc); break;
- case HLSL_OP2_BIT_AND: - success = fold_bit_and(ctx, &res, instr->data_type, arg1, arg2); + case HLSL_OP2_MUL: + success = fold_mul(ctx, &res, instr->data_type, arg1, arg2); break;
- case HLSL_OP2_BIT_OR: - success = fold_bit_or(ctx, &res, instr->data_type, arg1, arg2); + case HLSL_OP2_NEQUAL: + success = fold_nequal(ctx, &res, instr->data_type, arg1, arg2); + break; + + case HLSL_OP3_DP2ADD: + success = fold_dp2add(ctx, &res, instr->data_type, arg1, arg2, arg3); break;
default: diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c index 9eefb82c226..705905f7888 100644 --- a/libs/vkd3d/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d/libs/vkd3d-shader/ir.c @@ -85,6 +85,72 @@ static void shader_instruction_eliminate_phase_instance_id(struct vkd3d_shader_i shader_register_eliminate_phase_addressing((struct vkd3d_shader_register *)&ins->dst[i].reg, instance_id); }
+static const struct vkd3d_shader_varying_map *find_varying_map( + const struct vkd3d_shader_next_stage_info *next_stage, unsigned int signature_idx) +{ + unsigned int i; + + for (i = 0; i < next_stage->varying_count; ++i) + { + if (next_stage->varying_map[i].output_signature_index == signature_idx) + return &next_stage->varying_map[i]; + } + + return NULL; +} + +static enum vkd3d_result remap_output_signature(struct vkd3d_shader_parser *parser, + const struct vkd3d_shader_compile_info *compile_info) +{ + struct shader_signature *signature = &parser->shader_desc.output_signature; + const struct vkd3d_shader_next_stage_info *next_stage; + unsigned int i; + + if (!(next_stage = vkd3d_find_struct(compile_info->next, NEXT_STAGE_INFO))) + return VKD3D_OK; + + for (i = 0; i < signature->element_count; ++i) + { + const struct vkd3d_shader_varying_map *map = find_varying_map(next_stage, i); + struct signature_element *e = &signature->elements[i]; + + if (map) + { + unsigned int input_mask = map->input_mask; + + e->target_location = map->input_register_index; + + /* It is illegal in Vulkan if the next shader uses the same varying + * location with a different mask. */ + if (input_mask && input_mask != e->mask) + { + vkd3d_shader_parser_error(parser, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, + "Aborting due to not yet implemented feature: " + "Output mask %#x does not match input mask %#x.", + e->mask, input_mask); + return VKD3D_ERROR_NOT_IMPLEMENTED; + } + } + else + { + e->target_location = SIGNATURE_TARGET_LOCATION_UNUSED; + } + } + + for (i = 0; i < next_stage->varying_count; ++i) + { + if (next_stage->varying_map[i].output_signature_index >= signature->element_count) + { + vkd3d_shader_parser_error(parser, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, + "Aborting due to not yet implemented feature: " + "The next stage consumes varyings not written by this stage."); + return VKD3D_ERROR_NOT_IMPLEMENTED; + } + } + + return VKD3D_OK; +} + struct hull_flattener { struct vkd3d_shader_instruction_array instructions; @@ -247,13 +313,13 @@ static void shader_register_init(struct vkd3d_shader_register *reg, enum vkd3d_s reg->immconst_type = VKD3D_IMMCONST_SCALAR; }
-static void shader_instruction_init(struct vkd3d_shader_instruction *ins, enum vkd3d_shader_opcode handler_idx) +void shader_instruction_init(struct vkd3d_shader_instruction *ins, enum vkd3d_shader_opcode handler_idx) { memset(ins, 0, sizeof(*ins)); ins->handler_idx = handler_idx; }
-enum vkd3d_result instruction_array_flatten_hull_shader_phases(struct vkd3d_shader_instruction_array *src_instructions) +static enum vkd3d_result instruction_array_flatten_hull_shader_phases(struct vkd3d_shader_instruction_array *src_instructions) { struct hull_flattener flattener = {*src_instructions}; struct vkd3d_shader_instruction_array *instructions; @@ -388,7 +454,7 @@ static enum vkd3d_result control_point_normaliser_emit_hs_input(struct control_p return VKD3D_OK; }
-enum vkd3d_result instruction_array_normalise_hull_shader_control_point_io( +static enum vkd3d_result instruction_array_normalise_hull_shader_control_point_io( struct vkd3d_shader_instruction_array *src_instructions, const struct shader_signature *input_signature) { struct vkd3d_shader_instruction_array *instructions; @@ -999,7 +1065,7 @@ static void shader_instruction_normalise_io_params(struct vkd3d_shader_instructi shader_instruction_init(ins, VKD3DSIH_NOP); }
-enum vkd3d_result instruction_array_normalise_io_registers(struct vkd3d_shader_instruction_array *instructions, +static enum vkd3d_result instruction_array_normalise_io_registers(struct vkd3d_shader_instruction_array *instructions, enum vkd3d_shader_type shader_type, struct shader_signature *input_signature, struct shader_signature *output_signature, struct shader_signature *patch_constant_signature) { @@ -1070,3 +1136,159 @@ enum vkd3d_result instruction_array_normalise_io_registers(struct vkd3d_shader_i *instructions = normaliser.instructions; return VKD3D_OK; } + +struct flat_constant_def +{ + enum vkd3d_shader_d3dbc_constant_register set; + uint32_t index; + uint32_t value[4]; +}; + +struct flat_constants_normaliser +{ + struct vkd3d_shader_parser *parser; + struct flat_constant_def *defs; + size_t def_count, defs_capacity; +}; + +static bool get_flat_constant_register_type(const struct vkd3d_shader_register *reg, + enum vkd3d_shader_d3dbc_constant_register *set, uint32_t *index) +{ + static const struct + { + enum vkd3d_shader_register_type type; + enum vkd3d_shader_d3dbc_constant_register set; + uint32_t offset; + } + regs[] = + { + {VKD3DSPR_CONST, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, 0}, + {VKD3DSPR_CONST2, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, 2048}, + {VKD3DSPR_CONST3, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, 4096}, + {VKD3DSPR_CONST4, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, 6144}, + {VKD3DSPR_CONSTINT, VKD3D_SHADER_D3DBC_INT_CONSTANT_REGISTER, 0}, + {VKD3DSPR_CONSTBOOL, VKD3D_SHADER_D3DBC_BOOL_CONSTANT_REGISTER, 0}, + }; + + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(regs); ++i) + { + if (reg->type == regs[i].type) + { + if (reg->idx[0].rel_addr) + { + FIXME("Unhandled relative address.\n"); + return false; + } + + *set = regs[i].set; + *index = regs[i].offset + reg->idx[0].offset; + return true; + } + } + + return false; +} + +static void shader_register_normalise_flat_constants(struct vkd3d_shader_src_param *param, + const struct flat_constants_normaliser *normaliser) +{ + enum vkd3d_shader_d3dbc_constant_register set; + uint32_t index; + size_t i, j; + + if (!get_flat_constant_register_type(¶m->reg, &set, &index)) + return; + + for (i = 0; i < normaliser->def_count; ++i) + { + if (normaliser->defs[i].set == set && normaliser->defs[i].index == index) + { + param->reg.type = VKD3DSPR_IMMCONST; + param->reg.idx_count = 0; + param->reg.immconst_type = VKD3D_IMMCONST_VEC4; + for (j = 0; j < 4; ++j) + param->reg.u.immconst_uint[j] = normaliser->defs[i].value[j]; + return; + } + } + + param->reg.type = VKD3DSPR_CONSTBUFFER; + param->reg.idx[0].offset = set; /* register ID */ + param->reg.idx[1].offset = set; /* register index */ + param->reg.idx[2].offset = index; /* buffer index */ + param->reg.idx_count = 3; +} + +static enum vkd3d_result instruction_array_normalise_flat_constants(struct vkd3d_shader_parser *parser) +{ + struct flat_constants_normaliser normaliser = {.parser = parser}; + unsigned int i, j; + + for (i = 0; i < parser->instructions.count; ++i) + { + struct vkd3d_shader_instruction *ins = &parser->instructions.elements[i]; + + if (ins->handler_idx == VKD3DSIH_DEF || ins->handler_idx == VKD3DSIH_DEFI || ins->handler_idx == VKD3DSIH_DEFB) + { + struct flat_constant_def *def; + + if (!vkd3d_array_reserve((void **)&normaliser.defs, &normaliser.defs_capacity, + normaliser.def_count + 1, sizeof(*normaliser.defs))) + { + vkd3d_free(normaliser.defs); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + + def = &normaliser.defs[normaliser.def_count++]; + + get_flat_constant_register_type((struct vkd3d_shader_register *)&ins->dst[0].reg, &def->set, &def->index); + for (j = 0; j < 4; ++j) + def->value[j] = ins->src[0].reg.u.immconst_uint[j]; + + vkd3d_shader_instruction_make_nop(ins); + } + else + { + for (j = 0; j < ins->src_count; ++j) + shader_register_normalise_flat_constants((struct vkd3d_shader_src_param *)&ins->src[j], &normaliser); + } + } + + vkd3d_free(normaliser.defs); + return VKD3D_OK; +} + +enum vkd3d_result vkd3d_shader_normalise(struct vkd3d_shader_parser *parser, + const struct vkd3d_shader_compile_info *compile_info) +{ + struct vkd3d_shader_instruction_array *instructions = &parser->instructions; + enum vkd3d_result result = VKD3D_OK; + + if (parser->shader_desc.is_dxil) + return result; + + if (parser->shader_version.type != VKD3D_SHADER_TYPE_PIXEL + && (result = remap_output_signature(parser, compile_info)) < 0) + return result; + + if (parser->shader_version.type == VKD3D_SHADER_TYPE_HULL + && (result = instruction_array_flatten_hull_shader_phases(instructions)) >= 0) + { + result = instruction_array_normalise_hull_shader_control_point_io(instructions, + &parser->shader_desc.input_signature); + } + if (result >= 0) + result = instruction_array_normalise_io_registers(instructions, parser->shader_version.type, + &parser->shader_desc.input_signature, &parser->shader_desc.output_signature, + &parser->shader_desc.patch_constant_signature); + + if (result >= 0) + result = instruction_array_normalise_flat_constants(parser); + + if (result >= 0 && TRACE_ON()) + vkd3d_shader_trace(instructions, &parser->shader_version); + + return result; +} diff --git a/libs/vkd3d/libs/vkd3d-shader/preproc.l b/libs/vkd3d/libs/vkd3d-shader/preproc.l index 94079696280..6fb61eff6c3 100644 --- a/libs/vkd3d/libs/vkd3d-shader/preproc.l +++ b/libs/vkd3d/libs/vkd3d-shader/preproc.l @@ -30,6 +30,13 @@
#define YY_DECL static int preproc_lexer_lex(YYSTYPE *yylval_param, YYLTYPE *yylloc_param, yyscan_t yyscanner)
+static struct preproc_macro *preproc_get_top_macro(struct preproc_ctx *ctx) +{ + if (!ctx->expansion_count) + return NULL; + return ctx->expansion_stack[ctx->expansion_count - 1].macro; +} + static void update_location(struct preproc_ctx *ctx);
#define YY_USER_ACTION update_location(yyget_extra(yyscanner)); @@ -125,7 +132,20 @@ INT_SUFFIX [uUlL]{0,2} const char *p;
if (!ctx->last_was_newline) - return T_HASHSTRING; + { + struct preproc_macro *macro; + + /* Stringification is only done for function-like macro bodies. + * Anywhere else, we need to parse it as two separate tokens. + * We could use a state for this, but yyless() is easier and cheap. + */ + + if ((macro = preproc_get_top_macro(ctx)) && macro->arg_count) + return T_HASHSTRING; + + yyless(1); + return T_TEXT; + }
for (p = yytext + 1; strchr(" \t", *p); ++p) ; @@ -219,13 +239,6 @@ static bool preproc_is_writing(struct preproc_ctx *ctx) return file->if_stack[file->if_count - 1].current_true; }
-static struct preproc_macro *preproc_get_top_macro(struct preproc_ctx *ctx) -{ - if (!ctx->expansion_count) - return NULL; - return ctx->expansion_stack[ctx->expansion_count - 1].macro; -} - /* Concatenation is not done for object-like macros, but is done for both * function-like macro bodies and their arguments. */ static bool should_concat(struct preproc_ctx *ctx) @@ -334,6 +347,43 @@ static bool preproc_push_expansion(struct preproc_ctx *ctx, return true; }
+static void preproc_stringify(struct preproc_ctx *ctx, struct vkd3d_string_buffer *buffer, const char *text) +{ + const struct preproc_text *expansion; + const char *p = text + 1; + unsigned int i; + + while (*p == ' ' || *p == '\t') + ++p; + + vkd3d_string_buffer_printf(buffer, """); + if ((expansion = find_arg_expansion(ctx, p))) + { + size_t len = expansion->text.content_size; + size_t start = 0; + + while (len && strchr(" \t\r\n", expansion->text.buffer[len - 1])) + --len; + + while (start < len && strchr(" \t\r\n", expansion->text.buffer[start])) + ++start; + + for (i = start; i < len; ++i) + { + char c = expansion->text.buffer[i]; + + if (c == '\' || c == '"') + vkd3d_string_buffer_printf(buffer, "\"); + vkd3d_string_buffer_printf(buffer, "%c", c); + } + } + else + { + vkd3d_string_buffer_printf(buffer, "%s", p); + } + vkd3d_string_buffer_printf(buffer, """); +} + int yylex(YYSTYPE *lval, YYLTYPE *lloc, yyscan_t scanner) { struct preproc_ctx *ctx = yyget_extra(scanner); @@ -441,9 +491,6 @@ int yylex(YYSTYPE *lval, YYLTYPE *lloc, yyscan_t scanner) switch (func_state->state) { case STATE_NONE: - { - struct preproc_macro *macro; - if (token == T_CONCAT && should_concat(ctx)) { while (ctx->buffer.content_size @@ -452,37 +499,17 @@ int yylex(YYSTYPE *lval, YYLTYPE *lloc, yyscan_t scanner) break; }
- /* Stringification, however, is only done for function-like - * macro bodies. */ - if (token == T_HASHSTRING && (macro = preproc_get_top_macro(ctx)) && macro->arg_count) + if (token == T_HASHSTRING) { - const struct preproc_text *expansion; - const char *p = text + 1; - unsigned int i; + struct vkd3d_string_buffer buffer;
if (ctx->current_directive) return return_token(token, lval, text);
- while (*p == ' ' || *p == '\t') - ++p; - - vkd3d_string_buffer_printf(&ctx->buffer, """); - if ((expansion = find_arg_expansion(ctx, p))) - { - for (i = 0; i < expansion->text.content_size; ++i) - { - char c = expansion->text.buffer[i]; - - if (c == '\' || c == '"') - vkd3d_string_buffer_printf(&ctx->buffer, "\"); - vkd3d_string_buffer_printf(&ctx->buffer, "%c", c); - } - } - else - { - vkd3d_string_buffer_printf(&ctx->buffer, "%s", p); - } - vkd3d_string_buffer_printf(&ctx->buffer, """); + vkd3d_string_buffer_init(&buffer); + preproc_stringify(ctx, &buffer, text); + vkd3d_string_buffer_printf(&ctx->buffer, "%s", buffer.buffer); + vkd3d_string_buffer_cleanup(&buffer); break; }
@@ -586,7 +613,6 @@ int yylex(YYSTYPE *lval, YYLTYPE *lloc, yyscan_t scanner) else vkd3d_string_buffer_printf(&ctx->buffer, "%s ", text); break; - }
case STATE_IDENTIFIER: if (token == '(') @@ -628,6 +654,41 @@ int yylex(YYSTYPE *lval, YYLTYPE *lloc, yyscan_t scanner)
switch (token) { + /* Most text gets left alone (e.g. if it contains macros, + * the macros should be evaluated later). + * Arguments are a special case, and are replaced with + * their values immediately. */ + case T_IDENTIFIER: + case T_IDENTIFIER_PAREN: + { + const struct preproc_text *expansion; + + if ((expansion = find_arg_expansion(ctx, text))) + { + preproc_push_expansion(ctx, expansion, NULL); + continue; + } + + if (current_arg) + preproc_text_add(current_arg, text); + break; + } + + /* Stringification is another special case. Unsurprisingly, + * we need to stringify if this is an argument. More + * surprisingly, we need to stringify even if it's not. */ + case T_HASHSTRING: + { + struct vkd3d_string_buffer buffer; + + vkd3d_string_buffer_init(&buffer); + preproc_stringify(ctx, &buffer, text); + if (current_arg) + preproc_text_add(current_arg, buffer.buffer); + vkd3d_string_buffer_cleanup(&buffer); + break; + } + case T_NEWLINE: if (current_arg) preproc_text_add(current_arg, " "); @@ -686,6 +747,9 @@ int yylex(YYSTYPE *lval, YYLTYPE *lloc, yyscan_t scanner) if (current_arg) preproc_text_add(current_arg, text); } + + if (current_arg) + preproc_text_add(current_arg, " "); break; } } diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c index bfe5272fd29..eb8125b0e55 100644 --- a/libs/vkd3d/libs/vkd3d-shader/spirv.c +++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c @@ -199,6 +199,16 @@ enum vkd3d_shader_input_sysval_semantic vkd3d_siv_from_sysval_indexed(enum vkd3d } }
+static inline bool register_is_undef(const struct vkd3d_shader_register *reg) +{ + return reg->type == VKD3DSPR_UNDEF; +} + +static inline bool register_is_constant_or_undef(const struct vkd3d_shader_register *reg) +{ + return register_is_constant(reg) || register_is_undef(reg); +} + #define VKD3D_SPIRV_VERSION 0x00010000 #define VKD3D_SPIRV_GENERATOR_ID 18 #define VKD3D_SPIRV_GENERATOR_VERSION 8 @@ -1746,6 +1756,38 @@ static uint32_t vkd3d_spirv_get_type_id(struct vkd3d_spirv_builder *builder, } }
+static uint32_t vkd3d_spirv_get_type_id_for_data_type(struct vkd3d_spirv_builder *builder, + enum vkd3d_data_type data_type, unsigned int component_count) +{ + uint32_t scalar_id; + + if (component_count == 1) + { + switch (data_type) + { + case VKD3D_DATA_FLOAT: + case VKD3D_DATA_SNORM: + case VKD3D_DATA_UNORM: + return vkd3d_spirv_get_op_type_float(builder, 32); + break; + case VKD3D_DATA_INT: + case VKD3D_DATA_UINT: + return vkd3d_spirv_get_op_type_int(builder, 32, data_type == VKD3D_DATA_INT); + break; + case VKD3D_DATA_DOUBLE: + return vkd3d_spirv_get_op_type_float(builder, 64); + default: + FIXME("Unhandled data type %#x.\n", data_type); + return 0; + } + } + else + { + scalar_id = vkd3d_spirv_get_type_id_for_data_type(builder, data_type, 1); + return vkd3d_spirv_get_op_type_vector(builder, scalar_id, component_count); + } +} + static void vkd3d_spirv_builder_init(struct vkd3d_spirv_builder *builder, const char *entry_point) { vkd3d_spirv_stream_init(&builder->debug_stream); @@ -2263,7 +2305,7 @@ struct spirv_compiler
uint32_t binding_idx;
- const struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info; + const struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info; unsigned int input_control_point_count; unsigned int output_control_point_count; bool use_vocp; @@ -2333,7 +2375,7 @@ static void spirv_compiler_destroy(struct spirv_compiler *compiler)
static struct spirv_compiler *spirv_compiler_create(const struct vkd3d_shader_version *shader_version, struct vkd3d_shader_desc *shader_desc, const struct vkd3d_shader_compile_info *compile_info, - const struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info, + const struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info, struct vkd3d_shader_message_context *message_context, const struct vkd3d_shader_location *location) { const struct shader_signature *patch_constant_signature = &shader_desc->patch_constant_signature; @@ -2429,13 +2471,6 @@ static struct spirv_compiler *spirv_compiler_create(const struct vkd3d_shader_ve
compiler->shader_type = shader_version->type;
- compiler->input_signature = shader_desc->input_signature; - compiler->output_signature = shader_desc->output_signature; - compiler->patch_constant_signature = shader_desc->patch_constant_signature; - memset(&shader_desc->input_signature, 0, sizeof(shader_desc->input_signature)); - memset(&shader_desc->output_signature, 0, sizeof(shader_desc->output_signature)); - memset(&shader_desc->patch_constant_signature, 0, sizeof(shader_desc->patch_constant_signature)); - if ((shader_interface = vkd3d_find_struct(compile_info->next, INTERFACE_INFO))) { compiler->xfb_info = vkd3d_find_struct(compile_info->next, TRANSFORM_FEEDBACK_INFO); @@ -2536,13 +2571,13 @@ static bool spirv_compiler_check_shader_visibility(const struct spirv_compiler * }
static struct vkd3d_push_constant_buffer_binding *spirv_compiler_find_push_constant_buffer( - const struct spirv_compiler *compiler, const struct vkd3d_shader_constant_buffer *cb) + const struct spirv_compiler *compiler, const struct vkd3d_shader_register_range *range) { - unsigned int register_space = cb->range.space; - unsigned int reg_idx = cb->range.first; + unsigned int register_space = range->space; + unsigned int reg_idx = range->first; unsigned int i;
- if (cb->range.first != cb->range.last) + if (range->first != range->last) return NULL;
for (i = 0; i < compiler->shader_interface.push_constant_buffer_count; ++i) @@ -2559,8 +2594,8 @@ static struct vkd3d_push_constant_buffer_binding *spirv_compiler_find_push_const return NULL; }
-static bool spirv_compiler_has_combined_sampler(const struct spirv_compiler *compiler, - const struct vkd3d_shader_resource *resource, const struct vkd3d_shader_sampler *sampler) +static bool spirv_compiler_has_combined_sampler_for_resource(const struct spirv_compiler *compiler, + const struct vkd3d_shader_register_range *range) { const struct vkd3d_shader_interface_info *shader_interface = &compiler->shader_interface; const struct vkd3d_shader_combined_resource_sampler *combined_sampler; @@ -2569,10 +2604,35 @@ static bool spirv_compiler_has_combined_sampler(const struct spirv_compiler *com if (!shader_interface->combined_sampler_count) return false;
- if (resource && (resource->reg.reg.type == VKD3DSPR_UAV || resource->range.last != resource->range.first)) + if (range->last != range->first) + return false; + + for (i = 0; i < shader_interface->combined_sampler_count; ++i) + { + combined_sampler = &shader_interface->combined_samplers[i]; + + if (!spirv_compiler_check_shader_visibility(compiler, combined_sampler->shader_visibility)) + continue; + + if ((combined_sampler->resource_space == range->space + && combined_sampler->resource_index == range->first)) + return true; + } + + return false; +} + +static bool spirv_compiler_has_combined_sampler_for_sampler(const struct spirv_compiler *compiler, + const struct vkd3d_shader_register_range *range) +{ + const struct vkd3d_shader_interface_info *shader_interface = &compiler->shader_interface; + const struct vkd3d_shader_combined_resource_sampler *combined_sampler; + unsigned int i; + + if (!shader_interface->combined_sampler_count) return false;
- if (sampler && sampler->range.first != sampler->range.last) + if (range->last != range->first) return false;
for (i = 0; i < shader_interface->combined_sampler_count; ++i) @@ -2582,10 +2642,8 @@ static bool spirv_compiler_has_combined_sampler(const struct spirv_compiler *com if (!spirv_compiler_check_shader_visibility(compiler, combined_sampler->shader_visibility)) continue;
- if ((!resource || (combined_sampler->resource_space == resource->range.space - && combined_sampler->resource_index == resource->range.first)) - && (!sampler || (combined_sampler->sampler_space == sampler->range.space - && combined_sampler->sampler_index == sampler->range.first))) + if (combined_sampler->sampler_space == range->space + && combined_sampler->sampler_index == range->first) return true; }
@@ -2603,6 +2661,16 @@ static void VKD3D_PRINTF_FUNC(3, 4) spirv_compiler_error(struct spirv_compiler * compiler->failed = true; }
+static void VKD3D_PRINTF_FUNC(3, 4) spirv_compiler_warning(struct spirv_compiler *compiler, + enum vkd3d_shader_error error, const char *format, ...) +{ + va_list args; + + va_start(args, format); + vkd3d_shader_vwarning(compiler->message_context, &compiler->location, error, format, args); + va_end(args); +} + static struct vkd3d_string_buffer *vkd3d_shader_register_range_string(struct spirv_compiler *compiler, const struct vkd3d_shader_register_range *range) { @@ -3211,13 +3279,13 @@ static bool spirv_compiler_get_register_info(const struct spirv_compiler *compil struct vkd3d_symbol reg_symbol, *symbol; struct rb_entry *entry;
- assert(reg->type != VKD3DSPR_IMMCONST && reg->type != VKD3DSPR_IMMCONST64); + assert(!register_is_constant_or_undef(reg));
if (reg->type == VKD3DSPR_TEMP) { assert(reg->idx[0].offset < compiler->temp_count); register_info->id = compiler->temp_id + reg->idx[0].offset; - register_info->storage_class = SpvStorageClassFunction; + register_info->storage_class = SpvStorageClassPrivate; register_info->descriptor_array = NULL; register_info->member_idx = 0; register_info->component_type = VKD3D_SHADER_COMPONENT_FLOAT; @@ -3553,6 +3621,19 @@ static uint32_t spirv_compiler_emit_load_constant64(struct spirv_compiler *compi vkd3d_component_type_from_data_type(reg->data_type), component_count, values); }
+static uint32_t spirv_compiler_emit_load_undef(struct spirv_compiler *compiler, + const struct vkd3d_shader_register *reg, DWORD write_mask) +{ + unsigned int component_count = vkd3d_write_mask_component_count(write_mask); + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + uint32_t type_id; + + assert(reg->type == VKD3DSPR_UNDEF); + + type_id = vkd3d_spirv_get_type_id_for_data_type(builder, reg->data_type, component_count); + return vkd3d_spirv_build_op_undef(builder, &builder->global_stream, type_id); +} + static uint32_t spirv_compiler_emit_load_scalar(struct spirv_compiler *compiler, const struct vkd3d_shader_register *reg, DWORD swizzle, DWORD write_mask, const struct vkd3d_shader_register_info *reg_info) @@ -3563,7 +3644,7 @@ static uint32_t spirv_compiler_emit_load_scalar(struct spirv_compiler *compiler, enum vkd3d_shader_component_type component_type; unsigned int skipped_component_mask;
- assert(reg->type != VKD3DSPR_IMMCONST && reg->type != VKD3DSPR_IMMCONST64); + assert(!register_is_constant_or_undef(reg)); assert(vkd3d_write_mask_component_count(write_mask) == 1);
component_idx = vkd3d_write_mask_get_component_idx(write_mask); @@ -3615,6 +3696,8 @@ static uint32_t spirv_compiler_emit_load_reg(struct spirv_compiler *compiler, return spirv_compiler_emit_load_constant(compiler, reg, swizzle, write_mask); else if (reg->type == VKD3DSPR_IMMCONST64) return spirv_compiler_emit_load_constant64(compiler, reg, swizzle, write_mask); + else if (reg->type == VKD3DSPR_UNDEF) + return spirv_compiler_emit_load_undef(compiler, reg, write_mask);
component_count = vkd3d_write_mask_component_count(write_mask); component_type = vkd3d_component_type_from_data_type(reg->data_type); @@ -3827,7 +3910,7 @@ static void spirv_compiler_emit_store_reg(struct spirv_compiler *compiler, unsigned int src_write_mask = write_mask; uint32_t type_id;
- assert(reg->type != VKD3DSPR_IMMCONST && reg->type != VKD3DSPR_IMMCONST64); + assert(!register_is_constant_or_undef(reg));
if (!spirv_compiler_get_register_info(compiler, reg, ®_info)) return; @@ -3998,6 +4081,11 @@ static void spirv_compiler_emit_interpolation_decorations(struct spirv_compiler vkd3d_spirv_enable_capability(builder, SpvCapabilitySampleRateShading); vkd3d_spirv_build_op_decorate(builder, id, SpvDecorationSample, NULL, 0); break; + case VKD3DSIM_LINEAR_NOPERSPECTIVE_SAMPLE: + vkd3d_spirv_build_op_decorate(builder, id, SpvDecorationNoPerspective, NULL, 0); + vkd3d_spirv_enable_capability(builder, SpvCapabilitySampleRateShading); + vkd3d_spirv_build_op_decorate(builder, id, SpvDecorationSample, NULL, 0); + break; default: FIXME("Unhandled interpolation mode %#x.\n", mode); break; @@ -4542,7 +4630,7 @@ static uint32_t spirv_compiler_emit_input(struct spirv_compiler *compiler, } else { - unsigned int location = signature_element->register_index; + unsigned int location = signature_element->target_location;
input_id = spirv_compiler_emit_array_variable(compiler, &builder->global_stream, storage_class, component_type, input_component_count, array_sizes, 2); @@ -4918,9 +5006,15 @@ static void spirv_compiler_emit_output(struct spirv_compiler *compiler,
spirv_compiler_emit_register_execution_mode(compiler, &dst->reg); } + else if (signature_element->target_location == SIGNATURE_TARGET_LOCATION_UNUSED) + { + storage_class = SpvStorageClassPrivate; + id = spirv_compiler_emit_array_variable(compiler, &builder->global_stream, + storage_class, component_type, output_component_count, array_sizes, 2); + } else { - unsigned int location = signature_element->register_index; + unsigned int location = signature_element->target_location;
if (is_patch_constant) location += shader_signature_next_location(&compiler->output_signature); @@ -4929,10 +5023,10 @@ static void spirv_compiler_emit_output(struct spirv_compiler *compiler, storage_class, component_type, output_component_count, array_sizes, 2); vkd3d_spirv_add_iface_variable(builder, id);
- if (is_dual_source_blending(compiler) && signature_element->register_index < 2) + if (is_dual_source_blending(compiler) && location < 2) { vkd3d_spirv_build_op_decorate1(builder, id, SpvDecorationLocation, 0); - vkd3d_spirv_build_op_decorate1(builder, id, SpvDecorationIndex, signature_element->register_index); + vkd3d_spirv_build_op_decorate1(builder, id, SpvDecorationIndex, location); } else { @@ -5258,8 +5352,7 @@ static void spirv_compiler_emit_dcl_global_flags(struct spirv_compiler *compiler WARN("Unhandled global flags %#x.\n", flags); }
-static void spirv_compiler_emit_dcl_temps(struct spirv_compiler *compiler, - const struct vkd3d_shader_instruction *instruction) +static void spirv_compiler_emit_temps(struct spirv_compiler *compiler, uint32_t count) { struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; size_t function_location; @@ -5270,11 +5363,11 @@ static void spirv_compiler_emit_dcl_temps(struct spirv_compiler *compiler, vkd3d_spirv_begin_function_stream_insertion(builder, function_location);
assert(!compiler->temp_count); - compiler->temp_count = instruction->declaration.count; + compiler->temp_count = count; for (i = 0; i < compiler->temp_count; ++i) { - id = spirv_compiler_emit_variable(compiler, &builder->function_stream, - SpvStorageClassFunction, VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_VEC4_SIZE); + id = spirv_compiler_emit_variable(compiler, &builder->global_stream, + SpvStorageClassPrivate, VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_VEC4_SIZE); if (!i) compiler->temp_id = id; assert(id == compiler->temp_id + i); @@ -5473,28 +5566,31 @@ static uint32_t spirv_compiler_build_descriptor_variable(struct spirv_compiler * return var_id; }
-static void spirv_compiler_emit_dcl_constant_buffer(struct spirv_compiler *compiler, - const struct vkd3d_shader_instruction *instruction) +static void spirv_compiler_emit_cbv_declaration(struct spirv_compiler *compiler, + const struct vkd3d_shader_register_range *range, unsigned int register_id, unsigned int size) { - const struct vkd3d_shader_constant_buffer *cb = &instruction->declaration.cb; struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; uint32_t vec4_id, array_type_id, length_id, struct_id, var_id; const SpvStorageClass storage_class = SpvStorageClassUniform; - const struct vkd3d_shader_register *reg = &cb->src.reg; struct vkd3d_push_constant_buffer_binding *push_cb; struct vkd3d_descriptor_variable_info var_info; struct vkd3d_symbol reg_symbol;
- assert(!(instruction->flags & ~VKD3DSI_INDEXED_DYNAMIC)); + struct vkd3d_shader_register reg = + { + .type = VKD3DSPR_CONSTBUFFER, + .idx[0].offset = register_id, + .idx_count = 1, + };
- if ((push_cb = spirv_compiler_find_push_constant_buffer(compiler, cb))) + if ((push_cb = spirv_compiler_find_push_constant_buffer(compiler, range))) { /* Push constant buffers are handled in * spirv_compiler_emit_push_constant_buffers(). */ - unsigned int cb_size_in_bytes = cb->size * VKD3D_VEC4_SIZE * sizeof(uint32_t); - push_cb->reg = *reg; - push_cb->size = cb->size; + unsigned int cb_size_in_bytes = size * VKD3D_VEC4_SIZE * sizeof(uint32_t); + push_cb->reg = reg; + push_cb->size = size; if (cb_size_in_bytes > push_cb->pc.size) { WARN("Constant buffer size %u exceeds push constant size %u.\n", @@ -5504,19 +5600,19 @@ static void spirv_compiler_emit_dcl_constant_buffer(struct spirv_compiler *compi }
vec4_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_VEC4_SIZE); - length_id = spirv_compiler_get_constant_uint(compiler, cb->size); + length_id = spirv_compiler_get_constant_uint(compiler, size); array_type_id = vkd3d_spirv_build_op_type_array(builder, vec4_id, length_id); vkd3d_spirv_build_op_decorate1(builder, array_type_id, SpvDecorationArrayStride, 16);
struct_id = vkd3d_spirv_build_op_type_struct(builder, &array_type_id, 1); vkd3d_spirv_build_op_decorate(builder, struct_id, SpvDecorationBlock, NULL, 0); vkd3d_spirv_build_op_member_decorate1(builder, struct_id, 0, SpvDecorationOffset, 0); - vkd3d_spirv_build_op_name(builder, struct_id, "cb%u_struct", cb->size); + vkd3d_spirv_build_op_name(builder, struct_id, "cb%u_struct", size);
var_id = spirv_compiler_build_descriptor_variable(compiler, storage_class, struct_id, - reg, &cb->range, VKD3D_SHADER_RESOURCE_BUFFER, false, &var_info); + ®, range, VKD3D_SHADER_RESOURCE_BUFFER, false, &var_info);
- vkd3d_symbol_make_register(®_symbol, reg); + vkd3d_symbol_make_register(®_symbol, ®); vkd3d_symbol_set_register_info(®_symbol, var_id, storage_class, VKD3D_SHADER_COMPONENT_FLOAT, VKD3DSP_WRITEMASK_ALL); reg_symbol.descriptor_array = var_info.array_symbol; @@ -5557,29 +5653,34 @@ static void spirv_compiler_emit_dcl_immediate_constant_buffer(struct spirv_compi spirv_compiler_put_symbol(compiler, ®_symbol); }
-static void spirv_compiler_emit_dcl_sampler(struct spirv_compiler *compiler, - const struct vkd3d_shader_instruction *instruction) +static void spirv_compiler_emit_sampler_declaration(struct spirv_compiler *compiler, + const struct vkd3d_shader_register_range *range, unsigned int register_id) { - const struct vkd3d_shader_sampler *sampler = &instruction->declaration.sampler; const SpvStorageClass storage_class = SpvStorageClassUniformConstant; struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - const struct vkd3d_shader_register *reg = &sampler->src.reg; struct vkd3d_descriptor_variable_info var_info; struct vkd3d_symbol reg_symbol; uint32_t type_id, var_id;
- vkd3d_symbol_make_sampler(®_symbol, reg); - reg_symbol.info.sampler.range = sampler->range; + const struct vkd3d_shader_register reg = + { + .type = VKD3DSPR_SAMPLER, + .idx[0].offset = register_id, + .idx_count = 1, + }; + + vkd3d_symbol_make_sampler(®_symbol, ®); + reg_symbol.info.sampler.range = *range; spirv_compiler_put_symbol(compiler, ®_symbol);
- if (spirv_compiler_has_combined_sampler(compiler, NULL, sampler)) + if (spirv_compiler_has_combined_sampler_for_sampler(compiler, range)) return;
type_id = vkd3d_spirv_get_op_type_sampler(builder); - var_id = spirv_compiler_build_descriptor_variable(compiler, storage_class, type_id, reg, - &sampler->range, VKD3D_SHADER_RESOURCE_NONE, false, &var_info); + var_id = spirv_compiler_build_descriptor_variable(compiler, storage_class, type_id, ®, + range, VKD3D_SHADER_RESOURCE_NONE, false, &var_info);
- vkd3d_symbol_make_register(®_symbol, reg); + vkd3d_symbol_make_register(®_symbol, ®); vkd3d_symbol_set_register_info(®_symbol, var_id, storage_class, VKD3D_SHADER_COMPONENT_FLOAT, VKD3DSP_WRITEMASK_ALL); reg_symbol.descriptor_array = var_info.array_symbol; @@ -5624,13 +5725,13 @@ static SpvImageFormat image_format_for_image_read(enum vkd3d_shader_component_ty } }
-static const struct vkd3d_shader_descriptor_info *spirv_compiler_get_descriptor_info( +static const struct vkd3d_shader_descriptor_info1 *spirv_compiler_get_descriptor_info( struct spirv_compiler *compiler, enum vkd3d_shader_descriptor_type type, const struct vkd3d_shader_register_range *range) { - const struct vkd3d_shader_scan_descriptor_info *descriptor_info = compiler->scan_descriptor_info; + const struct vkd3d_shader_scan_descriptor_info1 *descriptor_info = compiler->scan_descriptor_info; unsigned int register_last = (range->last == ~0u) ? range->first : range->last; - const struct vkd3d_shader_descriptor_info *d; + const struct vkd3d_shader_descriptor_info1 *d; unsigned int i;
for (i = 0; i < descriptor_info->descriptor_count; ++i) @@ -5650,7 +5751,7 @@ static uint32_t spirv_compiler_get_image_type_id(struct spirv_compiler *compiler bool raw_structured, uint32_t depth) { struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - const struct vkd3d_shader_descriptor_info *d; + const struct vkd3d_shader_descriptor_info1 *d; bool uav_read, uav_atomics; uint32_t sampled_type_id; SpvImageFormat format; @@ -5685,7 +5786,7 @@ static void spirv_compiler_emit_combined_sampler_declarations(struct spirv_compi const struct vkd3d_shader_combined_resource_sampler *current; uint32_t image_type_id, type_id, ptr_type_id, var_id; enum vkd3d_shader_binding_flag resource_type_flag; - const struct vkd3d_shader_descriptor_info *d; + const struct vkd3d_shader_descriptor_info1 *d; struct vkd3d_symbol symbol; unsigned int i; bool depth; @@ -5761,20 +5862,30 @@ static void spirv_compiler_emit_combined_sampler_declarations(struct spirv_compi }
static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *compiler, - const struct vkd3d_shader_resource *resource, enum vkd3d_shader_resource_type resource_type, - enum vkd3d_data_type resource_data_type, unsigned int structure_stride, bool raw) + const struct vkd3d_shader_register_range *range, unsigned int register_id, + unsigned int sample_count, bool is_uav, enum vkd3d_shader_resource_type resource_type, + enum vkd3d_shader_resource_data_type resource_data_type, unsigned int structure_stride, bool raw) { struct vkd3d_descriptor_variable_info var_info, counter_var_info = {0}; struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; SpvStorageClass storage_class = SpvStorageClassUniformConstant; uint32_t counter_type_id, type_id, var_id, counter_var_id = 0; - const struct vkd3d_shader_register *reg = &resource->reg.reg; const struct vkd3d_spirv_resource_type *resource_type_info; enum vkd3d_shader_component_type sampled_type; struct vkd3d_symbol resource_symbol; - bool is_uav;
- is_uav = reg->type == VKD3DSPR_UAV; + struct vkd3d_shader_register reg = + { + .type = is_uav ? VKD3DSPR_UAV : VKD3DSPR_RESOURCE, + .idx[0].offset = register_id, + .idx_count = 1, + }; + + if (resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMS && sample_count == 1) + resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_2D; + else if (resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY && sample_count == 1) + resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY; + if (!(resource_type_info = spirv_compiler_enable_resource_type(compiler, resource_type, is_uav))) { @@ -5782,11 +5893,11 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp return; }
- sampled_type = vkd3d_component_type_from_data_type(resource_data_type); + sampled_type = vkd3d_component_type_from_resource_data_type(resource_data_type);
- if (spirv_compiler_has_combined_sampler(compiler, resource, NULL)) + if (!is_uav && spirv_compiler_has_combined_sampler_for_resource(compiler, range)) { - spirv_compiler_emit_combined_sampler_declarations(compiler, reg, &resource->range, + spirv_compiler_emit_combined_sampler_declarations(compiler, ®, range, resource_type, sampled_type, structure_stride, raw, resource_type_info); return; } @@ -5809,19 +5920,18 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp } else { - type_id = spirv_compiler_get_image_type_id(compiler, reg, &resource->range, + type_id = spirv_compiler_get_image_type_id(compiler, ®, range, resource_type_info, sampled_type, structure_stride || raw, 0); }
- var_id = spirv_compiler_build_descriptor_variable(compiler, storage_class, type_id, reg, - &resource->range, resource_type, false, &var_info); + var_id = spirv_compiler_build_descriptor_variable(compiler, storage_class, type_id, ®, + range, resource_type, false, &var_info);
if (is_uav) { - const struct vkd3d_shader_descriptor_info *d; + const struct vkd3d_shader_descriptor_info1 *d;
- d = spirv_compiler_get_descriptor_info(compiler, - VKD3D_SHADER_DESCRIPTOR_TYPE_UAV, &resource->range); + d = spirv_compiler_get_descriptor_info(compiler, VKD3D_SHADER_DESCRIPTOR_TYPE_UAV, range);
if (!(d->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_READ)) vkd3d_spirv_build_op_decorate(builder, var_id, SpvDecorationNonReadable, NULL, 0); @@ -5853,15 +5963,15 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp type_id = struct_id; }
- counter_var_id = spirv_compiler_build_descriptor_variable(compiler, storage_class, type_id, reg, - &resource->range, resource_type, true, &counter_var_info); + counter_var_id = spirv_compiler_build_descriptor_variable(compiler, storage_class, + type_id, ®, range, resource_type, true, &counter_var_info); } }
- vkd3d_symbol_make_resource(&resource_symbol, reg); + vkd3d_symbol_make_resource(&resource_symbol, ®); resource_symbol.id = var_id; resource_symbol.descriptor_array = var_info.array_symbol; - resource_symbol.info.resource.range = resource->range; + resource_symbol.info.resource.range = *range; resource_symbol.info.resource.sampled_type = sampled_type; resource_symbol.info.resource.type_id = type_id; resource_symbol.info.resource.resource_type_info = resource_type_info; @@ -5874,58 +5984,6 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp spirv_compiler_put_symbol(compiler, &resource_symbol); }
-static void spirv_compiler_emit_dcl_resource(struct spirv_compiler *compiler, - const struct vkd3d_shader_instruction *instruction) -{ - const struct vkd3d_shader_semantic *semantic = &instruction->declaration.semantic; - enum vkd3d_shader_resource_type resource_type = semantic->resource_type; - uint32_t flags = instruction->flags; - - /* We don't distinguish between APPEND and COUNTER UAVs. */ - flags &= ~VKD3DSUF_ORDER_PRESERVING_COUNTER; - if (flags) - FIXME("Unhandled UAV flags %#x.\n", flags); - - if (resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMS && semantic->sample_count == 1) - resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_2D; - else if (resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY && semantic->sample_count == 1) - resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY; - - spirv_compiler_emit_resource_declaration(compiler, &semantic->resource, - resource_type, semantic->resource_data_type[0], 0, false); -} - -static void spirv_compiler_emit_dcl_resource_raw(struct spirv_compiler *compiler, - const struct vkd3d_shader_instruction *instruction) -{ - const struct vkd3d_shader_raw_resource *resource = &instruction->declaration.raw_resource; - uint32_t flags = instruction->flags; - - /* We don't distinguish between APPEND and COUNTER UAVs. */ - flags &= ~VKD3DSUF_ORDER_PRESERVING_COUNTER; - if (flags) - FIXME("Unhandled UAV flags %#x.\n", flags); - - spirv_compiler_emit_resource_declaration(compiler, &resource->resource, - VKD3D_SHADER_RESOURCE_BUFFER, VKD3D_DATA_UINT, 0, true); -} - -static void spirv_compiler_emit_dcl_resource_structured(struct spirv_compiler *compiler, - const struct vkd3d_shader_instruction *instruction) -{ - const struct vkd3d_shader_structured_resource *resource = &instruction->declaration.structured_resource; - unsigned int stride = resource->byte_stride; - uint32_t flags = instruction->flags; - - /* We don't distinguish between APPEND and COUNTER UAVs. */ - flags &= ~VKD3DSUF_ORDER_PRESERVING_COUNTER; - if (flags) - FIXME("Unhandled UAV flags %#x.\n", flags); - - spirv_compiler_emit_resource_declaration(compiler, &resource->resource, - VKD3D_SHADER_RESOURCE_BUFFER, VKD3D_DATA_UINT, stride / 4, false); -} - static void spirv_compiler_emit_workgroup_memory(struct spirv_compiler *compiler, const struct vkd3d_shader_register *reg, unsigned int size, unsigned int structure_stride) { @@ -6236,9 +6294,6 @@ static void spirv_compiler_leave_shader_phase(struct spirv_compiler *compiler)
vkd3d_spirv_build_op_function_end(builder);
- compiler->temp_id = 0; - compiler->temp_count = 0; - if (is_in_control_point_phase(compiler)) { if (compiler->epilogue_function_id) @@ -6640,7 +6695,7 @@ static void spirv_compiler_emit_mov(struct spirv_compiler *compiler, uint32_t components[VKD3D_VEC4_SIZE]; unsigned int i, component_count;
- if (src->reg.type == VKD3DSPR_IMMCONST || src->reg.type == VKD3DSPR_IMMCONST64 || dst->modifiers || src->modifiers) + if (register_is_constant_or_undef(&src->reg) || dst->modifiers || src->modifiers) goto general_implementation;
spirv_compiler_get_register_info(compiler, &dst->reg, &dst_reg_info); @@ -7398,7 +7453,13 @@ static int spirv_compiler_emit_control_flow_instruction(struct spirv_compiler *c assert(compiler->control_flow_depth); assert(cf_info->current_block == VKD3D_BLOCK_SWITCH);
- assert(src->swizzle == VKD3D_SHADER_NO_SWIZZLE && src->reg.type == VKD3DSPR_IMMCONST); + if (src->swizzle != VKD3D_SHADER_SWIZZLE(X, X, X, X)) + { + WARN("Unexpected src swizzle %#x.\n", src->swizzle); + spirv_compiler_warning(compiler, VKD3D_SHADER_WARNING_SPV_INVALID_SWIZZLE, + "The swizzle for a switch case value is not scalar."); + } + assert(src->reg.type == VKD3DSPR_IMMCONST); value = *src->reg.u.immconst_uint;
if (!vkd3d_array_reserve((void **)&cf_info->u.switch_.case_blocks, &cf_info->u.switch_.case_blocks_size, @@ -9103,33 +9164,12 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, case VKD3DSIH_DCL_GLOBAL_FLAGS: spirv_compiler_emit_dcl_global_flags(compiler, instruction); break; - case VKD3DSIH_DCL_TEMPS: - spirv_compiler_emit_dcl_temps(compiler, instruction); - break; case VKD3DSIH_DCL_INDEXABLE_TEMP: spirv_compiler_emit_dcl_indexable_temp(compiler, instruction); break; - case VKD3DSIH_DCL_CONSTANT_BUFFER: - spirv_compiler_emit_dcl_constant_buffer(compiler, instruction); - break; case VKD3DSIH_DCL_IMMEDIATE_CONSTANT_BUFFER: spirv_compiler_emit_dcl_immediate_constant_buffer(compiler, instruction); break; - case VKD3DSIH_DCL_SAMPLER: - spirv_compiler_emit_dcl_sampler(compiler, instruction); - break; - case VKD3DSIH_DCL: - case VKD3DSIH_DCL_UAV_TYPED: - spirv_compiler_emit_dcl_resource(compiler, instruction); - break; - case VKD3DSIH_DCL_RESOURCE_RAW: - case VKD3DSIH_DCL_UAV_RAW: - spirv_compiler_emit_dcl_resource_raw(compiler, instruction); - break; - case VKD3DSIH_DCL_RESOURCE_STRUCTURED: - case VKD3DSIH_DCL_UAV_STRUCTURED: - spirv_compiler_emit_dcl_resource_structured(compiler, instruction); - break; case VKD3DSIH_DCL_TGSM_RAW: spirv_compiler_emit_dcl_tgsm_raw(compiler, instruction); break; @@ -9425,7 +9465,16 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, case VKD3DSIH_CUT_STREAM: spirv_compiler_emit_cut_stream(compiler, instruction); break; + case VKD3DSIH_DCL: + case VKD3DSIH_DCL_CONSTANT_BUFFER: case VKD3DSIH_DCL_HS_MAX_TESSFACTOR: + case VKD3DSIH_DCL_RESOURCE_RAW: + case VKD3DSIH_DCL_RESOURCE_STRUCTURED: + case VKD3DSIH_DCL_SAMPLER: + case VKD3DSIH_DCL_TEMPS: + case VKD3DSIH_DCL_UAV_RAW: + case VKD3DSIH_DCL_UAV_STRUCTURED: + case VKD3DSIH_DCL_UAV_TYPED: case VKD3DSIH_HS_DECLS: case VKD3DSIH_NOP: /* nothing to do */ @@ -9437,6 +9486,50 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, return ret; }
+static void spirv_compiler_emit_descriptor_declarations(struct spirv_compiler *compiler) +{ + unsigned int i; + + for (i = 0; i < compiler->scan_descriptor_info->descriptor_count; ++i) + { + const struct vkd3d_shader_descriptor_info1 *descriptor = &compiler->scan_descriptor_info->descriptors[i]; + struct vkd3d_shader_register_range range; + + range.first = descriptor->register_index; + if (descriptor->count == ~0u) + range.last = ~0u; + else + range.last = descriptor->register_index + descriptor->count - 1; + range.space = descriptor->register_space; + + switch (descriptor->type) + { + case VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER: + spirv_compiler_emit_sampler_declaration(compiler, &range, descriptor->register_id); + break; + + case VKD3D_SHADER_DESCRIPTOR_TYPE_CBV: + spirv_compiler_emit_cbv_declaration(compiler, &range, descriptor->register_id, descriptor->buffer_size); + break; + + case VKD3D_SHADER_DESCRIPTOR_TYPE_SRV: + spirv_compiler_emit_resource_declaration(compiler, &range, descriptor->register_id, + descriptor->sample_count, false, descriptor->resource_type, descriptor->resource_data_type, + descriptor->structure_stride / 4, descriptor->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_RAW_BUFFER); + break; + + case VKD3D_SHADER_DESCRIPTOR_TYPE_UAV: + spirv_compiler_emit_resource_declaration(compiler, &range, descriptor->register_id, + descriptor->sample_count, true, descriptor->resource_type, descriptor->resource_data_type, + descriptor->structure_stride / 4, descriptor->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_RAW_BUFFER); + break; + + default: + vkd3d_unreachable(); + } + } +} + static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_parser *parser, struct vkd3d_shader_code *spirv) @@ -9444,28 +9537,31 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, const struct vkd3d_shader_spirv_target_info *info = compiler->spirv_target_info; const struct vkd3d_shader_spirv_domain_shader_target_info *ds_info; struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + struct vkd3d_shader_desc *shader_desc = &parser->shader_desc; struct vkd3d_shader_instruction_array instructions; enum vkd3d_result result = VKD3D_OK; unsigned int i;
+ if (parser->shader_desc.temp_count) + spirv_compiler_emit_temps(compiler, parser->shader_desc.temp_count); + + spirv_compiler_emit_descriptor_declarations(compiler); + compiler->location.column = 0; compiler->location.line = 1;
+ if ((result = vkd3d_shader_normalise(parser, compile_info)) < 0) + return result; + instructions = parser->instructions; memset(&parser->instructions, 0, sizeof(parser->instructions));
- if (compiler->shader_type == VKD3D_SHADER_TYPE_HULL - && (result = instruction_array_flatten_hull_shader_phases(&instructions)) >= 0) - { - result = instruction_array_normalise_hull_shader_control_point_io(&instructions, - &compiler->input_signature); - } - if (result >= 0) - result = instruction_array_normalise_io_registers(&instructions, parser->shader_version.type, - &compiler->input_signature, &compiler->output_signature, &compiler->patch_constant_signature); - - if (result >= 0 && TRACE_ON()) - vkd3d_shader_trace(&instructions, &parser->shader_version); + compiler->input_signature = shader_desc->input_signature; + compiler->output_signature = shader_desc->output_signature; + compiler->patch_constant_signature = shader_desc->patch_constant_signature; + memset(&shader_desc->input_signature, 0, sizeof(shader_desc->input_signature)); + memset(&shader_desc->output_signature, 0, sizeof(shader_desc->output_signature)); + memset(&shader_desc->patch_constant_signature, 0, sizeof(shader_desc->patch_constant_signature));
if (compiler->shader_type != VKD3D_SHADER_TYPE_HULL) spirv_compiler_emit_shader_signature_outputs(compiler); @@ -9541,7 +9637,7 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, }
int spirv_compile(struct vkd3d_shader_parser *parser, - const struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info, + const struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info, const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) { diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c index d066b13ee4e..7949be150bf 100644 --- a/libs/vkd3d/libs/vkd3d-shader/tpf.c +++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c @@ -430,6 +430,8 @@ enum vkd3d_sm4_register_type VKD3D_SM5_RT_DEPTHOUT_GREATER_EQUAL = 0x26, VKD3D_SM5_RT_DEPTHOUT_LESS_EQUAL = 0x27, VKD3D_SM5_RT_OUTPUT_STENCIL_REF = 0x29, + + VKD3D_SM4_REGISTER_TYPE_COUNT, };
enum vkd3d_sm4_extended_operand_type @@ -505,7 +507,7 @@ enum vkd3d_sm4_input_primitive_type
enum vkd3d_sm4_swizzle_type { - VKD3D_SM4_SWIZZLE_NONE = 0x0, + VKD3D_SM4_SWIZZLE_NONE = 0x0, /* swizzle bitfield contains a mask */ VKD3D_SM4_SWIZZLE_VEC4 = 0x1, VKD3D_SM4_SWIZZLE_SCALAR = 0x2, }; @@ -571,6 +573,12 @@ struct sm4_index_range_array struct sm4_index_range ranges[MAX_REG_OUTPUT * 2]; };
+struct vkd3d_sm4_lookup_tables +{ + const struct vkd3d_sm4_register_type_info *register_type_info_from_sm4[VKD3D_SM4_REGISTER_TYPE_COUNT]; + const struct vkd3d_sm4_register_type_info *register_type_info_from_vkd3d[VKD3DSPR_COUNT]; +}; + struct vkd3d_shader_sm4_parser { const uint32_t *start, *end, *ptr; @@ -587,6 +595,8 @@ struct vkd3d_shader_sm4_parser struct sm4_index_range_array output_index_ranges; struct sm4_index_range_array patch_constant_index_ranges;
+ struct vkd3d_sm4_lookup_tables lookup; + struct vkd3d_shader_parser p; };
@@ -697,6 +707,19 @@ static void shader_sm4_read_conditional_op(struct vkd3d_shader_instruction *ins, VKD3D_SHADER_CONDITIONAL_OP_NZ : VKD3D_SHADER_CONDITIONAL_OP_Z; }
+static void shader_sm4_read_case_condition(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + shader_sm4_read_src_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_UINT, + (struct vkd3d_shader_src_param *)&ins->src[0]); + if (ins->src[0].reg.type != VKD3DSPR_IMMCONST) + { + FIXME("Switch case value is not a 32-bit constant.\n"); + vkd3d_shader_parser_error(&priv->p, VKD3D_SHADER_ERROR_TPF_INVALID_CASE_VALUE, + "Switch case value is not a 32-bit immediate constant register."); + } +} + static void shader_sm4_read_shader_data(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) { @@ -989,6 +1012,8 @@ static void shader_sm4_read_declaration_count(struct vkd3d_shader_instruction *i uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) { ins->declaration.count = *tokens; + if (opcode == VKD3D_SM4_OP_DCL_TEMPS) + priv->p.shader_desc.temp_count = max(priv->p.shader_desc.temp_count, *tokens); }
static void shader_sm4_read_declaration_dst(struct vkd3d_shader_instruction *ins, uint32_t opcode, @@ -1203,7 +1228,8 @@ static const struct vkd3d_sm4_opcode_info opcode_table[] = {VKD3D_SM4_OP_BREAK, VKD3DSIH_BREAK, "", ""}, {VKD3D_SM4_OP_BREAKC, VKD3DSIH_BREAKP, "", "u", shader_sm4_read_conditional_op}, - {VKD3D_SM4_OP_CASE, VKD3DSIH_CASE, "", "u"}, + {VKD3D_SM4_OP_CASE, VKD3DSIH_CASE, "", "u", + shader_sm4_read_case_condition}, {VKD3D_SM4_OP_CONTINUE, VKD3DSIH_CONTINUE, "", ""}, {VKD3D_SM4_OP_CONTINUEC, VKD3DSIH_CONTINUEP, "", "u", shader_sm4_read_conditional_op}, @@ -1466,50 +1492,10 @@ static const struct vkd3d_sm4_opcode_info opcode_table[] = {VKD3D_SM5_OP_CHECK_ACCESS_FULLY_MAPPED, VKD3DSIH_CHECK_ACCESS_FULLY_MAPPED, "u", "u"}, };
-static const enum vkd3d_shader_register_type register_type_table[] = -{ - /* VKD3D_SM4_RT_TEMP */ VKD3DSPR_TEMP, - /* VKD3D_SM4_RT_INPUT */ VKD3DSPR_INPUT, - /* VKD3D_SM4_RT_OUTPUT */ VKD3DSPR_OUTPUT, - /* VKD3D_SM4_RT_INDEXABLE_TEMP */ VKD3DSPR_IDXTEMP, - /* VKD3D_SM4_RT_IMMCONST */ VKD3DSPR_IMMCONST, - /* VKD3D_SM4_RT_IMMCONST64 */ VKD3DSPR_IMMCONST64, - /* VKD3D_SM4_RT_SAMPLER */ VKD3DSPR_SAMPLER, - /* VKD3D_SM4_RT_RESOURCE */ VKD3DSPR_RESOURCE, - /* VKD3D_SM4_RT_CONSTBUFFER */ VKD3DSPR_CONSTBUFFER, - /* VKD3D_SM4_RT_IMMCONSTBUFFER */ VKD3DSPR_IMMCONSTBUFFER, - /* UNKNOWN */ ~0u, - /* VKD3D_SM4_RT_PRIMID */ VKD3DSPR_PRIMID, - /* VKD3D_SM4_RT_DEPTHOUT */ VKD3DSPR_DEPTHOUT, - /* VKD3D_SM4_RT_NULL */ VKD3DSPR_NULL, - /* VKD3D_SM4_RT_RASTERIZER */ VKD3DSPR_RASTERIZER, - /* VKD3D_SM4_RT_OMASK */ VKD3DSPR_SAMPLEMASK, - /* VKD3D_SM5_RT_STREAM */ VKD3DSPR_STREAM, - /* VKD3D_SM5_RT_FUNCTION_BODY */ VKD3DSPR_FUNCTIONBODY, - /* UNKNOWN */ ~0u, - /* VKD3D_SM5_RT_FUNCTION_POINTER */ VKD3DSPR_FUNCTIONPOINTER, - /* UNKNOWN */ ~0u, - /* UNKNOWN */ ~0u, - /* VKD3D_SM5_RT_OUTPUT_CONTROL_POINT_ID */ VKD3DSPR_OUTPOINTID, - /* VKD3D_SM5_RT_FORK_INSTANCE_ID */ VKD3DSPR_FORKINSTID, - /* VKD3D_SM5_RT_JOIN_INSTANCE_ID */ VKD3DSPR_JOININSTID, - /* VKD3D_SM5_RT_INPUT_CONTROL_POINT */ VKD3DSPR_INCONTROLPOINT, - /* VKD3D_SM5_RT_OUTPUT_CONTROL_POINT */ VKD3DSPR_OUTCONTROLPOINT, - /* VKD3D_SM5_RT_PATCH_CONSTANT_DATA */ VKD3DSPR_PATCHCONST, - /* VKD3D_SM5_RT_DOMAIN_LOCATION */ VKD3DSPR_TESSCOORD, - /* UNKNOWN */ ~0u, - /* VKD3D_SM5_RT_UAV */ VKD3DSPR_UAV, - /* VKD3D_SM5_RT_SHARED_MEMORY */ VKD3DSPR_GROUPSHAREDMEM, - /* VKD3D_SM5_RT_THREAD_ID */ VKD3DSPR_THREADID, - /* VKD3D_SM5_RT_THREAD_GROUP_ID */ VKD3DSPR_THREADGROUPID, - /* VKD3D_SM5_RT_LOCAL_THREAD_ID */ VKD3DSPR_LOCALTHREADID, - /* VKD3D_SM5_RT_COVERAGE */ VKD3DSPR_COVERAGE, - /* VKD3D_SM5_RT_LOCAL_THREAD_INDEX */ VKD3DSPR_LOCALTHREADINDEX, - /* VKD3D_SM5_RT_GS_INSTANCE_ID */ VKD3DSPR_GSINSTID, - /* VKD3D_SM5_RT_DEPTHOUT_GREATER_EQUAL */ VKD3DSPR_DEPTHOUTGE, - /* VKD3D_SM5_RT_DEPTHOUT_LESS_EQUAL */ VKD3DSPR_DEPTHOUTLE, - /* VKD3D_SM5_RT_CYCLE_COUNTER */ ~0u, - /* VKD3D_SM5_RT_OUTPUT_STENCIL_REF */ VKD3DSPR_OUTSTENCILREF, +struct vkd3d_sm4_register_type_info +{ + enum vkd3d_sm4_register_type sm4_type; + enum vkd3d_shader_register_type vkd3d_type; };
static const enum vkd3d_shader_register_precision register_precision_table[] = @@ -1522,18 +1508,104 @@ static const enum vkd3d_shader_register_precision register_precision_table[] = /* VKD3D_SM4_REGISTER_PRECISION_MIN_UINT_16 */ VKD3D_SHADER_REGISTER_PRECISION_MIN_UINT_16, };
+struct tpf_writer +{ + struct hlsl_ctx *ctx; + struct vkd3d_bytecode_buffer *buffer; + struct vkd3d_sm4_lookup_tables lookup; +}; + static const struct vkd3d_sm4_opcode_info *get_opcode_info(enum vkd3d_sm4_opcode opcode) { unsigned int i;
for (i = 0; i < sizeof(opcode_table) / sizeof(*opcode_table); ++i) { - if (opcode == opcode_table[i].opcode) return &opcode_table[i]; + if (opcode == opcode_table[i].opcode) + return &opcode_table[i]; }
return NULL; }
+static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) +{ + const struct vkd3d_sm4_register_type_info *info; + unsigned int i; + + static const struct vkd3d_sm4_register_type_info register_type_table[] = + { + {VKD3D_SM4_RT_TEMP, VKD3DSPR_TEMP}, + {VKD3D_SM4_RT_INPUT, VKD3DSPR_INPUT}, + {VKD3D_SM4_RT_OUTPUT, VKD3DSPR_OUTPUT}, + {VKD3D_SM4_RT_INDEXABLE_TEMP, VKD3DSPR_IDXTEMP}, + {VKD3D_SM4_RT_IMMCONST, VKD3DSPR_IMMCONST}, + {VKD3D_SM4_RT_IMMCONST64, VKD3DSPR_IMMCONST64}, + {VKD3D_SM4_RT_SAMPLER, VKD3DSPR_SAMPLER}, + {VKD3D_SM4_RT_RESOURCE, VKD3DSPR_RESOURCE}, + {VKD3D_SM4_RT_CONSTBUFFER, VKD3DSPR_CONSTBUFFER}, + {VKD3D_SM4_RT_IMMCONSTBUFFER, VKD3DSPR_IMMCONSTBUFFER}, + {VKD3D_SM4_RT_PRIMID, VKD3DSPR_PRIMID}, + {VKD3D_SM4_RT_DEPTHOUT, VKD3DSPR_DEPTHOUT}, + {VKD3D_SM4_RT_NULL, VKD3DSPR_NULL}, + {VKD3D_SM4_RT_RASTERIZER, VKD3DSPR_RASTERIZER}, + {VKD3D_SM4_RT_OMASK, VKD3DSPR_SAMPLEMASK}, + {VKD3D_SM5_RT_STREAM, VKD3DSPR_STREAM}, + {VKD3D_SM5_RT_FUNCTION_BODY, VKD3DSPR_FUNCTIONBODY}, + {VKD3D_SM5_RT_FUNCTION_POINTER, VKD3DSPR_FUNCTIONPOINTER}, + {VKD3D_SM5_RT_OUTPUT_CONTROL_POINT_ID, VKD3DSPR_OUTPOINTID}, + {VKD3D_SM5_RT_FORK_INSTANCE_ID, VKD3DSPR_FORKINSTID}, + {VKD3D_SM5_RT_JOIN_INSTANCE_ID, VKD3DSPR_JOININSTID}, + {VKD3D_SM5_RT_INPUT_CONTROL_POINT, VKD3DSPR_INCONTROLPOINT}, + {VKD3D_SM5_RT_OUTPUT_CONTROL_POINT, VKD3DSPR_OUTCONTROLPOINT}, + {VKD3D_SM5_RT_PATCH_CONSTANT_DATA, VKD3DSPR_PATCHCONST}, + {VKD3D_SM5_RT_DOMAIN_LOCATION, VKD3DSPR_TESSCOORD}, + {VKD3D_SM5_RT_UAV, VKD3DSPR_UAV}, + {VKD3D_SM5_RT_SHARED_MEMORY, VKD3DSPR_GROUPSHAREDMEM}, + {VKD3D_SM5_RT_THREAD_ID, VKD3DSPR_THREADID}, + {VKD3D_SM5_RT_THREAD_GROUP_ID, VKD3DSPR_THREADGROUPID}, + {VKD3D_SM5_RT_LOCAL_THREAD_ID, VKD3DSPR_LOCALTHREADID}, + {VKD3D_SM5_RT_COVERAGE, VKD3DSPR_COVERAGE}, + {VKD3D_SM5_RT_LOCAL_THREAD_INDEX, VKD3DSPR_LOCALTHREADINDEX}, + {VKD3D_SM5_RT_GS_INSTANCE_ID, VKD3DSPR_GSINSTID}, + {VKD3D_SM5_RT_DEPTHOUT_GREATER_EQUAL, VKD3DSPR_DEPTHOUTGE}, + {VKD3D_SM5_RT_DEPTHOUT_LESS_EQUAL, VKD3DSPR_DEPTHOUTLE}, + {VKD3D_SM5_RT_OUTPUT_STENCIL_REF, VKD3DSPR_OUTSTENCILREF}, + }; + + memset(lookup, 0, sizeof(*lookup)); + + for (i = 0; i < ARRAY_SIZE(register_type_table); ++i) + { + info = ®ister_type_table[i]; + lookup->register_type_info_from_sm4[info->sm4_type] = info; + lookup->register_type_info_from_vkd3d[info->vkd3d_type] = info; + } +} + +static void tpf_writer_init(struct tpf_writer *tpf, struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer) +{ + tpf->ctx = ctx; + tpf->buffer = buffer; + init_sm4_lookup_tables(&tpf->lookup); +} + +static const struct vkd3d_sm4_register_type_info *get_info_from_sm4_register_type( + const struct vkd3d_sm4_lookup_tables *lookup, enum vkd3d_sm4_register_type sm4_type) +{ + if (sm4_type >= VKD3D_SM4_REGISTER_TYPE_COUNT) + return NULL; + return lookup->register_type_info_from_sm4[sm4_type]; +} + +static const struct vkd3d_sm4_register_type_info *get_info_from_vkd3d_register_type( + const struct vkd3d_sm4_lookup_tables *lookup, enum vkd3d_shader_register_type vkd3d_type) +{ + if (vkd3d_type >= VKD3DSPR_COUNT) + return NULL; + return lookup->register_type_info_from_vkd3d[vkd3d_type]; +} + static void map_register(const struct vkd3d_shader_sm4_parser *sm4, struct vkd3d_shader_register *reg) { switch (sm4->p.shader_version.type) @@ -1640,6 +1712,7 @@ static bool sm4_register_is_descriptor(enum vkd3d_sm4_register_type register_typ static bool shader_sm4_read_param(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, const uint32_t *end, enum vkd3d_data_type data_type, struct vkd3d_shader_register *param, enum vkd3d_shader_src_modifier *modifier) { + const struct vkd3d_sm4_register_type_info *register_type_info; enum vkd3d_sm4_register_precision precision; enum vkd3d_sm4_register_type register_type; enum vkd3d_sm4_extended_operand_type type; @@ -1654,15 +1727,15 @@ static bool shader_sm4_read_param(struct vkd3d_shader_sm4_parser *priv, const ui token = *(*ptr)++;
register_type = (token & VKD3D_SM4_REGISTER_TYPE_MASK) >> VKD3D_SM4_REGISTER_TYPE_SHIFT; - if (register_type >= ARRAY_SIZE(register_type_table) - || register_type_table[register_type] == VKD3DSPR_INVALID) + register_type_info = get_info_from_sm4_register_type(&priv->lookup, register_type); + if (!register_type_info) { FIXME("Unhandled register type %#x.\n", register_type); param->type = VKD3DSPR_TEMP; } else { - param->type = register_type_table[register_type]; + param->type = register_type_info->vkd3d_type; } param->precision = VKD3D_SHADER_REGISTER_PRECISION_DEFAULT; param->non_uniform = false; @@ -1953,6 +2026,7 @@ static bool shader_sm4_validate_input_output_register(struct vkd3d_shader_sm4_pa static bool shader_sm4_read_src_param(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, const uint32_t *end, enum vkd3d_data_type data_type, struct vkd3d_shader_src_param *src_param) { + unsigned int dimension, mask; DWORD token;
if (*ptr >= end) @@ -1968,37 +2042,63 @@ static bool shader_sm4_read_src_param(struct vkd3d_shader_sm4_parser *priv, cons return false; }
- if (src_param->reg.type == VKD3DSPR_IMMCONST || src_param->reg.type == VKD3DSPR_IMMCONST64) + switch ((dimension = (token & VKD3D_SM4_DIMENSION_MASK) >> VKD3D_SM4_DIMENSION_SHIFT)) { - src_param->swizzle = VKD3D_SHADER_NO_SWIZZLE; - } - else - { - enum vkd3d_sm4_swizzle_type swizzle_type = - (token & VKD3D_SM4_SWIZZLE_TYPE_MASK) >> VKD3D_SM4_SWIZZLE_TYPE_SHIFT; + case VKD3D_SM4_DIMENSION_NONE: + case VKD3D_SM4_DIMENSION_SCALAR: + src_param->swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); + break;
- switch (swizzle_type) + case VKD3D_SM4_DIMENSION_VEC4: { - case VKD3D_SM4_SWIZZLE_NONE: - if (shader_sm4_is_scalar_register(&src_param->reg)) - src_param->swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); - else + enum vkd3d_sm4_swizzle_type swizzle_type = + (token & VKD3D_SM4_SWIZZLE_TYPE_MASK) >> VKD3D_SM4_SWIZZLE_TYPE_SHIFT; + + switch (swizzle_type) + { + case VKD3D_SM4_SWIZZLE_NONE: src_param->swizzle = VKD3D_SHADER_NO_SWIZZLE; - break;
- case VKD3D_SM4_SWIZZLE_SCALAR: - src_param->swizzle = (token & VKD3D_SM4_SWIZZLE_MASK) >> VKD3D_SM4_SWIZZLE_SHIFT; - src_param->swizzle = (src_param->swizzle & 0x3) * 0x01010101; - break; + mask = (token & VKD3D_SM4_WRITEMASK_MASK) >> VKD3D_SM4_WRITEMASK_SHIFT; + /* Mask seems only to be used for vec4 constants and is always zero. */ + if (!register_is_constant(&src_param->reg)) + { + FIXME("Source mask %#x is not for a constant.\n", mask); + vkd3d_shader_parser_warning(&priv->p, VKD3D_SHADER_WARNING_TPF_UNHANDLED_REGISTER_MASK, + "Unhandled mask %#x for a non-constant source register.", mask); + } + else if (mask) + { + FIXME("Unhandled mask %#x.\n", mask); + vkd3d_shader_parser_warning(&priv->p, VKD3D_SHADER_WARNING_TPF_UNHANDLED_REGISTER_MASK, + "Unhandled source register mask %#x.", mask); + }
- case VKD3D_SM4_SWIZZLE_VEC4: - src_param->swizzle = swizzle_from_sm4((token & VKD3D_SM4_SWIZZLE_MASK) >> VKD3D_SM4_SWIZZLE_SHIFT); - break; + break;
- default: - FIXME("Unhandled swizzle type %#x.\n", swizzle_type); - break; + case VKD3D_SM4_SWIZZLE_SCALAR: + src_param->swizzle = (token & VKD3D_SM4_SWIZZLE_MASK) >> VKD3D_SM4_SWIZZLE_SHIFT; + src_param->swizzle = (src_param->swizzle & 0x3) * 0x01010101; + break; + + case VKD3D_SM4_SWIZZLE_VEC4: + src_param->swizzle = swizzle_from_sm4((token & VKD3D_SM4_SWIZZLE_MASK) >> VKD3D_SM4_SWIZZLE_SHIFT); + break; + + default: + FIXME("Unhandled swizzle type %#x.\n", swizzle_type); + vkd3d_shader_parser_error(&priv->p, VKD3D_SHADER_ERROR_TPF_INVALID_REGISTER_SWIZZLE, + "Source register swizzle type %#x is invalid.", swizzle_type); + break; + } + break; } + + default: + FIXME("Unhandled dimension %#x.\n", dimension); + vkd3d_shader_parser_error(&priv->p, VKD3D_SHADER_ERROR_TPF_INVALID_REGISTER_DIMENSION, + "Source register dimension %#x is invalid.", dimension); + break; }
if (register_is_input_output(&src_param->reg) && !shader_sm4_validate_input_output_register(priv, @@ -2011,7 +2111,9 @@ static bool shader_sm4_read_src_param(struct vkd3d_shader_sm4_parser *priv, cons static bool shader_sm4_read_dst_param(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, const uint32_t *end, enum vkd3d_data_type data_type, struct vkd3d_shader_dst_param *dst_param) { + enum vkd3d_sm4_swizzle_type swizzle_type; enum vkd3d_shader_src_modifier modifier; + unsigned int dimension, swizzle; DWORD token;
if (*ptr >= end) @@ -2033,10 +2135,53 @@ static bool shader_sm4_read_dst_param(struct vkd3d_shader_sm4_parser *priv, cons return false; }
- dst_param->write_mask = (token & VKD3D_SM4_WRITEMASK_MASK) >> VKD3D_SM4_WRITEMASK_SHIFT; + switch ((dimension = (token & VKD3D_SM4_DIMENSION_MASK) >> VKD3D_SM4_DIMENSION_SHIFT)) + { + case VKD3D_SM4_DIMENSION_NONE: + dst_param->write_mask = 0; + break; + + case VKD3D_SM4_DIMENSION_SCALAR: + dst_param->write_mask = VKD3DSP_WRITEMASK_0; + break; + + case VKD3D_SM4_DIMENSION_VEC4: + swizzle_type = (token & VKD3D_SM4_SWIZZLE_TYPE_MASK) >> VKD3D_SM4_SWIZZLE_TYPE_SHIFT; + switch (swizzle_type) + { + case VKD3D_SM4_SWIZZLE_NONE: + dst_param->write_mask = (token & VKD3D_SM4_WRITEMASK_MASK) >> VKD3D_SM4_WRITEMASK_SHIFT; + break; + + case VKD3D_SM4_SWIZZLE_VEC4: + swizzle = swizzle_from_sm4((token & VKD3D_SM4_SWIZZLE_MASK) >> VKD3D_SM4_SWIZZLE_SHIFT); + if (swizzle != VKD3D_SHADER_NO_SWIZZLE) + { + FIXME("Unhandled swizzle %#x.\n", swizzle); + vkd3d_shader_parser_warning(&priv->p, VKD3D_SHADER_WARNING_TPF_UNHANDLED_REGISTER_SWIZZLE, + "Unhandled destination register swizzle %#x.", swizzle); + } + dst_param->write_mask = VKD3DSP_WRITEMASK_ALL; + break; + + default: + FIXME("Unhandled swizzle type %#x.\n", swizzle_type); + vkd3d_shader_parser_error(&priv->p, VKD3D_SHADER_ERROR_TPF_INVALID_REGISTER_SWIZZLE, + "Destination register swizzle type %#x is invalid.", swizzle_type); + break; + } + break; + + default: + FIXME("Unhandled dimension %#x.\n", dimension); + vkd3d_shader_parser_error(&priv->p, VKD3D_SHADER_ERROR_TPF_INVALID_REGISTER_DIMENSION, + "Destination register dimension %#x is invalid.", dimension); + break; + } + if (data_type == VKD3D_DATA_DOUBLE) dst_param->write_mask = vkd3d_write_mask_64_from_32(dst_param->write_mask); - /* Scalar registers are declared with no write mask in shader bytecode. */ + /* Some scalar registers are declared with no write mask in shader bytecode. */ if (!dst_param->write_mask && shader_sm4_is_scalar_register(&dst_param->reg)) dst_param->write_mask = VKD3DSP_WRITEMASK_0; dst_param->modifiers = 0; @@ -2362,6 +2507,8 @@ static bool shader_sm4_init(struct vkd3d_shader_sm4_parser *sm4, const uint32_t sm4->output_map[e->register_index] = e->semantic_index; }
+ init_sm4_lookup_tables(&sm4->lookup); + return true; }
@@ -2442,6 +2589,7 @@ int vkd3d_shader_sm4_parser_create(const struct vkd3d_shader_compile_info *compi }
shader_desc = &sm4->p.shader_desc; + shader_desc->is_dxil = false; if ((ret = shader_extract_from_dxbc(&compile_info->source, message_context, compile_info->source_name, shader_desc)) < 0) { @@ -2499,7 +2647,7 @@ int vkd3d_shader_sm4_parser_create(const struct vkd3d_shader_compile_info *compi return sm4->p.failed ? VKD3D_ERROR_INVALID_SHADER : VKD3D_OK; }
-static void write_sm4_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_block *block); +static void write_sm4_block(const struct tpf_writer *tpf, const struct hlsl_block *block);
static bool type_is_integer(const struct hlsl_type *type) { @@ -2516,7 +2664,7 @@ static bool type_is_integer(const struct hlsl_type *type) }
bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, - bool output, unsigned int *type, enum vkd3d_sm4_swizzle_type *swizzle_type, bool *has_idx) + bool output, enum vkd3d_shader_register_type *type, enum vkd3d_sm4_swizzle_type *swizzle_type, bool *has_idx) { unsigned int i;
@@ -2526,24 +2674,24 @@ bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_sem bool output; enum vkd3d_shader_type shader_type; enum vkd3d_sm4_swizzle_type swizzle_type; - enum vkd3d_sm4_register_type type; + enum vkd3d_shader_register_type type; bool has_idx; } register_table[] = { - {"sv_dispatchthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM5_RT_THREAD_ID, false}, - {"sv_groupid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM5_RT_THREAD_GROUP_ID, false}, - {"sv_groupthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM5_RT_LOCAL_THREAD_ID, false}, + {"sv_dispatchthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3D_SM4_SWIZZLE_VEC4, VKD3DSPR_THREADID, false}, + {"sv_groupid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3D_SM4_SWIZZLE_VEC4, VKD3DSPR_THREADGROUPID, false}, + {"sv_groupthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3D_SM4_SWIZZLE_VEC4, VKD3DSPR_LOCALTHREADID, false},
- {"sv_primitiveid", false, VKD3D_SHADER_TYPE_GEOMETRY, VKD3D_SM4_SWIZZLE_NONE, VKD3D_SM4_RT_PRIMID, false}, + {"sv_primitiveid", false, VKD3D_SHADER_TYPE_GEOMETRY, VKD3D_SM4_SWIZZLE_NONE, VKD3DSPR_PRIMID, false},
/* Put sv_target in this table, instead of letting it fall through to * default varying allocation, so that the register index matches the * usage index. */ - {"color", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM4_RT_OUTPUT, true}, - {"depth", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM4_RT_DEPTHOUT, false}, - {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM4_RT_DEPTHOUT, false}, - {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM4_RT_OUTPUT, true}, + {"color", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3DSPR_OUTPUT, true}, + {"depth", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3DSPR_DEPTHOUT, false}, + {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3DSPR_DEPTHOUT, false}, + {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3DSPR_OUTPUT, true}, };
for (i = 0; i < ARRAY_SIZE(register_table); ++i) @@ -2552,7 +2700,8 @@ bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_sem && output == register_table[i].output && ctx->profile->type == register_table[i].shader_type) { - *type = register_table[i].type; + if (type) + *type = register_table[i].type; if (swizzle_type) *swizzle_type = register_table[i].swizzle_type; *has_idx = register_table[i].has_idx; @@ -2624,7 +2773,8 @@ bool hlsl_sm4_usage_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semant return true; }
-static void add_section(struct dxbc_writer *dxbc, uint32_t tag, struct vkd3d_bytecode_buffer *buffer) +static void add_section(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc, + uint32_t tag, struct vkd3d_bytecode_buffer *buffer) { /* Native D3DDisassemble() expects at least the sizes of the ISGN and OSGN * sections to be aligned. Without this, the sections themselves will be @@ -2632,6 +2782,9 @@ static void add_section(struct dxbc_writer *dxbc, uint32_t tag, struct vkd3d_byt size_t size = bytecode_align(buffer);
dxbc_writer_add_section(dxbc, tag, buffer->data, size); + + if (buffer->status < 0) + ctx->result = buffer->status; }
static void write_sm4_signature(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc, bool output) @@ -2649,7 +2802,6 @@ static void write_sm4_signature(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc, LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { unsigned int width = (1u << var->data_type->dimx) - 1, use_mask; - enum vkd3d_sm4_register_type type; uint32_t usage_idx, reg_idx; D3D_NAME usage; bool has_idx; @@ -2663,14 +2815,13 @@ static void write_sm4_signature(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc, continue; usage_idx = var->semantic.index;
- if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, output, &type, NULL, &has_idx)) + if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, output, NULL, NULL, &has_idx)) { reg_idx = has_idx ? var->semantic.index : ~0u; } else { assert(var->regs[HLSL_REGSET_NUMERIC].allocated); - type = VKD3D_SM4_RT_INPUT; reg_idx = var->regs[HLSL_REGSET_NUMERIC].id; }
@@ -2739,7 +2890,7 @@ static void write_sm4_signature(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc,
set_u32(&buffer, count_position, i);
- add_section(dxbc, output ? TAG_OSGN : TAG_ISGN, &buffer); + add_section(ctx, dxbc, output ? TAG_OSGN : TAG_ISGN, &buffer); }
static D3D_SHADER_VARIABLE_CLASS sm4_class(const struct hlsl_type *type) @@ -2827,6 +2978,22 @@ static D3D_SHADER_VARIABLE_TYPE sm4_base_type(const struct hlsl_type *type) return D3D_SVT_VERTEXSHADER; case HLSL_TYPE_VOID: return D3D_SVT_VOID; + case HLSL_TYPE_UAV: + switch (type->sampler_dim) + { + case HLSL_SAMPLER_DIM_1D: + return D3D_SVT_RWTEXTURE1D; + case HLSL_SAMPLER_DIM_2D: + return D3D_SVT_RWTEXTURE2D; + case HLSL_SAMPLER_DIM_3D: + return D3D_SVT_RWTEXTURE3D; + case HLSL_SAMPLER_DIM_1DARRAY: + return D3D_SVT_RWTEXTURE1DARRAY; + case HLSL_SAMPLER_DIM_2DARRAY: + return D3D_SVT_RWTEXTURE2DARRAY; + default: + vkd3d_unreachable(); + } default: vkd3d_unreachable(); } @@ -2967,47 +3134,154 @@ static D3D_SRV_DIMENSION sm4_rdef_resource_dimension(const struct hlsl_type *typ } }
+struct extern_resource +{ + /* var is only not NULL if this resource is a whole variable, so it may be responsible for more + * than one component. */ + const struct hlsl_ir_var *var; + + char *name; + struct hlsl_type *data_type; + bool is_user_packed; + + enum hlsl_regset regset; + unsigned int id, bind_count; +}; + static int sm4_compare_extern_resources(const void *a, const void *b) { - const struct hlsl_ir_var *aa = *(const struct hlsl_ir_var **)a; - const struct hlsl_ir_var *bb = *(const struct hlsl_ir_var **)b; - enum hlsl_regset aa_regset, bb_regset; + const struct extern_resource *aa = (const struct extern_resource *)a; + const struct extern_resource *bb = (const struct extern_resource *)b; + int r;
- aa_regset = hlsl_type_get_regset(aa->data_type); - bb_regset = hlsl_type_get_regset(bb->data_type); + if ((r = vkd3d_u32_compare(aa->regset, bb->regset))) + return r;
- if (aa_regset != bb_regset) - return aa_regset - bb_regset; + return vkd3d_u32_compare(aa->id, bb->id); +} + +static void sm4_free_extern_resources(struct extern_resource *extern_resources, unsigned int count) +{ + unsigned int i;
- return aa->regs[aa_regset].id - bb->regs[bb_regset].id; + for (i = 0; i < count; ++i) + vkd3d_free(extern_resources[i].name); + vkd3d_free(extern_resources); +} + +static const char *string_skip_tag(const char *string) +{ + if (!strncmp(string, "<resource>", strlen("<resource>"))) + return string + strlen("<resource>"); + return string; }
-static const struct hlsl_ir_var **sm4_get_extern_resources(struct hlsl_ctx *ctx, unsigned int *count) +static struct extern_resource *sm4_get_extern_resources(struct hlsl_ctx *ctx, unsigned int *count) { - const struct hlsl_ir_var **extern_resources = NULL; + bool separate_components = ctx->profile->major_version == 5 && ctx->profile->minor_version == 0; + struct extern_resource *extern_resources = NULL; const struct hlsl_ir_var *var; enum hlsl_regset regset; size_t capacity = 0; + char *name;
*count = 0;
LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { - if (!hlsl_type_is_resource(var->data_type)) - continue; - regset = hlsl_type_get_regset(var->data_type); - if (!var->regs[regset].allocated) - continue; - - if (!(hlsl_array_reserve(ctx, (void **)&extern_resources, &capacity, *count + 1, - sizeof(*extern_resources)))) + if (separate_components) { - *count = 0; - return NULL; + unsigned int component_count = hlsl_type_component_count(var->data_type); + unsigned int k, regset_offset; + + for (k = 0; k < component_count; ++k) + { + struct hlsl_type *component_type = hlsl_type_get_component_type(ctx, var->data_type, k); + struct vkd3d_string_buffer *name_buffer; + + if (!hlsl_type_is_resource(component_type)) + continue; + + regset = hlsl_type_get_regset(component_type); + regset_offset = hlsl_type_get_component_offset(ctx, var->data_type, regset, k); + + if (regset_offset > var->regs[regset].allocation_size) + continue; + + if (var->objects_usage[regset][regset_offset].used) + { + if (!(hlsl_array_reserve(ctx, (void **)&extern_resources, &capacity, *count + 1, + sizeof(*extern_resources)))) + { + sm4_free_extern_resources(extern_resources, *count); + *count = 0; + return NULL; + } + + if (!(name_buffer = hlsl_component_to_string(ctx, var, k))) + { + sm4_free_extern_resources(extern_resources, *count); + *count = 0; + return NULL; + } + if (!(name = hlsl_strdup(ctx, string_skip_tag(name_buffer->buffer)))) + { + sm4_free_extern_resources(extern_resources, *count); + *count = 0; + hlsl_release_string_buffer(ctx, name_buffer); + return NULL; + } + hlsl_release_string_buffer(ctx, name_buffer); + + extern_resources[*count].var = NULL; + + extern_resources[*count].name = name; + extern_resources[*count].data_type = component_type; + extern_resources[*count].is_user_packed = false; + + extern_resources[*count].regset = regset; + extern_resources[*count].id = var->regs[regset].id + regset_offset; + extern_resources[*count].bind_count = 1; + + ++*count; + } + } } + else + { + if (!hlsl_type_is_resource(var->data_type)) + continue; + regset = hlsl_type_get_regset(var->data_type); + if (!var->regs[regset].allocated) + continue; + + if (!(hlsl_array_reserve(ctx, (void **)&extern_resources, &capacity, *count + 1, + sizeof(*extern_resources)))) + { + sm4_free_extern_resources(extern_resources, *count); + *count = 0; + return NULL; + }
- extern_resources[*count] = var; - ++*count; + if (!(name = hlsl_strdup(ctx, string_skip_tag(var->name)))) + { + sm4_free_extern_resources(extern_resources, *count); + *count = 0; + return NULL; + } + + extern_resources[*count].var = var; + + extern_resources[*count].name = name; + extern_resources[*count].data_type = var->data_type; + extern_resources[*count].is_user_packed = !!var->reg_reservation.reg_type; + + extern_resources[*count].regset = regset; + extern_resources[*count].id = var->regs[regset].id; + extern_resources[*count].bind_count = var->bind_count[regset]; + + ++*count; + } }
qsort(extern_resources, *count, sizeof(*extern_resources), sm4_compare_extern_resources); @@ -3020,8 +3294,8 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) size_t cbuffers_offset, resources_offset, creator_offset, string_offset; size_t cbuffer_position, resource_position, creator_position; const struct hlsl_profile_info *profile = ctx->profile; - const struct hlsl_ir_var **extern_resources; struct vkd3d_bytecode_buffer buffer = {0}; + struct extern_resource *extern_resources; const struct hlsl_buffer *cbuffer; const struct hlsl_ir_var *var;
@@ -3075,18 +3349,15 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc)
for (i = 0; i < extern_resources_count; ++i) { - enum hlsl_regset regset; + const struct extern_resource *resource = &extern_resources[i]; uint32_t flags = 0;
- var = extern_resources[i]; - regset = hlsl_type_get_regset(var->data_type); - - if (var->reg_reservation.reg_type) + if (resource->is_user_packed) flags |= D3D_SIF_USERPACKED;
put_u32(&buffer, 0); /* name */ - put_u32(&buffer, sm4_resource_type(var->data_type)); - if (regset == HLSL_REGSET_SAMPLERS) + put_u32(&buffer, sm4_resource_type(resource->data_type)); + if (resource->regset == HLSL_REGSET_SAMPLERS) { put_u32(&buffer, 0); put_u32(&buffer, 0); @@ -3094,15 +3365,15 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) } else { - unsigned int dimx = hlsl_type_get_component_type(ctx, var->data_type, 0)->e.resource_format->dimx; + unsigned int dimx = hlsl_type_get_component_type(ctx, resource->data_type, 0)->e.resource_format->dimx;
- put_u32(&buffer, sm4_resource_format(var->data_type)); - put_u32(&buffer, sm4_rdef_resource_dimension(var->data_type)); + put_u32(&buffer, sm4_resource_format(resource->data_type)); + put_u32(&buffer, sm4_rdef_resource_dimension(resource->data_type)); put_u32(&buffer, ~0u); /* FIXME: multisample count */ flags |= (dimx - 1) << VKD3D_SM4_SIF_TEXTURE_COMPONENTS_SHIFT; } - put_u32(&buffer, var->regs[regset].id); - put_u32(&buffer, var->regs[regset].bind_count); + put_u32(&buffer, resource->id); + put_u32(&buffer, resource->bind_count); put_u32(&buffer, flags); }
@@ -3128,9 +3399,9 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc)
for (i = 0; i < extern_resources_count; ++i) { - var = extern_resources[i]; + const struct extern_resource *resource = &extern_resources[i];
- string_offset = put_string(&buffer, var->name); + string_offset = put_string(&buffer, resource->name); set_u32(&buffer, resources_offset + i * 8 * sizeof(uint32_t), string_offset); }
@@ -3234,9 +3505,9 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) creator_offset = put_string(&buffer, vkd3d_shader_get_version(NULL, NULL)); set_u32(&buffer, creator_position, creator_offset);
- add_section(dxbc, TAG_RDEF, &buffer); + add_section(ctx, dxbc, TAG_RDEF, &buffer);
- vkd3d_free(extern_resources); + sm4_free_extern_resources(extern_resources, extern_resources_count); }
static enum vkd3d_sm4_resource_type sm4_resource_dimension(const struct hlsl_type *type) @@ -3308,8 +3579,8 @@ static uint32_t sm4_encode_instruction_modifier(const struct sm4_instruction_mod
struct sm4_register { - enum vkd3d_sm4_register_type type; - uint32_t idx[2]; + enum vkd3d_shader_register_type type; + struct vkd3d_shader_register_index idx[2]; unsigned int idx_count; enum vkd3d_sm4_dimension dim; uint32_t immconst_uint[4]; @@ -3346,8 +3617,9 @@ struct sm4_instruction
static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *reg, unsigned int *writemask, enum vkd3d_sm4_swizzle_type *swizzle_type, - const struct hlsl_deref *deref, const struct hlsl_type *data_type) + const struct hlsl_deref *deref) { + const struct hlsl_type *data_type = hlsl_deref_get_type(ctx, deref); const struct hlsl_ir_var *var = deref->var;
if (var->is_uniform) @@ -3356,37 +3628,37 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *r
if (regset == HLSL_REGSET_TEXTURES) { - reg->type = VKD3D_SM4_RT_RESOURCE; + reg->type = VKD3DSPR_RESOURCE; reg->dim = VKD3D_SM4_DIMENSION_VEC4; if (swizzle_type) *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; - reg->idx[0] = var->regs[HLSL_REGSET_TEXTURES].id; - reg->idx[0] += hlsl_offset_from_deref_safe(ctx, deref); - assert(deref->offset_regset == HLSL_REGSET_TEXTURES); + reg->idx[0].offset = var->regs[HLSL_REGSET_TEXTURES].id; + reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); + assert(regset == HLSL_REGSET_TEXTURES); reg->idx_count = 1; *writemask = VKD3DSP_WRITEMASK_ALL; } else if (regset == HLSL_REGSET_UAVS) { - reg->type = VKD3D_SM5_RT_UAV; + reg->type = VKD3DSPR_UAV; reg->dim = VKD3D_SM4_DIMENSION_VEC4; if (swizzle_type) *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; - reg->idx[0] = var->regs[HLSL_REGSET_UAVS].id; - reg->idx[0] += hlsl_offset_from_deref_safe(ctx, deref); - assert(deref->offset_regset == HLSL_REGSET_UAVS); + reg->idx[0].offset = var->regs[HLSL_REGSET_UAVS].id; + reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); + assert(regset == HLSL_REGSET_UAVS); reg->idx_count = 1; *writemask = VKD3DSP_WRITEMASK_ALL; } else if (regset == HLSL_REGSET_SAMPLERS) { - reg->type = VKD3D_SM4_RT_SAMPLER; + reg->type = VKD3DSPR_SAMPLER; reg->dim = VKD3D_SM4_DIMENSION_NONE; if (swizzle_type) *swizzle_type = VKD3D_SM4_SWIZZLE_NONE; - reg->idx[0] = var->regs[HLSL_REGSET_SAMPLERS].id; - reg->idx[0] += hlsl_offset_from_deref_safe(ctx, deref); - assert(deref->offset_regset == HLSL_REGSET_SAMPLERS); + reg->idx[0].offset = var->regs[HLSL_REGSET_SAMPLERS].id; + reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); + assert(regset == HLSL_REGSET_SAMPLERS); reg->idx_count = 1; *writemask = VKD3DSP_WRITEMASK_ALL; } @@ -3395,12 +3667,12 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *r unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref) + var->buffer_offset;
assert(data_type->class <= HLSL_CLASS_VECTOR); - reg->type = VKD3D_SM4_RT_CONSTBUFFER; + reg->type = VKD3DSPR_CONSTBUFFER; reg->dim = VKD3D_SM4_DIMENSION_VEC4; if (swizzle_type) *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; - reg->idx[0] = var->buffer->reg.id; - reg->idx[1] = offset / 4; + reg->idx[0].offset = var->buffer->reg.id; + reg->idx[1].offset = offset / 4; reg->idx_count = 2; *writemask = ((1u << data_type->dimx) - 1) << (offset & 3); } @@ -3415,7 +3687,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *r
if (has_idx) { - reg->idx[0] = var->semantic.index + offset / 4; + reg->idx[0].offset = var->semantic.index + offset / 4; reg->idx_count = 1; }
@@ -3427,11 +3699,11 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *r struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref);
assert(hlsl_reg.allocated); - reg->type = VKD3D_SM4_RT_INPUT; + reg->type = VKD3DSPR_INPUT; reg->dim = VKD3D_SM4_DIMENSION_VEC4; if (swizzle_type) *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; - reg->idx[0] = hlsl_reg.id; + reg->idx[0].offset = hlsl_reg.id; reg->idx_count = 1; *writemask = hlsl_reg.writemask; } @@ -3446,11 +3718,11 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *r
if (has_idx) { - reg->idx[0] = var->semantic.index + offset / 4; + reg->idx[0].offset = var->semantic.index + offset / 4; reg->idx_count = 1; }
- if (reg->type == VKD3D_SM4_RT_DEPTHOUT) + if (reg->type == VKD3DSPR_DEPTHOUT) reg->dim = VKD3D_SM4_DIMENSION_SCALAR; else reg->dim = VKD3D_SM4_DIMENSION_VEC4; @@ -3461,9 +3733,9 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *r struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref);
assert(hlsl_reg.allocated); - reg->type = VKD3D_SM4_RT_OUTPUT; + reg->type = VKD3DSPR_OUTPUT; reg->dim = VKD3D_SM4_DIMENSION_VEC4; - reg->idx[0] = hlsl_reg.id; + reg->idx[0].offset = hlsl_reg.id; reg->idx_count = 1; *writemask = hlsl_reg.writemask; } @@ -3473,22 +3745,22 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *r struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref);
assert(hlsl_reg.allocated); - reg->type = VKD3D_SM4_RT_TEMP; + reg->type = VKD3DSPR_TEMP; reg->dim = VKD3D_SM4_DIMENSION_VEC4; if (swizzle_type) *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; - reg->idx[0] = hlsl_reg.id; + reg->idx[0].offset = hlsl_reg.id; reg->idx_count = 1; *writemask = hlsl_reg.writemask; } }
static void sm4_src_from_deref(struct hlsl_ctx *ctx, struct sm4_src_register *src, - const struct hlsl_deref *deref, const struct hlsl_type *data_type, unsigned int map_writemask) + const struct hlsl_deref *deref, unsigned int map_writemask) { unsigned int writemask;
- sm4_register_from_deref(ctx, &src->reg, &writemask, &src->swizzle_type, deref, data_type); + sm4_register_from_deref(ctx, &src->reg, &writemask, &src->swizzle_type, deref); if (src->swizzle_type == VKD3D_SM4_SWIZZLE_VEC4) src->swizzle = hlsl_map_swizzle(hlsl_swizzle_from_writemask(writemask), map_writemask); } @@ -3497,10 +3769,10 @@ static void sm4_register_from_node(struct sm4_register *reg, unsigned int *write enum vkd3d_sm4_swizzle_type *swizzle_type, const struct hlsl_ir_node *instr) { assert(instr->reg.allocated); - reg->type = VKD3D_SM4_RT_TEMP; + reg->type = VKD3DSPR_TEMP; reg->dim = VKD3D_SM4_DIMENSION_VEC4; *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; - reg->idx[0] = instr->reg.id; + reg->idx[0].offset = instr->reg.id; reg->idx_count = 1; *writemask = instr->reg.writemask; } @@ -3516,7 +3788,7 @@ static void sm4_src_from_constant_value(struct sm4_src_register *src, const struct hlsl_constant_value *value, unsigned int width, unsigned int map_writemask) { src->swizzle_type = VKD3D_SM4_SWIZZLE_NONE; - src->reg.type = VKD3D_SM4_RT_IMMCONST; + src->reg.type = VKD3DSPR_IMMCONST; if (width == 1) { src->reg.dim = VKD3D_SM4_DIMENSION_SCALAR; @@ -3529,8 +3801,10 @@ static void sm4_src_from_constant_value(struct sm4_src_register *src, src->reg.dim = VKD3D_SM4_DIMENSION_VEC4; for (i = 0; i < 4; ++i) { - if (map_writemask & (1u << i)) + if ((map_writemask & (1u << i)) && (j < width)) src->reg.immconst_uint[i] = value->u[j++].u; + else + src->reg.immconst_uint[i] = 0; } } } @@ -3553,17 +3827,100 @@ static void sm4_src_from_node(struct sm4_src_register *src, src->swizzle = hlsl_map_swizzle(hlsl_swizzle_from_writemask(writemask), map_writemask); }
-static uint32_t sm4_encode_register(const struct sm4_register *reg) +static void sm4_write_dst_register(const struct tpf_writer *tpf, const struct sm4_dst_register *dst) { - return (reg->type << VKD3D_SM4_REGISTER_TYPE_SHIFT) - | (reg->idx_count << VKD3D_SM4_REGISTER_ORDER_SHIFT) - | (reg->dim << VKD3D_SM4_DIMENSION_SHIFT); + const struct vkd3d_sm4_register_type_info *register_type_info; + struct vkd3d_bytecode_buffer *buffer = tpf->buffer; + uint32_t sm4_reg_type, reg_dim; + uint32_t token = 0; + unsigned int j; + + register_type_info = get_info_from_vkd3d_register_type(&tpf->lookup, dst->reg.type); + if (!register_type_info) + { + FIXME("Unhandled vkd3d-shader register type %#x.\n", dst->reg.type); + sm4_reg_type = VKD3D_SM4_RT_TEMP; + } + else + { + sm4_reg_type = register_type_info->sm4_type; + } + + reg_dim = dst->reg.dim; + + token |= sm4_reg_type << VKD3D_SM4_REGISTER_TYPE_SHIFT; + token |= dst->reg.idx_count << VKD3D_SM4_REGISTER_ORDER_SHIFT; + token |= reg_dim << VKD3D_SM4_DIMENSION_SHIFT; + if (reg_dim == VKD3D_SM4_DIMENSION_VEC4) + token |= dst->writemask << VKD3D_SM4_WRITEMASK_SHIFT; + put_u32(buffer, token); + + for (j = 0; j < dst->reg.idx_count; ++j) + { + put_u32(buffer, dst->reg.idx[j].offset); + assert(!dst->reg.idx[j].rel_addr); + } +} + +static void sm4_write_src_register(const struct tpf_writer *tpf, const struct sm4_src_register *src) +{ + const struct vkd3d_sm4_register_type_info *register_type_info; + struct vkd3d_bytecode_buffer *buffer = tpf->buffer; + uint32_t sm4_reg_type, reg_dim; + uint32_t token = 0; + unsigned int j; + + register_type_info = get_info_from_vkd3d_register_type(&tpf->lookup, src->reg.type); + if (!register_type_info) + { + FIXME("Unhandled vkd3d-shader register type %#x.\n", src->reg.type); + sm4_reg_type = VKD3D_SM4_RT_TEMP; + } + else + { + sm4_reg_type = register_type_info->sm4_type; + } + + reg_dim = src->reg.dim; + + token |= sm4_reg_type << VKD3D_SM4_REGISTER_TYPE_SHIFT; + token |= src->reg.idx_count << VKD3D_SM4_REGISTER_ORDER_SHIFT; + token |= reg_dim << VKD3D_SM4_DIMENSION_SHIFT; + if (reg_dim == VKD3D_SM4_DIMENSION_VEC4) + { + token |= (uint32_t)src->swizzle_type << VKD3D_SM4_SWIZZLE_TYPE_SHIFT; + token |= src->swizzle << VKD3D_SM4_SWIZZLE_SHIFT; + } + if (src->reg.mod) + token |= VKD3D_SM4_EXTENDED_OPERAND; + put_u32(buffer, token); + + if (src->reg.mod) + put_u32(buffer, (src->reg.mod << VKD3D_SM4_REGISTER_MODIFIER_SHIFT) + | VKD3D_SM4_EXTENDED_OPERAND_MODIFIER); + + for (j = 0; j < src->reg.idx_count; ++j) + { + put_u32(buffer, src->reg.idx[j].offset); + assert(!src->reg.idx[j].rel_addr); + } + + if (src->reg.type == VKD3DSPR_IMMCONST) + { + put_u32(buffer, src->reg.immconst_uint[0]); + if (reg_dim == VKD3D_SM4_DIMENSION_VEC4) + { + put_u32(buffer, src->reg.immconst_uint[1]); + put_u32(buffer, src->reg.immconst_uint[2]); + put_u32(buffer, src->reg.immconst_uint[3]); + } + } }
static uint32_t sm4_register_order(const struct sm4_register *reg) { uint32_t order = 1; - if (reg->type == VKD3D_SM4_RT_IMMCONST) + if (reg->type == VKD3DSPR_IMMCONST) order += reg->dim == VKD3D_SM4_DIMENSION_VEC4 ? 4 : 1; order += reg->idx_count; if (reg->mod) @@ -3571,8 +3928,9 @@ static uint32_t sm4_register_order(const struct sm4_register *reg) return order; }
-static void write_sm4_instruction(struct vkd3d_bytecode_buffer *buffer, const struct sm4_instruction *instr) +static void write_sm4_instruction(const struct tpf_writer *tpf, const struct sm4_instruction *instr) { + struct vkd3d_bytecode_buffer *buffer = tpf->buffer; uint32_t token = instr->opcode; unsigned int size = 1, i, j;
@@ -3600,43 +3958,10 @@ static void write_sm4_instruction(struct vkd3d_bytecode_buffer *buffer, const st }
for (i = 0; i < instr->dst_count; ++i) - { - token = sm4_encode_register(&instr->dsts[i].reg); - if (instr->dsts[i].reg.dim == VKD3D_SM4_DIMENSION_VEC4) - token |= instr->dsts[i].writemask << VKD3D_SM4_WRITEMASK_SHIFT; - put_u32(buffer, token); - - for (j = 0; j < instr->dsts[i].reg.idx_count; ++j) - put_u32(buffer, instr->dsts[i].reg.idx[j]); - } + sm4_write_dst_register(tpf, &instr->dsts[i]);
for (i = 0; i < instr->src_count; ++i) - { - token = sm4_encode_register(&instr->srcs[i].reg); - token |= (uint32_t)instr->srcs[i].swizzle_type << VKD3D_SM4_SWIZZLE_TYPE_SHIFT; - token |= instr->srcs[i].swizzle << VKD3D_SM4_SWIZZLE_SHIFT; - if (instr->srcs[i].reg.mod) - token |= VKD3D_SM4_EXTENDED_OPERAND; - put_u32(buffer, token); - - if (instr->srcs[i].reg.mod) - put_u32(buffer, (instr->srcs[i].reg.mod << VKD3D_SM4_REGISTER_MODIFIER_SHIFT) - | VKD3D_SM4_EXTENDED_OPERAND_MODIFIER); - - for (j = 0; j < instr->srcs[i].reg.idx_count; ++j) - put_u32(buffer, instr->srcs[i].reg.idx[j]); - - if (instr->srcs[i].reg.type == VKD3D_SM4_RT_IMMCONST) - { - put_u32(buffer, instr->srcs[i].reg.immconst_uint[0]); - if (instr->srcs[i].reg.dim == VKD3D_SM4_DIMENSION_VEC4) - { - put_u32(buffer, instr->srcs[i].reg.immconst_uint[1]); - put_u32(buffer, instr->srcs[i].reg.immconst_uint[2]); - put_u32(buffer, instr->srcs[i].reg.immconst_uint[3]); - } - } - } + sm4_write_src_register(tpf, &instr->srcs[i]);
if (instr->byte_stride) put_u32(buffer, instr->byte_stride); @@ -3672,67 +3997,75 @@ static bool encode_texel_offset_as_aoffimmi(struct sm4_instruction *instr, return true; }
-static void write_sm4_dcl_constant_buffer(struct vkd3d_bytecode_buffer *buffer, const struct hlsl_buffer *cbuffer) +static void write_sm4_dcl_constant_buffer(const struct tpf_writer *tpf, const struct hlsl_buffer *cbuffer) { const struct sm4_instruction instr = { .opcode = VKD3D_SM4_OP_DCL_CONSTANT_BUFFER,
.srcs[0].reg.dim = VKD3D_SM4_DIMENSION_VEC4, - .srcs[0].reg.type = VKD3D_SM4_RT_CONSTBUFFER, - .srcs[0].reg.idx = {cbuffer->reg.id, (cbuffer->used_size + 3) / 4}, + .srcs[0].reg.type = VKD3DSPR_CONSTBUFFER, + .srcs[0].reg.idx[0].offset = cbuffer->reg.id, + .srcs[0].reg.idx[1].offset = (cbuffer->used_size + 3) / 4, .srcs[0].reg.idx_count = 2, .srcs[0].swizzle_type = VKD3D_SM4_SWIZZLE_VEC4, .srcs[0].swizzle = HLSL_SWIZZLE(X, Y, Z, W), .src_count = 1, }; - write_sm4_instruction(buffer, &instr); + write_sm4_instruction(tpf, &instr); }
-static void write_sm4_dcl_samplers(struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_var *var) +static void write_sm4_dcl_samplers(const struct tpf_writer *tpf, const struct extern_resource *resource) { - unsigned int i, count = var->data_type->reg_size[HLSL_REGSET_SAMPLERS]; + struct hlsl_type *component_type; + unsigned int i; struct sm4_instruction instr = { .opcode = VKD3D_SM4_OP_DCL_SAMPLER,
- .dsts[0].reg.type = VKD3D_SM4_RT_SAMPLER, + .dsts[0].reg.type = VKD3DSPR_SAMPLER, .dsts[0].reg.idx_count = 1, .dst_count = 1, };
- if (var->data_type->sampler_dim == HLSL_SAMPLER_DIM_COMPARISON) + component_type = hlsl_type_get_component_type(tpf->ctx, resource->data_type, 0); + + if (component_type->sampler_dim == HLSL_SAMPLER_DIM_COMPARISON) instr.opcode |= VKD3D_SM4_SAMPLER_COMPARISON << VKD3D_SM4_SAMPLER_MODE_SHIFT;
- for (i = 0; i < count; ++i) + assert(resource->regset == HLSL_REGSET_SAMPLERS); + + for (i = 0; i < resource->bind_count; ++i) { - if (!var->objects_usage[HLSL_REGSET_SAMPLERS][i].used) + if (resource->var && !resource->var->objects_usage[HLSL_REGSET_SAMPLERS][i].used) continue;
- instr.dsts[0].reg.idx[0] = var->regs[HLSL_REGSET_SAMPLERS].id + i; - write_sm4_instruction(buffer, &instr); + instr.dsts[0].reg.idx[0].offset = resource->id + i; + write_sm4_instruction(tpf, &instr); } }
-static void write_sm4_dcl_textures(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - const struct hlsl_ir_var *var, bool uav) +static void write_sm4_dcl_textures(const struct tpf_writer *tpf, const struct extern_resource *resource, + bool uav) { enum hlsl_regset regset = uav ? HLSL_REGSET_UAVS : HLSL_REGSET_TEXTURES; - unsigned int i, count = var->data_type->reg_size[regset]; struct hlsl_type *component_type; struct sm4_instruction instr; + unsigned int i;
- component_type = hlsl_type_get_component_type(ctx, var->data_type, 0); + assert(resource->regset == regset);
- for (i = 0; i < count; ++i) + component_type = hlsl_type_get_component_type(tpf->ctx, resource->data_type, 0); + + for (i = 0; i < resource->bind_count; ++i) { - if (!var->objects_usage[regset][i].used) + if (resource->var && !resource->var->objects_usage[regset][i].used) continue;
instr = (struct sm4_instruction) { - .dsts[0].reg.type = uav ? VKD3D_SM5_RT_UAV : VKD3D_SM4_RT_RESOURCE, - .dsts[0].reg.idx = {var->regs[regset].id + i}, + .dsts[0].reg.type = uav ? VKD3DSPR_UAV : VKD3DSPR_RESOURCE, + .dsts[0].reg.idx[0].offset = resource->id + i, .dsts[0].reg.idx_count = 1, .dst_count = 1,
@@ -3742,11 +4075,11 @@ static void write_sm4_dcl_textures(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b
if (uav) { - switch (var->data_type->sampler_dim) + switch (resource->data_type->sampler_dim) { case HLSL_SAMPLER_DIM_STRUCTURED_BUFFER: instr.opcode = VKD3D_SM5_OP_DCL_UAV_STRUCTURED; - instr.byte_stride = var->data_type->e.resource_format->reg_size[HLSL_REGSET_NUMERIC] * 4; + instr.byte_stride = resource->data_type->e.resource_format->reg_size[HLSL_REGSET_NUMERIC] * 4; break; default: instr.opcode = VKD3D_SM5_OP_DCL_UAV_TYPED; @@ -3765,13 +4098,13 @@ static void write_sm4_dcl_textures(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b instr.opcode |= component_type->sample_count << VKD3D_SM4_RESOURCE_SAMPLE_COUNT_SHIFT; }
- write_sm4_instruction(buffer, &instr); + write_sm4_instruction(tpf, &instr); } }
-static void write_sm4_dcl_semantic(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_var *var) +static void write_sm4_dcl_semantic(const struct tpf_writer *tpf, const struct hlsl_ir_var *var) { - const struct hlsl_profile_info *profile = ctx->profile; + const struct hlsl_profile_info *profile = tpf->ctx->profile; const bool output = var->is_output_semantic; D3D_NAME usage; bool has_idx; @@ -3782,11 +4115,11 @@ static void write_sm4_dcl_semantic(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b .dst_count = 1, };
- if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, output, &instr.dsts[0].reg.type, NULL, &has_idx)) + if (hlsl_sm4_register_from_semantic(tpf->ctx, &var->semantic, output, &instr.dsts[0].reg.type, NULL, &has_idx)) { if (has_idx) { - instr.dsts[0].reg.idx[0] = var->semantic.index; + instr.dsts[0].reg.idx[0].offset = var->semantic.index; instr.dsts[0].reg.idx_count = 1; } else @@ -3797,16 +4130,16 @@ static void write_sm4_dcl_semantic(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b } else { - instr.dsts[0].reg.type = output ? VKD3D_SM4_RT_OUTPUT : VKD3D_SM4_RT_INPUT; - instr.dsts[0].reg.idx[0] = var->regs[HLSL_REGSET_NUMERIC].id; + instr.dsts[0].reg.type = output ? VKD3DSPR_OUTPUT : VKD3DSPR_INPUT; + instr.dsts[0].reg.idx[0].offset = var->regs[HLSL_REGSET_NUMERIC].id; instr.dsts[0].reg.idx_count = 1; instr.dsts[0].writemask = var->regs[HLSL_REGSET_NUMERIC].writemask; }
- if (instr.dsts[0].reg.type == VKD3D_SM4_RT_DEPTHOUT) + if (instr.dsts[0].reg.type == VKD3DSPR_DEPTHOUT) instr.dsts[0].reg.dim = VKD3D_SM4_DIMENSION_SCALAR;
- hlsl_sm4_usage_from_semantic(ctx, &var->semantic, output, &usage); + hlsl_sm4_usage_from_semantic(tpf->ctx, &var->semantic, output, &usage); if (usage == ~0u) usage = D3D_NAME_UNDEFINED;
@@ -3866,10 +4199,10 @@ static void write_sm4_dcl_semantic(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b break; }
- write_sm4_instruction(buffer, &instr); + write_sm4_instruction(tpf, &instr); }
-static void write_sm4_dcl_temps(struct vkd3d_bytecode_buffer *buffer, uint32_t temp_count) +static void write_sm4_dcl_temps(const struct tpf_writer *tpf, uint32_t temp_count) { struct sm4_instruction instr = { @@ -3879,33 +4212,35 @@ static void write_sm4_dcl_temps(struct vkd3d_bytecode_buffer *buffer, uint32_t t .idx_count = 1, };
- write_sm4_instruction(buffer, &instr); + write_sm4_instruction(tpf, &instr); }
-static void write_sm4_dcl_thread_group(struct vkd3d_bytecode_buffer *buffer, const uint32_t thread_count[3]) +static void write_sm4_dcl_thread_group(const struct tpf_writer *tpf, const uint32_t thread_count[3]) { struct sm4_instruction instr = { .opcode = VKD3D_SM5_OP_DCL_THREAD_GROUP,
- .idx = {thread_count[0], thread_count[1], thread_count[2]}, + .idx[0] = thread_count[0], + .idx[1] = thread_count[1], + .idx[2] = thread_count[2], .idx_count = 3, };
- write_sm4_instruction(buffer, &instr); + write_sm4_instruction(tpf, &instr); }
-static void write_sm4_ret(struct vkd3d_bytecode_buffer *buffer) +static void write_sm4_ret(const struct tpf_writer *tpf) { struct sm4_instruction instr = { .opcode = VKD3D_SM4_OP_RET, };
- write_sm4_instruction(buffer, &instr); + write_sm4_instruction(tpf, &instr); }
-static void write_sm4_unary_op(struct vkd3d_bytecode_buffer *buffer, enum vkd3d_sm4_opcode opcode, +static void write_sm4_unary_op(const struct tpf_writer *tpf, enum vkd3d_sm4_opcode opcode, const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src, unsigned int src_mod) { struct sm4_instruction instr; @@ -3920,12 +4255,11 @@ static void write_sm4_unary_op(struct vkd3d_bytecode_buffer *buffer, enum vkd3d_ instr.srcs[0].reg.mod = src_mod; instr.src_count = 1;
- write_sm4_instruction(buffer, &instr); + write_sm4_instruction(tpf, &instr); }
-static void write_sm4_unary_op_with_two_destinations(struct vkd3d_bytecode_buffer *buffer, - enum vkd3d_sm4_opcode opcode, const struct hlsl_ir_node *dst, unsigned dst_idx, - const struct hlsl_ir_node *src) +static void write_sm4_unary_op_with_two_destinations(const struct tpf_writer *tpf, enum vkd3d_sm4_opcode opcode, + const struct hlsl_ir_node *dst, unsigned dst_idx, const struct hlsl_ir_node *src) { struct sm4_instruction instr;
@@ -3935,7 +4269,7 @@ static void write_sm4_unary_op_with_two_destinations(struct vkd3d_bytecode_buffe assert(dst_idx < ARRAY_SIZE(instr.dsts)); sm4_dst_from_node(&instr.dsts[dst_idx], dst); assert(1 - dst_idx >= 0); - instr.dsts[1 - dst_idx].reg.type = VKD3D_SM4_RT_NULL; + instr.dsts[1 - dst_idx].reg.type = VKD3DSPR_NULL; instr.dsts[1 - dst_idx].reg.dim = VKD3D_SM4_DIMENSION_NONE; instr.dsts[1 - dst_idx].reg.idx_count = 0; instr.dst_count = 2; @@ -3943,10 +4277,10 @@ static void write_sm4_unary_op_with_two_destinations(struct vkd3d_bytecode_buffe sm4_src_from_node(&instr.srcs[0], src, instr.dsts[dst_idx].writemask); instr.src_count = 1;
- write_sm4_instruction(buffer, &instr); + write_sm4_instruction(tpf, &instr); }
-static void write_sm4_binary_op(struct vkd3d_bytecode_buffer *buffer, enum vkd3d_sm4_opcode opcode, +static void write_sm4_binary_op(const struct tpf_writer *tpf, enum vkd3d_sm4_opcode opcode, const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2) { struct sm4_instruction instr; @@ -3961,11 +4295,11 @@ static void write_sm4_binary_op(struct vkd3d_bytecode_buffer *buffer, enum vkd3d sm4_src_from_node(&instr.srcs[1], src2, instr.dsts[0].writemask); instr.src_count = 2;
- write_sm4_instruction(buffer, &instr); + write_sm4_instruction(tpf, &instr); }
/* dp# instructions don't map the swizzle. */ -static void write_sm4_binary_op_dot(struct vkd3d_bytecode_buffer *buffer, enum vkd3d_sm4_opcode opcode, +static void write_sm4_binary_op_dot(const struct tpf_writer *tpf, enum vkd3d_sm4_opcode opcode, const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2) { struct sm4_instruction instr; @@ -3980,10 +4314,10 @@ static void write_sm4_binary_op_dot(struct vkd3d_bytecode_buffer *buffer, enum v sm4_src_from_node(&instr.srcs[1], src2, VKD3DSP_WRITEMASK_ALL); instr.src_count = 2;
- write_sm4_instruction(buffer, &instr); + write_sm4_instruction(tpf, &instr); }
-static void write_sm4_binary_op_with_two_destinations(struct vkd3d_bytecode_buffer *buffer, +static void write_sm4_binary_op_with_two_destinations(const struct tpf_writer *tpf, enum vkd3d_sm4_opcode opcode, const struct hlsl_ir_node *dst, unsigned dst_idx, const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2) { @@ -3995,7 +4329,7 @@ static void write_sm4_binary_op_with_two_destinations(struct vkd3d_bytecode_buff assert(dst_idx < ARRAY_SIZE(instr.dsts)); sm4_dst_from_node(&instr.dsts[dst_idx], dst); assert(1 - dst_idx >= 0); - instr.dsts[1 - dst_idx].reg.type = VKD3D_SM4_RT_NULL; + instr.dsts[1 - dst_idx].reg.type = VKD3DSPR_NULL; instr.dsts[1 - dst_idx].reg.dim = VKD3D_SM4_DIMENSION_NONE; instr.dsts[1 - dst_idx].reg.idx_count = 0; instr.dst_count = 2; @@ -4004,15 +4338,15 @@ static void write_sm4_binary_op_with_two_destinations(struct vkd3d_bytecode_buff sm4_src_from_node(&instr.srcs[1], src2, instr.dsts[dst_idx].writemask); instr.src_count = 2;
- write_sm4_instruction(buffer, &instr); + write_sm4_instruction(tpf, &instr); }
-static void write_sm4_ld(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - const struct hlsl_type *resource_type, const struct hlsl_ir_node *dst, +static void write_sm4_ld(const struct tpf_writer *tpf, const struct hlsl_ir_node *dst, const struct hlsl_deref *resource, const struct hlsl_ir_node *coords, const struct hlsl_ir_node *sample_index, const struct hlsl_ir_node *texel_offset, enum hlsl_sampler_dim dim) { + const struct hlsl_type *resource_type = hlsl_deref_get_type(tpf->ctx, resource); bool multisampled = resource_type->base_type == HLSL_TYPE_TEXTURE && (resource_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS || resource_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY); bool uav = (hlsl_type_get_regset(resource_type) == HLSL_REGSET_UAVS); @@ -4029,7 +4363,7 @@ static void write_sm4_ld(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buf { if (!encode_texel_offset_as_aoffimmi(&instr, texel_offset)) { - hlsl_error(ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, + hlsl_error(tpf->ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, "Offset must resolve to integer literal in the range -8 to 7."); return; } @@ -4052,7 +4386,7 @@ static void write_sm4_ld(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buf
sm4_src_from_node(&instr.srcs[0], coords, coords_writemask);
- sm4_src_from_deref(ctx, &instr.srcs[1], resource, resource_type, instr.dsts[0].writemask); + sm4_src_from_deref(tpf->ctx, &instr.srcs[1], resource, instr.dsts[0].writemask);
instr.src_count = 2;
@@ -4067,13 +4401,13 @@ static void write_sm4_ld(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buf
memset(&instr.srcs[2], 0, sizeof(instr.srcs[2])); instr.srcs[2].swizzle_type = VKD3D_SM4_SWIZZLE_NONE; - reg->type = VKD3D_SM4_RT_IMMCONST; + reg->type = VKD3DSPR_IMMCONST; reg->dim = VKD3D_SM4_DIMENSION_SCALAR; reg->immconst_uint[0] = index->value.u[0].u; } - else if (ctx->profile->major_version == 4 && ctx->profile->minor_version == 0) + else if (tpf->ctx->profile->major_version == 4 && tpf->ctx->profile->minor_version == 0) { - hlsl_error(ctx, &sample_index->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Expected literal sample index."); + hlsl_error(tpf->ctx, &sample_index->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Expected literal sample index."); } else { @@ -4083,13 +4417,11 @@ static void write_sm4_ld(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buf ++instr.src_count; }
- write_sm4_instruction(buffer, &instr); + write_sm4_instruction(tpf, &instr); }
-static void write_sm4_sample(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - const struct hlsl_ir_resource_load *load) +static void write_sm4_sample(const struct tpf_writer *tpf, const struct hlsl_ir_resource_load *load) { - const struct hlsl_type *resource_type = load->resource.var->data_type; const struct hlsl_ir_node *texel_offset = load->texel_offset.node; const struct hlsl_ir_node *coords = load->coords.node; const struct hlsl_deref *resource = &load->resource; @@ -4132,7 +4464,7 @@ static void write_sm4_sample(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer { if (!encode_texel_offset_as_aoffimmi(&instr, texel_offset)) { - hlsl_error(ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, + hlsl_error(tpf->ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, "Offset must resolve to integer literal in the range -8 to 7."); return; } @@ -4142,8 +4474,8 @@ static void write_sm4_sample(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer instr.dst_count = 1;
sm4_src_from_node(&instr.srcs[0], coords, VKD3DSP_WRITEMASK_ALL); - sm4_src_from_deref(ctx, &instr.srcs[1], resource, resource_type, instr.dsts[0].writemask); - sm4_src_from_deref(ctx, &instr.srcs[2], sampler, sampler->var->data_type, VKD3DSP_WRITEMASK_ALL); + sm4_src_from_deref(tpf->ctx, &instr.srcs[1], resource, instr.dsts[0].writemask); + sm4_src_from_deref(tpf->ctx, &instr.srcs[2], sampler, VKD3DSP_WRITEMASK_ALL); instr.src_count = 3;
if (load->load_type == HLSL_RESOURCE_SAMPLE_LOD @@ -4165,7 +4497,52 @@ static void write_sm4_sample(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer ++instr.src_count; }
- write_sm4_instruction(buffer, &instr); + write_sm4_instruction(tpf, &instr); +} + +static void write_sm4_sampleinfo(const struct tpf_writer *tpf, const struct hlsl_ir_resource_load *load) +{ + const struct hlsl_deref *resource = &load->resource; + const struct hlsl_ir_node *dst = &load->node; + struct sm4_instruction instr; + + assert(dst->data_type->base_type == HLSL_TYPE_UINT || dst->data_type->base_type == HLSL_TYPE_FLOAT); + + memset(&instr, 0, sizeof(instr)); + instr.opcode = VKD3D_SM4_OP_SAMPLE_INFO; + if (dst->data_type->base_type == HLSL_TYPE_UINT) + instr.opcode |= VKD3DSI_SAMPLE_INFO_UINT << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT; + + sm4_dst_from_node(&instr.dsts[0], dst); + instr.dst_count = 1; + + sm4_src_from_deref(tpf->ctx, &instr.srcs[0], resource, instr.dsts[0].writemask); + instr.src_count = 1; + + write_sm4_instruction(tpf, &instr); +} + +static void write_sm4_resinfo(const struct tpf_writer *tpf, const struct hlsl_ir_resource_load *load) +{ + const struct hlsl_deref *resource = &load->resource; + const struct hlsl_ir_node *dst = &load->node; + struct sm4_instruction instr; + + assert(dst->data_type->base_type == HLSL_TYPE_UINT || dst->data_type->base_type == HLSL_TYPE_FLOAT); + + memset(&instr, 0, sizeof(instr)); + instr.opcode = VKD3D_SM4_OP_RESINFO; + if (dst->data_type->base_type == HLSL_TYPE_UINT) + instr.opcode |= VKD3DSI_RESINFO_UINT << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT; + + sm4_dst_from_node(&instr.dsts[0], dst); + instr.dst_count = 1; + + sm4_src_from_node(&instr.srcs[0], load->lod.node, VKD3DSP_WRITEMASK_ALL); + sm4_src_from_deref(tpf->ctx, &instr.srcs[1], resource, instr.dsts[0].writemask); + instr.src_count = 2; + + write_sm4_instruction(tpf, &instr); }
static bool type_is_float(const struct hlsl_type *type) @@ -4173,8 +4550,7 @@ static bool type_is_float(const struct hlsl_type *type) return type->base_type == HLSL_TYPE_FLOAT || type->base_type == HLSL_TYPE_HALF; }
-static void write_sm4_cast_from_bool(struct hlsl_ctx *ctx, - struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_expr *expr, +static void write_sm4_cast_from_bool(const struct tpf_writer *tpf, const struct hlsl_ir_expr *expr, const struct hlsl_ir_node *arg, uint32_t mask) { struct sm4_instruction instr; @@ -4187,16 +4563,15 @@ static void write_sm4_cast_from_bool(struct hlsl_ctx *ctx,
sm4_src_from_node(&instr.srcs[0], arg, instr.dsts[0].writemask); instr.srcs[1].swizzle_type = VKD3D_SM4_SWIZZLE_NONE; - instr.srcs[1].reg.type = VKD3D_SM4_RT_IMMCONST; + instr.srcs[1].reg.type = VKD3DSPR_IMMCONST; instr.srcs[1].reg.dim = VKD3D_SM4_DIMENSION_SCALAR; instr.srcs[1].reg.immconst_uint[0] = mask; instr.src_count = 2;
- write_sm4_instruction(buffer, &instr); + write_sm4_instruction(tpf, &instr); }
-static void write_sm4_cast(struct hlsl_ctx *ctx, - struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_expr *expr) +static void write_sm4_cast(const struct tpf_writer *tpf, const struct hlsl_ir_expr *expr) { static const union { @@ -4218,23 +4593,23 @@ static void write_sm4_cast(struct hlsl_ctx *ctx, { case HLSL_TYPE_HALF: case HLSL_TYPE_FLOAT: - write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); + write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); break;
case HLSL_TYPE_INT: - write_sm4_unary_op(buffer, VKD3D_SM4_OP_ITOF, &expr->node, arg1, 0); + write_sm4_unary_op(tpf, VKD3D_SM4_OP_ITOF, &expr->node, arg1, 0); break;
case HLSL_TYPE_UINT: - write_sm4_unary_op(buffer, VKD3D_SM4_OP_UTOF, &expr->node, arg1, 0); + write_sm4_unary_op(tpf, VKD3D_SM4_OP_UTOF, &expr->node, arg1, 0); break;
case HLSL_TYPE_BOOL: - write_sm4_cast_from_bool(ctx, buffer, expr, arg1, one.u); + write_sm4_cast_from_bool(tpf, expr, arg1, one.u); break;
case HLSL_TYPE_DOUBLE: - hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to float."); + hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 cast from double to float."); break;
default: @@ -4247,20 +4622,20 @@ static void write_sm4_cast(struct hlsl_ctx *ctx, { case HLSL_TYPE_HALF: case HLSL_TYPE_FLOAT: - write_sm4_unary_op(buffer, VKD3D_SM4_OP_FTOI, &expr->node, arg1, 0); + write_sm4_unary_op(tpf, VKD3D_SM4_OP_FTOI, &expr->node, arg1, 0); break;
case HLSL_TYPE_INT: case HLSL_TYPE_UINT: - write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); + write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); break;
case HLSL_TYPE_BOOL: - write_sm4_cast_from_bool(ctx, buffer, expr, arg1, 1); + write_sm4_cast_from_bool(tpf, expr, arg1, 1); break;
case HLSL_TYPE_DOUBLE: - hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to int."); + hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 cast from double to int."); break;
default: @@ -4273,20 +4648,20 @@ static void write_sm4_cast(struct hlsl_ctx *ctx, { case HLSL_TYPE_HALF: case HLSL_TYPE_FLOAT: - write_sm4_unary_op(buffer, VKD3D_SM4_OP_FTOU, &expr->node, arg1, 0); + write_sm4_unary_op(tpf, VKD3D_SM4_OP_FTOU, &expr->node, arg1, 0); break;
case HLSL_TYPE_INT: case HLSL_TYPE_UINT: - write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); + write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); break;
case HLSL_TYPE_BOOL: - write_sm4_cast_from_bool(ctx, buffer, expr, arg1, 1); + write_sm4_cast_from_bool(tpf, expr, arg1, 1); break;
case HLSL_TYPE_DOUBLE: - hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to uint."); + hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 cast from double to uint."); break;
default: @@ -4295,7 +4670,7 @@ static void write_sm4_cast(struct hlsl_ctx *ctx, break;
case HLSL_TYPE_DOUBLE: - hlsl_fixme(ctx, &expr->node.loc, "SM4 cast to double."); + hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 cast to double."); break;
case HLSL_TYPE_BOOL: @@ -4305,26 +4680,25 @@ static void write_sm4_cast(struct hlsl_ctx *ctx, } }
-static void write_sm4_store_uav_typed(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - const struct hlsl_deref *dst, const struct hlsl_ir_node *coords, const struct hlsl_ir_node *value) +static void write_sm4_store_uav_typed(const struct tpf_writer *tpf, const struct hlsl_deref *dst, + const struct hlsl_ir_node *coords, const struct hlsl_ir_node *value) { struct sm4_instruction instr;
memset(&instr, 0, sizeof(instr)); instr.opcode = VKD3D_SM5_OP_STORE_UAV_TYPED;
- sm4_register_from_deref(ctx, &instr.dsts[0].reg, &instr.dsts[0].writemask, NULL, dst, dst->var->data_type); + sm4_register_from_deref(tpf->ctx, &instr.dsts[0].reg, &instr.dsts[0].writemask, NULL, dst); instr.dst_count = 1;
sm4_src_from_node(&instr.srcs[0], coords, VKD3DSP_WRITEMASK_ALL); sm4_src_from_node(&instr.srcs[1], value, VKD3DSP_WRITEMASK_ALL); instr.src_count = 2;
- write_sm4_instruction(buffer, &instr); + write_sm4_instruction(tpf, &instr); }
-static void write_sm4_expr(struct hlsl_ctx *ctx, - struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_expr *expr) +static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_expr *expr) { const struct hlsl_ir_node *arg1 = expr->operands[0].node; const struct hlsl_ir_node *arg2 = expr->operands[1].node; @@ -4333,7 +4707,7 @@ static void write_sm4_expr(struct hlsl_ctx *ctx,
assert(expr->node.reg.allocated);
- if (!(dst_type_string = hlsl_type_to_string(ctx, dst_type))) + if (!(dst_type_string = hlsl_type_to_string(tpf->ctx, dst_type))) return;
switch (expr->op) @@ -4342,161 +4716,181 @@ static void write_sm4_expr(struct hlsl_ctx *ctx, switch (dst_type->base_type) { case HLSL_TYPE_FLOAT: - write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, VKD3D_SM4_REGISTER_MODIFIER_ABS); + write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, VKD3D_SM4_REGISTER_MODIFIER_ABS); break;
default: - hlsl_fixme(ctx, &expr->node.loc, "SM4 %s absolute value expression.", dst_type_string->buffer); + hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s absolute value expression.", dst_type_string->buffer); } break;
case HLSL_OP1_BIT_NOT: assert(type_is_integer(dst_type)); - write_sm4_unary_op(buffer, VKD3D_SM4_OP_NOT, &expr->node, arg1, 0); + write_sm4_unary_op(tpf, VKD3D_SM4_OP_NOT, &expr->node, arg1, 0); break;
case HLSL_OP1_CAST: - write_sm4_cast(ctx, buffer, expr); + write_sm4_cast(tpf, expr); break;
case HLSL_OP1_COS: assert(type_is_float(dst_type)); - write_sm4_unary_op_with_two_destinations(buffer, VKD3D_SM4_OP_SINCOS, &expr->node, 1, arg1); + write_sm4_unary_op_with_two_destinations(tpf, VKD3D_SM4_OP_SINCOS, &expr->node, 1, arg1); break;
case HLSL_OP1_DSX: assert(type_is_float(dst_type)); - write_sm4_unary_op(buffer, VKD3D_SM4_OP_DERIV_RTX, &expr->node, arg1, 0); + write_sm4_unary_op(tpf, VKD3D_SM4_OP_DERIV_RTX, &expr->node, arg1, 0); + break; + + case HLSL_OP1_DSX_COARSE: + assert(type_is_float(dst_type)); + write_sm4_unary_op(tpf, VKD3D_SM5_OP_DERIV_RTX_COARSE, &expr->node, arg1, 0); + break; + + case HLSL_OP1_DSX_FINE: + assert(type_is_float(dst_type)); + write_sm4_unary_op(tpf, VKD3D_SM5_OP_DERIV_RTX_FINE, &expr->node, arg1, 0); break;
case HLSL_OP1_DSY: assert(type_is_float(dst_type)); - write_sm4_unary_op(buffer, VKD3D_SM4_OP_DERIV_RTY, &expr->node, arg1, 0); + write_sm4_unary_op(tpf, VKD3D_SM4_OP_DERIV_RTY, &expr->node, arg1, 0); + break; + + case HLSL_OP1_DSY_COARSE: + assert(type_is_float(dst_type)); + write_sm4_unary_op(tpf, VKD3D_SM5_OP_DERIV_RTY_COARSE, &expr->node, arg1, 0); + break; + + case HLSL_OP1_DSY_FINE: + assert(type_is_float(dst_type)); + write_sm4_unary_op(tpf, VKD3D_SM5_OP_DERIV_RTY_FINE, &expr->node, arg1, 0); break;
case HLSL_OP1_EXP2: assert(type_is_float(dst_type)); - write_sm4_unary_op(buffer, VKD3D_SM4_OP_EXP, &expr->node, arg1, 0); + write_sm4_unary_op(tpf, VKD3D_SM4_OP_EXP, &expr->node, arg1, 0); break;
case HLSL_OP1_FLOOR: assert(type_is_float(dst_type)); - write_sm4_unary_op(buffer, VKD3D_SM4_OP_ROUND_NI, &expr->node, arg1, 0); + write_sm4_unary_op(tpf, VKD3D_SM4_OP_ROUND_NI, &expr->node, arg1, 0); break;
case HLSL_OP1_FRACT: assert(type_is_float(dst_type)); - write_sm4_unary_op(buffer, VKD3D_SM4_OP_FRC, &expr->node, arg1, 0); + write_sm4_unary_op(tpf, VKD3D_SM4_OP_FRC, &expr->node, arg1, 0); break;
case HLSL_OP1_LOG2: assert(type_is_float(dst_type)); - write_sm4_unary_op(buffer, VKD3D_SM4_OP_LOG, &expr->node, arg1, 0); + write_sm4_unary_op(tpf, VKD3D_SM4_OP_LOG, &expr->node, arg1, 0); break;
case HLSL_OP1_LOGIC_NOT: assert(dst_type->base_type == HLSL_TYPE_BOOL); - write_sm4_unary_op(buffer, VKD3D_SM4_OP_NOT, &expr->node, arg1, 0); + write_sm4_unary_op(tpf, VKD3D_SM4_OP_NOT, &expr->node, arg1, 0); break;
case HLSL_OP1_NEG: switch (dst_type->base_type) { case HLSL_TYPE_FLOAT: - write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, VKD3D_SM4_REGISTER_MODIFIER_NEGATE); + write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, VKD3D_SM4_REGISTER_MODIFIER_NEGATE); break;
case HLSL_TYPE_INT: case HLSL_TYPE_UINT: - write_sm4_unary_op(buffer, VKD3D_SM4_OP_INEG, &expr->node, arg1, 0); + write_sm4_unary_op(tpf, VKD3D_SM4_OP_INEG, &expr->node, arg1, 0); break;
default: - hlsl_fixme(ctx, &expr->node.loc, "SM4 %s negation expression.", dst_type_string->buffer); + hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s negation expression.", dst_type_string->buffer); } break;
case HLSL_OP1_REINTERPRET: - write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); + write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); break;
case HLSL_OP1_ROUND: assert(type_is_float(dst_type)); - write_sm4_unary_op(buffer, VKD3D_SM4_OP_ROUND_NE, &expr->node, arg1, 0); + write_sm4_unary_op(tpf, VKD3D_SM4_OP_ROUND_NE, &expr->node, arg1, 0); break;
case HLSL_OP1_RSQ: assert(type_is_float(dst_type)); - write_sm4_unary_op(buffer, VKD3D_SM4_OP_RSQ, &expr->node, arg1, 0); + write_sm4_unary_op(tpf, VKD3D_SM4_OP_RSQ, &expr->node, arg1, 0); break;
case HLSL_OP1_SAT: assert(type_is_float(dst_type)); - write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV + write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV | (VKD3D_SM4_INSTRUCTION_FLAG_SATURATE << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT), &expr->node, arg1, 0); break;
case HLSL_OP1_SIN: assert(type_is_float(dst_type)); - write_sm4_unary_op_with_two_destinations(buffer, VKD3D_SM4_OP_SINCOS, &expr->node, 0, arg1); + write_sm4_unary_op_with_two_destinations(tpf, VKD3D_SM4_OP_SINCOS, &expr->node, 0, arg1); break;
case HLSL_OP1_SQRT: assert(type_is_float(dst_type)); - write_sm4_unary_op(buffer, VKD3D_SM4_OP_SQRT, &expr->node, arg1, 0); + write_sm4_unary_op(tpf, VKD3D_SM4_OP_SQRT, &expr->node, arg1, 0); break;
case HLSL_OP1_TRUNC: assert(type_is_float(dst_type)); - write_sm4_unary_op(buffer, VKD3D_SM4_OP_ROUND_Z, &expr->node, arg1, 0); + write_sm4_unary_op(tpf, VKD3D_SM4_OP_ROUND_Z, &expr->node, arg1, 0); break;
case HLSL_OP2_ADD: switch (dst_type->base_type) { case HLSL_TYPE_FLOAT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_ADD, &expr->node, arg1, arg2); + write_sm4_binary_op(tpf, VKD3D_SM4_OP_ADD, &expr->node, arg1, arg2); break;
case HLSL_TYPE_INT: case HLSL_TYPE_UINT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_IADD, &expr->node, arg1, arg2); + write_sm4_binary_op(tpf, VKD3D_SM4_OP_IADD, &expr->node, arg1, arg2); break;
default: - hlsl_fixme(ctx, &expr->node.loc, "SM4 %s addition expression.", dst_type_string->buffer); + hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s addition expression.", dst_type_string->buffer); } break;
case HLSL_OP2_BIT_AND: assert(type_is_integer(dst_type)); - write_sm4_binary_op(buffer, VKD3D_SM4_OP_AND, &expr->node, arg1, arg2); + write_sm4_binary_op(tpf, VKD3D_SM4_OP_AND, &expr->node, arg1, arg2); break;
case HLSL_OP2_BIT_OR: assert(type_is_integer(dst_type)); - write_sm4_binary_op(buffer, VKD3D_SM4_OP_OR, &expr->node, arg1, arg2); + write_sm4_binary_op(tpf, VKD3D_SM4_OP_OR, &expr->node, arg1, arg2); break;
case HLSL_OP2_BIT_XOR: assert(type_is_integer(dst_type)); - write_sm4_binary_op(buffer, VKD3D_SM4_OP_XOR, &expr->node, arg1, arg2); + write_sm4_binary_op(tpf, VKD3D_SM4_OP_XOR, &expr->node, arg1, arg2); break;
case HLSL_OP2_DIV: switch (dst_type->base_type) { case HLSL_TYPE_FLOAT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_DIV, &expr->node, arg1, arg2); + write_sm4_binary_op(tpf, VKD3D_SM4_OP_DIV, &expr->node, arg1, arg2); break;
case HLSL_TYPE_UINT: - write_sm4_binary_op_with_two_destinations(buffer, VKD3D_SM4_OP_UDIV, &expr->node, 0, arg1, arg2); + write_sm4_binary_op_with_two_destinations(tpf, VKD3D_SM4_OP_UDIV, &expr->node, 0, arg1, arg2); break;
default: - hlsl_fixme(ctx, &expr->node.loc, "SM4 %s division expression.", dst_type_string->buffer); + hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s division expression.", dst_type_string->buffer); } break;
@@ -4507,15 +4901,15 @@ static void write_sm4_expr(struct hlsl_ctx *ctx, switch (arg1->data_type->dimx) { case 4: - write_sm4_binary_op_dot(buffer, VKD3D_SM4_OP_DP4, &expr->node, arg1, arg2); + write_sm4_binary_op_dot(tpf, VKD3D_SM4_OP_DP4, &expr->node, arg1, arg2); break;
case 3: - write_sm4_binary_op_dot(buffer, VKD3D_SM4_OP_DP3, &expr->node, arg1, arg2); + write_sm4_binary_op_dot(tpf, VKD3D_SM4_OP_DP3, &expr->node, arg1, arg2); break;
case 2: - write_sm4_binary_op_dot(buffer, VKD3D_SM4_OP_DP2, &expr->node, arg1, arg2); + write_sm4_binary_op_dot(tpf, VKD3D_SM4_OP_DP2, &expr->node, arg1, arg2); break;
case 1: @@ -4525,7 +4919,7 @@ static void write_sm4_expr(struct hlsl_ctx *ctx, break;
default: - hlsl_fixme(ctx, &expr->node.loc, "SM4 %s dot expression.", dst_type_string->buffer); + hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s dot expression.", dst_type_string->buffer); } break;
@@ -4538,18 +4932,18 @@ static void write_sm4_expr(struct hlsl_ctx *ctx, switch (src_type->base_type) { case HLSL_TYPE_FLOAT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_EQ, &expr->node, arg1, arg2); + write_sm4_binary_op(tpf, VKD3D_SM4_OP_EQ, &expr->node, arg1, arg2); break;
case HLSL_TYPE_BOOL: case HLSL_TYPE_INT: case HLSL_TYPE_UINT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_IEQ, &expr->node, arg1, arg2); + write_sm4_binary_op(tpf, VKD3D_SM4_OP_IEQ, &expr->node, arg1, arg2); break;
default: - hlsl_fixme(ctx, &expr->node.loc, "SM4 equality between "%s" operands.", - debug_hlsl_type(ctx, src_type)); + hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 equality between "%s" operands.", + debug_hlsl_type(tpf->ctx, src_type)); break; } break; @@ -4564,21 +4958,21 @@ static void write_sm4_expr(struct hlsl_ctx *ctx, switch (src_type->base_type) { case HLSL_TYPE_FLOAT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_GE, &expr->node, arg1, arg2); + write_sm4_binary_op(tpf, VKD3D_SM4_OP_GE, &expr->node, arg1, arg2); break;
case HLSL_TYPE_INT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_IGE, &expr->node, arg1, arg2); + write_sm4_binary_op(tpf, VKD3D_SM4_OP_IGE, &expr->node, arg1, arg2); break;
case HLSL_TYPE_BOOL: case HLSL_TYPE_UINT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_UGE, &expr->node, arg1, arg2); + write_sm4_binary_op(tpf, VKD3D_SM4_OP_UGE, &expr->node, arg1, arg2); break;
default: - hlsl_fixme(ctx, &expr->node.loc, "SM4 greater-than-or-equal between "%s" operands.", - debug_hlsl_type(ctx, src_type)); + hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 greater-than-or-equal between "%s" operands.", + debug_hlsl_type(tpf->ctx, src_type)); break; } break; @@ -4593,21 +4987,21 @@ static void write_sm4_expr(struct hlsl_ctx *ctx, switch (src_type->base_type) { case HLSL_TYPE_FLOAT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_LT, &expr->node, arg1, arg2); + write_sm4_binary_op(tpf, VKD3D_SM4_OP_LT, &expr->node, arg1, arg2); break;
case HLSL_TYPE_INT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_ILT, &expr->node, arg1, arg2); + write_sm4_binary_op(tpf, VKD3D_SM4_OP_ILT, &expr->node, arg1, arg2); break;
case HLSL_TYPE_BOOL: case HLSL_TYPE_UINT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_ULT, &expr->node, arg1, arg2); + write_sm4_binary_op(tpf, VKD3D_SM4_OP_ULT, &expr->node, arg1, arg2); break;
default: - hlsl_fixme(ctx, &expr->node.loc, "SM4 less-than between "%s" operands.", - debug_hlsl_type(ctx, src_type)); + hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 less-than between "%s" operands.", + debug_hlsl_type(tpf->ctx, src_type)); break; } break; @@ -4615,37 +5009,37 @@ static void write_sm4_expr(struct hlsl_ctx *ctx,
case HLSL_OP2_LOGIC_AND: assert(dst_type->base_type == HLSL_TYPE_BOOL); - write_sm4_binary_op(buffer, VKD3D_SM4_OP_AND, &expr->node, arg1, arg2); + write_sm4_binary_op(tpf, VKD3D_SM4_OP_AND, &expr->node, arg1, arg2); break;
case HLSL_OP2_LOGIC_OR: assert(dst_type->base_type == HLSL_TYPE_BOOL); - write_sm4_binary_op(buffer, VKD3D_SM4_OP_OR, &expr->node, arg1, arg2); + write_sm4_binary_op(tpf, VKD3D_SM4_OP_OR, &expr->node, arg1, arg2); break;
case HLSL_OP2_LSHIFT: assert(type_is_integer(dst_type)); assert(dst_type->base_type != HLSL_TYPE_BOOL); - write_sm4_binary_op(buffer, VKD3D_SM4_OP_ISHL, &expr->node, arg1, arg2); + write_sm4_binary_op(tpf, VKD3D_SM4_OP_ISHL, &expr->node, arg1, arg2); break;
case HLSL_OP2_MAX: switch (dst_type->base_type) { case HLSL_TYPE_FLOAT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_MAX, &expr->node, arg1, arg2); + write_sm4_binary_op(tpf, VKD3D_SM4_OP_MAX, &expr->node, arg1, arg2); break;
case HLSL_TYPE_INT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_IMAX, &expr->node, arg1, arg2); + write_sm4_binary_op(tpf, VKD3D_SM4_OP_IMAX, &expr->node, arg1, arg2); break;
case HLSL_TYPE_UINT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_UMAX, &expr->node, arg1, arg2); + write_sm4_binary_op(tpf, VKD3D_SM4_OP_UMAX, &expr->node, arg1, arg2); break;
default: - hlsl_fixme(ctx, &expr->node.loc, "SM4 %s maximum expression.", dst_type_string->buffer); + hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s maximum expression.", dst_type_string->buffer); } break;
@@ -4653,19 +5047,19 @@ static void write_sm4_expr(struct hlsl_ctx *ctx, switch (dst_type->base_type) { case HLSL_TYPE_FLOAT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_MIN, &expr->node, arg1, arg2); + write_sm4_binary_op(tpf, VKD3D_SM4_OP_MIN, &expr->node, arg1, arg2); break;
case HLSL_TYPE_INT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_IMIN, &expr->node, arg1, arg2); + write_sm4_binary_op(tpf, VKD3D_SM4_OP_IMIN, &expr->node, arg1, arg2); break;
case HLSL_TYPE_UINT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_UMIN, &expr->node, arg1, arg2); + write_sm4_binary_op(tpf, VKD3D_SM4_OP_UMIN, &expr->node, arg1, arg2); break;
default: - hlsl_fixme(ctx, &expr->node.loc, "SM4 %s minimum expression.", dst_type_string->buffer); + hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s minimum expression.", dst_type_string->buffer); } break;
@@ -4673,11 +5067,11 @@ static void write_sm4_expr(struct hlsl_ctx *ctx, switch (dst_type->base_type) { case HLSL_TYPE_UINT: - write_sm4_binary_op_with_two_destinations(buffer, VKD3D_SM4_OP_UDIV, &expr->node, 1, arg1, arg2); + write_sm4_binary_op_with_two_destinations(tpf, VKD3D_SM4_OP_UDIV, &expr->node, 1, arg1, arg2); break;
default: - hlsl_fixme(ctx, &expr->node.loc, "SM4 %s modulus expression.", dst_type_string->buffer); + hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s modulus expression.", dst_type_string->buffer); } break;
@@ -4685,18 +5079,18 @@ static void write_sm4_expr(struct hlsl_ctx *ctx, switch (dst_type->base_type) { case HLSL_TYPE_FLOAT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_MUL, &expr->node, arg1, arg2); + write_sm4_binary_op(tpf, VKD3D_SM4_OP_MUL, &expr->node, arg1, arg2); break;
case HLSL_TYPE_INT: case HLSL_TYPE_UINT: /* Using IMUL instead of UMUL because we're taking the low * bits, and the native compiler generates IMUL. */ - write_sm4_binary_op_with_two_destinations(buffer, VKD3D_SM4_OP_IMUL, &expr->node, 1, arg1, arg2); + write_sm4_binary_op_with_two_destinations(tpf, VKD3D_SM4_OP_IMUL, &expr->node, 1, arg1, arg2); break;
default: - hlsl_fixme(ctx, &expr->node.loc, "SM4 %s multiplication expression.", dst_type_string->buffer); + hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s multiplication expression.", dst_type_string->buffer); } break;
@@ -4709,18 +5103,18 @@ static void write_sm4_expr(struct hlsl_ctx *ctx, switch (src_type->base_type) { case HLSL_TYPE_FLOAT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_NE, &expr->node, arg1, arg2); + write_sm4_binary_op(tpf, VKD3D_SM4_OP_NE, &expr->node, arg1, arg2); break;
case HLSL_TYPE_BOOL: case HLSL_TYPE_INT: case HLSL_TYPE_UINT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_INE, &expr->node, arg1, arg2); + write_sm4_binary_op(tpf, VKD3D_SM4_OP_INE, &expr->node, arg1, arg2); break;
default: - hlsl_fixme(ctx, &expr->node.loc, "SM4 inequality between "%s" operands.", - debug_hlsl_type(ctx, src_type)); + hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 inequality between "%s" operands.", + debug_hlsl_type(tpf->ctx, src_type)); break; } break; @@ -4729,18 +5123,18 @@ static void write_sm4_expr(struct hlsl_ctx *ctx, case HLSL_OP2_RSHIFT: assert(type_is_integer(dst_type)); assert(dst_type->base_type != HLSL_TYPE_BOOL); - write_sm4_binary_op(buffer, dst_type->base_type == HLSL_TYPE_INT ? VKD3D_SM4_OP_ISHR : VKD3D_SM4_OP_USHR, + write_sm4_binary_op(tpf, dst_type->base_type == HLSL_TYPE_INT ? VKD3D_SM4_OP_ISHR : VKD3D_SM4_OP_USHR, &expr->node, arg1, arg2); break;
default: - hlsl_fixme(ctx, &expr->node.loc, "SM4 %s expression.", debug_hlsl_expr_op(expr->op)); + hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s expression.", debug_hlsl_expr_op(expr->op)); }
- hlsl_release_string_buffer(ctx, dst_type_string); + hlsl_release_string_buffer(tpf->ctx, dst_type_string); }
-static void write_sm4_if(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_if *iff) +static void write_sm4_if(const struct tpf_writer *tpf, const struct hlsl_ir_if *iff) { struct sm4_instruction instr = { @@ -4751,26 +5145,25 @@ static void write_sm4_if(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buf assert(iff->condition.node->data_type->dimx == 1);
sm4_src_from_node(&instr.srcs[0], iff->condition.node, VKD3DSP_WRITEMASK_ALL); - write_sm4_instruction(buffer, &instr); + write_sm4_instruction(tpf, &instr);
- write_sm4_block(ctx, buffer, &iff->then_block); + write_sm4_block(tpf, &iff->then_block);
if (!list_empty(&iff->else_block.instrs)) { instr.opcode = VKD3D_SM4_OP_ELSE; instr.src_count = 0; - write_sm4_instruction(buffer, &instr); + write_sm4_instruction(tpf, &instr);
- write_sm4_block(ctx, buffer, &iff->else_block); + write_sm4_block(tpf, &iff->else_block); }
instr.opcode = VKD3D_SM4_OP_ENDIF; instr.src_count = 0; - write_sm4_instruction(buffer, &instr); + write_sm4_instruction(tpf, &instr); }
-static void write_sm4_jump(struct hlsl_ctx *ctx, - struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_jump *jump) +static void write_sm4_jump(const struct tpf_writer *tpf, const struct hlsl_ir_jump *jump) { struct sm4_instruction instr = {0};
@@ -4780,19 +5173,13 @@ static void write_sm4_jump(struct hlsl_ctx *ctx, instr.opcode = VKD3D_SM4_OP_BREAK; break;
- case HLSL_IR_JUMP_DISCARD: + case HLSL_IR_JUMP_DISCARD_NZ: { - struct sm4_register *reg = &instr.srcs[0].reg; - instr.opcode = VKD3D_SM4_OP_DISCARD | VKD3D_SM4_CONDITIONAL_NZ;
memset(&instr.srcs[0], 0, sizeof(*instr.srcs)); - instr.srcs[0].swizzle_type = VKD3D_SM4_SWIZZLE_NONE; instr.src_count = 1; - reg->type = VKD3D_SM4_RT_IMMCONST; - reg->dim = VKD3D_SM4_DIMENSION_SCALAR; - reg->immconst_uint[0] = ~0u; - + sm4_src_from_node(&instr.srcs[0], jump->condition.node, VKD3DSP_WRITEMASK_ALL); break; }
@@ -4800,11 +5187,11 @@ static void write_sm4_jump(struct hlsl_ctx *ctx, vkd3d_unreachable();
default: - hlsl_fixme(ctx, &jump->node.loc, "Jump type %s.\n", hlsl_jump_type_to_string(jump->type)); + hlsl_fixme(tpf->ctx, &jump->node.loc, "Jump type %s.", hlsl_jump_type_to_string(jump->type)); return; }
- write_sm4_instruction(buffer, &instr); + write_sm4_instruction(tpf, &instr); }
/* Does this variable's data come directly from the API user, rather than being @@ -4818,8 +5205,7 @@ static bool var_is_user_input(struct hlsl_ctx *ctx, const struct hlsl_ir_var *va return var->is_input_semantic && ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX; }
-static void write_sm4_load(struct hlsl_ctx *ctx, - struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_load *load) +static void write_sm4_load(const struct tpf_writer *tpf, const struct hlsl_ir_load *load) { const struct hlsl_type *type = load->node.data_type; struct sm4_instruction instr; @@ -4830,7 +5216,7 @@ static void write_sm4_load(struct hlsl_ctx *ctx, instr.dst_count = 1;
assert(type->class <= HLSL_CLASS_LAST_NUMERIC); - if (type->base_type == HLSL_TYPE_BOOL && var_is_user_input(ctx, load->src.var)) + if (type->base_type == HLSL_TYPE_BOOL && var_is_user_input(tpf->ctx, load->src.var)) { struct hlsl_constant_value value;
@@ -4839,7 +5225,7 @@ static void write_sm4_load(struct hlsl_ctx *ctx,
instr.opcode = VKD3D_SM4_OP_MOVC;
- sm4_src_from_deref(ctx, &instr.srcs[0], &load->src, type, instr.dsts[0].writemask); + sm4_src_from_deref(tpf->ctx, &instr.srcs[0], &load->src, instr.dsts[0].writemask);
memset(&value, 0xff, sizeof(value)); sm4_src_from_constant_value(&instr.srcs[1], &value, type->dimx, instr.dsts[0].writemask); @@ -4851,33 +5237,31 @@ static void write_sm4_load(struct hlsl_ctx *ctx, { instr.opcode = VKD3D_SM4_OP_MOV;
- sm4_src_from_deref(ctx, &instr.srcs[0], &load->src, type, instr.dsts[0].writemask); + sm4_src_from_deref(tpf->ctx, &instr.srcs[0], &load->src, instr.dsts[0].writemask); instr.src_count = 1; }
- write_sm4_instruction(buffer, &instr); + write_sm4_instruction(tpf, &instr); }
-static void write_sm4_loop(struct hlsl_ctx *ctx, - struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_loop *loop) +static void write_sm4_loop(const struct tpf_writer *tpf, const struct hlsl_ir_loop *loop) { struct sm4_instruction instr = { .opcode = VKD3D_SM4_OP_LOOP, };
- write_sm4_instruction(buffer, &instr); + write_sm4_instruction(tpf, &instr);
- write_sm4_block(ctx, buffer, &loop->body); + write_sm4_block(tpf, &loop->body);
instr.opcode = VKD3D_SM4_OP_ENDLOOP; - write_sm4_instruction(buffer, &instr); + write_sm4_instruction(tpf, &instr); }
-static void write_sm4_gather(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - const struct hlsl_type *resource_type, const struct hlsl_ir_node *dst, - const struct hlsl_deref *resource, const struct hlsl_deref *sampler, - const struct hlsl_ir_node *coords, unsigned int swizzle, const struct hlsl_ir_node *texel_offset) +static void write_sm4_gather(const struct tpf_writer *tpf, const struct hlsl_ir_node *dst, + const struct hlsl_deref *resource, const struct hlsl_deref *sampler, const struct hlsl_ir_node *coords, + unsigned int swizzle, const struct hlsl_ir_node *texel_offset) { struct sm4_src_register *src; struct sm4_instruction instr; @@ -4895,9 +5279,9 @@ static void write_sm4_gather(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer { if (!encode_texel_offset_as_aoffimmi(&instr, texel_offset)) { - if (ctx->profile->major_version < 5) + if (tpf->ctx->profile->major_version < 5) { - hlsl_error(ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, + hlsl_error(tpf->ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, "Offset must resolve to integer literal in the range -8 to 7 for profiles < 5."); return; } @@ -4906,58 +5290,39 @@ static void write_sm4_gather(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer } }
- sm4_src_from_deref(ctx, &instr.srcs[instr.src_count++], resource, resource_type, instr.dsts[0].writemask); + sm4_src_from_deref(tpf->ctx, &instr.srcs[instr.src_count++], resource, instr.dsts[0].writemask);
src = &instr.srcs[instr.src_count++]; - sm4_src_from_deref(ctx, src, sampler, sampler->var->data_type, VKD3DSP_WRITEMASK_ALL); + sm4_src_from_deref(tpf->ctx, src, sampler, VKD3DSP_WRITEMASK_ALL); src->reg.dim = VKD3D_SM4_DIMENSION_VEC4; src->swizzle_type = VKD3D_SM4_SWIZZLE_SCALAR; src->swizzle = swizzle;
- write_sm4_instruction(buffer, &instr); + write_sm4_instruction(tpf, &instr); }
-static void write_sm4_resource_load(struct hlsl_ctx *ctx, - struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_resource_load *load) +static void write_sm4_resource_load(const struct tpf_writer *tpf, const struct hlsl_ir_resource_load *load) { - const struct hlsl_type *resource_type = load->resource.var->data_type; const struct hlsl_ir_node *texel_offset = load->texel_offset.node; const struct hlsl_ir_node *sample_index = load->sample_index.node; const struct hlsl_ir_node *coords = load->coords.node;
- if (!hlsl_type_is_resource(resource_type)) + if (load->sampler.var && !load->sampler.var->is_uniform) { - hlsl_fixme(ctx, &load->node.loc, "Separate object fields as new variables."); + hlsl_fixme(tpf->ctx, &load->node.loc, "Sample using non-uniform sampler variable."); return; }
- if (load->sampler.var) - { - const struct hlsl_type *sampler_type = load->sampler.var->data_type; - - if (!hlsl_type_is_resource(sampler_type)) - { - hlsl_fixme(ctx, &load->node.loc, "Separate object fields as new variables."); - return; - } - - if (!load->sampler.var->is_uniform) - { - hlsl_fixme(ctx, &load->node.loc, "Sample using non-uniform sampler variable."); - return; - } - } - if (!load->resource.var->is_uniform) { - hlsl_fixme(ctx, &load->node.loc, "Load from non-uniform resource variable."); + hlsl_fixme(tpf->ctx, &load->node.loc, "Load from non-uniform resource variable."); return; }
switch (load->load_type) { case HLSL_RESOURCE_LOAD: - write_sm4_ld(ctx, buffer, resource_type, &load->node, &load->resource, + write_sm4_ld(tpf, &load->node, &load->resource, coords, sample_index, texel_offset, load->sampling_dim); break;
@@ -4967,64 +5332,61 @@ static void write_sm4_resource_load(struct hlsl_ctx *ctx, case HLSL_RESOURCE_SAMPLE_LOD: case HLSL_RESOURCE_SAMPLE_LOD_BIAS: case HLSL_RESOURCE_SAMPLE_GRAD: - if (!load->sampler.var) - { - hlsl_fixme(ctx, &load->node.loc, "SM4 combined sample expression."); - return; - } - write_sm4_sample(ctx, buffer, load); + /* Combined sample expressions were lowered. */ + assert(load->sampler.var); + write_sm4_sample(tpf, load); break;
case HLSL_RESOURCE_GATHER_RED: - write_sm4_gather(ctx, buffer, resource_type, &load->node, &load->resource, - &load->sampler, coords, HLSL_SWIZZLE(X, X, X, X), texel_offset); + write_sm4_gather(tpf, &load->node, &load->resource, &load->sampler, coords, + HLSL_SWIZZLE(X, X, X, X), texel_offset); break;
case HLSL_RESOURCE_GATHER_GREEN: - write_sm4_gather(ctx, buffer, resource_type, &load->node, &load->resource, - &load->sampler, coords, HLSL_SWIZZLE(Y, Y, Y, Y), texel_offset); + write_sm4_gather(tpf, &load->node, &load->resource, &load->sampler, coords, + HLSL_SWIZZLE(Y, Y, Y, Y), texel_offset); break;
case HLSL_RESOURCE_GATHER_BLUE: - write_sm4_gather(ctx, buffer, resource_type, &load->node, &load->resource, - &load->sampler, coords, HLSL_SWIZZLE(Z, Z, Z, Z), texel_offset); + write_sm4_gather(tpf, &load->node, &load->resource, &load->sampler, coords, + HLSL_SWIZZLE(Z, Z, Z, Z), texel_offset); break;
case HLSL_RESOURCE_GATHER_ALPHA: - write_sm4_gather(ctx, buffer, resource_type, &load->node, &load->resource, - &load->sampler, coords, HLSL_SWIZZLE(W, W, W, W), texel_offset); + write_sm4_gather(tpf, &load->node, &load->resource, &load->sampler, coords, + HLSL_SWIZZLE(W, W, W, W), texel_offset); + break; + + case HLSL_RESOURCE_SAMPLE_INFO: + write_sm4_sampleinfo(tpf, load); + break; + + case HLSL_RESOURCE_RESINFO: + write_sm4_resinfo(tpf, load); break; } }
-static void write_sm4_resource_store(struct hlsl_ctx *ctx, - struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_resource_store *store) +static void write_sm4_resource_store(const struct tpf_writer *tpf, const struct hlsl_ir_resource_store *store) { - const struct hlsl_type *resource_type = store->resource.var->data_type; - - if (!hlsl_type_is_resource(resource_type)) - { - hlsl_fixme(ctx, &store->node.loc, "Separate object fields as new variables."); - return; - } + struct hlsl_type *resource_type = hlsl_deref_get_type(tpf->ctx, &store->resource);
if (!store->resource.var->is_uniform) { - hlsl_fixme(ctx, &store->node.loc, "Store to non-uniform resource variable."); + hlsl_fixme(tpf->ctx, &store->node.loc, "Store to non-uniform resource variable."); return; }
if (resource_type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) { - hlsl_fixme(ctx, &store->node.loc, "Structured buffers store is not implemented.\n"); + hlsl_fixme(tpf->ctx, &store->node.loc, "Structured buffers store is not implemented."); return; }
- write_sm4_store_uav_typed(ctx, buffer, &store->resource, store->coords.node, store->value.node); + write_sm4_store_uav_typed(tpf, &store->resource, store->coords.node, store->value.node); }
-static void write_sm4_store(struct hlsl_ctx *ctx, - struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_store *store) +static void write_sm4_store(const struct tpf_writer *tpf, const struct hlsl_ir_store *store) { const struct hlsl_ir_node *rhs = store->rhs.node; struct sm4_instruction instr; @@ -5033,18 +5395,17 @@ static void write_sm4_store(struct hlsl_ctx *ctx, memset(&instr, 0, sizeof(instr)); instr.opcode = VKD3D_SM4_OP_MOV;
- sm4_register_from_deref(ctx, &instr.dsts[0].reg, &writemask, NULL, &store->lhs, rhs->data_type); + sm4_register_from_deref(tpf->ctx, &instr.dsts[0].reg, &writemask, NULL, &store->lhs); instr.dsts[0].writemask = hlsl_combine_writemasks(writemask, store->writemask); instr.dst_count = 1;
sm4_src_from_node(&instr.srcs[0], rhs, instr.dsts[0].writemask); instr.src_count = 1;
- write_sm4_instruction(buffer, &instr); + write_sm4_instruction(tpf, &instr); }
-static void write_sm4_swizzle(struct hlsl_ctx *ctx, - struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_swizzle *swizzle) +static void write_sm4_swizzle(const struct tpf_writer *tpf, const struct hlsl_ir_swizzle *swizzle) { struct sm4_instruction instr; unsigned int writemask; @@ -5060,11 +5421,10 @@ static void write_sm4_swizzle(struct hlsl_ctx *ctx, swizzle->swizzle, swizzle->node.data_type->dimx), instr.dsts[0].writemask); instr.src_count = 1;
- write_sm4_instruction(buffer, &instr); + write_sm4_instruction(tpf, &instr); }
-static void write_sm4_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - const struct hlsl_block *block) +static void write_sm4_block(const struct tpf_writer *tpf, const struct hlsl_block *block) { const struct hlsl_ir_node *instr;
@@ -5074,12 +5434,12 @@ static void write_sm4_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer * { if (instr->data_type->class == HLSL_CLASS_MATRIX) { - hlsl_fixme(ctx, &instr->loc, "Matrix operations need to be lowered."); + hlsl_fixme(tpf->ctx, &instr->loc, "Matrix operations need to be lowered."); break; } else if (instr->data_type->class == HLSL_CLASS_OBJECT) { - hlsl_fixme(ctx, &instr->loc, "Object copy."); + hlsl_fixme(tpf->ctx, &instr->loc, "Object copy."); break; }
@@ -5099,43 +5459,43 @@ static void write_sm4_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer * vkd3d_unreachable();
case HLSL_IR_EXPR: - write_sm4_expr(ctx, buffer, hlsl_ir_expr(instr)); + write_sm4_expr(tpf, hlsl_ir_expr(instr)); break;
case HLSL_IR_IF: - write_sm4_if(ctx, buffer, hlsl_ir_if(instr)); + write_sm4_if(tpf, hlsl_ir_if(instr)); break;
case HLSL_IR_JUMP: - write_sm4_jump(ctx, buffer, hlsl_ir_jump(instr)); + write_sm4_jump(tpf, hlsl_ir_jump(instr)); break;
case HLSL_IR_LOAD: - write_sm4_load(ctx, buffer, hlsl_ir_load(instr)); + write_sm4_load(tpf, hlsl_ir_load(instr)); break;
case HLSL_IR_RESOURCE_LOAD: - write_sm4_resource_load(ctx, buffer, hlsl_ir_resource_load(instr)); + write_sm4_resource_load(tpf, hlsl_ir_resource_load(instr)); break;
case HLSL_IR_RESOURCE_STORE: - write_sm4_resource_store(ctx, buffer, hlsl_ir_resource_store(instr)); + write_sm4_resource_store(tpf, hlsl_ir_resource_store(instr)); break;
case HLSL_IR_LOOP: - write_sm4_loop(ctx, buffer, hlsl_ir_loop(instr)); + write_sm4_loop(tpf, hlsl_ir_loop(instr)); break;
case HLSL_IR_STORE: - write_sm4_store(ctx, buffer, hlsl_ir_store(instr)); + write_sm4_store(tpf, hlsl_ir_store(instr)); break;
case HLSL_IR_SWIZZLE: - write_sm4_swizzle(ctx, buffer, hlsl_ir_swizzle(instr)); + write_sm4_swizzle(tpf, hlsl_ir_swizzle(instr)); break;
default: - hlsl_fixme(ctx, &instr->loc, "Instruction type %s.", hlsl_node_type_to_string(instr->type)); + hlsl_fixme(tpf->ctx, &instr->loc, "Instruction type %s.", hlsl_node_type_to_string(instr->type)); } } } @@ -5144,12 +5504,13 @@ static void write_sm4_shdr(struct hlsl_ctx *ctx, const struct hlsl_ir_function_decl *entry_func, struct dxbc_writer *dxbc) { const struct hlsl_profile_info *profile = ctx->profile; - const struct hlsl_ir_var **extern_resources; struct vkd3d_bytecode_buffer buffer = {0}; + struct extern_resource *extern_resources; unsigned int extern_resources_count, i; const struct hlsl_buffer *cbuffer; const struct hlsl_ir_var *var; size_t token_count_position; + struct tpf_writer tpf;
static const uint16_t shader_types[VKD3D_SHADER_TYPE_COUNT] = { @@ -5164,6 +5525,8 @@ static void write_sm4_shdr(struct hlsl_ctx *ctx, VKD3D_SM4_LIB, };
+ tpf_writer_init(&tpf, ctx, &buffer); + extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count);
put_u32(&buffer, vkd3d_make_u32((profile->major_version << 4) | profile->minor_version, shader_types[profile->type])); @@ -5172,45 +5535,42 @@ static void write_sm4_shdr(struct hlsl_ctx *ctx, LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) { if (cbuffer->reg.allocated) - write_sm4_dcl_constant_buffer(&buffer, cbuffer); + write_sm4_dcl_constant_buffer(&tpf, cbuffer); }
for (i = 0; i < extern_resources_count; ++i) { - enum hlsl_regset regset; - - var = extern_resources[i]; - regset = hlsl_type_get_regset(var->data_type); + const struct extern_resource *resource = &extern_resources[i];
- if (regset == HLSL_REGSET_SAMPLERS) - write_sm4_dcl_samplers(&buffer, var); - else if (regset == HLSL_REGSET_TEXTURES) - write_sm4_dcl_textures(ctx, &buffer, var, false); - else if (regset == HLSL_REGSET_UAVS) - write_sm4_dcl_textures(ctx, &buffer, var, true); + if (resource->regset == HLSL_REGSET_SAMPLERS) + write_sm4_dcl_samplers(&tpf, resource); + else if (resource->regset == HLSL_REGSET_TEXTURES) + write_sm4_dcl_textures(&tpf, resource, false); + else if (resource->regset == HLSL_REGSET_UAVS) + write_sm4_dcl_textures(&tpf, resource, true); }
LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { if ((var->is_input_semantic && var->last_read) || (var->is_output_semantic && var->first_write)) - write_sm4_dcl_semantic(ctx, &buffer, var); + write_sm4_dcl_semantic(&tpf, var); }
if (profile->type == VKD3D_SHADER_TYPE_COMPUTE) - write_sm4_dcl_thread_group(&buffer, ctx->thread_count); + write_sm4_dcl_thread_group(&tpf, ctx->thread_count);
if (ctx->temp_count) - write_sm4_dcl_temps(&buffer, ctx->temp_count); + write_sm4_dcl_temps(&tpf, ctx->temp_count);
- write_sm4_block(ctx, &buffer, &entry_func->body); + write_sm4_block(&tpf, &entry_func->body);
- write_sm4_ret(&buffer); + write_sm4_ret(&tpf);
set_u32(&buffer, token_count_position, bytecode_get_size(&buffer) / sizeof(uint32_t));
- add_section(dxbc, TAG_SHDR, &buffer); + add_section(ctx, dxbc, TAG_SHDR, &buffer);
- vkd3d_free(extern_resources); + sm4_free_extern_resources(extern_resources, extern_resources_count); }
int hlsl_sm4_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out) diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c index b8f36df07f1..3ad8ba82020 100644 --- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c +++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c @@ -413,6 +413,8 @@ static const char *shader_get_source_type_suffix(enum vkd3d_shader_source_type t return "hlsl"; case VKD3D_SHADER_SOURCE_D3D_BYTECODE: return "d3dbc"; + case VKD3D_SHADER_SOURCE_DXBC_DXIL: + return "dxil"; default: FIXME("Unhandled source type %#x.\n", type); return "bin"; @@ -438,6 +440,18 @@ void vkd3d_shader_dump_shader(enum vkd3d_shader_source_type source_type, shader_get_source_type_suffix(source_type), shader->code, shader->size); }
+static void init_scan_signature_info(const struct vkd3d_shader_compile_info *info) +{ + struct vkd3d_shader_scan_signature_info *signature_info; + + if ((signature_info = vkd3d_find_struct(info->next, SCAN_SIGNATURE_INFO))) + { + memset(&signature_info->input, 0, sizeof(signature_info->input)); + memset(&signature_info->output, 0, sizeof(signature_info->output)); + memset(&signature_info->patch_constant, 0, sizeof(signature_info->patch_constant)); + } +} + bool vkd3d_shader_parser_init(struct vkd3d_shader_parser *parser, struct vkd3d_shader_message_context *message_context, const char *source_name, const struct vkd3d_shader_version *version, const struct vkd3d_shader_parser_ops *ops, @@ -524,9 +538,46 @@ void vkd3d_shader_free_messages(char *messages) vkd3d_free(messages); }
+static bool vkd3d_shader_signature_from_shader_signature(struct vkd3d_shader_signature *signature, + const struct shader_signature *src) +{ + unsigned int i; + + signature->element_count = src->element_count; + if (!src->elements) + { + assert(!signature->element_count); + signature->elements = NULL; + return true; + } + + if (!(signature->elements = vkd3d_calloc(signature->element_count, sizeof(*signature->elements)))) + return false; + + for (i = 0; i < signature->element_count; ++i) + { + struct vkd3d_shader_signature_element *d = &signature->elements[i]; + struct signature_element *e = &src->elements[i]; + + d->semantic_name = e->semantic_name; + d->semantic_index = e->semantic_index; + d->stream_index = e->stream_index; + d->sysval_semantic = e->sysval_semantic; + d->component_type = e->component_type; + d->register_index = e->register_index; + if (e->register_count > 1) + FIXME("Arrayed elements are not supported yet.\n"); + d->mask = e->mask; + d->used_mask = e->used_mask; + d->min_precision = e->min_precision; + } + + return true; +} + struct vkd3d_shader_scan_context { - struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info; + struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info; size_t descriptors_size;
struct vkd3d_shader_message_context *message_context; @@ -546,20 +597,12 @@ struct vkd3d_shader_scan_context size_t cf_info_size; size_t cf_info_count;
- struct - { - unsigned int id; - unsigned int descriptor_idx; - } *uav_ranges; - size_t uav_ranges_size; - size_t uav_range_count; - enum vkd3d_shader_api_version api_version; };
static void vkd3d_shader_scan_context_init(struct vkd3d_shader_scan_context *context, const struct vkd3d_shader_compile_info *compile_info, - struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info, + struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info, struct vkd3d_shader_message_context *message_context) { unsigned int i; @@ -582,7 +625,6 @@ static void vkd3d_shader_scan_context_init(struct vkd3d_shader_scan_context *con
static void vkd3d_shader_scan_context_cleanup(struct vkd3d_shader_scan_context *context) { - vkd3d_free(context->uav_ranges); vkd3d_free(context->cf_info); }
@@ -650,18 +692,23 @@ static struct vkd3d_shader_cf_info *vkd3d_shader_scan_find_innermost_loop_cf_inf return NULL; }
-static struct vkd3d_shader_descriptor_info *vkd3d_shader_scan_get_uav_descriptor_info( - const struct vkd3d_shader_scan_context *context, unsigned int range_id) +static void vkd3d_shader_scan_add_uav_flag(const struct vkd3d_shader_scan_context *context, + const struct vkd3d_shader_register *reg, uint32_t flag) { + unsigned int range_id = reg->idx[0].offset; unsigned int i;
- for (i = 0; i < context->uav_range_count; ++i) + if (!context->scan_descriptor_info) + return; + + for (i = 0; i < context->scan_descriptor_info->descriptor_count; ++i) { - if (context->uav_ranges[i].id == range_id) - return &context->scan_descriptor_info->descriptors[context->uav_ranges[i].descriptor_idx]; + if (context->scan_descriptor_info->descriptors[i].register_id == range_id) + { + context->scan_descriptor_info->descriptors[i].flags |= flag; + break; + } } - - return NULL; }
static bool vkd3d_shader_instruction_is_uav_read(const struct vkd3d_shader_instruction *instruction) @@ -677,13 +724,7 @@ static bool vkd3d_shader_instruction_is_uav_read(const struct vkd3d_shader_instr static void vkd3d_shader_scan_record_uav_read(struct vkd3d_shader_scan_context *context, const struct vkd3d_shader_register *reg) { - struct vkd3d_shader_descriptor_info *d; - - if (!context->scan_descriptor_info) - return; - - d = vkd3d_shader_scan_get_uav_descriptor_info(context, reg->idx[0].offset); - d->flags |= VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_READ; + vkd3d_shader_scan_add_uav_flag(context, reg, VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_READ); }
static bool vkd3d_shader_instruction_is_uav_counter(const struct vkd3d_shader_instruction *instruction) @@ -696,13 +737,7 @@ static bool vkd3d_shader_instruction_is_uav_counter(const struct vkd3d_shader_in static void vkd3d_shader_scan_record_uav_counter(struct vkd3d_shader_scan_context *context, const struct vkd3d_shader_register *reg) { - struct vkd3d_shader_descriptor_info *d; - - if (!context->scan_descriptor_info) - return; - - d = vkd3d_shader_scan_get_uav_descriptor_info(context, reg->idx[0].offset); - d->flags |= VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_COUNTER; + vkd3d_shader_scan_add_uav_flag(context, reg, VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_COUNTER); }
static bool vkd3d_shader_instruction_is_uav_atomic_op(const struct vkd3d_shader_instruction *instruction) @@ -715,93 +750,76 @@ static bool vkd3d_shader_instruction_is_uav_atomic_op(const struct vkd3d_shader_ static void vkd3d_shader_scan_record_uav_atomic_op(struct vkd3d_shader_scan_context *context, const struct vkd3d_shader_register *reg) { - struct vkd3d_shader_descriptor_info *d; - - if (!context->scan_descriptor_info) - return; - - d = vkd3d_shader_scan_get_uav_descriptor_info(context, reg->idx[0].offset); - d->flags |= VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_ATOMICS; + vkd3d_shader_scan_add_uav_flag(context, reg, VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_ATOMICS); }
-static bool vkd3d_shader_scan_add_descriptor(struct vkd3d_shader_scan_context *context, - enum vkd3d_shader_descriptor_type type, const struct vkd3d_shader_register_range *range, - enum vkd3d_shader_resource_type resource_type, enum vkd3d_shader_resource_data_type resource_data_type, - unsigned int flags) +static struct vkd3d_shader_descriptor_info1 *vkd3d_shader_scan_add_descriptor(struct vkd3d_shader_scan_context *context, + enum vkd3d_shader_descriptor_type type, const struct vkd3d_shader_register *reg, + const struct vkd3d_shader_register_range *range, enum vkd3d_shader_resource_type resource_type, + enum vkd3d_shader_resource_data_type resource_data_type) { - struct vkd3d_shader_scan_descriptor_info *info = context->scan_descriptor_info; - struct vkd3d_shader_descriptor_info *d; + struct vkd3d_shader_scan_descriptor_info1 *info = context->scan_descriptor_info; + struct vkd3d_shader_descriptor_info1 *d;
if (!vkd3d_array_reserve((void **)&info->descriptors, &context->descriptors_size, info->descriptor_count + 1, sizeof(*info->descriptors))) { ERR("Failed to allocate descriptor info.\n"); - return false; + return NULL; }
d = &info->descriptors[info->descriptor_count]; + memset(d, 0, sizeof(*d)); d->type = type; + d->register_id = reg->idx[0].offset; d->register_space = range->space; d->register_index = range->first; d->resource_type = resource_type; d->resource_data_type = resource_data_type; - d->flags = flags; d->count = (range->last == ~0u) ? ~0u : range->last - range->first + 1; ++info->descriptor_count;
- return true; -} - -static bool vkd3d_shader_scan_add_uav_range(struct vkd3d_shader_scan_context *context, - unsigned int id, unsigned int descriptor_idx) -{ - if (!vkd3d_array_reserve((void **)&context->uav_ranges, &context->uav_ranges_size, - context->uav_range_count + 1, sizeof(*context->uav_ranges))) - { - ERR("Failed to allocate UAV range.\n"); - return false; - } - - context->uav_ranges[context->uav_range_count].id = id; - context->uav_ranges[context->uav_range_count].descriptor_idx = descriptor_idx; - ++context->uav_range_count; - - return true; + return d; }
static void vkd3d_shader_scan_constant_buffer_declaration(struct vkd3d_shader_scan_context *context, const struct vkd3d_shader_instruction *instruction) { const struct vkd3d_shader_constant_buffer *cb = &instruction->declaration.cb; + struct vkd3d_shader_descriptor_info1 *d;
if (!context->scan_descriptor_info) return;
- vkd3d_shader_scan_add_descriptor(context, VKD3D_SHADER_DESCRIPTOR_TYPE_CBV, &cb->range, - VKD3D_SHADER_RESOURCE_BUFFER, VKD3D_SHADER_RESOURCE_DATA_UINT, 0); + if (!(d = vkd3d_shader_scan_add_descriptor(context, VKD3D_SHADER_DESCRIPTOR_TYPE_CBV, + &cb->src.reg, &cb->range, VKD3D_SHADER_RESOURCE_BUFFER, VKD3D_SHADER_RESOURCE_DATA_UINT))) + return; + d->buffer_size = cb->size * 16; }
static void vkd3d_shader_scan_sampler_declaration(struct vkd3d_shader_scan_context *context, const struct vkd3d_shader_instruction *instruction) { const struct vkd3d_shader_sampler *sampler = &instruction->declaration.sampler; - unsigned int flags; + struct vkd3d_shader_descriptor_info1 *d;
if (!context->scan_descriptor_info) return;
+ if (!(d = vkd3d_shader_scan_add_descriptor(context, VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, + &sampler->src.reg, &sampler->range, VKD3D_SHADER_RESOURCE_NONE, VKD3D_SHADER_RESOURCE_DATA_UINT))) + return; + if (instruction->flags & VKD3DSI_SAMPLER_COMPARISON_MODE) - flags = VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_SAMPLER_COMPARISON_MODE; - else - flags = 0; - vkd3d_shader_scan_add_descriptor(context, VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, &sampler->range, - VKD3D_SHADER_RESOURCE_NONE, VKD3D_SHADER_RESOURCE_DATA_UINT, flags); + d->flags |= VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_SAMPLER_COMPARISON_MODE; }
static void vkd3d_shader_scan_resource_declaration(struct vkd3d_shader_scan_context *context, const struct vkd3d_shader_resource *resource, enum vkd3d_shader_resource_type resource_type, - enum vkd3d_shader_resource_data_type resource_data_type) + enum vkd3d_shader_resource_data_type resource_data_type, + unsigned int sample_count, unsigned int structure_stride, bool raw) { + struct vkd3d_shader_descriptor_info1 *d; enum vkd3d_shader_descriptor_type type;
if (!context->scan_descriptor_info) @@ -811,10 +829,13 @@ static void vkd3d_shader_scan_resource_declaration(struct vkd3d_shader_scan_cont type = VKD3D_SHADER_DESCRIPTOR_TYPE_UAV; else type = VKD3D_SHADER_DESCRIPTOR_TYPE_SRV; - vkd3d_shader_scan_add_descriptor(context, type, &resource->range, resource_type, resource_data_type, 0); - if (type == VKD3D_SHADER_DESCRIPTOR_TYPE_UAV) - vkd3d_shader_scan_add_uav_range(context, resource->reg.reg.idx[0].offset, - context->scan_descriptor_info->descriptor_count - 1); + if (!(d = vkd3d_shader_scan_add_descriptor(context, type, &resource->reg.reg, + &resource->range, resource_type, resource_data_type))) + return; + d->sample_count = sample_count; + d->structure_stride = structure_stride; + if (raw) + d->flags |= VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_RAW_BUFFER; }
static void vkd3d_shader_scan_typed_resource_declaration(struct vkd3d_shader_scan_context *context, @@ -873,7 +894,7 @@ static void vkd3d_shader_scan_typed_resource_declaration(struct vkd3d_shader_sca }
vkd3d_shader_scan_resource_declaration(context, &semantic->resource, - semantic->resource_type, resource_data_type); + semantic->resource_type, resource_data_type, semantic->sample_count, 0, false); }
static void vkd3d_shader_scan_error(struct vkd3d_shader_scan_context *context, @@ -907,12 +928,13 @@ static int vkd3d_shader_scan_instruction(struct vkd3d_shader_scan_context *conte case VKD3DSIH_DCL_RESOURCE_RAW: case VKD3DSIH_DCL_UAV_RAW: vkd3d_shader_scan_resource_declaration(context, &instruction->declaration.raw_resource.resource, - VKD3D_SHADER_RESOURCE_BUFFER, VKD3D_SHADER_RESOURCE_DATA_UINT); + VKD3D_SHADER_RESOURCE_BUFFER, VKD3D_SHADER_RESOURCE_DATA_UINT, 0, 0, true); break; case VKD3DSIH_DCL_RESOURCE_STRUCTURED: case VKD3DSIH_DCL_UAV_STRUCTURED: vkd3d_shader_scan_resource_declaration(context, &instruction->declaration.structured_resource.resource, - VKD3D_SHADER_RESOURCE_BUFFER, VKD3D_SHADER_RESOURCE_DATA_UINT); + VKD3D_SHADER_RESOURCE_BUFFER, VKD3D_SHADER_RESOURCE_DATA_UINT, 0, + instruction->declaration.structured_resource.byte_stride, false); break; case VKD3DSIH_IF: cf_info = vkd3d_shader_scan_push_cf_info(context); @@ -1064,22 +1086,64 @@ static int vkd3d_shader_scan_instruction(struct vkd3d_shader_scan_context *conte return VKD3D_OK; }
+static enum vkd3d_result convert_descriptor_info(struct vkd3d_shader_scan_descriptor_info *info, + const struct vkd3d_shader_scan_descriptor_info1 *info1) +{ + unsigned int i; + + if (!(info->descriptors = vkd3d_calloc(info1->descriptor_count, sizeof(*info->descriptors)))) + return VKD3D_ERROR_OUT_OF_MEMORY; + + for (i = 0; i < info1->descriptor_count; ++i) + { + const struct vkd3d_shader_descriptor_info1 *src = &info1->descriptors[i]; + struct vkd3d_shader_descriptor_info *dst = &info->descriptors[i]; + + dst->type = src->type; + dst->register_space = src->register_space; + dst->register_index = src->register_index; + dst->resource_type = src->resource_type; + dst->resource_data_type = src->resource_data_type; + dst->flags = src->flags; + dst->count = src->count; + } + info->descriptor_count = info1->descriptor_count; + + return VKD3D_OK; +} + +static void vkd3d_shader_free_scan_descriptor_info1(struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info) +{ + TRACE("scan_descriptor_info %p.\n", scan_descriptor_info); + + vkd3d_free(scan_descriptor_info->descriptors); +} + static int scan_with_parser(const struct vkd3d_shader_compile_info *compile_info, - struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser *parser) + struct vkd3d_shader_message_context *message_context, + struct vkd3d_shader_scan_descriptor_info1 *descriptor_info1, struct vkd3d_shader_parser *parser) { - struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info; + struct vkd3d_shader_scan_descriptor_info1 local_descriptor_info1 = {0}; + struct vkd3d_shader_scan_descriptor_info *descriptor_info; + struct vkd3d_shader_scan_signature_info *signature_info; struct vkd3d_shader_instruction *instruction; struct vkd3d_shader_scan_context context; int ret = VKD3D_OK; unsigned int i;
- if ((scan_descriptor_info = vkd3d_find_struct(compile_info->next, SCAN_DESCRIPTOR_INFO))) + descriptor_info = vkd3d_find_struct(compile_info->next, SCAN_DESCRIPTOR_INFO); + if (descriptor_info1) + { + descriptor_info1->descriptors = NULL; + descriptor_info1->descriptor_count = 0; + } + else if (descriptor_info) { - scan_descriptor_info->descriptors = NULL; - scan_descriptor_info->descriptor_count = 0; + descriptor_info1 = &local_descriptor_info1; } + signature_info = vkd3d_find_struct(compile_info->next, SCAN_SIGNATURE_INFO);
- vkd3d_shader_scan_context_init(&context, compile_info, scan_descriptor_info, message_context); + vkd3d_shader_scan_context_init(&context, compile_info, descriptor_info1, message_context);
if (TRACE_ON()) { @@ -1090,13 +1154,52 @@ static int scan_with_parser(const struct vkd3d_shader_compile_info *compile_info { instruction = &parser->instructions.elements[i]; if ((ret = vkd3d_shader_scan_instruction(&context, instruction)) < 0) - { - if (scan_descriptor_info) - vkd3d_shader_free_scan_descriptor_info(scan_descriptor_info); break; + } + + for (i = 0; i < ARRAY_SIZE(parser->shader_desc.flat_constant_count); ++i) + { + unsigned int size = parser->shader_desc.flat_constant_count[i].external; + struct vkd3d_shader_register_range range = {.space = 0, .first = i, .last = i}; + struct vkd3d_shader_register reg = {.idx[0].offset = i, .idx_count = 1}; + struct vkd3d_shader_descriptor_info1 *d; + + if (parser->shader_desc.flat_constant_count[i].external) + { + if ((d = vkd3d_shader_scan_add_descriptor(&context, VKD3D_SHADER_DESCRIPTOR_TYPE_CBV, ®, + &range, VKD3D_SHADER_RESOURCE_BUFFER, VKD3D_SHADER_RESOURCE_DATA_UINT))) + d->buffer_size = size * 16; } }
+ if (!ret && signature_info) + { + if (!vkd3d_shader_signature_from_shader_signature(&signature_info->input, &parser->shader_desc.input_signature) + || !vkd3d_shader_signature_from_shader_signature(&signature_info->output, + &parser->shader_desc.output_signature) + || !vkd3d_shader_signature_from_shader_signature(&signature_info->patch_constant, + &parser->shader_desc.patch_constant_signature)) + { + ret = VKD3D_ERROR_OUT_OF_MEMORY; + } + } + + if (!ret && descriptor_info) + ret = convert_descriptor_info(descriptor_info, descriptor_info1); + + if (ret < 0) + { + if (descriptor_info) + vkd3d_shader_free_scan_descriptor_info(descriptor_info); + if (descriptor_info1) + vkd3d_shader_free_scan_descriptor_info1(descriptor_info1); + if (signature_info) + vkd3d_shader_free_scan_signature_info(signature_info); + } + else + { + vkd3d_shader_free_scan_descriptor_info1(&local_descriptor_info1); + } vkd3d_shader_scan_context_cleanup(&context); return ret; } @@ -1113,7 +1216,7 @@ static int scan_dxbc(const struct vkd3d_shader_compile_info *compile_info, return ret; }
- ret = scan_with_parser(compile_info, message_context, parser); + ret = scan_with_parser(compile_info, message_context, NULL, parser); vkd3d_shader_parser_destroy(parser);
return ret; @@ -1131,7 +1234,25 @@ static int scan_d3dbc(const struct vkd3d_shader_compile_info *compile_info, return ret; }
- ret = scan_with_parser(compile_info, message_context, parser); + ret = scan_with_parser(compile_info, message_context, NULL, parser); + vkd3d_shader_parser_destroy(parser); + + return ret; +} + +static int scan_dxil(const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_message_context *message_context) +{ + struct vkd3d_shader_parser *parser; + int ret; + + if ((ret = vkd3d_shader_sm6_parser_create(compile_info, message_context, &parser)) < 0) + { + WARN("Failed to initialise shader parser.\n"); + return ret; + } + + ret = scan_with_parser(compile_info, message_context, NULL, parser); vkd3d_shader_parser_destroy(parser);
return ret; @@ -1150,6 +1271,8 @@ int vkd3d_shader_scan(const struct vkd3d_shader_compile_info *compile_info, char if ((ret = vkd3d_shader_validate_compile_info(compile_info, false)) < 0) return ret;
+ init_scan_signature_info(compile_info); + vkd3d_shader_message_context_init(&message_context, compile_info->log_level);
switch (compile_info->source_type) @@ -1167,6 +1290,10 @@ int vkd3d_shader_scan(const struct vkd3d_shader_compile_info *compile_info, char ret = scan_d3dbc(compile_info, &message_context); break;
+ case VKD3D_SHADER_SOURCE_DXBC_DXIL: + ret = scan_dxil(compile_info, &message_context); + break; + default: ERR("Unsupported source type %#x.\n", compile_info->source_type); ret = VKD3D_ERROR_INVALID_ARGUMENT; @@ -1184,7 +1311,7 @@ static int vkd3d_shader_parser_compile(struct vkd3d_shader_parser *parser, const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) { - struct vkd3d_shader_scan_descriptor_info scan_descriptor_info; + struct vkd3d_shader_scan_descriptor_info1 scan_descriptor_info; struct vkd3d_glsl_generator *glsl_generator; struct vkd3d_shader_compile_info scan_info; int ret; @@ -1192,11 +1319,8 @@ static int vkd3d_shader_parser_compile(struct vkd3d_shader_parser *parser, vkd3d_shader_dump_shader(compile_info->source_type, parser->shader_version.type, &compile_info->source);
scan_info = *compile_info; - scan_descriptor_info.type = VKD3D_SHADER_STRUCTURE_TYPE_SCAN_DESCRIPTOR_INFO; - scan_descriptor_info.next = scan_info.next; - scan_info.next = &scan_descriptor_info;
- if ((ret = scan_with_parser(&scan_info, message_context, parser)) < 0) + if ((ret = scan_with_parser(&scan_info, message_context, &scan_descriptor_info, parser)) < 0) return ret;
switch (compile_info->target_type) @@ -1210,7 +1334,7 @@ static int vkd3d_shader_parser_compile(struct vkd3d_shader_parser *parser, message_context, &parser->location))) { ERR("Failed to create GLSL generator.\n"); - vkd3d_shader_free_scan_descriptor_info(&scan_descriptor_info); + vkd3d_shader_free_scan_descriptor_info1(&scan_descriptor_info); return VKD3D_ERROR; }
@@ -1228,7 +1352,7 @@ static int vkd3d_shader_parser_compile(struct vkd3d_shader_parser *parser, assert(0); }
- vkd3d_shader_free_scan_descriptor_info(&scan_descriptor_info); + vkd3d_shader_free_scan_descriptor_info1(&scan_descriptor_info); return ret; }
@@ -1289,6 +1413,24 @@ static int compile_d3d_bytecode(const struct vkd3d_shader_compile_info *compile_ return VKD3D_ERROR; }
+static int compile_dxbc_dxil(const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) +{ + struct vkd3d_shader_parser *parser; + int ret; + + if ((ret = vkd3d_shader_sm6_parser_create(compile_info, message_context, &parser)) < 0) + { + WARN("Failed to initialise shader parser.\n"); + return ret; + } + + ret = vkd3d_shader_parser_compile(parser, compile_info, out, message_context); + + vkd3d_shader_parser_destroy(parser); + return ret; +} + int vkd3d_shader_compile(const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out, char **messages) { @@ -1303,6 +1445,8 @@ int vkd3d_shader_compile(const struct vkd3d_shader_compile_info *compile_info, if ((ret = vkd3d_shader_validate_compile_info(compile_info, true)) < 0) return ret;
+ init_scan_signature_info(compile_info); + vkd3d_shader_message_context_init(&message_context, compile_info->log_level);
switch (compile_info->source_type) @@ -1319,6 +1463,10 @@ int vkd3d_shader_compile(const struct vkd3d_shader_compile_info *compile_info, ret = compile_d3d_bytecode(compile_info, out, &message_context); break;
+ case VKD3D_SHADER_SOURCE_DXBC_DXIL: + ret = compile_dxbc_dxil(compile_info, out, &message_context); + break; + default: vkd3d_unreachable(); } @@ -1337,6 +1485,15 @@ void vkd3d_shader_free_scan_descriptor_info(struct vkd3d_shader_scan_descriptor_ vkd3d_free(scan_descriptor_info->descriptors); }
+void vkd3d_shader_free_scan_signature_info(struct vkd3d_shader_scan_signature_info *info) +{ + TRACE("info %p.\n", info); + + vkd3d_shader_free_shader_signature(&info->input); + vkd3d_shader_free_shader_signature(&info->output); + vkd3d_shader_free_shader_signature(&info->patch_constant); +} + void vkd3d_shader_free_shader_code(struct vkd3d_shader_code *shader_code) { TRACE("shader_code %p.\n", shader_code); @@ -1399,43 +1556,6 @@ void vkd3d_shader_free_root_signature(struct vkd3d_shader_versioned_root_signatu desc->version = 0; }
-static bool vkd3d_shader_signature_from_shader_signature(struct vkd3d_shader_signature *signature, - const struct shader_signature *src) -{ - unsigned int i; - - signature->element_count = src->element_count; - if (!src->elements) - { - assert(!signature->element_count); - signature->elements = NULL; - return true; - } - - if (!(signature->elements = vkd3d_calloc(signature->element_count, sizeof(*signature->elements)))) - return false; - - for (i = 0; i < signature->element_count; ++i) - { - struct vkd3d_shader_signature_element *d = &signature->elements[i]; - struct signature_element *e = &src->elements[i]; - - d->semantic_name = e->semantic_name; - d->semantic_index = e->semantic_index; - d->stream_index = e->stream_index; - d->sysval_semantic = e->sysval_semantic; - d->component_type = e->component_type; - d->register_index = e->register_index; - if (e->register_count > 1) - FIXME("Arrayed elements are not supported yet.\n"); - d->mask = e->mask; - d->used_mask = e->used_mask; - d->min_precision = e->min_precision; - } - - return true; -} - void shader_signature_cleanup(struct shader_signature *signature) { vkd3d_free(signature->elements); @@ -1524,6 +1644,7 @@ const enum vkd3d_shader_source_type *vkd3d_shader_get_supported_source_types(uns VKD3D_SHADER_SOURCE_DXBC_TPF, VKD3D_SHADER_SOURCE_HLSL, VKD3D_SHADER_SOURCE_D3D_BYTECODE, + VKD3D_SHADER_SOURCE_DXBC_DXIL, };
TRACE("count %p.\n", count); @@ -1562,6 +1683,7 @@ const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types(
switch (source_type) { + case VKD3D_SHADER_SOURCE_DXBC_DXIL: case VKD3D_SHADER_SOURCE_DXBC_TPF: *count = ARRAY_SIZE(dxbc_tpf_types); return dxbc_tpf_types; @@ -1790,3 +1912,41 @@ void shader_instruction_array_destroy(struct vkd3d_shader_instruction_array *ins vkd3d_free(instructions->icbs[i]); vkd3d_free(instructions->icbs); } + +void vkd3d_shader_build_varying_map(const struct vkd3d_shader_signature *output_signature, + const struct vkd3d_shader_signature *input_signature, + unsigned int *ret_count, struct vkd3d_shader_varying_map *varyings) +{ + unsigned int count = 0; + unsigned int i; + + TRACE("output_signature %p, input_signature %p, ret_count %p, varyings %p.\n", + output_signature, input_signature, ret_count, varyings); + + for (i = 0; i < input_signature->element_count; ++i) + { + const struct vkd3d_shader_signature_element *input_element, *output_element; + + input_element = &input_signature->elements[i]; + + if (input_element->sysval_semantic != VKD3D_SHADER_SV_NONE) + continue; + + varyings[count].input_register_index = input_element->register_index; + varyings[count].input_mask = input_element->mask; + + if ((output_element = vkd3d_shader_find_signature_element(output_signature, + input_element->semantic_name, input_element->semantic_index, 0))) + { + varyings[count].output_signature_index = output_element - output_signature->elements; + } + else + { + varyings[count].output_signature_index = output_signature->element_count; + } + + ++count; + } + + *ret_count = count; +} diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h index d77c5393940..4a2f6b96b82 100644 --- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h @@ -78,9 +78,14 @@ enum vkd3d_shader_error VKD3D_SHADER_ERROR_TPF_TOO_MANY_REGISTERS = 1004, VKD3D_SHADER_ERROR_TPF_INVALID_IO_REGISTER = 1005, VKD3D_SHADER_ERROR_TPF_INVALID_INDEX_RANGE_DCL = 1006, + VKD3D_SHADER_ERROR_TPF_INVALID_CASE_VALUE = 1007, + VKD3D_SHADER_ERROR_TPF_INVALID_REGISTER_DIMENSION = 1008, + VKD3D_SHADER_ERROR_TPF_INVALID_REGISTER_SWIZZLE = 1009,
VKD3D_SHADER_WARNING_TPF_MASK_NOT_CONTIGUOUS = 1300, VKD3D_SHADER_WARNING_TPF_UNHANDLED_INDEX_RANGE_MASK = 1301, + VKD3D_SHADER_WARNING_TPF_UNHANDLED_REGISTER_MASK = 1302, + VKD3D_SHADER_WARNING_TPF_UNHANDLED_REGISTER_SWIZZLE = 1303,
VKD3D_SHADER_ERROR_SPV_DESCRIPTOR_BINDING_NOT_FOUND = 2000, VKD3D_SHADER_ERROR_SPV_INVALID_REGISTER_TYPE = 2001, @@ -88,6 +93,8 @@ enum vkd3d_shader_error VKD3D_SHADER_ERROR_SPV_DESCRIPTOR_IDX_UNSUPPORTED = 2003, VKD3D_SHADER_ERROR_SPV_STENCIL_EXPORT_UNSUPPORTED = 2004,
+ VKD3D_SHADER_WARNING_SPV_INVALID_SWIZZLE = 2300, + VKD3D_SHADER_ERROR_RS_OUT_OF_MEMORY = 3000, VKD3D_SHADER_ERROR_RS_INVALID_VERSION = 3001, VKD3D_SHADER_ERROR_RS_INVALID_ROOT_PARAMETER_TYPE = 3002, @@ -133,10 +140,14 @@ enum vkd3d_shader_error VKD3D_SHADER_ERROR_HLSL_MISSING_ATTRIBUTE = 5024, VKD3D_SHADER_ERROR_HLSL_RECURSIVE_CALL = 5025, VKD3D_SHADER_ERROR_HLSL_INCONSISTENT_SAMPLER = 5026, + VKD3D_SHADER_ERROR_HLSL_NON_FINITE_RESULT = 5027,
VKD3D_SHADER_WARNING_HLSL_IMPLICIT_TRUNCATION = 5300, VKD3D_SHADER_WARNING_HLSL_DIVISION_BY_ZERO = 5301, VKD3D_SHADER_WARNING_HLSL_UNKNOWN_ATTRIBUTE = 5302, + VKD3D_SHADER_WARNING_HLSL_IMAGINARY_NUMERIC_RESULT = 5303, + VKD3D_SHADER_WARNING_HLSL_NON_FINITE_RESULT = 5304, + VKD3D_SHADER_WARNING_HLSL_IGNORED_ATTRIBUTE = 5305,
VKD3D_SHADER_ERROR_GLSL_INTERNAL = 6000,
@@ -145,8 +156,31 @@ enum vkd3d_shader_error VKD3D_SHADER_ERROR_D3DBC_INVALID_OPCODE = 7002, VKD3D_SHADER_ERROR_D3DBC_INVALID_RESOURCE_TYPE = 7003, VKD3D_SHADER_ERROR_D3DBC_OUT_OF_MEMORY = 7004, + VKD3D_SHADER_ERROR_D3DBC_INVALID_REGISTER_INDEX = 7005, + VKD3D_SHADER_ERROR_D3DBC_UNDECLARED_SEMANTIC = 7006,
VKD3D_SHADER_WARNING_D3DBC_IGNORED_INSTRUCTION_FLAGS= 7300, + + VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY = 8000, + VKD3D_SHADER_ERROR_DXIL_INVALID_SIZE = 8001, + VKD3D_SHADER_ERROR_DXIL_INVALID_CHUNK_OFFSET = 8002, + VKD3D_SHADER_ERROR_DXIL_INVALID_CHUNK_SIZE = 8003, + VKD3D_SHADER_ERROR_DXIL_INVALID_BITCODE = 8004, + VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND_COUNT = 8005, + VKD3D_SHADER_ERROR_DXIL_INVALID_TYPE_TABLE = 8006, + VKD3D_SHADER_ERROR_DXIL_INVALID_VALUE_SYMTAB = 8007, + VKD3D_SHADER_ERROR_DXIL_UNSUPPORTED_BITCODE_FORMAT = 8008, + VKD3D_SHADER_ERROR_DXIL_INVALID_FUNCTION_DCL = 8009, + VKD3D_SHADER_ERROR_DXIL_INVALID_TYPE_ID = 8010, + VKD3D_SHADER_ERROR_DXIL_INVALID_MODULE = 8011, + + VKD3D_SHADER_WARNING_DXIL_UNKNOWN_MAGIC_NUMBER = 8300, + VKD3D_SHADER_WARNING_DXIL_UNKNOWN_SHADER_TYPE = 8301, + VKD3D_SHADER_WARNING_DXIL_INVALID_BLOCK_LENGTH = 8302, + VKD3D_SHADER_WARNING_DXIL_INVALID_MODULE_LENGTH = 8303, + VKD3D_SHADER_WARNING_DXIL_IGNORING_OPERANDS = 8304, + + VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED = 9000, };
enum vkd3d_shader_opcode @@ -486,6 +520,9 @@ enum vkd3d_shader_register_type VKD3DSPR_DEPTHOUTLE, VKD3DSPR_RASTERIZER, VKD3DSPR_OUTSTENCILREF, + VKD3DSPR_UNDEF, + + VKD3DSPR_COUNT,
VKD3DSPR_INVALID = ~0u, }; @@ -516,6 +553,7 @@ enum vkd3d_data_type VKD3D_DATA_DOUBLE, VKD3D_DATA_CONTINUED, VKD3D_DATA_UNUSED, + VKD3D_DATA_UINT8, };
enum vkd3d_immconst_type @@ -784,6 +822,8 @@ enum vkd3d_shader_input_sysval_semantic VKD3D_SIV_LINE_DENSITY_TESS_FACTOR = 22, };
+#define SIGNATURE_TARGET_LOCATION_UNUSED (~0u) + struct signature_element { unsigned int sort_index; @@ -792,16 +832,21 @@ struct signature_element unsigned int stream_index; enum vkd3d_shader_sysval_semantic sysval_semantic; enum vkd3d_shader_component_type component_type; + /* Register index in the source shader. */ unsigned int register_index; unsigned int register_count; unsigned int mask; unsigned int used_mask; enum vkd3d_shader_minimum_precision min_precision; + /* Register index / location in the target shader. + * If SIGNATURE_TARGET_LOCATION_UNUSED, this element should not be written. */ + unsigned int target_location; };
struct shader_signature { struct signature_element *elements; + size_t elements_capacity; unsigned int element_count; };
@@ -811,9 +856,17 @@ struct vkd3d_shader_desc { const uint32_t *byte_code; size_t byte_code_size; + bool is_dxil; struct shader_signature input_signature; struct shader_signature output_signature; struct shader_signature patch_constant_signature; + + uint32_t temp_count; + + struct + { + uint32_t used, external; + } flat_constant_count[3]; };
struct vkd3d_shader_register_semantic @@ -945,6 +998,8 @@ struct vkd3d_shader_instruction } declaration; };
+void shader_instruction_init(struct vkd3d_shader_instruction *ins, enum vkd3d_shader_opcode handler_idx); + static inline bool vkd3d_shader_instruction_has_texel_offset(const struct vkd3d_shader_instruction *ins) { return ins->texel_offset.u || ins->texel_offset.v || ins->texel_offset.w; @@ -965,6 +1020,11 @@ static inline bool vkd3d_shader_register_is_patch_constant(const struct vkd3d_sh return reg->type == VKD3DSPR_PATCHCONST; }
+static inline bool register_is_constant(const struct vkd3d_shader_register *reg) +{ + return (reg->type == VKD3DSPR_IMMCONST || reg->type == VKD3DSPR_IMMCONST64); +} + struct vkd3d_shader_location { const char *source_name; @@ -1066,6 +1126,27 @@ static inline void vkd3d_shader_parser_destroy(struct vkd3d_shader_parser *parse parser->ops->parser_destroy(parser); }
+struct vkd3d_shader_descriptor_info1 +{ + enum vkd3d_shader_descriptor_type type; + unsigned int register_space; + unsigned int register_index; + unsigned int register_id; + enum vkd3d_shader_resource_type resource_type; + enum vkd3d_shader_resource_data_type resource_data_type; + unsigned int flags; + unsigned int sample_count; + unsigned int buffer_size; + unsigned int structure_stride; + unsigned int count; +}; + +struct vkd3d_shader_scan_descriptor_info1 +{ + struct vkd3d_shader_descriptor_info1 *descriptors; + unsigned int descriptor_count; +}; + void vkd3d_shader_trace(const struct vkd3d_shader_instruction_array *instructions, const struct vkd3d_shader_version *shader_version);
@@ -1167,6 +1248,8 @@ int vkd3d_shader_sm1_parser_create(const struct vkd3d_shader_compile_info *compi struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser **parser); int vkd3d_shader_sm4_parser_create(const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser **parser); +int vkd3d_shader_sm6_parser_create(const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser **parser);
void free_shader_desc(struct vkd3d_shader_desc *desc);
@@ -1186,7 +1269,7 @@ void vkd3d_glsl_generator_destroy(struct vkd3d_glsl_generator *generator); #define SPIRV_MAX_SRC_COUNT 6
int spirv_compile(struct vkd3d_shader_parser *parser, - const struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info, + const struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info, const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context);
@@ -1240,6 +1323,30 @@ static inline enum vkd3d_data_type vkd3d_data_type_from_component_type( } }
+static inline enum vkd3d_shader_component_type vkd3d_component_type_from_resource_data_type( + enum vkd3d_shader_resource_data_type data_type) +{ + switch (data_type) + { + case VKD3D_SHADER_RESOURCE_DATA_FLOAT: + case VKD3D_SHADER_RESOURCE_DATA_UNORM: + case VKD3D_SHADER_RESOURCE_DATA_SNORM: + return VKD3D_SHADER_COMPONENT_FLOAT; + case VKD3D_SHADER_RESOURCE_DATA_UINT: + return VKD3D_SHADER_COMPONENT_UINT; + case VKD3D_SHADER_RESOURCE_DATA_INT: + return VKD3D_SHADER_COMPONENT_INT; + case VKD3D_SHADER_RESOURCE_DATA_DOUBLE: + case VKD3D_SHADER_RESOURCE_DATA_CONTINUED: + return VKD3D_SHADER_COMPONENT_DOUBLE; + default: + FIXME("Unhandled data type %#x.\n", data_type); + /* fall-through */ + case VKD3D_SHADER_RESOURCE_DATA_MIXED: + return VKD3D_SHADER_COMPONENT_UINT; + } +} + enum vkd3d_shader_input_sysval_semantic vkd3d_siv_from_sysval_indexed(enum vkd3d_shader_sysval_semantic sysval, unsigned int index);
@@ -1339,6 +1446,7 @@ static inline void *vkd3d_find_struct_(const struct vkd3d_struct *chain, }
#define VKD3D_DXBC_HEADER_SIZE (8 * sizeof(uint32_t)) +#define VKD3D_DXBC_CHUNK_ALIGNMENT sizeof(uint32_t)
#define TAG_AON9 VKD3D_MAKE_TAG('A', 'o', 'n', '9') #define TAG_DXBC VKD3D_MAKE_TAG('D', 'X', 'B', 'C') @@ -1369,11 +1477,7 @@ void dxbc_writer_add_section(struct dxbc_writer *dxbc, uint32_t tag, const void void dxbc_writer_init(struct dxbc_writer *dxbc); int dxbc_writer_write(struct dxbc_writer *dxbc, struct vkd3d_shader_code *code);
-enum vkd3d_result instruction_array_flatten_hull_shader_phases(struct vkd3d_shader_instruction_array *instructions); -enum vkd3d_result instruction_array_normalise_hull_shader_control_point_io( - struct vkd3d_shader_instruction_array *instructions, const struct shader_signature *input_signature); -enum vkd3d_result instruction_array_normalise_io_registers(struct vkd3d_shader_instruction_array *instructions, - enum vkd3d_shader_type shader_type, struct shader_signature *input_signature, - struct shader_signature *output_signature, struct shader_signature *patch_constant_signature); +enum vkd3d_result vkd3d_shader_normalise(struct vkd3d_shader_parser *parser, + const struct vkd3d_shader_compile_info *compile_info);
#endif /* __VKD3D_SHADER_PRIVATE_H */ diff --git a/libs/vkd3d/libs/vkd3d/command.c b/libs/vkd3d/libs/vkd3d/command.c index 32439eec7eb..42a98763438 100644 --- a/libs/vkd3d/libs/vkd3d/command.c +++ b/libs/vkd3d/libs/vkd3d/command.c @@ -26,6 +26,7 @@ static HRESULT d3d12_fence_signal(struct d3d12_fence *fence, uint64_t value, VkF static void d3d12_fence_signal_timeline_semaphore(struct d3d12_fence *fence, uint64_t timeline_value); static HRESULT d3d12_command_queue_signal(struct d3d12_command_queue *command_queue, struct d3d12_fence *fence, uint64_t value); +static void d3d12_command_queue_submit_locked(struct d3d12_command_queue *queue); static HRESULT d3d12_command_queue_flush_ops(struct d3d12_command_queue *queue, bool *flushed_any); static HRESULT d3d12_command_queue_flush_ops_locked(struct d3d12_command_queue *queue, bool *flushed_any);
@@ -453,9 +454,9 @@ static const struct d3d12_root_parameter *root_signature_get_root_descriptor( }
/* ID3D12Fence */ -static struct d3d12_fence *impl_from_ID3D12Fence(ID3D12Fence *iface) +static struct d3d12_fence *impl_from_ID3D12Fence1(ID3D12Fence1 *iface) { - return CONTAINING_RECORD(iface, struct d3d12_fence, ID3D12Fence_iface); + return CONTAINING_RECORD(iface, struct d3d12_fence, ID3D12Fence1_iface); }
static VkResult d3d12_fence_create_vk_fence(struct d3d12_fence *fence, VkFence *vk_fence) @@ -899,18 +900,19 @@ static void d3d12_fence_signal_timeline_semaphore(struct d3d12_fence *fence, uin vkd3d_mutex_unlock(&fence->mutex); }
-static HRESULT STDMETHODCALLTYPE d3d12_fence_QueryInterface(ID3D12Fence *iface, +static HRESULT STDMETHODCALLTYPE d3d12_fence_QueryInterface(ID3D12Fence1 *iface, REFIID riid, void **object) { TRACE("iface %p, riid %s, object %p.\n", iface, debugstr_guid(riid), object);
- if (IsEqualGUID(riid, &IID_ID3D12Fence) + if (IsEqualGUID(riid, &IID_ID3D12Fence1) + || IsEqualGUID(riid, &IID_ID3D12Fence) || IsEqualGUID(riid, &IID_ID3D12Pageable) || IsEqualGUID(riid, &IID_ID3D12DeviceChild) || IsEqualGUID(riid, &IID_ID3D12Object) || IsEqualGUID(riid, &IID_IUnknown)) { - ID3D12Fence_AddRef(iface); + ID3D12Fence1_AddRef(iface); *object = iface; return S_OK; } @@ -921,9 +923,9 @@ static HRESULT STDMETHODCALLTYPE d3d12_fence_QueryInterface(ID3D12Fence *iface, return E_NOINTERFACE; }
-static ULONG STDMETHODCALLTYPE d3d12_fence_AddRef(ID3D12Fence *iface) +static ULONG STDMETHODCALLTYPE d3d12_fence_AddRef(ID3D12Fence1 *iface) { - struct d3d12_fence *fence = impl_from_ID3D12Fence(iface); + struct d3d12_fence *fence = impl_from_ID3D12Fence1(iface); ULONG refcount = InterlockedIncrement(&fence->refcount);
TRACE("%p increasing refcount to %u.\n", fence, refcount); @@ -936,9 +938,9 @@ static void d3d12_fence_incref(struct d3d12_fence *fence) InterlockedIncrement(&fence->internal_refcount); }
-static ULONG STDMETHODCALLTYPE d3d12_fence_Release(ID3D12Fence *iface) +static ULONG STDMETHODCALLTYPE d3d12_fence_Release(ID3D12Fence1 *iface) { - struct d3d12_fence *fence = impl_from_ID3D12Fence(iface); + struct d3d12_fence *fence = impl_from_ID3D12Fence1(iface); ULONG refcount = InterlockedDecrement(&fence->refcount);
TRACE("%p decreasing refcount to %u.\n", fence, refcount); @@ -971,10 +973,10 @@ static void d3d12_fence_decref(struct d3d12_fence *fence) } }
-static HRESULT STDMETHODCALLTYPE d3d12_fence_GetPrivateData(ID3D12Fence *iface, +static HRESULT STDMETHODCALLTYPE d3d12_fence_GetPrivateData(ID3D12Fence1 *iface, REFGUID guid, UINT *data_size, void *data) { - struct d3d12_fence *fence = impl_from_ID3D12Fence(iface); + struct d3d12_fence *fence = impl_from_ID3D12Fence1(iface);
TRACE("iface %p, guid %s, data_size %p, data %p.\n", iface, debugstr_guid(guid), data_size, data); @@ -982,10 +984,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_fence_GetPrivateData(ID3D12Fence *iface, return vkd3d_get_private_data(&fence->private_store, guid, data_size, data); }
-static HRESULT STDMETHODCALLTYPE d3d12_fence_SetPrivateData(ID3D12Fence *iface, +static HRESULT STDMETHODCALLTYPE d3d12_fence_SetPrivateData(ID3D12Fence1 *iface, REFGUID guid, UINT data_size, const void *data) { - struct d3d12_fence *fence = impl_from_ID3D12Fence(iface); + struct d3d12_fence *fence = impl_from_ID3D12Fence1(iface);
TRACE("iface %p, guid %s, data_size %u, data %p.\n", iface, debugstr_guid(guid), data_size, data); @@ -993,37 +995,37 @@ static HRESULT STDMETHODCALLTYPE d3d12_fence_SetPrivateData(ID3D12Fence *iface, return vkd3d_set_private_data(&fence->private_store, guid, data_size, data); }
-static HRESULT STDMETHODCALLTYPE d3d12_fence_SetPrivateDataInterface(ID3D12Fence *iface, +static HRESULT STDMETHODCALLTYPE d3d12_fence_SetPrivateDataInterface(ID3D12Fence1 *iface, REFGUID guid, const IUnknown *data) { - struct d3d12_fence *fence = impl_from_ID3D12Fence(iface); + struct d3d12_fence *fence = impl_from_ID3D12Fence1(iface);
TRACE("iface %p, guid %s, data %p.\n", iface, debugstr_guid(guid), data);
return vkd3d_set_private_data_interface(&fence->private_store, guid, data); }
-static HRESULT STDMETHODCALLTYPE d3d12_fence_SetName(ID3D12Fence *iface, const WCHAR *name) +static HRESULT STDMETHODCALLTYPE d3d12_fence_SetName(ID3D12Fence1 *iface, const WCHAR *name) { - struct d3d12_fence *fence = impl_from_ID3D12Fence(iface); + struct d3d12_fence *fence = impl_from_ID3D12Fence1(iface);
TRACE("iface %p, name %s.\n", iface, debugstr_w(name, fence->device->wchar_size));
return name ? S_OK : E_INVALIDARG; }
-static HRESULT STDMETHODCALLTYPE d3d12_fence_GetDevice(ID3D12Fence *iface, REFIID iid, void **device) +static HRESULT STDMETHODCALLTYPE d3d12_fence_GetDevice(ID3D12Fence1 *iface, REFIID iid, void **device) { - struct d3d12_fence *fence = impl_from_ID3D12Fence(iface); + struct d3d12_fence *fence = impl_from_ID3D12Fence1(iface);
TRACE("iface %p, iid %s, device %p.\n", iface, debugstr_guid(iid), device);
return d3d12_device_query_interface(fence->device, iid, device); }
-static UINT64 STDMETHODCALLTYPE d3d12_fence_GetCompletedValue(ID3D12Fence *iface) +static UINT64 STDMETHODCALLTYPE d3d12_fence_GetCompletedValue(ID3D12Fence1 *iface) { - struct d3d12_fence *fence = impl_from_ID3D12Fence(iface); + struct d3d12_fence *fence = impl_from_ID3D12Fence1(iface); uint64_t completed_value;
TRACE("iface %p.\n", iface); @@ -1034,10 +1036,10 @@ static UINT64 STDMETHODCALLTYPE d3d12_fence_GetCompletedValue(ID3D12Fence *iface return completed_value; }
-static HRESULT STDMETHODCALLTYPE d3d12_fence_SetEventOnCompletion(ID3D12Fence *iface, +static HRESULT STDMETHODCALLTYPE d3d12_fence_SetEventOnCompletion(ID3D12Fence1 *iface, UINT64 value, HANDLE event) { - struct d3d12_fence *fence = impl_from_ID3D12Fence(iface); + struct d3d12_fence *fence = impl_from_ID3D12Fence1(iface); unsigned int i; bool latch = false;
@@ -1105,9 +1107,9 @@ static HRESULT d3d12_fence_signal_cpu_timeline_semaphore(struct d3d12_fence *fen return d3d12_device_flush_blocked_queues(fence->device); }
-static HRESULT STDMETHODCALLTYPE d3d12_fence_Signal(ID3D12Fence *iface, UINT64 value) +static HRESULT STDMETHODCALLTYPE d3d12_fence_Signal(ID3D12Fence1 *iface, UINT64 value) { - struct d3d12_fence *fence = impl_from_ID3D12Fence(iface); + struct d3d12_fence *fence = impl_from_ID3D12Fence1(iface);
TRACE("iface %p, value %#"PRIx64".\n", iface, value);
@@ -1116,7 +1118,16 @@ static HRESULT STDMETHODCALLTYPE d3d12_fence_Signal(ID3D12Fence *iface, UINT64 v return d3d12_fence_signal(fence, value, VK_NULL_HANDLE, true); }
-static const struct ID3D12FenceVtbl d3d12_fence_vtbl = +static D3D12_FENCE_FLAGS STDMETHODCALLTYPE d3d12_fence_GetCreationFlags(ID3D12Fence1 *iface) +{ + struct d3d12_fence *fence = impl_from_ID3D12Fence1(iface); + + TRACE("iface %p.\n", iface); + + return fence->flags; +} + +static const struct ID3D12Fence1Vtbl d3d12_fence_vtbl = { /* IUnknown methods */ d3d12_fence_QueryInterface, @@ -1133,14 +1144,18 @@ static const struct ID3D12FenceVtbl d3d12_fence_vtbl = d3d12_fence_GetCompletedValue, d3d12_fence_SetEventOnCompletion, d3d12_fence_Signal, + /* ID3D12Fence1 methods */ + d3d12_fence_GetCreationFlags, };
static struct d3d12_fence *unsafe_impl_from_ID3D12Fence(ID3D12Fence *iface) { - if (!iface) + ID3D12Fence1 *iface1; + + if (!(iface1 = (ID3D12Fence1 *)iface)) return NULL; - assert(iface->lpVtbl == &d3d12_fence_vtbl); - return impl_from_ID3D12Fence(iface); + assert(iface1->lpVtbl == &d3d12_fence_vtbl); + return impl_from_ID3D12Fence1(iface1); }
static HRESULT d3d12_fence_init(struct d3d12_fence *fence, struct d3d12_device *device, @@ -1150,7 +1165,7 @@ static HRESULT d3d12_fence_init(struct d3d12_fence *fence, struct d3d12_device * VkResult vr; HRESULT hr;
- fence->ID3D12Fence_iface.lpVtbl = &d3d12_fence_vtbl; + fence->ID3D12Fence1_iface.lpVtbl = &d3d12_fence_vtbl; fence->internal_refcount = 1; fence->refcount = 1;
@@ -1161,7 +1176,7 @@ static HRESULT d3d12_fence_init(struct d3d12_fence *fence, struct d3d12_device *
vkd3d_cond_init(&fence->null_event_cond);
- if (flags) + if ((fence->flags = flags)) FIXME("Ignoring flags %#x.\n", flags);
fence->events = NULL; @@ -1315,32 +1330,26 @@ static HRESULT d3d12_command_allocator_allocate_command_buffer(struct d3d12_comm return hr; }
- allocator->current_command_list = list; - - return S_OK; -} - -static void d3d12_command_allocator_free_command_buffer(struct d3d12_command_allocator *allocator, - struct d3d12_command_list *list) -{ - struct d3d12_device *device = allocator->device; - const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; - - TRACE("allocator %p, list %p.\n", allocator, list); - - if (allocator->current_command_list == list) - allocator->current_command_list = NULL; - if (!vkd3d_array_reserve((void **)&allocator->command_buffers, &allocator->command_buffers_size, allocator->command_buffer_count + 1, sizeof(*allocator->command_buffers))) { WARN("Failed to add command buffer.\n"); VK_CALL(vkFreeCommandBuffers(device->vk_device, allocator->vk_command_pool, 1, &list->vk_command_buffer)); - return; + return E_OUTOFMEMORY; } - allocator->command_buffers[allocator->command_buffer_count++] = list->vk_command_buffer; + + allocator->current_command_list = list; + + return S_OK; +} + +static void d3d12_command_allocator_remove_command_list(struct d3d12_command_allocator *allocator, + const struct d3d12_command_list *list) +{ + if (allocator->current_command_list == list) + allocator->current_command_list = NULL; }
static bool d3d12_command_allocator_add_render_pass(struct d3d12_command_allocator *allocator, VkRenderPass pass) @@ -1910,10 +1919,32 @@ HRESULT d3d12_command_allocator_create(struct d3d12_device *device, return S_OK; }
+static void d3d12_command_signature_incref(struct d3d12_command_signature *signature) +{ + vkd3d_atomic_increment(&signature->internal_refcount); +} + +static void d3d12_command_signature_decref(struct d3d12_command_signature *signature) +{ + unsigned int refcount = vkd3d_atomic_decrement(&signature->internal_refcount); + + if (!refcount) + { + struct d3d12_device *device = signature->device; + + vkd3d_private_store_destroy(&signature->private_store); + + vkd3d_free((void *)signature->desc.pArgumentDescs); + vkd3d_free(signature); + + d3d12_device_release(device); + } +} + /* ID3D12CommandList */ -static inline struct d3d12_command_list *impl_from_ID3D12GraphicsCommandList2(ID3D12GraphicsCommandList2 *iface) +static inline struct d3d12_command_list *impl_from_ID3D12GraphicsCommandList3(ID3D12GraphicsCommandList3 *iface) { - return CONTAINING_RECORD(iface, struct d3d12_command_list, ID3D12GraphicsCommandList2_iface); + return CONTAINING_RECORD(iface, struct d3d12_command_list, ID3D12GraphicsCommandList3_iface); }
static void d3d12_command_list_invalidate_current_framebuffer(struct d3d12_command_list *list) @@ -2259,12 +2290,13 @@ static void d3d12_command_list_track_resource_usage(struct d3d12_command_list *l } }
-static HRESULT STDMETHODCALLTYPE d3d12_command_list_QueryInterface(ID3D12GraphicsCommandList2 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_command_list_QueryInterface(ID3D12GraphicsCommandList3 *iface, REFIID iid, void **object) { TRACE("iface %p, iid %s, object %p.\n", iface, debugstr_guid(iid), object);
- if (IsEqualGUID(iid, &IID_ID3D12GraphicsCommandList2) + if (IsEqualGUID(iid, &IID_ID3D12GraphicsCommandList3) + || IsEqualGUID(iid, &IID_ID3D12GraphicsCommandList2) || IsEqualGUID(iid, &IID_ID3D12GraphicsCommandList1) || IsEqualGUID(iid, &IID_ID3D12GraphicsCommandList) || IsEqualGUID(iid, &IID_ID3D12CommandList) @@ -2272,7 +2304,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_list_QueryInterface(ID3D12Graphic || IsEqualGUID(iid, &IID_ID3D12Object) || IsEqualGUID(iid, &IID_IUnknown)) { - ID3D12GraphicsCommandList2_AddRef(iface); + ID3D12GraphicsCommandList3_AddRef(iface); *object = iface; return S_OK; } @@ -2283,9 +2315,9 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_list_QueryInterface(ID3D12Graphic return E_NOINTERFACE; }
-static ULONG STDMETHODCALLTYPE d3d12_command_list_AddRef(ID3D12GraphicsCommandList2 *iface) +static ULONG STDMETHODCALLTYPE d3d12_command_list_AddRef(ID3D12GraphicsCommandList3 *iface) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); ULONG refcount = InterlockedIncrement(&list->refcount);
TRACE("%p increasing refcount to %u.\n", list, refcount); @@ -2298,9 +2330,9 @@ static void vkd3d_pipeline_bindings_cleanup(struct vkd3d_pipeline_bindings *bind vkd3d_free(bindings->vk_uav_counter_views); }
-static ULONG STDMETHODCALLTYPE d3d12_command_list_Release(ID3D12GraphicsCommandList2 *iface) +static ULONG STDMETHODCALLTYPE d3d12_command_list_Release(ID3D12GraphicsCommandList3 *iface) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); ULONG refcount = InterlockedDecrement(&list->refcount);
TRACE("%p decreasing refcount to %u.\n", list, refcount); @@ -2313,7 +2345,7 @@ static ULONG STDMETHODCALLTYPE d3d12_command_list_Release(ID3D12GraphicsCommandL
/* When command pool is destroyed, all command buffers are implicitly freed. */ if (list->allocator) - d3d12_command_allocator_free_command_buffer(list->allocator, list); + d3d12_command_allocator_remove_command_list(list->allocator, list);
vkd3d_pipeline_bindings_cleanup(&list->pipeline_bindings[VKD3D_PIPELINE_BIND_POINT_COMPUTE]); vkd3d_pipeline_bindings_cleanup(&list->pipeline_bindings[VKD3D_PIPELINE_BIND_POINT_GRAPHICS]); @@ -2326,66 +2358,66 @@ static ULONG STDMETHODCALLTYPE d3d12_command_list_Release(ID3D12GraphicsCommandL return refcount; }
-static HRESULT STDMETHODCALLTYPE d3d12_command_list_GetPrivateData(ID3D12GraphicsCommandList2 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_command_list_GetPrivateData(ID3D12GraphicsCommandList3 *iface, REFGUID guid, UINT *data_size, void *data) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface);
TRACE("iface %p, guid %s, data_size %p, data %p.\n", iface, debugstr_guid(guid), data_size, data);
return vkd3d_get_private_data(&list->private_store, guid, data_size, data); }
-static HRESULT STDMETHODCALLTYPE d3d12_command_list_SetPrivateData(ID3D12GraphicsCommandList2 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_command_list_SetPrivateData(ID3D12GraphicsCommandList3 *iface, REFGUID guid, UINT data_size, const void *data) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface);
TRACE("iface %p, guid %s, data_size %u, data %p.\n", iface, debugstr_guid(guid), data_size, data);
return vkd3d_set_private_data(&list->private_store, guid, data_size, data); }
-static HRESULT STDMETHODCALLTYPE d3d12_command_list_SetPrivateDataInterface(ID3D12GraphicsCommandList2 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_command_list_SetPrivateDataInterface(ID3D12GraphicsCommandList3 *iface, REFGUID guid, const IUnknown *data) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface);
TRACE("iface %p, guid %s, data %p.\n", iface, debugstr_guid(guid), data);
return vkd3d_set_private_data_interface(&list->private_store, guid, data); }
-static HRESULT STDMETHODCALLTYPE d3d12_command_list_SetName(ID3D12GraphicsCommandList2 *iface, const WCHAR *name) +static HRESULT STDMETHODCALLTYPE d3d12_command_list_SetName(ID3D12GraphicsCommandList3 *iface, const WCHAR *name) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface);
TRACE("iface %p, name %s.\n", iface, debugstr_w(name, list->device->wchar_size));
return name ? S_OK : E_INVALIDARG; }
-static HRESULT STDMETHODCALLTYPE d3d12_command_list_GetDevice(ID3D12GraphicsCommandList2 *iface, REFIID iid, void **device) +static HRESULT STDMETHODCALLTYPE d3d12_command_list_GetDevice(ID3D12GraphicsCommandList3 *iface, REFIID iid, void **device) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface);
TRACE("iface %p, iid %s, device %p.\n", iface, debugstr_guid(iid), device);
return d3d12_device_query_interface(list->device, iid, device); }
-static D3D12_COMMAND_LIST_TYPE STDMETHODCALLTYPE d3d12_command_list_GetType(ID3D12GraphicsCommandList2 *iface) +static D3D12_COMMAND_LIST_TYPE STDMETHODCALLTYPE d3d12_command_list_GetType(ID3D12GraphicsCommandList3 *iface) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface);
TRACE("iface %p.\n", iface);
return list->type; }
-static HRESULT STDMETHODCALLTYPE d3d12_command_list_Close(ID3D12GraphicsCommandList2 *iface) +static HRESULT STDMETHODCALLTYPE d3d12_command_list_Close(ID3D12GraphicsCommandList3 *iface) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); const struct vkd3d_vk_device_procs *vk_procs; VkResult vr;
@@ -2411,7 +2443,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_list_Close(ID3D12GraphicsCommandL
if (list->allocator) { - d3d12_command_allocator_free_command_buffer(list->allocator, list); + d3d12_command_allocator_remove_command_list(list->allocator, list); list->allocator = NULL; }
@@ -2429,7 +2461,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_list_Close(ID3D12GraphicsCommandL static void d3d12_command_list_reset_state(struct d3d12_command_list *list, ID3D12PipelineState *initial_pipeline_state) { - ID3D12GraphicsCommandList2 *iface = &list->ID3D12GraphicsCommandList2_iface; + ID3D12GraphicsCommandList3 *iface = &list->ID3D12GraphicsCommandList3_iface;
memset(list->strides, 0, sizeof(list->strides)); list->primitive_topology = D3D_PRIMITIVE_TOPOLOGY_POINTLIST; @@ -2465,14 +2497,14 @@ static void d3d12_command_list_reset_state(struct d3d12_command_list *list,
list->descriptor_heap_count = 0;
- ID3D12GraphicsCommandList2_SetPipelineState(iface, initial_pipeline_state); + ID3D12GraphicsCommandList3_SetPipelineState(iface, initial_pipeline_state); }
-static HRESULT STDMETHODCALLTYPE d3d12_command_list_Reset(ID3D12GraphicsCommandList2 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_command_list_Reset(ID3D12GraphicsCommandList3 *iface, ID3D12CommandAllocator *allocator, ID3D12PipelineState *initial_pipeline_state) { struct d3d12_command_allocator *allocator_impl = unsafe_impl_from_ID3D12CommandAllocator(allocator); - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); HRESULT hr;
TRACE("iface %p, allocator %p, initial_pipeline_state %p.\n", @@ -2499,7 +2531,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_list_Reset(ID3D12GraphicsCommandL return hr; }
-static void STDMETHODCALLTYPE d3d12_command_list_ClearState(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_ClearState(ID3D12GraphicsCommandList3 *iface, ID3D12PipelineState *pipeline_state) { FIXME("iface %p, pipline_state %p stub!\n", iface, pipeline_state); @@ -3185,6 +3217,23 @@ static void command_list_flush_vk_heap_updates(struct d3d12_command_list *list) } }
+static void command_list_add_descriptor_heap(struct d3d12_command_list *list, struct d3d12_descriptor_heap *heap) +{ + if (!contains_heap(list->descriptor_heaps, list->descriptor_heap_count, heap)) + { + if (list->descriptor_heap_count == ARRAY_SIZE(list->descriptor_heaps)) + { + /* Descriptors can be written after binding. */ + FIXME("Flushing descriptor updates while list %p is not closed.\n", list); + vkd3d_mutex_lock(&heap->vk_sets_mutex); + d3d12_desc_flush_vk_heap_updates_locked(heap, list->device); + vkd3d_mutex_unlock(&heap->vk_sets_mutex); + return; + } + list->descriptor_heaps[list->descriptor_heap_count++] = heap; + } +} + static void d3d12_command_list_bind_descriptor_heap(struct d3d12_command_list *list, enum vkd3d_pipeline_bind_point bind_point, struct d3d12_descriptor_heap *heap) { @@ -3209,18 +3258,6 @@ static void d3d12_command_list_bind_descriptor_heap(struct d3d12_command_list *l bindings->sampler_heap_id = heap->serial_id; }
- if (!contains_heap(list->descriptor_heaps, list->descriptor_heap_count, heap)) - { - if (list->descriptor_heap_count == ARRAY_SIZE(list->descriptor_heaps)) - { - /* Descriptors can be written after binding. */ - FIXME("Flushing descriptor updates while list %p is not closed.\n", list); - command_list_flush_vk_heap_updates(list); - list->descriptor_heap_count = 0; - } - list->descriptor_heaps[list->descriptor_heap_count++] = heap; - } - vkd3d_mutex_lock(&heap->vk_sets_mutex);
for (set = 0; set < ARRAY_SIZE(heap->vk_descriptor_sets); ++set) @@ -3353,11 +3390,11 @@ static void d3d12_command_list_check_index_buffer_strip_cut_value(struct d3d12_c } }
-static void STDMETHODCALLTYPE d3d12_command_list_DrawInstanced(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_DrawInstanced(ID3D12GraphicsCommandList3 *iface, UINT vertex_count_per_instance, UINT instance_count, UINT start_vertex_location, UINT start_instance_location) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); const struct vkd3d_vk_device_procs *vk_procs;
TRACE("iface %p, vertex_count_per_instance %u, instance_count %u, " @@ -3377,11 +3414,11 @@ static void STDMETHODCALLTYPE d3d12_command_list_DrawInstanced(ID3D12GraphicsCom instance_count, start_vertex_location, start_instance_location)); }
-static void STDMETHODCALLTYPE d3d12_command_list_DrawIndexedInstanced(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_DrawIndexedInstanced(ID3D12GraphicsCommandList3 *iface, UINT index_count_per_instance, UINT instance_count, UINT start_vertex_location, INT base_vertex_location, UINT start_instance_location) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); const struct vkd3d_vk_device_procs *vk_procs;
TRACE("iface %p, index_count_per_instance %u, instance_count %u, start_vertex_location %u, " @@ -3403,10 +3440,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_DrawIndexedInstanced(ID3D12Grap instance_count, start_vertex_location, base_vertex_location, start_instance_location)); }
-static void STDMETHODCALLTYPE d3d12_command_list_Dispatch(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_Dispatch(ID3D12GraphicsCommandList3 *iface, UINT x, UINT y, UINT z) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); const struct vkd3d_vk_device_procs *vk_procs;
TRACE("iface %p, x %u, y %u, z %u.\n", iface, x, y, z); @@ -3422,10 +3459,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_Dispatch(ID3D12GraphicsCommandL VK_CALL(vkCmdDispatch(list->vk_command_buffer, x, y, z)); }
-static void STDMETHODCALLTYPE d3d12_command_list_CopyBufferRegion(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_CopyBufferRegion(ID3D12GraphicsCommandList3 *iface, ID3D12Resource *dst, UINT64 dst_offset, ID3D12Resource *src, UINT64 src_offset, UINT64 byte_count) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); struct d3d12_resource *dst_resource, *src_resource; const struct vkd3d_vk_device_procs *vk_procs; VkBufferCopy buffer_copy; @@ -3624,7 +3661,7 @@ static HRESULT d3d12_command_list_allocate_transfer_buffer(struct d3d12_command_ static void d3d12_command_list_copy_incompatible_texture_region(struct d3d12_command_list *list, struct d3d12_resource *dst_resource, unsigned int dst_sub_resource_idx, const struct vkd3d_format *dst_format, struct d3d12_resource *src_resource, - unsigned int src_sub_resource_idx, const struct vkd3d_format *src_format) + unsigned int src_sub_resource_idx, const struct vkd3d_format *src_format, unsigned int layer_count) { const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs; const D3D12_RESOURCE_DESC *dst_desc = &dst_resource->desc; @@ -3651,6 +3688,7 @@ static void d3d12_command_list_copy_incompatible_texture_region(struct d3d12_com buffer_image_copy.bufferImageHeight = 0; vk_image_subresource_layers_from_d3d12(&buffer_image_copy.imageSubresource, src_format, src_sub_resource_idx, src_desc->MipLevels); + buffer_image_copy.imageSubresource.layerCount = layer_count; src_miplevel_idx = buffer_image_copy.imageSubresource.mipLevel; buffer_image_copy.imageOffset.x = 0; buffer_image_copy.imageOffset.y = 0; @@ -3658,7 +3696,7 @@ static void d3d12_command_list_copy_incompatible_texture_region(struct d3d12_com vk_extent_3d_from_d3d12_miplevel(&buffer_image_copy.imageExtent, src_desc, src_miplevel_idx);
buffer_size = src_format->byte_count * buffer_image_copy.imageExtent.width * - buffer_image_copy.imageExtent.height * buffer_image_copy.imageExtent.depth; + buffer_image_copy.imageExtent.height * buffer_image_copy.imageExtent.depth * layer_count; if (FAILED(hr = d3d12_command_list_allocate_transfer_buffer(list, buffer_size, &transfer_buffer))) { ERR("Failed to allocate transfer buffer, hr %#x.\n", hr); @@ -3684,6 +3722,7 @@ static void d3d12_command_list_copy_incompatible_texture_region(struct d3d12_com
vk_image_subresource_layers_from_d3d12(&buffer_image_copy.imageSubresource, dst_format, dst_sub_resource_idx, dst_desc->MipLevels); + buffer_image_copy.imageSubresource.layerCount = layer_count; dst_miplevel_idx = buffer_image_copy.imageSubresource.mipLevel;
assert(d3d12_resource_desc_get_width(src_desc, src_miplevel_idx) == @@ -3705,11 +3744,11 @@ static bool validate_d3d12_box(const D3D12_BOX *box) && box->back > box->front; }
-static void STDMETHODCALLTYPE d3d12_command_list_CopyTextureRegion(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_CopyTextureRegion(ID3D12GraphicsCommandList3 *iface, const D3D12_TEXTURE_COPY_LOCATION *dst, UINT dst_x, UINT dst_y, UINT dst_z, const D3D12_TEXTURE_COPY_LOCATION *src, const D3D12_BOX *src_box) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); struct d3d12_resource *dst_resource, *src_resource; const struct vkd3d_format *src_format, *dst_format; const struct vkd3d_vk_device_procs *vk_procs; @@ -3813,7 +3852,7 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyTextureRegion(ID3D12Graphic { d3d12_command_list_copy_incompatible_texture_region(list, dst_resource, dst->u.SubresourceIndex, dst_format, - src_resource, src->u.SubresourceIndex, src_format); + src_resource, src->u.SubresourceIndex, src_format, 1); return; }
@@ -3830,11 +3869,12 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyTextureRegion(ID3D12Graphic } }
-static void STDMETHODCALLTYPE d3d12_command_list_CopyResource(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_CopyResource(ID3D12GraphicsCommandList3 *iface, ID3D12Resource *dst, ID3D12Resource *src) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); struct d3d12_resource *dst_resource, *src_resource; + const struct vkd3d_format *dst_format, *src_format; const struct vkd3d_vk_device_procs *vk_procs; VkBufferCopy vk_buffer_copy; VkImageCopy vk_image_copy; @@ -3867,16 +3907,29 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyResource(ID3D12GraphicsComm else { layer_count = d3d12_resource_desc_get_layer_count(&dst_resource->desc); + dst_format = dst_resource->format; + src_format = src_resource->format;
assert(d3d12_resource_is_texture(dst_resource)); assert(d3d12_resource_is_texture(src_resource)); assert(dst_resource->desc.MipLevels == src_resource->desc.MipLevels); assert(layer_count == d3d12_resource_desc_get_layer_count(&src_resource->desc));
+ if (src_format->vk_aspect_mask != dst_format->vk_aspect_mask) + { + for (i = 0; i < dst_resource->desc.MipLevels; ++i) + { + d3d12_command_list_copy_incompatible_texture_region(list, + dst_resource, i, dst_format, + src_resource, i, src_format, layer_count); + } + return; + } + for (i = 0; i < dst_resource->desc.MipLevels; ++i) { vk_image_copy_from_d3d12(&vk_image_copy, i, i, &src_resource->desc, &dst_resource->desc, - src_resource->format, dst_resource->format, NULL, 0, 0, 0); + src_format, dst_format, NULL, 0, 0, 0); vk_image_copy.dstSubresource.layerCount = layer_count; vk_image_copy.srcSubresource.layerCount = layer_count; VK_CALL(vkCmdCopyImage(list->vk_command_buffer, src_resource->u.vk_image, @@ -3886,7 +3939,7 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyResource(ID3D12GraphicsComm } }
-static void STDMETHODCALLTYPE d3d12_command_list_CopyTiles(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_CopyTiles(ID3D12GraphicsCommandList3 *iface, ID3D12Resource *tiled_resource, const D3D12_TILED_RESOURCE_COORDINATE *tile_region_start_coordinate, const D3D12_TILE_REGION_SIZE *tile_region_size, ID3D12Resource *buffer, UINT64 buffer_offset, D3D12_TILE_COPY_FLAGS flags) @@ -3897,11 +3950,11 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyTiles(ID3D12GraphicsCommand buffer, buffer_offset, flags); }
-static void STDMETHODCALLTYPE d3d12_command_list_ResolveSubresource(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_ResolveSubresource(ID3D12GraphicsCommandList3 *iface, ID3D12Resource *dst, UINT dst_sub_resource_idx, ID3D12Resource *src, UINT src_sub_resource_idx, DXGI_FORMAT format) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); const struct vkd3d_format *src_format, *dst_format, *vk_format; struct d3d12_resource *dst_resource, *src_resource; const struct vkd3d_vk_device_procs *vk_procs; @@ -3964,10 +4017,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_ResolveSubresource(ID3D12Graphi VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &vk_image_resolve)); }
-static void STDMETHODCALLTYPE d3d12_command_list_IASetPrimitiveTopology(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_IASetPrimitiveTopology(ID3D12GraphicsCommandList3 *iface, D3D12_PRIMITIVE_TOPOLOGY topology) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface);
TRACE("iface %p, topology %#x.\n", iface, topology);
@@ -3978,11 +4031,11 @@ static void STDMETHODCALLTYPE d3d12_command_list_IASetPrimitiveTopology(ID3D12Gr d3d12_command_list_invalidate_current_pipeline(list); }
-static void STDMETHODCALLTYPE d3d12_command_list_RSSetViewports(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_RSSetViewports(ID3D12GraphicsCommandList3 *iface, UINT viewport_count, const D3D12_VIEWPORT *viewports) { VkViewport vk_viewports[D3D12_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE]; - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); const struct vkd3d_vk_device_procs *vk_procs; unsigned int i;
@@ -4016,10 +4069,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_RSSetViewports(ID3D12GraphicsCo VK_CALL(vkCmdSetViewport(list->vk_command_buffer, 0, viewport_count, vk_viewports)); }
-static void STDMETHODCALLTYPE d3d12_command_list_RSSetScissorRects(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_RSSetScissorRects(ID3D12GraphicsCommandList3 *iface, UINT rect_count, const D3D12_RECT *rects) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); VkRect2D vk_rects[D3D12_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE]; const struct vkd3d_vk_device_procs *vk_procs; unsigned int i; @@ -4044,10 +4097,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_RSSetScissorRects(ID3D12Graphic VK_CALL(vkCmdSetScissor(list->vk_command_buffer, 0, rect_count, vk_rects)); }
-static void STDMETHODCALLTYPE d3d12_command_list_OMSetBlendFactor(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_OMSetBlendFactor(ID3D12GraphicsCommandList3 *iface, const FLOAT blend_factor[4]) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); const struct vkd3d_vk_device_procs *vk_procs;
TRACE("iface %p, blend_factor %p.\n", iface, blend_factor); @@ -4056,10 +4109,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_OMSetBlendFactor(ID3D12Graphics VK_CALL(vkCmdSetBlendConstants(list->vk_command_buffer, blend_factor)); }
-static void STDMETHODCALLTYPE d3d12_command_list_OMSetStencilRef(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_OMSetStencilRef(ID3D12GraphicsCommandList3 *iface, UINT stencil_ref) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); const struct vkd3d_vk_device_procs *vk_procs;
TRACE("iface %p, stencil_ref %u.\n", iface, stencil_ref); @@ -4068,11 +4121,11 @@ static void STDMETHODCALLTYPE d3d12_command_list_OMSetStencilRef(ID3D12GraphicsC VK_CALL(vkCmdSetStencilReference(list->vk_command_buffer, VK_STENCIL_FRONT_AND_BACK, stencil_ref)); }
-static void STDMETHODCALLTYPE d3d12_command_list_SetPipelineState(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_SetPipelineState(ID3D12GraphicsCommandList3 *iface, ID3D12PipelineState *pipeline_state) { struct d3d12_pipeline_state *state = unsafe_impl_from_ID3D12PipelineState(pipeline_state); - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface);
TRACE("iface %p, pipeline_state %p.\n", iface, pipeline_state);
@@ -4123,10 +4176,10 @@ static unsigned int d3d12_find_ds_multiplanar_transition(const D3D12_RESOURCE_BA return 0; }
-static void STDMETHODCALLTYPE d3d12_command_list_ResourceBarrier(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_ResourceBarrier(ID3D12GraphicsCommandList3 *iface, UINT barrier_count, const D3D12_RESOURCE_BARRIER *barriers) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); bool have_aliasing_barriers = false, have_split_barriers = false; const struct vkd3d_vk_device_procs *vk_procs; const struct vkd3d_vulkan_info *vk_info; @@ -4349,13 +4402,13 @@ static void STDMETHODCALLTYPE d3d12_command_list_ResourceBarrier(ID3D12GraphicsC WARN("Issuing split barrier(s) on D3D12_RESOURCE_BARRIER_FLAG_END_ONLY.\n"); }
-static void STDMETHODCALLTYPE d3d12_command_list_ExecuteBundle(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_ExecuteBundle(ID3D12GraphicsCommandList3 *iface, ID3D12GraphicsCommandList *command_list) { FIXME("iface %p, command_list %p stub!\n", iface, command_list); }
-static void STDMETHODCALLTYPE d3d12_command_list_SetDescriptorHeaps(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_SetDescriptorHeaps(ID3D12GraphicsCommandList3 *iface, UINT heap_count, ID3D12DescriptorHeap *const *heaps) { TRACE("iface %p, heap_count %u, heaps %p.\n", iface, heap_count, heaps); @@ -4381,10 +4434,10 @@ static void d3d12_command_list_set_root_signature(struct d3d12_command_list *lis d3d12_command_list_invalidate_root_parameters(list, bind_point); }
-static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootSignature(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootSignature(ID3D12GraphicsCommandList3 *iface, ID3D12RootSignature *root_signature) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface);
TRACE("iface %p, root_signature %p.\n", iface, root_signature);
@@ -4392,10 +4445,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootSignature(ID3D12G unsafe_impl_from_ID3D12RootSignature(root_signature)); }
-static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootSignature(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootSignature(ID3D12GraphicsCommandList3 *iface, ID3D12RootSignature *root_signature) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface);
TRACE("iface %p, root_signature %p.\n", iface, root_signature);
@@ -4408,6 +4461,7 @@ static void d3d12_command_list_set_descriptor_table(struct d3d12_command_list *l { struct vkd3d_pipeline_bindings *bindings = &list->pipeline_bindings[bind_point]; const struct d3d12_root_signature *root_signature = bindings->root_signature; + struct d3d12_descriptor_heap *descriptor_heap; struct d3d12_desc *desc;
assert(root_signature_get_descriptor_table(root_signature, index)); @@ -4418,15 +4472,25 @@ static void d3d12_command_list_set_descriptor_table(struct d3d12_command_list *l if (bindings->descriptor_tables[index] == desc) return;
+ descriptor_heap = d3d12_desc_get_descriptor_heap(desc); + if (!(descriptor_heap->desc.Flags & D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE)) + { + /* GetGPUDescriptorHandleForHeapStart() returns a null handle in this case, + * but a CPU handle could be passed. */ + WARN("Descriptor heap %p is not shader visible.\n", descriptor_heap); + return; + } + command_list_add_descriptor_heap(list, descriptor_heap); + bindings->descriptor_tables[index] = desc; bindings->descriptor_table_dirty_mask |= (uint64_t)1 << index; bindings->descriptor_table_active_mask |= (uint64_t)1 << index; }
-static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootDescriptorTable(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootDescriptorTable(ID3D12GraphicsCommandList3 *iface, UINT root_parameter_index, D3D12_GPU_DESCRIPTOR_HANDLE base_descriptor) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface);
TRACE("iface %p, root_parameter_index %u, base_descriptor %#"PRIx64".\n", iface, root_parameter_index, base_descriptor.ptr); @@ -4435,10 +4499,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootDescriptorTable(I root_parameter_index, base_descriptor); }
-static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootDescriptorTable(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootDescriptorTable(ID3D12GraphicsCommandList3 *iface, UINT root_parameter_index, D3D12_GPU_DESCRIPTOR_HANDLE base_descriptor) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface);
TRACE("iface %p, root_parameter_index %u, base_descriptor %#"PRIx64".\n", iface, root_parameter_index, base_descriptor.ptr); @@ -4460,10 +4524,10 @@ static void d3d12_command_list_set_root_constants(struct d3d12_command_list *lis c->stage_flags, c->offset + offset * sizeof(uint32_t), count * sizeof(uint32_t), data)); }
-static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRoot32BitConstant(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRoot32BitConstant(ID3D12GraphicsCommandList3 *iface, UINT root_parameter_index, UINT data, UINT dst_offset) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface);
TRACE("iface %p, root_parameter_index %u, data 0x%08x, dst_offset %u.\n", iface, root_parameter_index, data, dst_offset); @@ -4472,10 +4536,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRoot32BitConstant(ID3 root_parameter_index, dst_offset, 1, &data); }
-static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRoot32BitConstant(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRoot32BitConstant(ID3D12GraphicsCommandList3 *iface, UINT root_parameter_index, UINT data, UINT dst_offset) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface);
TRACE("iface %p, root_parameter_index %u, data 0x%08x, dst_offset %u.\n", iface, root_parameter_index, data, dst_offset); @@ -4484,10 +4548,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRoot32BitConstant(ID root_parameter_index, dst_offset, 1, &data); }
-static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRoot32BitConstants(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRoot32BitConstants(ID3D12GraphicsCommandList3 *iface, UINT root_parameter_index, UINT constant_count, const void *data, UINT dst_offset) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface);
TRACE("iface %p, root_parameter_index %u, constant_count %u, data %p, dst_offset %u.\n", iface, root_parameter_index, constant_count, data, dst_offset); @@ -4496,10 +4560,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRoot32BitConstants(ID root_parameter_index, dst_offset, constant_count, data); }
-static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRoot32BitConstants(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRoot32BitConstants(ID3D12GraphicsCommandList3 *iface, UINT root_parameter_index, UINT constant_count, const void *data, UINT dst_offset) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface);
TRACE("iface %p, root_parameter_index %u, constant_count %u, data %p, dst_offset %u.\n", iface, root_parameter_index, constant_count, data, dst_offset); @@ -4561,9 +4625,9 @@ static void d3d12_command_list_set_root_cbv(struct d3d12_command_list *list, }
static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootConstantBufferView( - ID3D12GraphicsCommandList2 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) + ID3D12GraphicsCommandList3 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface);
TRACE("iface %p, root_parameter_index %u, address %#"PRIx64".\n", iface, root_parameter_index, address); @@ -4572,9 +4636,9 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootConstantBufferVie }
static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootConstantBufferView( - ID3D12GraphicsCommandList2 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) + ID3D12GraphicsCommandList3 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface);
TRACE("iface %p, root_parameter_index %u, address %#"PRIx64".\n", iface, root_parameter_index, address); @@ -4633,9 +4697,9 @@ static void d3d12_command_list_set_root_descriptor(struct d3d12_command_list *li }
static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootShaderResourceView( - ID3D12GraphicsCommandList2 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) + ID3D12GraphicsCommandList3 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface);
TRACE("iface %p, root_parameter_index %u, address %#"PRIx64".\n", iface, root_parameter_index, address); @@ -4645,9 +4709,9 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootShaderResourceVie }
static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootShaderResourceView( - ID3D12GraphicsCommandList2 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) + ID3D12GraphicsCommandList3 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface);
TRACE("iface %p, root_parameter_index %u, address %#"PRIx64".\n", iface, root_parameter_index, address); @@ -4657,9 +4721,9 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootShaderResourceVi }
static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootUnorderedAccessView( - ID3D12GraphicsCommandList2 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) + ID3D12GraphicsCommandList3 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface);
TRACE("iface %p, root_parameter_index %u, address %#"PRIx64".\n", iface, root_parameter_index, address); @@ -4669,9 +4733,9 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootUnorderedAccessVi }
static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootUnorderedAccessView( - ID3D12GraphicsCommandList2 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) + ID3D12GraphicsCommandList3 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface);
TRACE("iface %p, root_parameter_index %u, address %#"PRIx64".\n", iface, root_parameter_index, address); @@ -4680,10 +4744,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootUnorderedAccessV root_parameter_index, address); }
-static void STDMETHODCALLTYPE d3d12_command_list_IASetIndexBuffer(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_IASetIndexBuffer(ID3D12GraphicsCommandList3 *iface, const D3D12_INDEX_BUFFER_VIEW *view) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); const struct vkd3d_vk_device_procs *vk_procs; struct d3d12_resource *resource; enum VkIndexType index_type; @@ -4723,10 +4787,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_IASetIndexBuffer(ID3D12Graphics view->BufferLocation - resource->gpu_address, index_type)); }
-static void STDMETHODCALLTYPE d3d12_command_list_IASetVertexBuffers(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_IASetVertexBuffers(ID3D12GraphicsCommandList3 *iface, UINT start_slot, UINT view_count, const D3D12_VERTEX_BUFFER_VIEW *views) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); const struct vkd3d_null_resources *null_resources; struct vkd3d_gpu_va_allocator *gpu_va_allocator; VkDeviceSize offsets[ARRAY_SIZE(list->strides)]; @@ -4781,10 +4845,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_IASetVertexBuffers(ID3D12Graphi d3d12_command_list_invalidate_current_pipeline(list); }
-static void STDMETHODCALLTYPE d3d12_command_list_SOSetTargets(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_SOSetTargets(ID3D12GraphicsCommandList3 *iface, UINT start_slot, UINT view_count, const D3D12_STREAM_OUTPUT_BUFFER_VIEW *views) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); VkDeviceSize offsets[ARRAY_SIZE(list->so_counter_buffers)]; VkDeviceSize sizes[ARRAY_SIZE(list->so_counter_buffers)]; VkBuffer buffers[ARRAY_SIZE(list->so_counter_buffers)]; @@ -4846,11 +4910,11 @@ static void STDMETHODCALLTYPE d3d12_command_list_SOSetTargets(ID3D12GraphicsComm VK_CALL(vkCmdBindTransformFeedbackBuffersEXT(list->vk_command_buffer, first, count, buffers, offsets, sizes)); }
-static void STDMETHODCALLTYPE d3d12_command_list_OMSetRenderTargets(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_OMSetRenderTargets(ID3D12GraphicsCommandList3 *iface, UINT render_target_descriptor_count, const D3D12_CPU_DESCRIPTOR_HANDLE *render_target_descriptors, BOOL single_descriptor_handle, const D3D12_CPU_DESCRIPTOR_HANDLE *depth_stencil_descriptor) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); const struct d3d12_rtv_desc *rtv_desc; const struct d3d12_dsv_desc *dsv_desc; VkFormat prev_dsv_format; @@ -5051,12 +5115,12 @@ static void d3d12_command_list_clear(struct d3d12_command_list *list, } }
-static void STDMETHODCALLTYPE d3d12_command_list_ClearDepthStencilView(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_ClearDepthStencilView(ID3D12GraphicsCommandList3 *iface, D3D12_CPU_DESCRIPTOR_HANDLE dsv, D3D12_CLEAR_FLAGS flags, float depth, UINT8 stencil, UINT rect_count, const D3D12_RECT *rects) { const union VkClearValue clear_value = {.depthStencil = {depth, stencil}}; - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); const struct d3d12_dsv_desc *dsv_desc = d3d12_dsv_desc_from_cpu_handle(dsv); struct VkAttachmentDescription attachment_desc; struct VkAttachmentReference ds_reference; @@ -5100,10 +5164,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_ClearDepthStencilView(ID3D12Gra &clear_value, rect_count, rects); }
-static void STDMETHODCALLTYPE d3d12_command_list_ClearRenderTargetView(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_ClearRenderTargetView(ID3D12GraphicsCommandList3 *iface, D3D12_CPU_DESCRIPTOR_HANDLE rtv, const FLOAT color[4], UINT rect_count, const D3D12_RECT *rects) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); const struct d3d12_rtv_desc *rtv_desc = d3d12_rtv_desc_from_cpu_handle(rtv); struct VkAttachmentDescription attachment_desc; struct VkAttachmentReference color_reference; @@ -5348,11 +5412,11 @@ static const struct vkd3d_format *vkd3d_fixup_clear_uav_uint_colour(struct d3d12 } }
-static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewUint(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewUint(ID3D12GraphicsCommandList3 *iface, D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle, D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle, ID3D12Resource *resource, const UINT values[4], UINT rect_count, const D3D12_RECT *rects) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); struct vkd3d_view *descriptor, *uint_view = NULL; struct d3d12_device *device = list->device; struct vkd3d_texture_view_desc view_desc; @@ -5414,11 +5478,11 @@ static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewUint(ID vkd3d_view_decref(uint_view, device); }
-static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewFloat(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewFloat(ID3D12GraphicsCommandList3 *iface, D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle, D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle, ID3D12Resource *resource, const float values[4], UINT rect_count, const D3D12_RECT *rects) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); struct d3d12_resource *resource_impl; VkClearColorValue colour; struct vkd3d_view *view; @@ -5434,16 +5498,16 @@ static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewFloat(I d3d12_command_list_clear_uav(list, resource_impl, view, &colour, rect_count, rects); }
-static void STDMETHODCALLTYPE d3d12_command_list_DiscardResource(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_DiscardResource(ID3D12GraphicsCommandList3 *iface, ID3D12Resource *resource, const D3D12_DISCARD_REGION *region) { FIXME_ONCE("iface %p, resource %p, region %p stub!\n", iface, resource, region); }
-static void STDMETHODCALLTYPE d3d12_command_list_BeginQuery(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_BeginQuery(ID3D12GraphicsCommandList3 *iface, ID3D12QueryHeap *heap, D3D12_QUERY_TYPE type, UINT index) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); struct d3d12_query_heap *query_heap = unsafe_impl_from_ID3D12QueryHeap(heap); const struct vkd3d_vk_device_procs *vk_procs; VkQueryControlFlags flags = 0; @@ -5470,10 +5534,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_BeginQuery(ID3D12GraphicsComman VK_CALL(vkCmdBeginQuery(list->vk_command_buffer, query_heap->vk_query_pool, index, flags)); }
-static void STDMETHODCALLTYPE d3d12_command_list_EndQuery(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_EndQuery(ID3D12GraphicsCommandList3 *iface, ID3D12QueryHeap *heap, D3D12_QUERY_TYPE type, UINT index) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); struct d3d12_query_heap *query_heap = unsafe_impl_from_ID3D12QueryHeap(heap); const struct vkd3d_vk_device_procs *vk_procs;
@@ -5515,12 +5579,12 @@ static size_t get_query_stride(D3D12_QUERY_TYPE type) return sizeof(uint64_t); }
-static void STDMETHODCALLTYPE d3d12_command_list_ResolveQueryData(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_ResolveQueryData(ID3D12GraphicsCommandList3 *iface, ID3D12QueryHeap *heap, D3D12_QUERY_TYPE type, UINT start_index, UINT query_count, ID3D12Resource *dst_buffer, UINT64 aligned_dst_buffer_offset) { const struct d3d12_query_heap *query_heap = unsafe_impl_from_ID3D12QueryHeap(heap); - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); struct d3d12_resource *buffer = unsafe_impl_from_ID3D12Resource(dst_buffer); const struct vkd3d_vk_device_procs *vk_procs; unsigned int i, first, count; @@ -5596,10 +5660,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_ResolveQueryData(ID3D12Graphics } }
-static void STDMETHODCALLTYPE d3d12_command_list_SetPredication(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_SetPredication(ID3D12GraphicsCommandList3 *iface, ID3D12Resource *buffer, UINT64 aligned_buffer_offset, D3D12_PREDICATION_OP operation) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); struct d3d12_resource *resource = unsafe_impl_from_ID3D12Resource(buffer); const struct vkd3d_vulkan_info *vk_info = &list->device->vk_info; const struct vkd3d_vk_device_procs *vk_procs; @@ -5668,19 +5732,19 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetPredication(ID3D12GraphicsCo } }
-static void STDMETHODCALLTYPE d3d12_command_list_SetMarker(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_SetMarker(ID3D12GraphicsCommandList3 *iface, UINT metadata, const void *data, UINT size) { FIXME("iface %p, metadata %#x, data %p, size %u stub!\n", iface, metadata, data, size); }
-static void STDMETHODCALLTYPE d3d12_command_list_BeginEvent(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_BeginEvent(ID3D12GraphicsCommandList3 *iface, UINT metadata, const void *data, UINT size) { FIXME("iface %p, metadata %#x, data %p, size %u stub!\n", iface, metadata, data, size); }
-static void STDMETHODCALLTYPE d3d12_command_list_EndEvent(ID3D12GraphicsCommandList2 *iface) +static void STDMETHODCALLTYPE d3d12_command_list_EndEvent(ID3D12GraphicsCommandList3 *iface) { FIXME("iface %p stub!\n", iface); } @@ -5689,14 +5753,14 @@ STATIC_ASSERT(sizeof(VkDispatchIndirectCommand) == sizeof(D3D12_DISPATCH_ARGUMEN STATIC_ASSERT(sizeof(VkDrawIndexedIndirectCommand) == sizeof(D3D12_DRAW_INDEXED_ARGUMENTS)); STATIC_ASSERT(sizeof(VkDrawIndirectCommand) == sizeof(D3D12_DRAW_ARGUMENTS));
-static void STDMETHODCALLTYPE d3d12_command_list_ExecuteIndirect(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_ExecuteIndirect(ID3D12GraphicsCommandList3 *iface, ID3D12CommandSignature *command_signature, UINT max_command_count, ID3D12Resource *arg_buffer, UINT64 arg_buffer_offset, ID3D12Resource *count_buffer, UINT64 count_buffer_offset) { struct d3d12_command_signature *sig_impl = unsafe_impl_from_ID3D12CommandSignature(command_signature); struct d3d12_resource *count_impl = unsafe_impl_from_ID3D12Resource(count_buffer); struct d3d12_resource *arg_impl = unsafe_impl_from_ID3D12Resource(arg_buffer); - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); const D3D12_COMMAND_SIGNATURE_DESC *signature_desc; const struct vkd3d_vk_device_procs *vk_procs; unsigned int i; @@ -5714,6 +5778,8 @@ static void STDMETHODCALLTYPE d3d12_command_list_ExecuteIndirect(ID3D12GraphicsC return; }
+ d3d12_command_signature_incref(sig_impl); + signature_desc = &sig_impl->desc; for (i = 0; i < signature_desc->NumArgumentDescs; ++i) { @@ -5776,6 +5842,7 @@ static void STDMETHODCALLTYPE d3d12_command_list_ExecuteIndirect(ID3D12GraphicsC if (!d3d12_command_list_update_compute_state(list)) { WARN("Failed to update compute state, ignoring dispatch.\n"); + d3d12_command_signature_decref(sig_impl); return; }
@@ -5788,9 +5855,11 @@ static void STDMETHODCALLTYPE d3d12_command_list_ExecuteIndirect(ID3D12GraphicsC break; } } + + d3d12_command_signature_decref(sig_impl); }
-static void STDMETHODCALLTYPE d3d12_command_list_AtomicCopyBufferUINT(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_AtomicCopyBufferUINT(ID3D12GraphicsCommandList3 *iface, ID3D12Resource *dst_buffer, UINT64 dst_offset, ID3D12Resource *src_buffer, UINT64 src_offset, UINT dependent_resource_count, ID3D12Resource * const *dependent_resources, @@ -5803,7 +5872,7 @@ static void STDMETHODCALLTYPE d3d12_command_list_AtomicCopyBufferUINT(ID3D12Grap dependent_resource_count, dependent_resources, dependent_sub_resource_ranges); }
-static void STDMETHODCALLTYPE d3d12_command_list_AtomicCopyBufferUINT64(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_AtomicCopyBufferUINT64(ID3D12GraphicsCommandList3 *iface, ID3D12Resource *dst_buffer, UINT64 dst_offset, ID3D12Resource *src_buffer, UINT64 src_offset, UINT dependent_resource_count, ID3D12Resource * const *dependent_resources, @@ -5816,20 +5885,20 @@ static void STDMETHODCALLTYPE d3d12_command_list_AtomicCopyBufferUINT64(ID3D12Gr dependent_resource_count, dependent_resources, dependent_sub_resource_ranges); }
-static void STDMETHODCALLTYPE d3d12_command_list_OMSetDepthBounds(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_OMSetDepthBounds(ID3D12GraphicsCommandList3 *iface, FLOAT min, FLOAT max) { FIXME("iface %p, min %.8e, max %.8e stub!\n", iface, min, max); }
-static void STDMETHODCALLTYPE d3d12_command_list_SetSamplePositions(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_SetSamplePositions(ID3D12GraphicsCommandList3 *iface, UINT sample_count, UINT pixel_count, D3D12_SAMPLE_POSITION *sample_positions) { FIXME("iface %p, sample_count %u, pixel_count %u, sample_positions %p stub!\n", iface, sample_count, pixel_count, sample_positions); }
-static void STDMETHODCALLTYPE d3d12_command_list_ResolveSubresourceRegion(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_ResolveSubresourceRegion(ID3D12GraphicsCommandList3 *iface, ID3D12Resource *dst_resource, UINT dst_sub_resource_idx, UINT dst_x, UINT dst_y, ID3D12Resource *src_resource, UINT src_sub_resource_idx, D3D12_RECT *src_rect, DXGI_FORMAT format, D3D12_RESOLVE_MODE mode) @@ -5841,16 +5910,16 @@ static void STDMETHODCALLTYPE d3d12_command_list_ResolveSubresourceRegion(ID3D12 src_resource, src_sub_resource_idx, src_rect, format, mode); }
-static void STDMETHODCALLTYPE d3d12_command_list_SetViewInstanceMask(ID3D12GraphicsCommandList2 *iface, UINT mask) +static void STDMETHODCALLTYPE d3d12_command_list_SetViewInstanceMask(ID3D12GraphicsCommandList3 *iface, UINT mask) { FIXME("iface %p, mask %#x stub!\n", iface, mask); }
-static void STDMETHODCALLTYPE d3d12_command_list_WriteBufferImmediate(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_WriteBufferImmediate(ID3D12GraphicsCommandList3 *iface, UINT count, const D3D12_WRITEBUFFERIMMEDIATE_PARAMETER *parameters, const D3D12_WRITEBUFFERIMMEDIATE_MODE *modes) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); struct d3d12_resource *resource; unsigned int i;
@@ -5863,7 +5932,13 @@ static void STDMETHODCALLTYPE d3d12_command_list_WriteBufferImmediate(ID3D12Grap } }
-static const struct ID3D12GraphicsCommandList2Vtbl d3d12_command_list_vtbl = +static void STDMETHODCALLTYPE d3d12_command_list_SetProtectedResourceSession(ID3D12GraphicsCommandList3 *iface, + ID3D12ProtectedResourceSession *protected_session) +{ + FIXME("iface %p, protected_session %p stub!\n", iface, protected_session); +} + +static const struct ID3D12GraphicsCommandList3Vtbl d3d12_command_list_vtbl = { /* IUnknown methods */ d3d12_command_list_QueryInterface, @@ -5939,6 +6014,8 @@ static const struct ID3D12GraphicsCommandList2Vtbl d3d12_command_list_vtbl = d3d12_command_list_SetViewInstanceMask, /* ID3D12GraphicsCommandList2 methods */ d3d12_command_list_WriteBufferImmediate, + /* ID3D12GraphicsCommandList3 methods */ + d3d12_command_list_SetProtectedResourceSession, };
static struct d3d12_command_list *unsafe_impl_from_ID3D12CommandList(ID3D12CommandList *iface) @@ -5946,7 +6023,7 @@ static struct d3d12_command_list *unsafe_impl_from_ID3D12CommandList(ID3D12Comma if (!iface) return NULL; assert(iface->lpVtbl == (struct ID3D12CommandListVtbl *)&d3d12_command_list_vtbl); - return CONTAINING_RECORD(iface, struct d3d12_command_list, ID3D12GraphicsCommandList2_iface); + return CONTAINING_RECORD(iface, struct d3d12_command_list, ID3D12GraphicsCommandList3_iface); }
static HRESULT d3d12_command_list_init(struct d3d12_command_list *list, struct d3d12_device *device, @@ -5955,7 +6032,7 @@ static HRESULT d3d12_command_list_init(struct d3d12_command_list *list, struct d { HRESULT hr;
- list->ID3D12GraphicsCommandList2_iface.lpVtbl = &d3d12_command_list_vtbl; + list->ID3D12GraphicsCommandList3_iface.lpVtbl = &d3d12_command_list_vtbl; list->refcount = 1;
list->type = type; @@ -6063,8 +6140,35 @@ static ULONG STDMETHODCALLTYPE d3d12_command_queue_AddRef(ID3D12CommandQueue *if return refcount; }
+static void d3d12_command_queue_destroy_op(struct vkd3d_cs_op_data *op) +{ + switch (op->opcode) + { + case VKD3D_CS_OP_WAIT: + d3d12_fence_decref(op->u.wait.fence); + break; + + case VKD3D_CS_OP_SIGNAL: + d3d12_fence_decref(op->u.signal.fence); + break; + + case VKD3D_CS_OP_EXECUTE: + vkd3d_free(op->u.execute.buffers); + break; + + case VKD3D_CS_OP_UPDATE_MAPPINGS: + case VKD3D_CS_OP_COPY_MAPPINGS: + break; + } +} + static void d3d12_command_queue_op_array_destroy(struct d3d12_command_queue_op_array *array) { + unsigned int i; + + for (i = 0; i < array->count; ++i) + d3d12_command_queue_destroy_op(&array->ops[i]); + vkd3d_free(array->ops); }
@@ -6162,17 +6266,131 @@ static struct vkd3d_cs_op_data *d3d12_command_queue_op_array_require_space(struc return &array->ops[array->count++]; }
+static bool clone_array_parameter(void **dst, const void *src, size_t elem_size, unsigned int count) +{ + void *buffer; + + *dst = NULL; + if (src) + { + if (!(buffer = vkd3d_calloc(count, elem_size))) + return false; + memcpy(buffer, src, count * elem_size); + *dst = buffer; + } + return true; +} + +static void update_mappings_cleanup(struct vkd3d_cs_update_mappings *update_mappings) +{ + vkd3d_free(update_mappings->region_start_coordinates); + vkd3d_free(update_mappings->region_sizes); + vkd3d_free(update_mappings->range_flags); + vkd3d_free(update_mappings->heap_range_offsets); + vkd3d_free(update_mappings->range_tile_counts); +} + static void STDMETHODCALLTYPE d3d12_command_queue_UpdateTileMappings(ID3D12CommandQueue *iface, ID3D12Resource *resource, UINT region_count, const D3D12_TILED_RESOURCE_COORDINATE *region_start_coordinates, const D3D12_TILE_REGION_SIZE *region_sizes, ID3D12Heap *heap, UINT range_count, const D3D12_TILE_RANGE_FLAGS *range_flags, - UINT *heap_range_offsets, UINT *range_tile_counts, D3D12_TILE_MAPPING_FLAGS flags) + const UINT *heap_range_offsets, const UINT *range_tile_counts, D3D12_TILE_MAPPING_FLAGS flags) { - FIXME("iface %p, resource %p, region_count %u, region_start_coordinates %p, " + struct d3d12_resource *resource_impl = unsafe_impl_from_ID3D12Resource(resource); + struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface); + struct d3d12_heap *heap_impl = unsafe_impl_from_ID3D12Heap(heap); + struct vkd3d_cs_update_mappings update_mappings = {0}; + struct vkd3d_cs_op_data *op; + + TRACE("iface %p, resource %p, region_count %u, region_start_coordinates %p, " "region_sizes %p, heap %p, range_count %u, range_flags %p, heap_range_offsets %p, " - "range_tile_counts %p, flags %#x stub!\n", + "range_tile_counts %p, flags %#x.\n", iface, resource, region_count, region_start_coordinates, region_sizes, heap, range_count, range_flags, heap_range_offsets, range_tile_counts, flags); + + if (!region_count || !range_count) + return; + + if (!command_queue->supports_sparse_binding) + { + FIXME("Command queue %p does not support sparse binding.\n", command_queue); + return; + } + + if (!resource_impl->tiles.subresource_count) + { + WARN("Resource %p is not a tiled resource.\n", resource_impl); + return; + } + + if (region_count > 1 && !region_start_coordinates) + { + WARN("Region start coordinates must not be NULL when region count is > 1.\n"); + return; + } + + if (range_count > 1 && !range_tile_counts) + { + WARN("Range tile counts must not be NULL when range count is > 1.\n"); + return; + } + + update_mappings.resource = resource_impl; + update_mappings.heap = heap_impl; + if (!clone_array_parameter((void **)&update_mappings.region_start_coordinates, + region_start_coordinates, sizeof(*region_start_coordinates), region_count)) + { + ERR("Failed to allocate region start coordinates.\n"); + return; + } + if (!clone_array_parameter((void **)&update_mappings.region_sizes, + region_sizes, sizeof(*region_sizes), region_count)) + { + ERR("Failed to allocate region sizes.\n"); + goto free_clones; + } + if (!clone_array_parameter((void **)&update_mappings.range_flags, + range_flags, sizeof(*range_flags), range_count)) + { + ERR("Failed to allocate range flags.\n"); + goto free_clones; + } + if (!clone_array_parameter((void **)&update_mappings.heap_range_offsets, + heap_range_offsets, sizeof(*heap_range_offsets), range_count)) + { + ERR("Failed to allocate heap range offsets.\n"); + goto free_clones; + } + if (!clone_array_parameter((void **)&update_mappings.range_tile_counts, + range_tile_counts, sizeof(*range_tile_counts), range_count)) + { + ERR("Failed to allocate range tile counts.\n"); + goto free_clones; + } + update_mappings.region_count = region_count; + update_mappings.range_count = range_count; + update_mappings.flags = flags; + + vkd3d_mutex_lock(&command_queue->op_mutex); + + if (!(op = d3d12_command_queue_op_array_require_space(&command_queue->op_queue))) + { + ERR("Failed to add op.\n"); + goto unlock_mutex; + } + + op->opcode = VKD3D_CS_OP_UPDATE_MAPPINGS; + op->u.update_mappings = update_mappings; + + d3d12_command_queue_submit_locked(command_queue); + + vkd3d_mutex_unlock(&command_queue->op_mutex); + return; + +unlock_mutex: + vkd3d_mutex_unlock(&command_queue->op_mutex); +free_clones: + update_mappings_cleanup(&update_mappings); }
static void STDMETHODCALLTYPE d3d12_command_queue_CopyTileMappings(ID3D12CommandQueue *iface, @@ -6183,10 +6401,34 @@ static void STDMETHODCALLTYPE d3d12_command_queue_CopyTileMappings(ID3D12Command const D3D12_TILE_REGION_SIZE *region_size, D3D12_TILE_MAPPING_FLAGS flags) { - FIXME("iface %p, dst_resource %p, dst_region_start_coordinate %p, " - "src_resource %p, src_region_start_coordinate %p, region_size %p, flags %#x stub!\n", + struct d3d12_resource *dst_resource_impl = impl_from_ID3D12Resource(dst_resource); + struct d3d12_resource *src_resource_impl = impl_from_ID3D12Resource(src_resource); + struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface); + struct vkd3d_cs_op_data *op; + + TRACE("iface %p, dst_resource %p, dst_region_start_coordinate %p, " + "src_resource %p, src_region_start_coordinate %p, region_size %p, flags %#x.\n", iface, dst_resource, dst_region_start_coordinate, src_resource, src_region_start_coordinate, region_size, flags); + + vkd3d_mutex_lock(&command_queue->op_mutex); + + if (!(op = d3d12_command_queue_op_array_require_space(&command_queue->op_queue))) + { + ERR("Failed to add op.\n"); + return; + } + op->opcode = VKD3D_CS_OP_COPY_MAPPINGS; + op->u.copy_mappings.dst_resource = dst_resource_impl; + op->u.copy_mappings.src_resource = src_resource_impl; + op->u.copy_mappings.dst_region_start_coordinate = *dst_region_start_coordinate; + op->u.copy_mappings.src_region_start_coordinate = *src_region_start_coordinate; + op->u.copy_mappings.region_size = *region_size; + op->u.copy_mappings.flags = flags; + + d3d12_command_queue_submit_locked(command_queue); + + vkd3d_mutex_unlock(&command_queue->op_mutex); }
static void d3d12_command_queue_execute(struct d3d12_command_queue *command_queue, @@ -6214,8 +6456,6 @@ static void d3d12_command_queue_execute(struct d3d12_command_queue *command_queu ERR("Failed to submit queue(s), vr %d.\n", vr);
vkd3d_queue_release(vkd3d_queue); - - vkd3d_free(buffers); }
static void d3d12_command_queue_submit_locked(struct d3d12_command_queue *queue) @@ -6273,7 +6513,7 @@ static void STDMETHODCALLTYPE d3d12_command_queue_ExecuteCommandLists(ID3D12Comm if (!(op = d3d12_command_queue_op_array_require_space(&command_queue->op_queue))) { ERR("Failed to add op.\n"); - return; + goto done; } op->opcode = VKD3D_CS_OP_EXECUTE; op->u.execute.buffers = buffers; @@ -6281,6 +6521,7 @@ static void STDMETHODCALLTYPE d3d12_command_queue_ExecuteCommandLists(ID3D12Comm
d3d12_command_queue_submit_locked(command_queue);
+done: vkd3d_mutex_unlock(&command_queue->op_mutex); return; } @@ -6348,6 +6589,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Signal(ID3D12CommandQueue *
if (!(op = d3d12_command_queue_op_array_require_space(&command_queue->op_queue))) { + ERR("Failed to add op.\n"); hr = E_OUTOFMEMORY; goto done; } @@ -6686,6 +6928,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Wait(ID3D12CommandQueue *if
if (!(op = d3d12_command_queue_op_array_require_space(&command_queue->op_queue))) { + ERR("Failed to add op.\n"); hr = E_OUTOFMEMORY; goto done; } @@ -6922,22 +7165,31 @@ static HRESULT d3d12_command_queue_flush_ops_locked(struct d3d12_command_queue * return d3d12_command_queue_fixup_after_flush_locked(queue); } d3d12_command_queue_wait_locked(queue, fence, op->u.wait.value); - d3d12_fence_decref(fence); break;
case VKD3D_CS_OP_SIGNAL: d3d12_command_queue_signal(queue, op->u.signal.fence, op->u.signal.value); - d3d12_fence_decref(op->u.signal.fence); break;
case VKD3D_CS_OP_EXECUTE: d3d12_command_queue_execute(queue, op->u.execute.buffers, op->u.execute.buffer_count); break;
+ case VKD3D_CS_OP_UPDATE_MAPPINGS: + FIXME("Tiled resource binding is not supported yet.\n"); + update_mappings_cleanup(&op->u.update_mappings); + break; + + case VKD3D_CS_OP_COPY_MAPPINGS: + FIXME("Tiled resource mapping copying is not supported yet.\n"); + break; + default: vkd3d_unreachable(); }
+ d3d12_command_queue_destroy_op(op); + *flushed_any |= true; }
@@ -7000,6 +7252,8 @@ static HRESULT d3d12_command_queue_init(struct d3d12_command_queue *queue, if (FAILED(hr = vkd3d_fence_worker_start(&queue->fence_worker, queue->vkd3d_queue, device))) goto fail_destroy_op_mutex;
+ queue->supports_sparse_binding = !!(queue->vkd3d_queue->vk_queue_flags & VK_QUEUE_SPARSE_BINDING_BIT); + d3d12_device_add_ref(queue->device = device);
return S_OK; @@ -7105,16 +7359,7 @@ static ULONG STDMETHODCALLTYPE d3d12_command_signature_Release(ID3D12CommandSign TRACE("%p decreasing refcount to %u.\n", signature, refcount);
if (!refcount) - { - struct d3d12_device *device = signature->device; - - vkd3d_private_store_destroy(&signature->private_store); - - vkd3d_free((void *)signature->desc.pArgumentDescs); - vkd3d_free(signature); - - d3d12_device_release(device); - } + d3d12_command_signature_decref(signature);
return refcount; } @@ -7221,6 +7466,7 @@ HRESULT d3d12_command_signature_create(struct d3d12_device *device, const D3D12_
object->ID3D12CommandSignature_iface.lpVtbl = &d3d12_command_signature_vtbl; object->refcount = 1; + object->internal_refcount = 1;
object->desc = *desc; if (!(object->desc.pArgumentDescs = vkd3d_calloc(desc->NumArgumentDescs, sizeof(*desc->pArgumentDescs)))) diff --git a/libs/vkd3d/libs/vkd3d/device.c b/libs/vkd3d/libs/vkd3d/device.c index 4263dcf4184..c33061073a3 100644 --- a/libs/vkd3d/libs/vkd3d/device.c +++ b/libs/vkd3d/libs/vkd3d/device.c @@ -1464,6 +1464,8 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device,
vulkan_info->device_limits = physical_device_info->properties2.properties.limits; vulkan_info->sparse_properties = physical_device_info->properties2.properties.sparseProperties; + vulkan_info->sparse_binding = features->sparseBinding; + vulkan_info->sparse_residency_3d = features->sparseResidencyImage3D; vulkan_info->rasterization_stream = physical_device_info->xfb_properties.transformFeedbackRasterizationStreamSelect; vulkan_info->transform_feedback_queries = physical_device_info->xfb_properties.transformFeedbackQueries; vulkan_info->uav_read_without_format = features->shaderStorageImageReadWithoutFormat; @@ -2433,34 +2435,39 @@ static void device_init_descriptor_pool_sizes(struct d3d12_device *device)
static void vkd3d_desc_object_cache_init(struct vkd3d_desc_object_cache *cache, size_t size) { - cache->head = NULL; + memset(cache, 0, sizeof(*cache)); cache->size = size; }
static void vkd3d_desc_object_cache_cleanup(struct vkd3d_desc_object_cache *cache) { union d3d12_desc_object u; + unsigned int i; void *next;
- for (u.object = cache->head; u.object; u.object = next) + for (i = 0; i < ARRAY_SIZE(cache->heads); ++i) { - next = u.header->next; - vkd3d_free(u.object); + for (u.object = cache->heads[i].head; u.object; u.object = next) + { + next = u.header->next; + vkd3d_free(u.object); + } } }
/* ID3D12Device */ -static inline struct d3d12_device *impl_from_ID3D12Device(ID3D12Device *iface) +static inline struct d3d12_device *impl_from_ID3D12Device1(ID3D12Device1 *iface) { - return CONTAINING_RECORD(iface, struct d3d12_device, ID3D12Device_iface); + return CONTAINING_RECORD(iface, struct d3d12_device, ID3D12Device1_iface); }
-static HRESULT STDMETHODCALLTYPE d3d12_device_QueryInterface(ID3D12Device *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_QueryInterface(ID3D12Device1 *iface, REFIID riid, void **object) { TRACE("iface %p, riid %s, object %p.\n", iface, debugstr_guid(riid), object);
- if (IsEqualGUID(riid, &IID_ID3D12Device) + if (IsEqualGUID(riid, &IID_ID3D12Device1) + || IsEqualGUID(riid, &IID_ID3D12Device) || IsEqualGUID(riid, &IID_ID3D12Object) || IsEqualGUID(riid, &IID_IUnknown)) { @@ -2475,9 +2482,9 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_QueryInterface(ID3D12Device *iface return E_NOINTERFACE; }
-static ULONG STDMETHODCALLTYPE d3d12_device_AddRef(ID3D12Device *iface) +static ULONG STDMETHODCALLTYPE d3d12_device_AddRef(ID3D12Device1 *iface) { - struct d3d12_device *device = impl_from_ID3D12Device(iface); + struct d3d12_device *device = impl_from_ID3D12Device1(iface); ULONG refcount = InterlockedIncrement(&device->refcount);
TRACE("%p increasing refcount to %u.\n", device, refcount); @@ -2485,9 +2492,9 @@ static ULONG STDMETHODCALLTYPE d3d12_device_AddRef(ID3D12Device *iface) return refcount; }
-static ULONG STDMETHODCALLTYPE d3d12_device_Release(ID3D12Device *iface) +static ULONG STDMETHODCALLTYPE d3d12_device_Release(ID3D12Device1 *iface) { - struct d3d12_device *device = impl_from_ID3D12Device(iface); + struct d3d12_device *device = impl_from_ID3D12Device1(iface); ULONG refcount = InterlockedDecrement(&device->refcount);
TRACE("%p decreasing refcount to %u.\n", device, refcount); @@ -2521,10 +2528,10 @@ static ULONG STDMETHODCALLTYPE d3d12_device_Release(ID3D12Device *iface) return refcount; }
-static HRESULT STDMETHODCALLTYPE d3d12_device_GetPrivateData(ID3D12Device *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_GetPrivateData(ID3D12Device1 *iface, REFGUID guid, UINT *data_size, void *data) { - struct d3d12_device *device = impl_from_ID3D12Device(iface); + struct d3d12_device *device = impl_from_ID3D12Device1(iface);
TRACE("iface %p, guid %s, data_size %p, data %p.\n", iface, debugstr_guid(guid), data_size, data); @@ -2532,10 +2539,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_GetPrivateData(ID3D12Device *iface return vkd3d_get_private_data(&device->private_store, guid, data_size, data); }
-static HRESULT STDMETHODCALLTYPE d3d12_device_SetPrivateData(ID3D12Device *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_SetPrivateData(ID3D12Device1 *iface, REFGUID guid, UINT data_size, const void *data) { - struct d3d12_device *device = impl_from_ID3D12Device(iface); + struct d3d12_device *device = impl_from_ID3D12Device1(iface);
TRACE("iface %p, guid %s, data_size %u, data %p.\n", iface, debugstr_guid(guid), data_size, data); @@ -2543,19 +2550,19 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_SetPrivateData(ID3D12Device *iface return vkd3d_set_private_data(&device->private_store, guid, data_size, data); }
-static HRESULT STDMETHODCALLTYPE d3d12_device_SetPrivateDataInterface(ID3D12Device *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_SetPrivateDataInterface(ID3D12Device1 *iface, REFGUID guid, const IUnknown *data) { - struct d3d12_device *device = impl_from_ID3D12Device(iface); + struct d3d12_device *device = impl_from_ID3D12Device1(iface);
TRACE("iface %p, guid %s, data %p.\n", iface, debugstr_guid(guid), data);
return vkd3d_set_private_data_interface(&device->private_store, guid, data); }
-static HRESULT STDMETHODCALLTYPE d3d12_device_SetName(ID3D12Device *iface, const WCHAR *name) +static HRESULT STDMETHODCALLTYPE d3d12_device_SetName(ID3D12Device1 *iface, const WCHAR *name) { - struct d3d12_device *device = impl_from_ID3D12Device(iface); + struct d3d12_device *device = impl_from_ID3D12Device1(iface);
TRACE("iface %p, name %s.\n", iface, debugstr_w(name, device->wchar_size));
@@ -2563,17 +2570,17 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_SetName(ID3D12Device *iface, const VK_DEBUG_REPORT_OBJECT_TYPE_DEVICE_EXT, name); }
-static UINT STDMETHODCALLTYPE d3d12_device_GetNodeCount(ID3D12Device *iface) +static UINT STDMETHODCALLTYPE d3d12_device_GetNodeCount(ID3D12Device1 *iface) { TRACE("iface %p.\n", iface);
return 1; }
-static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandQueue(ID3D12Device *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandQueue(ID3D12Device1 *iface, const D3D12_COMMAND_QUEUE_DESC *desc, REFIID riid, void **command_queue) { - struct d3d12_device *device = impl_from_ID3D12Device(iface); + struct d3d12_device *device = impl_from_ID3D12Device1(iface); struct d3d12_command_queue *object; HRESULT hr;
@@ -2587,10 +2594,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandQueue(ID3D12Device *i riid, command_queue); }
-static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandAllocator(ID3D12Device *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandAllocator(ID3D12Device1 *iface, D3D12_COMMAND_LIST_TYPE type, REFIID riid, void **command_allocator) { - struct d3d12_device *device = impl_from_ID3D12Device(iface); + struct d3d12_device *device = impl_from_ID3D12Device1(iface); struct d3d12_command_allocator *object; HRESULT hr;
@@ -2604,10 +2611,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandAllocator(ID3D12Devic riid, command_allocator); }
-static HRESULT STDMETHODCALLTYPE d3d12_device_CreateGraphicsPipelineState(ID3D12Device *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_CreateGraphicsPipelineState(ID3D12Device1 *iface, const D3D12_GRAPHICS_PIPELINE_STATE_DESC *desc, REFIID riid, void **pipeline_state) { - struct d3d12_device *device = impl_from_ID3D12Device(iface); + struct d3d12_device *device = impl_from_ID3D12Device1(iface); struct d3d12_pipeline_state *object; HRESULT hr;
@@ -2621,10 +2628,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateGraphicsPipelineState(ID3D12 &IID_ID3D12PipelineState, riid, pipeline_state); }
-static HRESULT STDMETHODCALLTYPE d3d12_device_CreateComputePipelineState(ID3D12Device *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_CreateComputePipelineState(ID3D12Device1 *iface, const D3D12_COMPUTE_PIPELINE_STATE_DESC *desc, REFIID riid, void **pipeline_state) { - struct d3d12_device *device = impl_from_ID3D12Device(iface); + struct d3d12_device *device = impl_from_ID3D12Device1(iface); struct d3d12_pipeline_state *object; HRESULT hr;
@@ -2638,11 +2645,11 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateComputePipelineState(ID3D12D &IID_ID3D12PipelineState, riid, pipeline_state); }
-static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandList(ID3D12Device *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandList(ID3D12Device1 *iface, UINT node_mask, D3D12_COMMAND_LIST_TYPE type, ID3D12CommandAllocator *command_allocator, ID3D12PipelineState *initial_pipeline_state, REFIID riid, void **command_list) { - struct d3d12_device *device = impl_from_ID3D12Device(iface); + struct d3d12_device *device = impl_from_ID3D12Device1(iface); struct d3d12_command_list *object; HRESULT hr;
@@ -2655,8 +2662,8 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandList(ID3D12Device *if initial_pipeline_state, &object))) return hr;
- return return_interface(&object->ID3D12GraphicsCommandList2_iface, - &IID_ID3D12GraphicsCommandList2, riid, command_list); + return return_interface(&object->ID3D12GraphicsCommandList3_iface, + &IID_ID3D12GraphicsCommandList3, riid, command_list); }
/* Direct3D feature levels restrict which formats can be optionally supported. */ @@ -2765,10 +2772,10 @@ bool d3d12_device_is_uma(struct d3d12_device *device, bool *coherent) return true; }
-static HRESULT STDMETHODCALLTYPE d3d12_device_CheckFeatureSupport(ID3D12Device *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_CheckFeatureSupport(ID3D12Device1 *iface, D3D12_FEATURE feature, void *feature_data, UINT feature_data_size) { - struct d3d12_device *device = impl_from_ID3D12Device(iface); + struct d3d12_device *device = impl_from_ID3D12Device1(iface);
TRACE("iface %p, feature %#x, feature_data %p, feature_data_size %u.\n", iface, feature, feature_data, feature_data_size); @@ -3267,10 +3274,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CheckFeatureSupport(ID3D12Device * } }
-static HRESULT STDMETHODCALLTYPE d3d12_device_CreateDescriptorHeap(ID3D12Device *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_CreateDescriptorHeap(ID3D12Device1 *iface, const D3D12_DESCRIPTOR_HEAP_DESC *desc, REFIID riid, void **descriptor_heap) { - struct d3d12_device *device = impl_from_ID3D12Device(iface); + struct d3d12_device *device = impl_from_ID3D12Device1(iface); struct d3d12_descriptor_heap *object; HRESULT hr;
@@ -3284,7 +3291,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateDescriptorHeap(ID3D12Device &IID_ID3D12DescriptorHeap, riid, descriptor_heap); }
-static UINT STDMETHODCALLTYPE d3d12_device_GetDescriptorHandleIncrementSize(ID3D12Device *iface, +static UINT STDMETHODCALLTYPE d3d12_device_GetDescriptorHandleIncrementSize(ID3D12Device1 *iface, D3D12_DESCRIPTOR_HEAP_TYPE descriptor_heap_type) { TRACE("iface %p, descriptor_heap_type %#x.\n", iface, descriptor_heap_type); @@ -3307,11 +3314,11 @@ static UINT STDMETHODCALLTYPE d3d12_device_GetDescriptorHandleIncrementSize(ID3D } }
-static HRESULT STDMETHODCALLTYPE d3d12_device_CreateRootSignature(ID3D12Device *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_CreateRootSignature(ID3D12Device1 *iface, UINT node_mask, const void *bytecode, SIZE_T bytecode_length, REFIID riid, void **root_signature) { - struct d3d12_device *device = impl_from_ID3D12Device(iface); + struct d3d12_device *device = impl_from_ID3D12Device1(iface); struct d3d12_root_signature *object; HRESULT hr;
@@ -3327,10 +3334,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateRootSignature(ID3D12Device * &IID_ID3D12RootSignature, riid, root_signature); }
-static void STDMETHODCALLTYPE d3d12_device_CreateConstantBufferView(ID3D12Device *iface, +static void STDMETHODCALLTYPE d3d12_device_CreateConstantBufferView(ID3D12Device1 *iface, const D3D12_CONSTANT_BUFFER_VIEW_DESC *desc, D3D12_CPU_DESCRIPTOR_HANDLE descriptor) { - struct d3d12_device *device = impl_from_ID3D12Device(iface); + struct d3d12_device *device = impl_from_ID3D12Device1(iface); struct d3d12_desc tmp = {0};
TRACE("iface %p, desc %p, descriptor %#lx.\n", iface, desc, descriptor.ptr); @@ -3339,11 +3346,11 @@ static void STDMETHODCALLTYPE d3d12_device_CreateConstantBufferView(ID3D12Device d3d12_desc_write_atomic(d3d12_desc_from_cpu_handle(descriptor), &tmp, device); }
-static void STDMETHODCALLTYPE d3d12_device_CreateShaderResourceView(ID3D12Device *iface, +static void STDMETHODCALLTYPE d3d12_device_CreateShaderResourceView(ID3D12Device1 *iface, ID3D12Resource *resource, const D3D12_SHADER_RESOURCE_VIEW_DESC *desc, D3D12_CPU_DESCRIPTOR_HANDLE descriptor) { - struct d3d12_device *device = impl_from_ID3D12Device(iface); + struct d3d12_device *device = impl_from_ID3D12Device1(iface); struct d3d12_desc tmp = {0};
TRACE("iface %p, resource %p, desc %p, descriptor %#lx.\n", @@ -3353,11 +3360,11 @@ static void STDMETHODCALLTYPE d3d12_device_CreateShaderResourceView(ID3D12Device d3d12_desc_write_atomic(d3d12_desc_from_cpu_handle(descriptor), &tmp, device); }
-static void STDMETHODCALLTYPE d3d12_device_CreateUnorderedAccessView(ID3D12Device *iface, +static void STDMETHODCALLTYPE d3d12_device_CreateUnorderedAccessView(ID3D12Device1 *iface, ID3D12Resource *resource, ID3D12Resource *counter_resource, const D3D12_UNORDERED_ACCESS_VIEW_DESC *desc, D3D12_CPU_DESCRIPTOR_HANDLE descriptor) { - struct d3d12_device *device = impl_from_ID3D12Device(iface); + struct d3d12_device *device = impl_from_ID3D12Device1(iface); struct d3d12_desc tmp = {0};
TRACE("iface %p, resource %p, counter_resource %p, desc %p, descriptor %#lx.\n", @@ -3368,7 +3375,7 @@ static void STDMETHODCALLTYPE d3d12_device_CreateUnorderedAccessView(ID3D12Devic d3d12_desc_write_atomic(d3d12_desc_from_cpu_handle(descriptor), &tmp, device); }
-static void STDMETHODCALLTYPE d3d12_device_CreateRenderTargetView(ID3D12Device *iface, +static void STDMETHODCALLTYPE d3d12_device_CreateRenderTargetView(ID3D12Device1 *iface, ID3D12Resource *resource, const D3D12_RENDER_TARGET_VIEW_DESC *desc, D3D12_CPU_DESCRIPTOR_HANDLE descriptor) { @@ -3376,10 +3383,10 @@ static void STDMETHODCALLTYPE d3d12_device_CreateRenderTargetView(ID3D12Device * iface, resource, desc, descriptor.ptr);
d3d12_rtv_desc_create_rtv(d3d12_rtv_desc_from_cpu_handle(descriptor), - impl_from_ID3D12Device(iface), unsafe_impl_from_ID3D12Resource(resource), desc); + impl_from_ID3D12Device1(iface), unsafe_impl_from_ID3D12Resource(resource), desc); }
-static void STDMETHODCALLTYPE d3d12_device_CreateDepthStencilView(ID3D12Device *iface, +static void STDMETHODCALLTYPE d3d12_device_CreateDepthStencilView(ID3D12Device1 *iface, ID3D12Resource *resource, const D3D12_DEPTH_STENCIL_VIEW_DESC *desc, D3D12_CPU_DESCRIPTOR_HANDLE descriptor) { @@ -3387,13 +3394,13 @@ static void STDMETHODCALLTYPE d3d12_device_CreateDepthStencilView(ID3D12Device * iface, resource, desc, descriptor.ptr);
d3d12_dsv_desc_create_dsv(d3d12_dsv_desc_from_cpu_handle(descriptor), - impl_from_ID3D12Device(iface), unsafe_impl_from_ID3D12Resource(resource), desc); + impl_from_ID3D12Device1(iface), unsafe_impl_from_ID3D12Resource(resource), desc); }
-static void STDMETHODCALLTYPE d3d12_device_CreateSampler(ID3D12Device *iface, +static void STDMETHODCALLTYPE d3d12_device_CreateSampler(ID3D12Device1 *iface, const D3D12_SAMPLER_DESC *desc, D3D12_CPU_DESCRIPTOR_HANDLE descriptor) { - struct d3d12_device *device = impl_from_ID3D12Device(iface); + struct d3d12_device *device = impl_from_ID3D12Device1(iface); struct d3d12_desc tmp = {0};
TRACE("iface %p, desc %p, descriptor %#lx.\n", iface, desc, descriptor.ptr); @@ -3402,16 +3409,17 @@ static void STDMETHODCALLTYPE d3d12_device_CreateSampler(ID3D12Device *iface, d3d12_desc_write_atomic(d3d12_desc_from_cpu_handle(descriptor), &tmp, device); }
-static void STDMETHODCALLTYPE d3d12_device_CopyDescriptors(ID3D12Device *iface, +static void STDMETHODCALLTYPE d3d12_device_CopyDescriptors(ID3D12Device1 *iface, UINT dst_descriptor_range_count, const D3D12_CPU_DESCRIPTOR_HANDLE *dst_descriptor_range_offsets, const UINT *dst_descriptor_range_sizes, UINT src_descriptor_range_count, const D3D12_CPU_DESCRIPTOR_HANDLE *src_descriptor_range_offsets, const UINT *src_descriptor_range_sizes, D3D12_DESCRIPTOR_HEAP_TYPE descriptor_heap_type) { - struct d3d12_device *device = impl_from_ID3D12Device(iface); + struct d3d12_device *device = impl_from_ID3D12Device1(iface); unsigned int dst_range_idx, dst_idx, src_range_idx, src_idx; unsigned int dst_range_size, src_range_size; + struct d3d12_descriptor_heap *dst_heap; const struct d3d12_desc *src; struct d3d12_desc *dst;
@@ -3441,13 +3449,14 @@ static void STDMETHODCALLTYPE d3d12_device_CopyDescriptors(ID3D12Device *iface, src_range_size = src_descriptor_range_sizes ? src_descriptor_range_sizes[src_range_idx] : 1;
dst = d3d12_desc_from_cpu_handle(dst_descriptor_range_offsets[dst_range_idx]); + dst_heap = d3d12_desc_get_descriptor_heap(dst); src = d3d12_desc_from_cpu_handle(src_descriptor_range_offsets[src_range_idx]);
for (; dst_idx < dst_range_size && src_idx < src_range_size; ++dst_idx, ++src_idx) { if (dst[dst_idx].s.u.object == src[src_idx].s.u.object) continue; - d3d12_desc_copy(&dst[dst_idx], &src[src_idx], device); + d3d12_desc_copy(&dst[dst_idx], &src[src_idx], dst_heap, device); }
if (dst_idx >= dst_range_size) @@ -3463,7 +3472,7 @@ static void STDMETHODCALLTYPE d3d12_device_CopyDescriptors(ID3D12Device *iface, } }
-static void STDMETHODCALLTYPE d3d12_device_CopyDescriptorsSimple(ID3D12Device *iface, +static void STDMETHODCALLTYPE d3d12_device_CopyDescriptorsSimple(ID3D12Device1 *iface, UINT descriptor_count, const D3D12_CPU_DESCRIPTOR_HANDLE dst_descriptor_range_offset, const D3D12_CPU_DESCRIPTOR_HANDLE src_descriptor_range_offset, D3D12_DESCRIPTOR_HEAP_TYPE descriptor_heap_type) @@ -3478,10 +3487,10 @@ static void STDMETHODCALLTYPE d3d12_device_CopyDescriptorsSimple(ID3D12Device *i }
static D3D12_RESOURCE_ALLOCATION_INFO * STDMETHODCALLTYPE d3d12_device_GetResourceAllocationInfo( - ID3D12Device *iface, D3D12_RESOURCE_ALLOCATION_INFO *info, UINT visible_mask, + ID3D12Device1 *iface, D3D12_RESOURCE_ALLOCATION_INFO *info, UINT visible_mask, UINT count, const D3D12_RESOURCE_DESC *resource_descs) { - struct d3d12_device *device = impl_from_ID3D12Device(iface); + struct d3d12_device *device = impl_from_ID3D12Device1(iface); const D3D12_RESOURCE_DESC *desc; uint64_t requested_alignment;
@@ -3554,10 +3563,10 @@ invalid: return info; }
-static D3D12_HEAP_PROPERTIES * STDMETHODCALLTYPE d3d12_device_GetCustomHeapProperties(ID3D12Device *iface, +static D3D12_HEAP_PROPERTIES * STDMETHODCALLTYPE d3d12_device_GetCustomHeapProperties(ID3D12Device1 *iface, D3D12_HEAP_PROPERTIES *heap_properties, UINT node_mask, D3D12_HEAP_TYPE heap_type) { - struct d3d12_device *device = impl_from_ID3D12Device(iface); + struct d3d12_device *device = impl_from_ID3D12Device1(iface); bool coherent;
TRACE("iface %p, heap_properties %p, node_mask 0x%08x, heap_type %#x.\n", @@ -3597,12 +3606,12 @@ static D3D12_HEAP_PROPERTIES * STDMETHODCALLTYPE d3d12_device_GetCustomHeapPrope return heap_properties; }
-static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommittedResource(ID3D12Device *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommittedResource(ID3D12Device1 *iface, const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags, const D3D12_RESOURCE_DESC *desc, D3D12_RESOURCE_STATES initial_state, const D3D12_CLEAR_VALUE *optimized_clear_value, REFIID iid, void **resource) { - struct d3d12_device *device = impl_from_ID3D12Device(iface); + struct d3d12_device *device = impl_from_ID3D12Device1(iface); struct d3d12_resource *object; HRESULT hr;
@@ -3621,10 +3630,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommittedResource(ID3D12Devi return return_interface(&object->ID3D12Resource_iface, &IID_ID3D12Resource, iid, resource); }
-static HRESULT STDMETHODCALLTYPE d3d12_device_CreateHeap(ID3D12Device *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_CreateHeap(ID3D12Device1 *iface, const D3D12_HEAP_DESC *desc, REFIID iid, void **heap) { - struct d3d12_device *device = impl_from_ID3D12Device(iface); + struct d3d12_device *device = impl_from_ID3D12Device1(iface); struct d3d12_heap *object; HRESULT hr;
@@ -3640,12 +3649,12 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateHeap(ID3D12Device *iface, return return_interface(&object->ID3D12Heap_iface, &IID_ID3D12Heap, iid, heap); }
-static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePlacedResource(ID3D12Device *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePlacedResource(ID3D12Device1 *iface, ID3D12Heap *heap, UINT64 heap_offset, const D3D12_RESOURCE_DESC *desc, D3D12_RESOURCE_STATES initial_state, const D3D12_CLEAR_VALUE *optimized_clear_value, REFIID iid, void **resource) { - struct d3d12_device *device = impl_from_ID3D12Device(iface); + struct d3d12_device *device = impl_from_ID3D12Device1(iface); struct d3d12_heap *heap_object; struct d3d12_resource *object; HRESULT hr; @@ -3664,11 +3673,11 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePlacedResource(ID3D12Device return return_interface(&object->ID3D12Resource_iface, &IID_ID3D12Resource, iid, resource); }
-static HRESULT STDMETHODCALLTYPE d3d12_device_CreateReservedResource(ID3D12Device *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_CreateReservedResource(ID3D12Device1 *iface, const D3D12_RESOURCE_DESC *desc, D3D12_RESOURCE_STATES initial_state, const D3D12_CLEAR_VALUE *optimized_clear_value, REFIID iid, void **resource) { - struct d3d12_device *device = impl_from_ID3D12Device(iface); + struct d3d12_device *device = impl_from_ID3D12Device1(iface); struct d3d12_resource *object; HRESULT hr;
@@ -3682,11 +3691,11 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateReservedResource(ID3D12Devic return return_interface(&object->ID3D12Resource_iface, &IID_ID3D12Resource, iid, resource); }
-static HRESULT STDMETHODCALLTYPE d3d12_device_CreateSharedHandle(ID3D12Device *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_CreateSharedHandle(ID3D12Device1 *iface, ID3D12DeviceChild *object, const SECURITY_ATTRIBUTES *attributes, DWORD access, const WCHAR *name, HANDLE *handle) { - struct d3d12_device *device = impl_from_ID3D12Device(iface); + struct d3d12_device *device = impl_from_ID3D12Device1(iface);
FIXME("iface %p, object %p, attributes %p, access %#x, name %s, handle %p stub!\n", iface, object, attributes, access, debugstr_w(name, device->wchar_size), handle); @@ -3694,7 +3703,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateSharedHandle(ID3D12Device *i return E_NOTIMPL; }
-static HRESULT STDMETHODCALLTYPE d3d12_device_OpenSharedHandle(ID3D12Device *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_OpenSharedHandle(ID3D12Device1 *iface, HANDLE handle, REFIID riid, void **object) { FIXME("iface %p, handle %p, riid %s, object %p stub!\n", @@ -3703,10 +3712,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_OpenSharedHandle(ID3D12Device *ifa return E_NOTIMPL; }
-static HRESULT STDMETHODCALLTYPE d3d12_device_OpenSharedHandleByName(ID3D12Device *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_OpenSharedHandleByName(ID3D12Device1 *iface, const WCHAR *name, DWORD access, HANDLE *handle) { - struct d3d12_device *device = impl_from_ID3D12Device(iface); + struct d3d12_device *device = impl_from_ID3D12Device1(iface);
FIXME("iface %p, name %s, access %#x, handle %p stub!\n", iface, debugstr_w(name, device->wchar_size), access, handle); @@ -3714,7 +3723,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_OpenSharedHandleByName(ID3D12Devic return E_NOTIMPL; }
-static HRESULT STDMETHODCALLTYPE d3d12_device_MakeResident(ID3D12Device *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_MakeResident(ID3D12Device1 *iface, UINT object_count, ID3D12Pageable * const *objects) { FIXME_ONCE("iface %p, object_count %u, objects %p stub!\n", @@ -3723,7 +3732,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_MakeResident(ID3D12Device *iface, return S_OK; }
-static HRESULT STDMETHODCALLTYPE d3d12_device_Evict(ID3D12Device *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_Evict(ID3D12Device1 *iface, UINT object_count, ID3D12Pageable * const *objects) { FIXME_ONCE("iface %p, object_count %u, objects %p stub!\n", @@ -3732,10 +3741,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_Evict(ID3D12Device *iface, return S_OK; }
-static HRESULT STDMETHODCALLTYPE d3d12_device_CreateFence(ID3D12Device *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_CreateFence(ID3D12Device1 *iface, UINT64 initial_value, D3D12_FENCE_FLAGS flags, REFIID riid, void **fence) { - struct d3d12_device *device = impl_from_ID3D12Device(iface); + struct d3d12_device *device = impl_from_ID3D12Device1(iface); struct d3d12_fence *object; HRESULT hr;
@@ -3745,24 +3754,24 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateFence(ID3D12Device *iface, if (FAILED(hr = d3d12_fence_create(device, initial_value, flags, &object))) return hr;
- return return_interface(&object->ID3D12Fence_iface, &IID_ID3D12Fence, riid, fence); + return return_interface(&object->ID3D12Fence1_iface, &IID_ID3D12Fence1, riid, fence); }
-static HRESULT STDMETHODCALLTYPE d3d12_device_GetDeviceRemovedReason(ID3D12Device *iface) +static HRESULT STDMETHODCALLTYPE d3d12_device_GetDeviceRemovedReason(ID3D12Device1 *iface) { - struct d3d12_device *device = impl_from_ID3D12Device(iface); + struct d3d12_device *device = impl_from_ID3D12Device1(iface);
TRACE("iface %p.\n", iface);
return device->removed_reason; }
-static void STDMETHODCALLTYPE d3d12_device_GetCopyableFootprints(ID3D12Device *iface, +static void STDMETHODCALLTYPE d3d12_device_GetCopyableFootprints(ID3D12Device1 *iface, const D3D12_RESOURCE_DESC *desc, UINT first_sub_resource, UINT sub_resource_count, UINT64 base_offset, D3D12_PLACED_SUBRESOURCE_FOOTPRINT *layouts, UINT *row_counts, UINT64 *row_sizes, UINT64 *total_bytes) { - struct d3d12_device *device = impl_from_ID3D12Device(iface); + struct d3d12_device *device = impl_from_ID3D12Device1(iface);
unsigned int i, sub_resource_idx, miplevel_idx, row_count, row_size, row_pitch; unsigned int width, height, depth, plane_count, sub_resources_per_plane; @@ -3842,10 +3851,10 @@ static void STDMETHODCALLTYPE d3d12_device_GetCopyableFootprints(ID3D12Device *i *total_bytes = total; }
-static HRESULT STDMETHODCALLTYPE d3d12_device_CreateQueryHeap(ID3D12Device *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_CreateQueryHeap(ID3D12Device1 *iface, const D3D12_QUERY_HEAP_DESC *desc, REFIID iid, void **heap) { - struct d3d12_device *device = impl_from_ID3D12Device(iface); + struct d3d12_device *device = impl_from_ID3D12Device1(iface); struct d3d12_query_heap *object; HRESULT hr;
@@ -3858,18 +3867,18 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateQueryHeap(ID3D12Device *ifac return return_interface(&object->ID3D12QueryHeap_iface, &IID_ID3D12QueryHeap, iid, heap); }
-static HRESULT STDMETHODCALLTYPE d3d12_device_SetStablePowerState(ID3D12Device *iface, BOOL enable) +static HRESULT STDMETHODCALLTYPE d3d12_device_SetStablePowerState(ID3D12Device1 *iface, BOOL enable) { FIXME("iface %p, enable %#x stub!\n", iface, enable);
return E_NOTIMPL; }
-static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandSignature(ID3D12Device *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandSignature(ID3D12Device1 *iface, const D3D12_COMMAND_SIGNATURE_DESC *desc, ID3D12RootSignature *root_signature, REFIID iid, void **command_signature) { - struct d3d12_device *device = impl_from_ID3D12Device(iface); + struct d3d12_device *device = impl_from_ID3D12Device1(iface); struct d3d12_command_signature *object; HRESULT hr;
@@ -3883,23 +3892,29 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandSignature(ID3D12Devic &IID_ID3D12CommandSignature, iid, command_signature); }
-static void STDMETHODCALLTYPE d3d12_device_GetResourceTiling(ID3D12Device *iface, +static void STDMETHODCALLTYPE d3d12_device_GetResourceTiling(ID3D12Device1 *iface, ID3D12Resource *resource, UINT *total_tile_count, D3D12_PACKED_MIP_INFO *packed_mip_info, D3D12_TILE_SHAPE *standard_tile_shape, UINT *sub_resource_tiling_count, UINT first_sub_resource_tiling, D3D12_SUBRESOURCE_TILING *sub_resource_tilings) { - FIXME("iface %p, resource %p, total_tile_count %p, packed_mip_info %p, " + const struct d3d12_resource *resource_impl = impl_from_ID3D12Resource(resource); + struct d3d12_device *device = impl_from_ID3D12Device1(iface); + + TRACE("iface %p, resource %p, total_tile_count %p, packed_mip_info %p, " "standard_title_shape %p, sub_resource_tiling_count %p, " - "first_sub_resource_tiling %u, sub_resource_tilings %p stub!\n", + "first_sub_resource_tiling %u, sub_resource_tilings %p.\n", iface, resource, total_tile_count, packed_mip_info, standard_tile_shape, sub_resource_tiling_count, first_sub_resource_tiling, sub_resource_tilings); + + d3d12_resource_get_tiling(device, resource_impl, total_tile_count, packed_mip_info, standard_tile_shape, + sub_resource_tiling_count, first_sub_resource_tiling, sub_resource_tilings); }
-static LUID * STDMETHODCALLTYPE d3d12_device_GetAdapterLuid(ID3D12Device *iface, LUID *luid) +static LUID * STDMETHODCALLTYPE d3d12_device_GetAdapterLuid(ID3D12Device1 *iface, LUID *luid) { - struct d3d12_device *device = impl_from_ID3D12Device(iface); + struct d3d12_device *device = impl_from_ID3D12Device1(iface);
TRACE("iface %p, luid %p.\n", iface, luid);
@@ -3908,7 +3923,33 @@ static LUID * STDMETHODCALLTYPE d3d12_device_GetAdapterLuid(ID3D12Device *iface, return luid; }
-static const struct ID3D12DeviceVtbl d3d12_device_vtbl = +static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePipelineLibrary(ID3D12Device1 *iface, + const void *blob, SIZE_T blob_size, REFIID iid, void **lib) +{ + FIXME("iface %p, blob %p, blob_size %lu, iid %s, lib %p stub!\n", iface, blob, blob_size, debugstr_guid(iid), lib); + + return DXGI_ERROR_UNSUPPORTED; +} + +static HRESULT STDMETHODCALLTYPE d3d12_device_SetEventOnMultipleFenceCompletion(ID3D12Device1 *iface, + ID3D12Fence *const *fences, const UINT64 *values, UINT fence_count, + D3D12_MULTIPLE_FENCE_WAIT_FLAGS flags, HANDLE event) +{ + FIXME("iface %p, fences %p, values %p, fence_count %u, flags %#x, event %p stub!\n", + iface, fences, values, fence_count, flags, event); + + return E_NOTIMPL; +} + +static HRESULT STDMETHODCALLTYPE d3d12_device_SetResidencyPriority(ID3D12Device1 *iface, + UINT object_count, ID3D12Pageable *const *objects, const D3D12_RESIDENCY_PRIORITY *priorities) +{ + FIXME_ONCE("iface %p, object_count %u, objects %p, priorities %p stub!\n", iface, object_count, objects, priorities); + + return S_OK; +} + +static const struct ID3D12Device1Vtbl d3d12_device_vtbl = { /* IUnknown methods */ d3d12_device_QueryInterface, @@ -3957,14 +3998,18 @@ static const struct ID3D12DeviceVtbl d3d12_device_vtbl = d3d12_device_CreateCommandSignature, d3d12_device_GetResourceTiling, d3d12_device_GetAdapterLuid, + /* ID3D12Device1 methods */ + d3d12_device_CreatePipelineLibrary, + d3d12_device_SetEventOnMultipleFenceCompletion, + d3d12_device_SetResidencyPriority, };
-struct d3d12_device *unsafe_impl_from_ID3D12Device(ID3D12Device *iface) +struct d3d12_device *unsafe_impl_from_ID3D12Device1(ID3D12Device1 *iface) { if (!iface) return NULL; assert(iface->lpVtbl == &d3d12_device_vtbl); - return impl_from_ID3D12Device(iface); + return impl_from_ID3D12Device1(iface); }
static HRESULT d3d12_device_init(struct d3d12_device *device, @@ -3973,7 +4018,7 @@ static HRESULT d3d12_device_init(struct d3d12_device *device, const struct vkd3d_vk_device_procs *vk_procs; HRESULT hr;
- device->ID3D12Device_iface.lpVtbl = &d3d12_device_vtbl; + device->ID3D12Device1_iface.lpVtbl = &d3d12_device_vtbl; device->refcount = 1;
vkd3d_instance_incref(device->vkd3d_instance = instance); @@ -4170,28 +4215,28 @@ HRESULT vkd3d_join_thread(struct vkd3d_instance *instance, union vkd3d_thread_ha
IUnknown *vkd3d_get_device_parent(ID3D12Device *device) { - struct d3d12_device *d3d12_device = impl_from_ID3D12Device(device); + struct d3d12_device *d3d12_device = impl_from_ID3D12Device1((ID3D12Device1 *)device);
return d3d12_device->parent; }
VkDevice vkd3d_get_vk_device(ID3D12Device *device) { - struct d3d12_device *d3d12_device = impl_from_ID3D12Device(device); + struct d3d12_device *d3d12_device = impl_from_ID3D12Device1((ID3D12Device1 *)device);
return d3d12_device->vk_device; }
VkPhysicalDevice vkd3d_get_vk_physical_device(ID3D12Device *device) { - struct d3d12_device *d3d12_device = impl_from_ID3D12Device(device); + struct d3d12_device *d3d12_device = impl_from_ID3D12Device1((ID3D12Device1 *)device);
return d3d12_device->vk_physical_device; }
struct vkd3d_instance *vkd3d_instance_from_device(ID3D12Device *device) { - struct d3d12_device *d3d12_device = impl_from_ID3D12Device(device); + struct d3d12_device *d3d12_device = impl_from_ID3D12Device1((ID3D12Device1 *)device);
return d3d12_device->vkd3d_instance; } diff --git a/libs/vkd3d/libs/vkd3d/resource.c b/libs/vkd3d/libs/vkd3d/resource.c index ea7b6859cc1..f3842958d96 100644 --- a/libs/vkd3d/libs/vkd3d/resource.c +++ b/libs/vkd3d/libs/vkd3d/resource.c @@ -779,6 +779,7 @@ static HRESULT vkd3d_create_image(struct d3d12_device *device, VkImageFormatListCreateInfoKHR format_list; const struct vkd3d_format *format; VkImageCreateInfo image_info; + uint32_t count; VkResult vr;
if (resource) @@ -914,6 +915,20 @@ static HRESULT vkd3d_create_image(struct d3d12_device *device, if (resource && image_info.tiling == VK_IMAGE_TILING_LINEAR) resource->flags |= VKD3D_RESOURCE_LINEAR_TILING;
+ if (sparse_resource) + { + count = 0; + VK_CALL(vkGetPhysicalDeviceSparseImageFormatProperties(device->vk_physical_device, image_info.format, + image_info.imageType, image_info.samples, image_info.usage, image_info.tiling, &count, NULL)); + + if (!count) + { + FIXME("Sparse images are not supported with format %u, type %u, samples %u, usage %#x.\n", + image_info.format, image_info.imageType, image_info.samples, image_info.usage); + return E_INVALIDARG; + } + } + if ((vr = VK_CALL(vkCreateImage(device->vk_device, &image_info, NULL, vk_image))) < 0) WARN("Failed to create Vulkan image, vr %d.\n", vr);
@@ -928,6 +943,7 @@ HRESULT vkd3d_get_image_allocation_info(struct d3d12_device *device, D3D12_RESOURCE_DESC validated_desc; VkMemoryRequirements requirements; VkImage vk_image; + bool tiled; HRESULT hr;
assert(desc->Dimension != D3D12_RESOURCE_DIMENSION_BUFFER); @@ -940,8 +956,10 @@ HRESULT vkd3d_get_image_allocation_info(struct d3d12_device *device, desc = &validated_desc; }
+ tiled = desc->Layout == D3D12_TEXTURE_LAYOUT_64KB_UNDEFINED_SWIZZLE; + /* XXX: We have to create an image to get its memory requirements. */ - if (SUCCEEDED(hr = vkd3d_create_image(device, &heap_properties, 0, desc, NULL, &vk_image))) + if (SUCCEEDED(hr = vkd3d_create_image(device, tiled ? NULL : &heap_properties, 0, desc, NULL, &vk_image))) { VK_CALL(vkGetImageMemoryRequirements(device->vk_device, vk_image, &requirements)); VK_CALL(vkDestroyImage(device->vk_device, vk_image, NULL)); @@ -953,6 +971,11 @@ HRESULT vkd3d_get_image_allocation_info(struct d3d12_device *device, return hr; }
+static void d3d12_resource_tile_info_cleanup(struct d3d12_resource *resource) +{ + vkd3d_free(resource->tiles.subresources); +} + static void d3d12_resource_destroy(struct d3d12_resource *resource, struct d3d12_device *device) { const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; @@ -968,6 +991,8 @@ static void d3d12_resource_destroy(struct d3d12_resource *resource, struct d3d12 else VK_CALL(vkDestroyImage(device->vk_device, resource->u.vk_image, NULL));
+ d3d12_resource_tile_info_cleanup(resource); + if (resource->heap) d3d12_heap_resource_destroyed(resource->heap); } @@ -1039,12 +1064,196 @@ static void d3d12_resource_get_level_box(const struct d3d12_resource *resource, box->back = d3d12_resource_desc_get_depth(&resource->desc, level); }
-/* ID3D12Resource */ -static inline struct d3d12_resource *impl_from_ID3D12Resource(ID3D12Resource *iface) +static void compute_image_subresource_size_in_tiles(const VkExtent3D *tile_extent, + const struct D3D12_RESOURCE_DESC *desc, unsigned int miplevel_idx, + struct vkd3d_tiled_region_extent *size) { - return CONTAINING_RECORD(iface, struct d3d12_resource, ID3D12Resource_iface); + unsigned int width, height, depth; + + width = d3d12_resource_desc_get_width(desc, miplevel_idx); + height = d3d12_resource_desc_get_height(desc, miplevel_idx); + depth = d3d12_resource_desc_get_depth(desc, miplevel_idx); + size->width = (width + tile_extent->width - 1) / tile_extent->width; + size->height = (height + tile_extent->height - 1) / tile_extent->height; + size->depth = (depth + tile_extent->depth - 1) / tile_extent->depth; +} + +void d3d12_resource_get_tiling(struct d3d12_device *device, const struct d3d12_resource *resource, + UINT *total_tile_count, D3D12_PACKED_MIP_INFO *packed_mip_info, D3D12_TILE_SHAPE *standard_tile_shape, + UINT *subresource_tiling_count, UINT first_subresource_tiling, + D3D12_SUBRESOURCE_TILING *subresource_tilings) +{ + unsigned int i, subresource, subresource_count, miplevel_idx, count; + const struct vkd3d_subresource_tile_info *tile_info; + const VkExtent3D *tile_extent; + + tile_extent = &resource->tiles.tile_extent; + + if (packed_mip_info) + { + packed_mip_info->NumStandardMips = resource->tiles.standard_mip_count; + packed_mip_info->NumPackedMips = resource->desc.MipLevels - packed_mip_info->NumStandardMips; + packed_mip_info->NumTilesForPackedMips = !!resource->tiles.packed_mip_tile_count; /* non-zero dummy value */ + packed_mip_info->StartTileIndexInOverallResource = packed_mip_info->NumPackedMips + ? resource->tiles.subresources[resource->tiles.standard_mip_count].offset : 0; + } + + if (standard_tile_shape) + { + /* D3D12 docs say tile shape is cleared to zero if there is no standard mip, but drivers don't to do this. */ + standard_tile_shape->WidthInTexels = tile_extent->width; + standard_tile_shape->HeightInTexels = tile_extent->height; + standard_tile_shape->DepthInTexels = tile_extent->depth; + } + + if (total_tile_count) + *total_tile_count = resource->tiles.total_count; + + if (!subresource_tiling_count) + return; + + subresource_count = resource->tiles.subresource_count; + + count = subresource_count - min(first_subresource_tiling, subresource_count); + count = min(count, *subresource_tiling_count); + + for (i = 0; i < count; ++i) + { + subresource = i + first_subresource_tiling; + miplevel_idx = subresource % resource->desc.MipLevels; + if (miplevel_idx >= resource->tiles.standard_mip_count) + { + memset(&subresource_tilings[i], 0, sizeof(subresource_tilings[i])); + subresource_tilings[i].StartTileIndexInOverallResource = D3D12_PACKED_TILE; + continue; + } + + tile_info = &resource->tiles.subresources[subresource]; + subresource_tilings[i].StartTileIndexInOverallResource = tile_info->offset; + subresource_tilings[i].WidthInTiles = tile_info->extent.width; + subresource_tilings[i].HeightInTiles = tile_info->extent.height; + subresource_tilings[i].DepthInTiles = tile_info->extent.depth; + } + *subresource_tiling_count = i; }
+static bool d3d12_resource_init_tiles(struct d3d12_resource *resource, struct d3d12_device *device) +{ + unsigned int i, start_idx, subresource_count, tile_count, miplevel_idx; + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + VkSparseImageMemoryRequirements *sparse_requirements_array; + VkSparseImageMemoryRequirements sparse_requirements = {0}; + struct vkd3d_subresource_tile_info *tile_info; + VkMemoryRequirements requirements; + const VkExtent3D *tile_extent; + uint32_t requirement_count; + + subresource_count = d3d12_resource_desc_get_sub_resource_count(&resource->desc); + + if (!(resource->tiles.subresources = vkd3d_calloc(subresource_count, sizeof(*resource->tiles.subresources)))) + { + ERR("Failed to allocate subresource info array.\n"); + return false; + } + + if (d3d12_resource_is_buffer(resource)) + { + assert(subresource_count == 1); + + VK_CALL(vkGetBufferMemoryRequirements(device->vk_device, resource->u.vk_buffer, &requirements)); + if (requirements.alignment > D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES) + FIXME("Vulkan device tile size is greater than the standard D3D12 tile size.\n"); + + tile_info = &resource->tiles.subresources[0]; + tile_info->offset = 0; + tile_info->extent.width = align(resource->desc.Width, D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES) + / D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES; + tile_info->extent.height = 1; + tile_info->extent.depth = 1; + tile_info->count = tile_info->extent.width; + + resource->tiles.tile_extent.width = D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES; + resource->tiles.tile_extent.height = 1; + resource->tiles.tile_extent.depth = 1; + resource->tiles.total_count = tile_info->extent.width; + resource->tiles.subresource_count = 1; + resource->tiles.standard_mip_count = 1; + resource->tiles.packed_mip_tile_count = 0; + } + else + { + VK_CALL(vkGetImageMemoryRequirements(device->vk_device, resource->u.vk_image, &requirements)); + if (requirements.alignment > D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES) + FIXME("Vulkan device tile size is greater than the standard D3D12 tile size.\n"); + + requirement_count = 0; + VK_CALL(vkGetImageSparseMemoryRequirements(device->vk_device, resource->u.vk_image, &requirement_count, NULL)); + if (!(sparse_requirements_array = vkd3d_calloc(requirement_count, sizeof(*sparse_requirements_array)))) + { + ERR("Failed to allocate sparse requirements array.\n"); + return false; + } + VK_CALL(vkGetImageSparseMemoryRequirements(device->vk_device, resource->u.vk_image, + &requirement_count, sparse_requirements_array)); + + for (i = 0; i < requirement_count; ++i) + { + if (sparse_requirements_array[i].formatProperties.aspectMask & resource->format->vk_aspect_mask) + { + if (sparse_requirements.formatProperties.aspectMask) + { + WARN("Ignoring properties for aspect mask %#x.\n", + sparse_requirements_array[i].formatProperties.aspectMask); + } + else + { + sparse_requirements = sparse_requirements_array[i]; + } + } + } + vkd3d_free(sparse_requirements_array); + if (!sparse_requirements.formatProperties.aspectMask) + { + WARN("Failed to get sparse requirements.\n"); + return false; + } + + resource->tiles.tile_extent = sparse_requirements.formatProperties.imageGranularity; + resource->tiles.subresource_count = subresource_count; + resource->tiles.standard_mip_count = sparse_requirements.imageMipTailSize + ? sparse_requirements.imageMipTailFirstLod : resource->desc.MipLevels; + resource->tiles.packed_mip_tile_count = (resource->tiles.standard_mip_count < resource->desc.MipLevels) + ? sparse_requirements.imageMipTailSize / requirements.alignment : 0; + + for (i = 0, start_idx = 0; i < subresource_count; ++i) + { + miplevel_idx = i % resource->desc.MipLevels; + + tile_extent = &sparse_requirements.formatProperties.imageGranularity; + tile_info = &resource->tiles.subresources[i]; + compute_image_subresource_size_in_tiles(tile_extent, &resource->desc, miplevel_idx, &tile_info->extent); + tile_info->offset = start_idx; + tile_info->count = 0; + + if (miplevel_idx < resource->tiles.standard_mip_count) + { + tile_count = tile_info->extent.width * tile_info->extent.height * tile_info->extent.depth; + start_idx += tile_count; + tile_info->count = tile_count; + } + else if (miplevel_idx == resource->tiles.standard_mip_count) + { + tile_info->count = 1; /* Non-zero dummy value */ + start_idx += 1; + } + } + resource->tiles.total_count = start_idx; + } + + return true; +} + +/* ID3D12Resource */ static HRESULT STDMETHODCALLTYPE d3d12_resource_QueryInterface(ID3D12Resource *iface, REFIID riid, void **object) { @@ -1661,6 +1870,21 @@ HRESULT d3d12_resource_validate_desc(const D3D12_RESOURCE_DESC *desc, struct d3d return E_INVALIDARG; }
+ if (desc->Layout == D3D12_TEXTURE_LAYOUT_64KB_UNDEFINED_SWIZZLE) + { + if (desc->Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE3D && !device->vk_info.sparse_residency_3d) + { + WARN("The device does not support tiled 3D images.\n"); + return E_INVALIDARG; + } + if (format->plane_count > 1) + { + WARN("Invalid format %#x. D3D12 does not support multiplanar formats for tiled resources.\n", + format->dxgi_format); + return E_INVALIDARG; + } + } + if (!d3d12_resource_validate_texture_format(desc, format) || !d3d12_resource_validate_texture_alignment(desc, format)) return E_INVALIDARG; @@ -1722,6 +1946,12 @@ static HRESULT d3d12_resource_init(struct d3d12_resource *resource, struct d3d12
resource->desc = *desc;
+ if (!heap_properties && !device->vk_info.sparse_binding) + { + WARN("The device does not support tiled images.\n"); + return E_INVALIDARG; + } + if (heap_properties && !d3d12_resource_validate_heap_properties(resource, heap_properties, initial_state)) return E_INVALIDARG;
@@ -1787,6 +2017,8 @@ static HRESULT d3d12_resource_init(struct d3d12_resource *resource, struct d3d12 resource->heap = NULL; resource->heap_offset = 0;
+ memset(&resource->tiles, 0, sizeof(resource->tiles)); + if (FAILED(hr = vkd3d_private_store_init(&resource->private_store))) { d3d12_resource_destroy(resource, device); @@ -1972,6 +2204,12 @@ HRESULT d3d12_reserved_resource_create(struct d3d12_device *device, desc, initial_state, optimized_clear_value, &object))) return hr;
+ if (!d3d12_resource_init_tiles(object, device)) + { + d3d12_resource_Release(&object->ID3D12Resource_iface); + return E_OUTOFMEMORY; + } + TRACE("Created reserved resource %p.\n", object);
*resource = object; @@ -1982,7 +2220,7 @@ HRESULT d3d12_reserved_resource_create(struct d3d12_device *device, HRESULT vkd3d_create_image_resource(ID3D12Device *device, const struct vkd3d_image_resource_create_info *create_info, ID3D12Resource **resource) { - struct d3d12_device *d3d12_device = unsafe_impl_from_ID3D12Device(device); + struct d3d12_device *d3d12_device = unsafe_impl_from_ID3D12Device1((ID3D12Device1 *)device); struct d3d12_resource *object; HRESULT hr;
@@ -2044,38 +2282,67 @@ ULONG vkd3d_resource_decref(ID3D12Resource *resource) return d3d12_resource_decref(impl_from_ID3D12Resource(resource)); }
-/* Objects are cached so that vkd3d_view_incref() can safely check the refcount - * of an object freed by another thread. */ +#define HEAD_INDEX_MASK (ARRAY_SIZE(cache->heads) - 1) + +/* Objects are cached so that vkd3d_view_incref() can safely check the refcount of an + * object freed by another thread. This could be implemented as a single atomic linked + * list, but it requires handling the ABA problem, which brings issues with cross-platform + * support, compiler support, and non-universal x86-64 support for 128-bit CAS. */ static void *vkd3d_desc_object_cache_get(struct vkd3d_desc_object_cache *cache) { union d3d12_desc_object u; - void *next; + unsigned int i;
- do + STATIC_ASSERT(!(ARRAY_SIZE(cache->heads) & HEAD_INDEX_MASK)); + + i = (vkd3d_atomic_increment(&cache->next_index)) & HEAD_INDEX_MASK; + for (;;) { - u.object = cache->head; - if (!u.object) + if (vkd3d_atomic_compare_exchange(&cache->heads[i].spinlock, 0, 1)) + { + if ((u.object = cache->heads[i].head)) + { + vkd3d_atomic_decrement(&cache->free_count); + cache->heads[i].head = u.header->next; + vkd3d_atomic_exchange(&cache->heads[i].spinlock, 0); + return u.object; + } + vkd3d_atomic_exchange(&cache->heads[i].spinlock, 0); + } + /* Keeping a free count avoids uncertainty over when this loop should terminate, + * which could result in excess allocations gradually increasing without limit. */ + if (cache->free_count < ARRAY_SIZE(cache->heads)) return vkd3d_malloc(cache->size); - next = u.header->next; - } - while (!vkd3d_atomic_compare_exchange_pointer(&cache->head, u.object, next));
- return u.object; + i = (i + 1) & HEAD_INDEX_MASK; + } }
static void vkd3d_desc_object_cache_push(struct vkd3d_desc_object_cache *cache, void *object) { union d3d12_desc_object u = {object}; + unsigned int i; void *head;
- do + /* Using the same index as above may result in a somewhat uneven distribution, + * but the main objective is to avoid costly spinlock contention. */ + i = (vkd3d_atomic_increment(&cache->next_index)) & HEAD_INDEX_MASK; + for (;;) { - head = cache->head; - u.header->next = head; + if (vkd3d_atomic_compare_exchange(&cache->heads[i].spinlock, 0, 1)) + break; + i = (i + 1) & HEAD_INDEX_MASK; } - while (!vkd3d_atomic_compare_exchange_pointer(&cache->head, head, u.object)); + + head = cache->heads[i].head; + u.header->next = head; + cache->heads[i].head = u.object; + vkd3d_atomic_exchange(&cache->heads[i].spinlock, 0); + vkd3d_atomic_increment(&cache->free_count); }
+#undef HEAD_INDEX_MASK + static struct vkd3d_cbuffer_desc *vkd3d_cbuffer_desc_create(struct d3d12_device *device) { struct vkd3d_cbuffer_desc *desc; @@ -2368,13 +2635,11 @@ void d3d12_desc_flush_vk_heap_updates_locked(struct d3d12_descriptor_heap *descr descriptor_writes_free_object_refs(&writes, device); }
-static void d3d12_desc_mark_as_modified(struct d3d12_desc *dst) +static void d3d12_desc_mark_as_modified(struct d3d12_desc *dst, struct d3d12_descriptor_heap *descriptor_heap) { - struct d3d12_descriptor_heap *descriptor_heap; unsigned int i, head;
i = dst->index; - descriptor_heap = d3d12_desc_get_descriptor_heap(dst); head = descriptor_heap->dirty_list_head;
/* Only one thread can swap the value away from zero. */ @@ -2388,14 +2653,20 @@ static void d3d12_desc_mark_as_modified(struct d3d12_desc *dst) } }
-void d3d12_desc_write_atomic(struct d3d12_desc *dst, const struct d3d12_desc *src, - struct d3d12_device *device) +static inline void descriptor_heap_write_atomic(struct d3d12_descriptor_heap *descriptor_heap, struct d3d12_desc *dst, + const struct d3d12_desc *src, struct d3d12_device *device) { void *object = src->s.u.object;
d3d12_desc_replace(dst, object, device); - if (device->use_vk_heaps && object && !dst->next) - d3d12_desc_mark_as_modified(dst); + if (descriptor_heap->use_vk_heaps && object && !dst->next) + d3d12_desc_mark_as_modified(dst, descriptor_heap); +} + +void d3d12_desc_write_atomic(struct d3d12_desc *dst, const struct d3d12_desc *src, + struct d3d12_device *device) +{ + descriptor_heap_write_atomic(d3d12_desc_get_descriptor_heap(dst), dst, src, device); }
static void d3d12_desc_destroy(struct d3d12_desc *descriptor, struct d3d12_device *device) @@ -2403,7 +2674,9 @@ static void d3d12_desc_destroy(struct d3d12_desc *descriptor, struct d3d12_devic d3d12_desc_replace(descriptor, NULL, device); }
-void d3d12_desc_copy(struct d3d12_desc *dst, const struct d3d12_desc *src, +/* This is a major performance bottleneck for some games, so do not load the device + * pointer from dst_heap. In some cases device will not be used. */ +void d3d12_desc_copy(struct d3d12_desc *dst, const struct d3d12_desc *src, struct d3d12_descriptor_heap *dst_heap, struct d3d12_device *device) { struct d3d12_desc tmp; @@ -2411,7 +2684,7 @@ void d3d12_desc_copy(struct d3d12_desc *dst, const struct d3d12_desc *src, assert(dst != src);
tmp.s.u.object = d3d12_desc_get_object_ref(src, device); - d3d12_desc_write_atomic(dst, &tmp, device); + descriptor_heap_write_atomic(dst_heap, dst, &tmp, device); }
static VkDeviceSize vkd3d_get_required_texel_buffer_alignment(const struct d3d12_device *device, @@ -3810,7 +4083,15 @@ static D3D12_GPU_DESCRIPTOR_HANDLE * STDMETHODCALLTYPE d3d12_descriptor_heap_Get
TRACE("iface %p, descriptor %p.\n", iface, descriptor);
- descriptor->ptr = (uint64_t)(intptr_t)heap->descriptors; + if (heap->desc.Flags & D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE) + { + descriptor->ptr = (uint64_t)(intptr_t)heap->descriptors; + } + else + { + WARN("Heap %p is not shader-visible.\n", iface); + descriptor->ptr = 0; + }
return descriptor; } @@ -3913,7 +4194,7 @@ static HRESULT d3d12_descriptor_heap_vk_descriptor_sets_init(struct d3d12_descri descriptor_heap->vk_descriptor_pool = VK_NULL_HANDLE; memset(descriptor_heap->vk_descriptor_sets, 0, sizeof(descriptor_heap->vk_descriptor_sets));
- if (!device->use_vk_heaps || (desc->Type != D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV + if (!descriptor_heap->use_vk_heaps || (desc->Type != D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV && desc->Type != D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER)) return S_OK;
@@ -3944,6 +4225,7 @@ static HRESULT d3d12_descriptor_heap_init(struct d3d12_descriptor_heap *descript if (FAILED(hr = vkd3d_private_store_init(&descriptor_heap->private_store))) return hr;
+ descriptor_heap->use_vk_heaps = device->use_vk_heaps && (desc->Flags & D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE); d3d12_descriptor_heap_vk_descriptor_sets_init(descriptor_heap, device, desc); vkd3d_mutex_init(&descriptor_heap->vk_sets_mutex);
diff --git a/libs/vkd3d/libs/vkd3d/state.c b/libs/vkd3d/libs/vkd3d/state.c index 5e46b467252..7ae46c862cc 100644 --- a/libs/vkd3d/libs/vkd3d/state.c +++ b/libs/vkd3d/libs/vkd3d/state.c @@ -20,6 +20,7 @@
#include "vkd3d_private.h" #include "vkd3d_shaders.h" +#include "vkd3d_shader_utils.h"
/* ID3D12RootSignature */ static inline struct d3d12_root_signature *impl_from_ID3D12RootSignature(ID3D12RootSignature *iface) @@ -374,8 +375,8 @@ static HRESULT d3d12_root_signature_info_count_descriptors(struct d3d12_root_sig
if (unbounded && range->OffsetInDescriptorsFromTableStart == D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND) { - WARN("An unbounded range with offset D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND occurs after " - "another unbounded range.\n"); + WARN("A range with offset D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND occurs after " + "an unbounded range.\n"); return E_INVALIDARG; }
@@ -1978,14 +1979,14 @@ static HRESULT create_shader_stage(struct d3d12_device *device, compile_info.next = shader_interface; compile_info.source.code = code->pShaderBytecode; compile_info.source.size = code->BytecodeLength; - compile_info.source_type = VKD3D_SHADER_SOURCE_DXBC_TPF; compile_info.target_type = VKD3D_SHADER_TARGET_SPIRV_BINARY; compile_info.options = options; compile_info.option_count = ARRAY_SIZE(options); compile_info.log_level = VKD3D_SHADER_LOG_NONE; compile_info.source_name = NULL;
- if ((ret = vkd3d_shader_compile(&compile_info, &spirv, NULL)) < 0) + if ((ret = vkd3d_shader_parse_dxbc_source_type(&compile_info.source, &compile_info.source_type, NULL)) < 0 + || (ret = vkd3d_shader_compile(&compile_info, &spirv, NULL)) < 0) { WARN("Failed to compile shader, vkd3d result %d.\n", ret); return hresult_from_vkd3d_result(ret); @@ -2008,6 +2009,7 @@ static int vkd3d_scan_dxbc(const struct d3d12_device *device, const D3D12_SHADER struct vkd3d_shader_scan_descriptor_info *descriptor_info) { struct vkd3d_shader_compile_info compile_info; + enum vkd3d_result ret;
const struct vkd3d_shader_compile_option options[] = { @@ -2019,13 +2021,15 @@ static int vkd3d_scan_dxbc(const struct d3d12_device *device, const D3D12_SHADER compile_info.next = descriptor_info; compile_info.source.code = code->pShaderBytecode; compile_info.source.size = code->BytecodeLength; - compile_info.source_type = VKD3D_SHADER_SOURCE_DXBC_TPF; compile_info.target_type = VKD3D_SHADER_TARGET_SPIRV_BINARY; compile_info.options = options; compile_info.option_count = ARRAY_SIZE(options); compile_info.log_level = VKD3D_SHADER_LOG_NONE; compile_info.source_name = NULL;
+ if ((ret = vkd3d_shader_parse_dxbc_source_type(&compile_info.source, &compile_info.source_type, NULL)) < 0) + return ret; + return vkd3d_shader_scan(&compile_info, NULL); }
diff --git a/libs/vkd3d/libs/vkd3d/vkd3d_main.c b/libs/vkd3d/libs/vkd3d/vkd3d_main.c index 88301fbb313..159560afd8e 100644 --- a/libs/vkd3d/libs/vkd3d/vkd3d_main.c +++ b/libs/vkd3d/libs/vkd3d/vkd3d_main.c @@ -71,11 +71,11 @@ HRESULT vkd3d_create_device(const struct vkd3d_device_create_info *create_info,
if (!device) { - ID3D12Device_Release(&object->ID3D12Device_iface); + ID3D12Device_Release(&object->ID3D12Device1_iface); return S_FALSE; }
- return return_interface(&object->ID3D12Device_iface, &IID_ID3D12Device, iid, device); + return return_interface(&object->ID3D12Device1_iface, &IID_ID3D12Device, iid, device); }
/* ID3D12RootSignatureDeserializer */ diff --git a/libs/vkd3d/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/libs/vkd3d/vkd3d_private.h index e8d6371709c..363a7132c3a 100644 --- a/libs/vkd3d/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/libs/vkd3d/vkd3d_private.h @@ -147,9 +147,12 @@ struct vkd3d_vulkan_info unsigned int max_vertex_attrib_divisor;
VkPhysicalDeviceLimits device_limits; - VkPhysicalDeviceSparseProperties sparse_properties; struct vkd3d_device_descriptor_limits descriptor_limits;
+ VkPhysicalDeviceSparseProperties sparse_properties; + bool sparse_binding; + bool sparse_residency_3d; + VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT texel_buffer_alignment_properties;
unsigned int shader_extension_count; @@ -250,6 +253,11 @@ static inline void vkd3d_cond_destroy(struct vkd3d_cond *cond) { }
+static inline unsigned int vkd3d_atomic_increment(unsigned int volatile *x) +{ + return InterlockedIncrement((LONG volatile *)x); +} + static inline unsigned int vkd3d_atomic_decrement(unsigned int volatile *x) { return InterlockedDecrement((LONG volatile *)x); @@ -384,6 +392,15 @@ static inline unsigned int vkd3d_atomic_decrement(unsigned int volatile *x) } # else # error "vkd3d_atomic_decrement() not implemented for this platform" +# endif /* HAVE_SYNC_SUB_AND_FETCH */ + +# if HAVE_SYNC_ADD_AND_FETCH +static inline unsigned int vkd3d_atomic_increment(unsigned int volatile *x) +{ + return __sync_add_and_fetch(x, 1); +} +# else +# error "vkd3d_atomic_increment() not implemented for this platform" # endif /* HAVE_SYNC_ADD_AND_FETCH */
# if HAVE_SYNC_BOOL_COMPARE_AND_SWAP @@ -599,10 +616,12 @@ struct vkd3d_signaled_semaphore /* ID3D12Fence */ struct d3d12_fence { - ID3D12Fence ID3D12Fence_iface; + ID3D12Fence1 ID3D12Fence1_iface; LONG internal_refcount; LONG refcount;
+ D3D12_FENCE_FLAGS flags; + uint64_t value; uint64_t max_pending_value; struct vkd3d_mutex mutex; @@ -670,6 +689,30 @@ struct d3d12_heap *unsafe_impl_from_ID3D12Heap(ID3D12Heap *iface); #define VKD3D_RESOURCE_DEDICATED_HEAP 0x00000008 #define VKD3D_RESOURCE_LINEAR_TILING 0x00000010
+struct vkd3d_tiled_region_extent +{ + unsigned int width; + unsigned int height; + unsigned int depth; +}; + +struct vkd3d_subresource_tile_info +{ + unsigned int offset; + unsigned int count; + struct vkd3d_tiled_region_extent extent; +}; + +struct d3d12_resource_tile_info +{ + VkExtent3D tile_extent; + unsigned int total_count; + unsigned int standard_mip_count; + unsigned int packed_mip_tile_count; + unsigned int subresource_count; + struct vkd3d_subresource_tile_info *subresources; +}; + /* ID3D12Resource */ struct d3d12_resource { @@ -698,9 +741,16 @@ struct d3d12_resource
struct d3d12_device *device;
+ struct d3d12_resource_tile_info tiles; + struct vkd3d_private_store private_store; };
+static inline struct d3d12_resource *impl_from_ID3D12Resource(ID3D12Resource *iface) +{ + return CONTAINING_RECORD(iface, struct d3d12_resource, ID3D12Resource_iface); +} + static inline bool d3d12_resource_is_buffer(const struct d3d12_resource *resource) { return resource->desc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER; @@ -713,6 +763,10 @@ static inline bool d3d12_resource_is_texture(const struct d3d12_resource *resour
bool d3d12_resource_is_cpu_accessible(const struct d3d12_resource *resource); HRESULT d3d12_resource_validate_desc(const D3D12_RESOURCE_DESC *desc, struct d3d12_device *device); +void d3d12_resource_get_tiling(struct d3d12_device *device, const struct d3d12_resource *resource, + UINT *total_tile_count, D3D12_PACKED_MIP_INFO *packed_mip_info, D3D12_TILE_SHAPE *standard_tile_shape, + UINT *sub_resource_tiling_count, UINT first_sub_resource_tiling, + D3D12_SUBRESOURCE_TILING *sub_resource_tilings);
HRESULT d3d12_committed_resource_create(struct d3d12_device *device, const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags, @@ -853,8 +907,9 @@ static inline void *d3d12_desc_get_object_ref(const volatile struct d3d12_desc * { do { - view = src->s.u.object; - } while (view && !vkd3d_view_incref(view)); + if (!(view = src->s.u.object)) + return NULL; + } while (!vkd3d_view_incref(view));
/* Check if the object is still in src to handle the case where it was * already freed and reused elsewhere when the refcount was incremented. */ @@ -880,7 +935,10 @@ static inline void d3d12_desc_copy_raw(struct d3d12_desc *dst, const struct d3d1 dst->s = src->s; }
-void d3d12_desc_copy(struct d3d12_desc *dst, const struct d3d12_desc *src, struct d3d12_device *device); +struct d3d12_descriptor_heap; + +void d3d12_desc_copy(struct d3d12_desc *dst, const struct d3d12_desc *src, struct d3d12_descriptor_heap *dst_heap, + struct d3d12_device *device); void d3d12_desc_create_cbv(struct d3d12_desc *descriptor, struct d3d12_device *device, const D3D12_CONSTANT_BUFFER_VIEW_DESC *desc); void d3d12_desc_create_srv(struct d3d12_desc *descriptor, @@ -983,6 +1041,7 @@ struct d3d12_descriptor_heap D3D12_DESCRIPTOR_HEAP_DESC desc;
struct d3d12_device *device; + bool use_vk_heaps;
struct vkd3d_private_store private_store;
@@ -1367,7 +1426,7 @@ enum vkd3d_pipeline_bind_point /* ID3D12CommandList */ struct d3d12_command_list { - ID3D12GraphicsCommandList2 ID3D12GraphicsCommandList2_iface; + ID3D12GraphicsCommandList3 ID3D12GraphicsCommandList3_iface; LONG refcount;
D3D12_COMMAND_LIST_TYPE type; @@ -1454,6 +1513,8 @@ enum vkd3d_cs_op VKD3D_CS_OP_WAIT, VKD3D_CS_OP_SIGNAL, VKD3D_CS_OP_EXECUTE, + VKD3D_CS_OP_UPDATE_MAPPINGS, + VKD3D_CS_OP_COPY_MAPPINGS, };
struct vkd3d_cs_wait @@ -1474,6 +1535,30 @@ struct vkd3d_cs_execute unsigned int buffer_count; };
+struct vkd3d_cs_update_mappings +{ + struct d3d12_resource *resource; + struct d3d12_heap *heap; + D3D12_TILED_RESOURCE_COORDINATE *region_start_coordinates; + D3D12_TILE_REGION_SIZE *region_sizes; + D3D12_TILE_RANGE_FLAGS *range_flags; + UINT *heap_range_offsets; + UINT *range_tile_counts; + UINT region_count; + UINT range_count; + D3D12_TILE_MAPPING_FLAGS flags; +}; + +struct vkd3d_cs_copy_mappings +{ + struct d3d12_resource *dst_resource; + struct d3d12_resource *src_resource; + D3D12_TILED_RESOURCE_COORDINATE dst_region_start_coordinate; + D3D12_TILED_RESOURCE_COORDINATE src_region_start_coordinate; + D3D12_TILE_REGION_SIZE region_size; + D3D12_TILE_MAPPING_FLAGS flags; +}; + struct vkd3d_cs_op_data { enum vkd3d_cs_op opcode; @@ -1482,6 +1567,8 @@ struct vkd3d_cs_op_data struct vkd3d_cs_wait wait; struct vkd3d_cs_signal signal; struct vkd3d_cs_execute execute; + struct vkd3d_cs_update_mappings update_mappings; + struct vkd3d_cs_copy_mappings copy_mappings; } u; };
@@ -1519,6 +1606,8 @@ struct d3d12_command_queue * set, aux_op_queue.count must be zero. */ struct d3d12_command_queue_op_array aux_op_queue;
+ bool supports_sparse_binding; + struct vkd3d_private_store private_store; };
@@ -1530,6 +1619,7 @@ struct d3d12_command_signature { ID3D12CommandSignature ID3D12CommandSignature_iface; LONG refcount; + unsigned int internal_refcount;
D3D12_COMMAND_SIGNATURE_DESC desc;
@@ -1600,9 +1690,17 @@ struct vkd3d_uav_clear_state HRESULT vkd3d_uav_clear_state_init(struct vkd3d_uav_clear_state *state, struct d3d12_device *device); void vkd3d_uav_clear_state_cleanup(struct vkd3d_uav_clear_state *state, struct d3d12_device *device);
+struct desc_object_cache_head +{ + void *head; + unsigned int spinlock; +}; + struct vkd3d_desc_object_cache { - void * volatile head; + struct desc_object_cache_head heads[16]; + unsigned int next_index; + unsigned int free_count; size_t size; };
@@ -1611,7 +1709,7 @@ struct vkd3d_desc_object_cache /* ID3D12Device */ struct d3d12_device { - ID3D12Device ID3D12Device_iface; + ID3D12Device1 ID3D12Device1_iface; LONG refcount;
VkDevice vk_device; @@ -1677,27 +1775,27 @@ struct vkd3d_queue *d3d12_device_get_vkd3d_queue(struct d3d12_device *device, D3 bool d3d12_device_is_uma(struct d3d12_device *device, bool *coherent); void d3d12_device_mark_as_removed(struct d3d12_device *device, HRESULT reason, const char *message, ...) VKD3D_PRINTF_FUNC(3, 4); -struct d3d12_device *unsafe_impl_from_ID3D12Device(ID3D12Device *iface); +struct d3d12_device *unsafe_impl_from_ID3D12Device1(ID3D12Device1 *iface);
static inline HRESULT d3d12_device_query_interface(struct d3d12_device *device, REFIID iid, void **object) { - return ID3D12Device_QueryInterface(&device->ID3D12Device_iface, iid, object); + return ID3D12Device1_QueryInterface(&device->ID3D12Device1_iface, iid, object); }
static inline ULONG d3d12_device_add_ref(struct d3d12_device *device) { - return ID3D12Device_AddRef(&device->ID3D12Device_iface); + return ID3D12Device1_AddRef(&device->ID3D12Device1_iface); }
static inline ULONG d3d12_device_release(struct d3d12_device *device) { - return ID3D12Device_Release(&device->ID3D12Device_iface); + return ID3D12Device1_Release(&device->ID3D12Device1_iface); }
static inline unsigned int d3d12_device_get_descriptor_handle_increment_size(struct d3d12_device *device, D3D12_DESCRIPTOR_HEAP_TYPE descriptor_type) { - return ID3D12Device_GetDescriptorHandleIncrementSize(&device->ID3D12Device_iface, descriptor_type); + return ID3D12Device1_GetDescriptorHandleIncrementSize(&device->ID3D12Device1_iface, descriptor_type); }
/* utils */
Hi,
It looks like your patch introduced the new failures shown below. Please investigate and fix them before resubmitting your patch. If they are not new, fixing them anyway would help a lot. Otherwise please ask for the known failures list to be updated.
The tests also ran into some preexisting test failures. If you know how to fix them that would be helpful. See the TestBot job for the details:
The full results can be found at: https://testbot.winehq.org/JobDetails.pl?Key=136798
Your paranoid android.
=== debian11 (32 bit report) ===
d3dcompiler_43: hlsl_d3d9.c:561: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:585: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:659: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:702: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:1180: Test failed: Test 0: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1180: Test failed: Test 1: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1180: Test failed: Test 2: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1174: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:1174: Test failed: Failed to compile shader, hr 0x80004001.
=== debian11 (32 bit ar:MA report) ===
d3dcompiler_43: hlsl_d3d9.c:561: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:585: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:659: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:702: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:1180: Test failed: Test 0: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1180: Test failed: Test 1: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1180: Test failed: Test 2: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1174: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:1174: Test failed: Failed to compile shader, hr 0x80004001.
=== debian11 (32 bit de report) ===
d3dcompiler_43: hlsl_d3d9.c:561: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:585: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:659: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:702: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:1180: Test failed: Test 0: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1180: Test failed: Test 1: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1180: Test failed: Test 2: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1174: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:1174: Test failed: Failed to compile shader, hr 0x80004001.
=== debian11 (32 bit fr report) ===
d3dcompiler_43: hlsl_d3d9.c:561: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:585: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:659: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:702: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:1180: Test failed: Test 0: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1180: Test failed: Test 1: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1180: Test failed: Test 2: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1174: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:1174: Test failed: Failed to compile shader, hr 0x80004001.
=== debian11 (32 bit he:IL report) ===
d3dcompiler_43: hlsl_d3d9.c:561: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:585: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:659: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:702: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:1180: Test failed: Test 0: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1180: Test failed: Test 1: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1180: Test failed: Test 2: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1174: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:1174: Test failed: Failed to compile shader, hr 0x80004001.
=== debian11 (32 bit hi:IN report) ===
d3dcompiler_43: hlsl_d3d9.c:561: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:585: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:659: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:702: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:1180: Test failed: Test 0: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1180: Test failed: Test 1: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1180: Test failed: Test 2: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1174: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:1174: Test failed: Failed to compile shader, hr 0x80004001.
=== debian11 (32 bit ja:JP report) ===
d3dcompiler_43: hlsl_d3d9.c:561: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:585: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:659: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:702: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:1180: Test failed: Test 0: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1180: Test failed: Test 1: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1180: Test failed: Test 2: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1174: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:1174: Test failed: Failed to compile shader, hr 0x80004001.
=== debian11 (32 bit zh:CN report) ===
d3dcompiler_43: hlsl_d3d9.c:561: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:585: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:659: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:702: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:1180: Test failed: Test 0: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1180: Test failed: Test 1: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1180: Test failed: Test 2: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1174: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:1174: Test failed: Failed to compile shader, hr 0x80004001.
=== debian11b (32 bit WoW report) ===
d3dcompiler_43: hlsl_d3d9.c:561: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:585: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:659: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:702: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:1180: Test failed: Test 0: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1180: Test failed: Test 1: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1180: Test failed: Test 2: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1174: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:1174: Test failed: Failed to compile shader, hr 0x80004001.
=== debian11b (64 bit WoW report) ===
d3dcompiler_43: hlsl_d3d9.c:561: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:585: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:659: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:702: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:1180: Test failed: Test 0: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1180: Test failed: Test 1: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1180: Test failed: Test 2: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1174: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:1174: Test failed: Failed to compile shader, hr 0x80004001.
d3dcompiler_46: hlsl_d3d9.c:561: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:585: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:659: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:702: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:1180: Test failed: Test 0: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1180: Test failed: Test 1: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1180: Test failed: Test 2: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1174: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:1174: Test failed: Failed to compile shader, hr 0x80004001.
d3dcompiler_47: hlsl_d3d9.c:561: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:585: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:659: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:702: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:1180: Test failed: Test 0: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1180: Test failed: Test 1: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1180: Test failed: Test 2: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1174: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:1174: Test failed: Failed to compile shader, hr 0x80004001.
On Tue Aug 29 10:22:07 2023 +0000, **** wrote:
Marvin replied on the mailing list:
Hi, It looks like your patch introduced the new failures shown below. Please investigate and fix them before resubmitting your patch. If they are not new, fixing them anyway would help a lot. Otherwise please ask for the known failures list to be updated. The tests also ran into some preexisting test failures. If you know how to fix them that would be helpful. See the TestBot job for the details: The full results can be found at: https://testbot.winehq.org/JobDetails.pl?Key=136798 Your paranoid android. === debian11 (32 bit report) === d3dcompiler_43: hlsl_d3d9.c:561: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:585: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:659: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:702: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:1180: Test failed: Test 0: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1180: Test failed: Test 1: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1180: Test failed: Test 2: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1174: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:1174: Test failed: Failed to compile shader, hr 0x80004001. === debian11 (32 bit ar:MA report) === d3dcompiler_43: hlsl_d3d9.c:561: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:585: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:659: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:702: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:1180: Test failed: Test 0: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1180: Test failed: Test 1: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1180: Test failed: Test 2: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1174: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:1174: Test failed: Failed to compile shader, hr 0x80004001. === debian11 (32 bit de report) === d3dcompiler_43: hlsl_d3d9.c:561: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:585: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:659: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:702: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:1180: Test failed: Test 0: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1180: Test failed: Test 1: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1180: Test failed: Test 2: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1174: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:1174: Test failed: Failed to compile shader, hr 0x80004001. === debian11 (32 bit fr report) === d3dcompiler_43: hlsl_d3d9.c:561: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:585: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:659: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:702: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:1180: Test failed: Test 0: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1180: Test failed: Test 1: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1180: Test failed: Test 2: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1174: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:1174: Test failed: Failed to compile shader, hr 0x80004001. === debian11 (32 bit he:IL report) === d3dcompiler_43: hlsl_d3d9.c:561: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:585: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:659: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:702: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:1180: Test failed: Test 0: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1180: Test failed: Test 1: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1180: Test failed: Test 2: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1174: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:1174: Test failed: Failed to compile shader, hr 0x80004001. === debian11 (32 bit hi:IN report) === d3dcompiler_43: hlsl_d3d9.c:561: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:585: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:659: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:702: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:1180: Test failed: Test 0: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1180: Test failed: Test 1: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1180: Test failed: Test 2: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1174: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:1174: Test failed: Failed to compile shader, hr 0x80004001. === debian11 (32 bit ja:JP report) === d3dcompiler_43: hlsl_d3d9.c:561: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:585: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:659: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:702: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:1180: Test failed: Test 0: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1180: Test failed: Test 1: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1180: Test failed: Test 2: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1174: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:1174: Test failed: Failed to compile shader, hr 0x80004001. === debian11 (32 bit zh:CN report) === d3dcompiler_43: hlsl_d3d9.c:561: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:585: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:659: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:702: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:1180: Test failed: Test 0: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1180: Test failed: Test 1: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1180: Test failed: Test 2: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1174: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:1174: Test failed: Failed to compile shader, hr 0x80004001. === debian11b (32 bit WoW report) === d3dcompiler_43: hlsl_d3d9.c:561: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:585: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:659: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:702: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:1180: Test failed: Test 0: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1180: Test failed: Test 1: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1180: Test failed: Test 2: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1174: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:1174: Test failed: Failed to compile shader, hr 0x80004001. === debian11b (64 bit WoW report) === d3dcompiler_43: hlsl_d3d9.c:561: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:585: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:659: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:702: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:1180: Test failed: Test 0: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1180: Test failed: Test 1: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1180: Test failed: Test 2: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1174: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:1174: Test failed: Failed to compile shader, hr 0x80004001. d3dcompiler_46: hlsl_d3d9.c:561: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:585: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:659: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:702: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:1180: Test failed: Test 0: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1180: Test failed: Test 1: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1180: Test failed: Test 2: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1174: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:1174: Test failed: Failed to compile shader, hr 0x80004001. d3dcompiler_47: hlsl_d3d9.c:561: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:585: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:659: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:702: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:1180: Test failed: Test 0: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1180: Test failed: Test 1: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1180: Test failed: Test 2: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1174: Test failed: Failed to compile shader, hr 0x80004001. hlsl_d3d9.c:1174: Test failed: Failed to compile shader, hr 0x80004001.
AFAICS, most of those were "accidentally" passing with older vkd3d releases, but then vkd3d-shader started to properly return compilation failures on unsupported features so that now we don't pass these tests anymore. I think we want to restore todo_wines for those. I can write a patch for this, to be squashed into 3b2a47dec5747bbb3dd443fb3380efcd88686fa7, if you like. How do you prefer that I send it your way? I'll default to attaching it here if I don't hear anything :smile:
I don't seem to reproduce the test failures at line 1180 locally. The test results suggest that there is some surprising inaccuracy in the sampled texture color on the test machines. The generated pixel shaders look correct though and are virtually identical to the native-generated ones, so I guess we should just raise the test tolerance (i.e. the shader does no math, I don't see how wined3d could mess any of this up, I'd blame the driver and move on...) For this one it would be raising the last argument to the compare_vec4() call on line 1180 to higher powers of 2 until the test passes.
On Thu Aug 31 08:28:27 2023 +0000, Matteo Bruni wrote:
AFAICS, most of those were "accidentally" passing with older vkd3d releases, but then vkd3d-shader started to properly return compilation failures on unsupported features so that now we don't pass these tests anymore. I think we want to restore todo_wines for those. I can write a patch for this, to be squashed into 3b2a47dec5747bbb3dd443fb3380efcd88686fa7, if you like. How do you prefer that I send it your way? I'll default to attaching it here if I don't hear anything :smile: I don't seem to reproduce the test failures at line 1180 locally. The test results suggest that there is some surprising inaccuracy in the sampled texture color on the test machines. The generated pixel shaders look correct though and are virtually identical to the native-generated ones, so I guess we should just raise the test tolerance (i.e. the shader does no math, I don't see how wined3d could mess any of this up, I'd blame the driver and move on...) For this one it would be raising the last argument to the compare_vec4() call on line 1180 to higher powers of 2 until the test passes.
Listing the new compilation errors for these shaders for reference: <pre> vkd3d:0124:trace:vkd3d_shader_compile <anonymous>:3:26: E5017: Aborting due to not yet implemented feature: SM1 non-float expression. vkd3d:0124:trace:vkd3d_shader_compile <anonymous>:3:1: E5017: Aborting due to not yet implemented feature: Instruction type HLSL_IR_IF. hlsl_d3d9.c:561: <anonymous>:3:26: E5017: Aborting due to not yet implemented feature: SM1 non-float expression.
vkd3d:0124:trace:vkd3d_shader_compile <anonymous>:3:18: E5017: Aborting due to not yet implemented feature: SM1 non-float expression. vkd3d:0124:trace:vkd3d_shader_compile <anonymous>:3:18: E5017: Aborting due to not yet implemented feature: Instruction type HLSL_IR_IF. hlsl_d3d9.c:585: <anonymous>:3:18: E5017: Aborting due to not yet implemented feature: SM1 non-float expression.
vkd3d:0124:trace:vkd3d_shader_compile <anonymous>:5:21: E5017: Aborting due to not yet implemented feature: SM1 non-float expression. vkd3d:0124:trace:vkd3d_shader_compile <anonymous>:5:19: E5017: Aborting due to not yet implemented feature: SM1 non-float expression. vkd3d:0124:trace:vkd3d_shader_compile <anonymous>:5:19: E5017: Aborting due to not yet implemented feature: SM1 "cast" expression. hlsl_d3d9.c:659: Test failed: Failed to compile shader, hr 0x80004001.
vkd3d:0124:trace:vkd3d_shader_compile <anonymous>:4:21: E5017: Aborting due to not yet implemented feature: SM1 "sin" expression. vkd3d:0124:trace:vkd3d_shader_compile <anonymous>:5:21: E5017: Aborting due to not yet implemented feature: SM1 "cos" expression. hlsl_d3d9.c:702: Test failed: Failed to compile shader, hr 0x80004001.
vkd3d:0124:trace:vkd3d_shader_compile <anonymous>:1:9: E5017: Aborting due to not yet implemented feature: Generic samplers need to be lowered. hlsl_d3d9.c:1174: Test failed: Failed to compile shader, hr 0x80004001. </pre> The last one refers to the last two shaders in test_samplers() which use separate sampler and texture via the newer t.Sample() syntax.
On Thu Aug 31 08:35:24 2023 +0000, Matteo Bruni wrote:
Listing the new compilation errors for these shaders for reference:
<pre> vkd3d:0124:trace:vkd3d_shader_compile <anonymous>:3:26: E5017: Aborting due to not yet implemented feature: SM1 non-float expression. vkd3d:0124:trace:vkd3d_shader_compile <anonymous>:3:1: E5017: Aborting due to not yet implemented feature: Instruction type HLSL_IR_IF. hlsl_d3d9.c:561: <anonymous>:3:26: E5017: Aborting due to not yet implemented feature: SM1 non-float expression. vkd3d:0124:trace:vkd3d_shader_compile <anonymous>:3:18: E5017: Aborting due to not yet implemented feature: SM1 non-float expression. vkd3d:0124:trace:vkd3d_shader_compile <anonymous>:3:18: E5017: Aborting due to not yet implemented feature: Instruction type HLSL_IR_IF. hlsl_d3d9.c:585: <anonymous>:3:18: E5017: Aborting due to not yet implemented feature: SM1 non-float expression. vkd3d:0124:trace:vkd3d_shader_compile <anonymous>:5:21: E5017: Aborting due to not yet implemented feature: SM1 non-float expression. vkd3d:0124:trace:vkd3d_shader_compile <anonymous>:5:19: E5017: Aborting due to not yet implemented feature: SM1 non-float expression. vkd3d:0124:trace:vkd3d_shader_compile <anonymous>:5:19: E5017: Aborting due to not yet implemented feature: SM1 "cast" expression. hlsl_d3d9.c:659: Test failed: Failed to compile shader, hr 0x80004001. vkd3d:0124:trace:vkd3d_shader_compile <anonymous>:4:21: E5017: Aborting due to not yet implemented feature: SM1 "sin" expression. vkd3d:0124:trace:vkd3d_shader_compile <anonymous>:5:21: E5017: Aborting due to not yet implemented feature: SM1 "cos" expression. hlsl_d3d9.c:702: Test failed: Failed to compile shader, hr 0x80004001. vkd3d:0124:trace:vkd3d_shader_compile <anonymous>:1:9: E5017: Aborting due to not yet implemented feature: Generic samplers need to be lowered. hlsl_d3d9.c:1174: Test failed: Failed to compile shader, hr 0x80004001. </pre>
The last one refers to the last two shaders in test_samplers() which use separate sampler and texture via the newer t.Sample() syntax.
Here's the fixup for the shader compilation failures [0001-d3dcompiler-tests-Reintroduce-a-few-todo_wine.txt](/uploads/2958da7c60dff88bd0bfccd39705131a/0001-d3dcompiler-tests-Reintroduce-a-few-todo_wine.txt)
For this one it would be raising the last argument to the compare_vec4() call on line 1180 to higher powers of 2 until the test passes.
It turns out this only starts passing with 256 * 1024 ULPs of tolerance, which seems insane to me. I'll try to change the test to avoid interpolation from the magnification filter instead.
For this one it would be raising the last argument to the compare_vec4() call on line 1180 to higher powers of 2 until the test passes.
It turns out this only starts passing with 256 * 1024 ULPs of tolerance, which seems insane to me. I'll try to change the test to avoid interpolation from the magnification filter instead.
I opened !3694 to address that, it looks like a much better approach.